On 2022/10/28 15:00, Yang Shen wrote:
Due to performance, uadk tries to leave many configuration options to users. This gives users great flexibility, but it also leads to a problem that the current initialization interface has high complexity. Therefore, in order to facilitate users to adapt quickly, a new set of interfaces is provided.
The 'wd_alg_init2_()' will complete all initialization steps. There are 4 parameters to describe the user configuration requirements. @alg: The algorithm users want to use. @sched_type: Scheduling type the user wants to use. @numa_bitmask: The bitmask provided by libnuma. Users can use this parameter to control requesting ctxs devices in the bind NUMA scenario. @ctx_nums: The requested ctx number for each numa node. Due to users may have different requirements for different types of ctx numbers, needs a two-dimensional array as input.
If users think 'wd_alg_init2_()' is too complex, wd_alg_init2() is a simplified packaging and will use the default value of numa_bitmask and ctx_nums.
Signed-off-by: Yang Shen shenyang39@huawei.com
Makefile.am | 4 +- include/wd.h | 24 ++++ include/wd_alg_common.h | 24 ++++ include/wd_comp.h | 27 +++++ include/wd_util.h | 19 ++++ wd.c | 62 +++++++++++ wd_comp.c | 86 +++++++++++++++ wd_util.c | 236 +++++++++++++++++++++++++++++++++++++++- 8 files changed, 479 insertions(+), 3 deletions(-)
diff --git a/Makefile.am b/Makefile.am index 53f36f9..5465b64 100644 --- a/Makefile.am +++ b/Makefile.am @@ -86,7 +86,7 @@ AM_CFLAGS += -DWD_NO_LOG
libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma
-libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_comp_la_DEPENDENCIES = libwd.la
libhisi_zip_la_LIBADD = -ldl @@ -103,7 +103,7 @@ else libwd_la_LDFLAGS=$(UADK_VERSION) libwd_la_LIBADD= -lnuma
-libwd_comp_la_LIBADD= -lwd -ldl +libwd_comp_la_LIBADD= -lwd -ldl -lnuma libwd_comp_la_LDFLAGS=$(UADK_VERSION) libwd_comp_la_DEPENDENCIES= libwd.la
diff --git a/include/wd.h b/include/wd.h index e1a87de..facd992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev); */ struct uacce_dev_list *wd_get_accel_list(const char *alg_name);
+/**
- wd_find_dev_by_numa() - get device with max available ctx number from an
device list according to numa id.
- @list: The device list.
- @numa_id: The numa_id.
- Return device if succeed and other error number if fail.
- */
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id);
/**
- wd_get_accel_dev() - Get device supporting the algorithm with smallest numa distance to current numa node.
@@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev); */ void wd_add_dev_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node);
+/**
- wd_create_device_nodemask() - create a numa node mask of device list.
- @list: The devices list.
- Return a pointer value if succeed, and error number if fail.
- */
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list);
+/**
- wd_free_device_nodemask() - free a numa node mask.
- @bmp: A numa node mask.
- */
+void wd_free_device_nodemask(struct bitmask *bmp);
/**
- wd_ctx_get_dev_name() - Get the device name about task.
- @h_ctx: The handle of context.
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index c455dc3..5f63215 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -63,6 +63,30 @@ struct wd_ctx_config { void *priv; };
+/**
- struct wd_ctx_nums - Define the ctx sets numbers.
- @sync_ctx_num: The ctx numbers which are used for sync mode for each
- ctx sets.
- @async_ctx_num: The ctx numbers which are used for async mode for each
- ctx sets.
- */
+struct wd_ctx_nums {
- __u32 sync_ctx_num;
- __u32 async_ctx_num;
+};
+/**
- struct wd_ctx_params - Define the ctx sets params which are used for init
- algorithms.
- @ctx_set_size: Number of ctx sets to be created. Usually users can
- set it according to <alg>_op_type.
- @ctx_set_num: Each ctx sets numbers.
- */
+struct wd_ctx_params {
- __u32 ctx_set_size;
- struct wd_ctx_nums *ctx_set_num;
+};
struct wd_ctx_internal { handle_t ctx; __u8 op_type; diff --git a/include/wd_comp.h b/include/wd_comp.h index e043a83..d96110e 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -7,6 +7,7 @@ #ifndef __WD_COMP_H #define __WD_COMP_H
+#include <numa.h> #include "wd.h" #include "wd_alg_common.h"
@@ -113,6 +114,32 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched); */ void wd_comp_uninit(void);
+/**
- wd_comp_init2_() - A simplify interface to initializate uadk
- compression/decompression. This interface keeps most functions of
- wd_comp_init(). Users just need to descripe the deployment of
- business scenarios. Then the initialization will request appropriate
- resources to support the business scenarios.
- To make the initializate simpler, bmp and cparams support set NULL.
- And then the function will set them as default.
- @alg: The selected algorithm.
- @sched_type: The scheduler type.
- @bmp: Node mask of the required devices.
- @cparams: The ctx number settings.
- Return 0 if succeed and others if fail.
- */
+int wd_comp_init2_(char *alg, __u32 sched_type, struct bitmask *bmp, struct wd_ctx_params *cparams);
+#define wd_comp_init2(alg, sched_type) \
- wd_comp_init2_(alg, sched_type, NULL, NULL)
+/**
- wd_comp_uninit2() - Uninitialise ctx configuration and scheduler.
- */
+void wd_comp_uninit2(void);
struct wd_comp_sess_setup { enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */ enum wd_comp_level comp_lv; /* Denoted by enum wd_comp_level */ diff --git a/include/wd_util.h b/include/wd_util.h index eafe3ce..8ae70f1 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -7,6 +7,7 @@ #ifndef __WD_UTIL_H #define __WD_UTIL_H
+#include <numa.h> #include <stdbool.h> #include <sys/ipc.h> #include <sys/shm.h> @@ -112,6 +113,15 @@ struct wd_msg_handle { int (*recv)(handle_t sess, void *msg); };
+struct wd_init_attrs {
- __u32 sched_type;
- char *alg;
- struct bitmask *bmp;
- struct wd_sched *sched;
- struct wd_ctx_params *cparams;
- struct wd_ctx_config *ctx_config;
+};
/*
- wd_init_ctx_config() - Init internal ctx configuration.
- @in: ctx configuration in global setting.
@@ -394,6 +404,15 @@ static inline void wd_alg_clear_init(enum wd_status *status) __atomic_store(status, &setting, __ATOMIC_RELAXED); }
+/**
- wd_alg_pre_init() - Request the ctxs and initialize the sched_domain
with the given devices list, ctxs number and numa mask.
- @attrs: the algorithm initialization parameters.
- Return device if succeed and other error number if fail.
- */
+int wd_alg_pre_init(struct wd_init_attrs *attrs);
/**
- wd_dfx_msg_cnt() - Message counter interface for ctx
- @msg: Shared memory addr.
diff --git a/wd.c b/wd.c index 78094d8..9eb69d2 100644 --- a/wd.c +++ b/wd.c @@ -727,6 +727,35 @@ free_list: return NULL; }
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id) +{
- struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV);
- struct uacce_dev_list *p = list;
- int ctx_num, ctx_max = 0;
- if (!list) {
WD_ERR("invalid: list is NULL!\n");
return WD_ERR_PTR(-WD_EINVAL);
- }
- while (p) {
if (numa_id != p->dev->numa_id) {
p = p->next;
continue;
}
ctx_num = wd_get_avail_ctx(p->dev);
if (ctx_num > ctx_max) {
dev = p->dev;
ctx_max = ctx_num;
}
p = p->next;
- }
- return dev;
+}
void wd_free_list_accels(struct uacce_dev_list *list) { struct uacce_dev_list *curr, *next; @@ -793,6 +822,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg) return ioctl(ctx->fd, cmd, arg); }
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list) +{
- struct uacce_dev_list *p;
- struct bitmask *bmp;
- if (!list) {
WD_ERR("invalid: list is NULL!\n");
return WD_ERR_PTR(-WD_EINVAL);
- }
- bmp = numa_allocate_nodemask();
- if (!bmp) {
WD_ERR("failed to alloc bitmask(%d)!\n", errno);
return WD_ERR_PTR(-WD_ENOMEM);
- }
- p = list;
- while (p) {
numa_bitmask_setbit(bmp, p->dev->numa_id);
p = p->next;
- }
- return bmp;
+}
+void wd_free_device_nodemask(struct bitmask *bmp) +{
- if (!bmp)
return;
- numa_free_nodemask(bmp);
+}
void wd_get_version(void) { const char *wd_released_time = UADK_RELEASED_TIME; diff --git a/wd_comp.c b/wd_comp.c index 44593a6..ea80d13 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -14,6 +14,7 @@
#include "config.h" #include "drv/wd_comp_drv.h" +#include "wd_sched.h" #include "wd_util.h" #include "wd_comp.h"
@@ -21,6 +22,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024)
+#define SCHED_RR_NAME "sched_rr"
#define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ @@ -42,6 +45,7 @@ struct wd_comp_sess {
struct wd_comp_setting { enum wd_status status;
- enum wd_status status2; struct wd_ctx_config_internal config; struct wd_sched sched; struct wd_comp_driver *driver;
@@ -52,6 +56,19 @@ struct wd_comp_setting {
struct wd_env_config wd_comp_env_config;
+static struct wd_init_attrs wd_comp_init_attrs; +static struct wd_ctx_config wd_comp_ctx; +static struct wd_sched *wd_comp_sched;
+static struct wd_ctx_nums wd_comp_ctx_num[] = {
- {1, 1}, {1, 1}, {}
+};
+static struct wd_ctx_params wd_comp_cparams = {
- .ctx_set_size = WD_DIR_MAX,
- .ctx_set_num = wd_comp_ctx_num
+};
#ifdef WD_STATIC_DRV static void wd_comp_set_static_drv(void) { @@ -178,6 +195,73 @@ void wd_comp_uninit(void) wd_alg_clear_init(&wd_comp_setting.status); }
+int wd_comp_init2_(char *alg, __u32 sched_type, struct bitmask *bmp, struct wd_ctx_params *cparams)
This bmp is recommended to be merged into cparams, because it is also a parameter that affects the number of ctx.
+{
- bool flag;
- int ret;
- flag = wd_alg_try_init(&wd_comp_setting.status2);
- if (!flag)
return 0;
- if (!alg) {
WD_ERR("invalid: alg is NULL!\n");
ret = -WD_EINVAL;
goto out_uninit;
- }
- wd_comp_init_attrs.alg = alg;
- wd_comp_init_attrs.sched_type = sched_type;
- wd_comp_init_attrs.bmp = bmp;
- wd_comp_init_attrs.cparams = cparams ? cparams : &wd_comp_cparams;
It is recommended not to use this parameter structure, use cparams directly, and pass alg and sched_type to wd_util.c
- wd_comp_init_attrs.ctx_config = &wd_comp_ctx;
- wd_comp_sched = wd_sched_rr_alloc(sched_type, wd_comp_init_attrs.cparams->ctx_set_size,
numa_max_node() + 1, wd_comp_poll_ctx);
- if (!wd_comp_sched) {
ret = -WD_EINVAL;
goto out_uninit;
- }
- wd_comp_sched->name = SCHED_RR_NAME;
- wd_comp_init_attrs.sched = wd_comp_sched;
- ret = wd_alg_pre_init(&wd_comp_init_attrs);
- if (ret)
goto out_freesched;
- ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched);
- if (ret)
goto out_freesched;
- wd_alg_set_init(&wd_comp_setting.status2);
- return 0;
+out_freesched:
- wd_sched_rr_release(wd_comp_sched);
+out_uninit:
- wd_alg_clear_init(&wd_comp_setting.status2);
- return ret;
+}
+void wd_comp_uninit2(void) +{
- int i;
- wd_comp_uninit();
- for (i = 0; i < wd_comp_ctx.ctx_num; i++)
if (wd_comp_ctx.ctxs[i].ctx) {
wd_release_ctx(wd_comp_ctx.ctxs[i].ctx);
wd_comp_ctx.ctxs[i].ctx = 0;
- }
- wd_sched_rr_release(wd_comp_sched);
- wd_alg_clear_init(&wd_comp_setting.status2);
+}
struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag) { return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag); @@ -289,6 +373,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->comp_lv = setup->comp_lv; sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW;
- /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param);
@@ -318,6 +403,7 @@ void wd_comp_free_sess(handle_t h_sess)
if (sess->sched_key) free(sess->sched_key);
- free(sess);
}
diff --git a/wd_util.c b/wd_util.c index fa77b46..d618776 100644 --- a/wd_util.c +++ b/wd_util.c @@ -5,7 +5,6 @@ */
#define _GNU_SOURCE -#include <numa.h> #include <pthread.h> #include <semaphore.h> #include <string.h> @@ -1801,3 +1800,238 @@ bool wd_alg_try_init(enum wd_status *status)
return true; }
+static __u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end) +{
- __u32 count = 0;
- int i;
- for (i = 0; i < end; i++) {
count += cparams.ctx_set_num[i].sync_ctx_num;
count += cparams.ctx_set_num[i].async_ctx_num;
- }
- return count;
+}
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{
- struct uacce_dev_list *p, *node, *result = NULL;
- struct uacce_dev *dev;
- int numa_id, ret;
- if (!bmp) {
WD_ERR("invalid: bmp is NULL!\n");
return WD_ERR_PTR(-WD_EINVAL);
- }
- p = list;
- while (p) {
dev = p->dev;
numa_id = dev->numa_id;
ret = numa_bitmask_isbitset(bmp, numa_id);
if (!ret) {
p = p->next;
continue;
}
node = calloc(1, sizeof(*node));
if (!node) {
result = WD_ERR_PTR(-WD_ENOMEM);
goto out_free_list;
}
node->dev = wd_clone_dev(dev);
if (!node->dev) {
result = WD_ERR_PTR(-WD_ENOMEM);
goto out_free_node;
}
if (!result)
result = node;
else
wd_add_dev_to_list(result, node);
p = p->next;
- }
- return result;
+out_free_node:
- free(node);
+out_free_list:
- wd_free_list_accels(result);
- return result;
+}
+static int wd_init_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *list,
int idx, int numa_id, int op_type)
+{
- struct wd_ctx_nums ctx_nums = attrs->cparams->ctx_set_num[op_type];
- __u32 ctx_set_num = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num;
- struct wd_ctx_config *ctx_config = attrs->ctx_config;
- struct uacce_dev *dev;
- int i;
- dev = wd_find_dev_by_numa(list, numa_id);
- if (WD_IS_ERR(dev))
return WD_PTR_ERR(dev);
- for (i = idx; i < idx + ctx_set_num; i++) {
ctx_config->ctxs[i].ctx = wd_request_ctx(dev);
if (errno == WD_EBUSY) {
dev = wd_find_dev_by_numa(list, numa_id);
if (WD_IS_ERR(dev))
return WD_PTR_ERR(dev);
i--;
}
ctx_config->ctxs[i].op_type = op_type;
ctx_config->ctxs[i].ctx_mode =
((i - idx) < ctx_nums.sync_ctx_num) ?
CTX_MODE_SYNC : CTX_MODE_ASYNC;
- }
- return 0;
+}
+static void wd_release_ctx_set(struct wd_ctx_config *ctx_config) +{
- int i;
- for (i = 0; i < ctx_config->ctx_num; i++)
if (ctx_config->ctxs[i].ctx) {
wd_release_ctx(ctx_config->ctxs[i].ctx);
ctx_config->ctxs[i].ctx = 0;
}
+}
+static int wd_instance_sched_set(struct wd_sched *sched, struct wd_ctx_nums ctx_nums,
int idx, int numa_id, int op_type)
+{
- struct sched_params sparams;
- int i, ret = 0;
- for (i = 0; i < CTX_MODE_MAX; i++) {
sparams.numa_id = numa_id;
sparams.type = op_type;
sparams.mode = i;
sparams.begin = idx + ctx_nums.sync_ctx_num * i;
sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i;
if (sparams.begin > sparams.end)
continue;
ret = wd_sched_rr_instance(sched, &sparams);
if (ret)
goto out;
- }
+out:
- return ret;
+}
+static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bmp,
struct uacce_dev_list *list)
+{
- struct wd_ctx_params *cparams = attrs->cparams;
- __u32 ctx_set_size = cparams->ctx_set_size;
- int max_node = numa_max_node() + 1;
- struct wd_ctx_nums ctx_nums;
- int i, j, ret;
- int idx = 0;
- for (i = 0; i < max_node; i++) {
if (!numa_bitmask_isbitset(bmp, i))
continue;
for (j = 0; j < ctx_set_size; j++) {
ctx_nums = cparams->ctx_set_num[j];
ret = wd_init_ctx_set(attrs, list, idx, i, j);
if (ret)
goto free_ctxs;
ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j);
if (ret)
goto free_ctxs;
idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num);
}
- }
- return 0;
+free_ctxs:
- wd_release_ctx_set(attrs->ctx_config);
- return ret;
+}
+int wd_alg_pre_init(struct wd_init_attrs *attrs) +{
- struct wd_ctx_config *ctx_config = attrs->ctx_config;
- struct wd_ctx_params *cparams = attrs->cparams;
- struct uacce_dev_list *list, *used_list = NULL;
- struct bitmask *used_bmp, *bmp = attrs->bmp;
- __u32 ctx_set_num, ctx_set_size;
- int numa_cnt, ret;
- list = wd_get_accel_list(attrs->alg);
- if (!list) {
WD_ERR("failed to get devices!\n");
return -WD_ENODEV;
- }
- ctx_set_size = cparams->ctx_set_size;
- ctx_set_num = wd_get_ctx_numbers(*cparams, ctx_set_size);
- if (!ctx_set_num || !ctx_set_size) {
WD_ERR("invalid: ctx_set_num is %d, ctx_set_size is %d!\n",
ctx_set_num, ctx_set_size);
ret = -WD_EINVAL;
goto out_freelist;
- }
- /*
* Not every numa has a device. Therefore, the first thing is to
* filter the devices in the selected numa node, and the second
* thing is to obtain the distribution of devices.
*/
- if (bmp) {
used_list = wd_get_usable_list(list, bmp);
if (WD_IS_ERR(used_list)) {
ret = WD_PTR_ERR(used_list);
WD_ERR("failed to get usable devices(%d)!\n", ret);
goto out_freelist;
}
- }
- used_bmp = wd_create_device_nodemask(used_list ? used_list : list);
- if (WD_IS_ERR(bmp)) {
ret = WD_PTR_ERR(bmp);
goto out_freeusedlist;
- }
- numa_cnt = numa_bitmask_weight(used_bmp);
- if (!numa_cnt) {
ret = numa_cnt;
WD_ERR("invalid: bmp is clear!\n");
goto out_freenodemask;
- }
- ctx_config->ctx_num = ctx_set_num * numa_cnt;
- ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx));
- if (!ctx_config->ctxs) {
ret = -WD_ENOMEM;
WD_ERR("failed to alloc ctxs!\n");
goto out_freenodemask;
- }
- ret = wd_init_ctx_and_sched(attrs, used_bmp, used_list ? used_list : list);
- if (ret)
free(ctx_config->ctxs);
+out_freenodemask:
- wd_free_device_nodemask(used_bmp);
+out_freeusedlist:
- wd_free_list_accels(used_list);
+out_freelist:
- wd_free_list_accels(list);
- return ret;
+}