On 2022/9/24 18:18, Yang Shen Wrote:
Due to performance, uadk tries to leave many configuration options to users. This gives users great flexibility, but it also leads to a problem that the current initialization interface has high complexity. Therefore, in order to facilitate users to adapt quickly, a new set of interfaces is provided.
The 'wd_alg_init2()' will complete all initialization steps. There are 4 parameters to describe the user configuration requirements. @device_list: The available uacce device list. Users can get it by wd_get_accel_list(). @numa_bitmask: The bitmask provided by libnuma. Users can use this parameter to control requesting ctxs devices in the bind NUMA scenario. @ctx_nums: The requested ctx number for each numa node. Due to users may have different requirements for different types of ctx numbers, needs a two-dimensional array as input. @sched_type: Scheduling type the user wants to use.
Signed-off-by: Yang Shen shenyang39@huawei.com
Makefile.am | 4 +- include/wd.h | 24 +++++ include/wd_alg_common.h | 24 +++++ include/wd_comp.h | 27 +++++ include/wd_util.h | 19 ++++ wd.c | 62 ++++++++++++ wd_comp.c | 213 ++++++++++++++++++++++++++++++++++++++++ wd_util.c | 59 ++++++++++- 8 files changed, 429 insertions(+), 3 deletions(-)
diff --git a/Makefile.am b/Makefile.am index b3f07df..6cfb6b3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -86,7 +86,7 @@ AM_CFLAGS += -DWD_NO_LOG
libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma
-libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_comp_la_DEPENDENCIES = libwd.la
libhisi_zip_la_LIBADD = -ldl @@ -103,7 +103,7 @@ else libwd_la_LDFLAGS=$(UADK_VERSION) libwd_la_LIBADD= -lnuma
-libwd_comp_la_LIBADD= -lwd -ldl +libwd_comp_la_LIBADD= -lwd -ldl -lnuma libwd_comp_la_LDFLAGS=$(UADK_VERSION) libwd_comp_la_DEPENDENCIES= libwd.la
diff --git a/include/wd.h b/include/wd.h index e1a87de..facd992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev); */ struct uacce_dev_list *wd_get_accel_list(const char *alg_name);
+/**
- wd_find_dev_by_numa() - get device with max available ctx number from an
device list according to numa id.
- @list: The device list.
- @numa_id: The numa_id.
- Return device if succeed and other error number if fail.
- */
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id);
/**
- wd_get_accel_dev() - Get device supporting the algorithm with smallest numa distance to current numa node.
@@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev); */ void wd_add_dev_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node);
+/**
- wd_create_device_nodemask() - create a numa node mask of device list.
- @list: The devices list.
- Return a pointer value if succeed, and error number if fail.
- */
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list);
+/**
- wd_free_device_nodemask() - free a numa node mask.
- @bmp: A numa node mask.
- */
+void wd_free_device_nodemask(struct bitmask *bmp);
/**
- wd_ctx_get_dev_name() - Get the device name about task.
- @h_ctx: The handle of context.
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index c455dc3..f261830 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -63,6 +63,30 @@ struct wd_ctx_config { void *priv; };
+/**
- struct wd_ctx_nums - Define the ctx sets numbers.
- @sync_ctx_num: The ctx numbers which are used for sync mode for each
- ctx sets.
- @async_ctx_num: The ctx numbers which are used for async mode for each
- ctx sets.
- */
+struct wd_ctx_nums {
- __u32 sync_ctx_num;
- __u32 async_ctx_num;
+};
+/**
- struct wd_ctx_params - Define the ctx sets params which are used for init
- algorithms.
- @ctx_set_num: Number of ctx sets to be created. Usually users can
- set it according to <alg>_op_type.
- @ctx_set_size: Each ctx sets numbers.
- */
+struct wd_ctx_params {
- __u32 ctx_set_num;
- struct wd_ctx_nums *ctx_set_size;
+};
struct wd_ctx_internal { handle_t ctx; __u8 op_type; diff --git a/include/wd_comp.h b/include/wd_comp.h index e043a83..9cd50dd 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -7,6 +7,7 @@ #ifndef __WD_COMP_H #define __WD_COMP_H
+#include <numa.h> #include "wd.h" #include "wd_alg_common.h"
@@ -113,6 +114,32 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched); */ void wd_comp_uninit(void);
+/**
- wd_comp_init2() - A simplify interface to initializate uadk
- compression/decompression. Users can use wd_get_accel_list() to
- get the usable device list with the algrithms. Users should provide
- a device numa node mask to show which numa devices will be
- selected. wd_create_device_nodemask() can create a node mask
- according the list. If all numa devices on the list are match
- the requirement, just use the return of it. Otherwise, users can
- use the function in libnuma to set the node mask.
- To make the initializate simpler, bmp and cparams support set NULL.
- And then the function will set them as default.
- @list: The device list.
- @bmp: Node mask of the required devices.
- @cparams: The ctx settings.
- @sched_type: The scheduler type.
- Return 0 if succeed and others if fail.
- */
+int wd_comp_init2(const char *alg_name, __u32 sched_type);
+/**
- wd_comp_uninit2() - Uninitialise ctx configuration and scheduler.
- */
+void wd_comp_uninit2(void);
struct wd_comp_sess_setup { enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */ enum wd_comp_level comp_lv; /* Denoted by enum wd_comp_level */ diff --git a/include/wd_util.h b/include/wd_util.h index eafe3ce..4a2e102 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -7,6 +7,7 @@ #ifndef __WD_UTIL_H #define __WD_UTIL_H
+#include <numa.h> #include <stdbool.h> #include <sys/ipc.h> #include <sys/shm.h> @@ -394,6 +395,24 @@ static inline void wd_alg_clear_init(enum wd_status *status) __atomic_store(status, &setting, __ATOMIC_RELAXED); }
+/**
- wd_get_usable_list() - choose the devices according bitmask.
- @list: The device list.
- @bmp: The devices node mask.
- Return a list that meet user's requirement if succeed, and error number if fail.
- */
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp);
+/**
- wd_get_ctx_numbers() - count the ctx number for first to end.
- @cparams: the input ctx setting numbers.
- @end: the end index of cparams.
- Return the sum of top '@end' cparams ctx number.
- */
+__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end);
/**
- wd_dfx_msg_cnt() - Message counter interface for ctx
- @msg: Shared memory addr.
diff --git a/wd.c b/wd.c index d99d4ec..c63805c 100644 --- a/wd.c +++ b/wd.c @@ -741,6 +741,35 @@ free_list: return NULL; }
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id) +{
- struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV);
- struct uacce_dev_list *p = list;
- int ctx_num, ctx_max = 0;
- if (!list) {
WD_ERR("invalid: list is NULL!\n");
return WD_ERR_PTR(-WD_EINVAL);
- }
- while (p) {
if (numa_id != p->dev->numa_id) {
p = p->next;
continue;
}
ctx_num = wd_get_avail_ctx(p->dev);
if (ctx_num > ctx_max) {
dev = p->dev;
ctx_max = ctx_num;
}
p = p->next;
- }
- return dev;
+}
void wd_free_list_accels(struct uacce_dev_list *list) { struct uacce_dev_list *curr, *next; @@ -807,6 +836,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg) return ioctl(ctx->fd, cmd, arg); }
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list) +{
- struct uacce_dev_list *p;
- struct bitmask *bmp;
- if (!list) {
WD_ERR("invalid: list is NULL!\n");
return WD_ERR_PTR(-WD_EINVAL);
- }
- bmp = numa_allocate_nodemask();
- if (!bmp) {
WD_ERR("failed to alloc bitmask(%d)!\n", errno);
return WD_ERR_PTR(-WD_EINVAL);
- }
- p = list;
- while (p) {
numa_bitmask_setbit(bmp, p->dev->numa_id);
p = p->next;
- }
- return bmp;
+}
+void wd_free_device_nodemask(struct bitmask *bmp) +{
- if (!bmp)
return;
- numa_free_nodemask(bmp);
+}
void wd_get_version(void) { const char *wd_released_time = UADK_RELEASED_TIME; diff --git a/wd_comp.c b/wd_comp.c index 44593a6..ba79838 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -14,6 +14,7 @@
#include "config.h" #include "drv/wd_comp_drv.h" +#include "wd_sched.h" #include "wd_util.h" #include "wd_comp.h"
@@ -21,6 +22,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024)
+#define SCHED_RR_NAME "sched_rr"
#define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ @@ -42,6 +45,7 @@ struct wd_comp_sess {
struct wd_comp_setting { enum wd_status status;
- enum wd_status status2; struct wd_ctx_config_internal config; struct wd_sched sched; struct wd_comp_driver *driver;
@@ -52,6 +56,10 @@ struct wd_comp_setting {
struct wd_env_config wd_comp_env_config;
+static struct wd_ctx_config wd_comp_ctx; +static struct wd_sched *wd_comp_sched; +static int wd_comp_numa_count;
#ifdef WD_STATIC_DRV static void wd_comp_set_static_drv(void) { @@ -178,6 +186,209 @@ void wd_comp_uninit(void) wd_alg_clear_init(&wd_comp_setting.status); }
+static int wd_comp_request_ctx(struct uacce_dev_list *list,
struct wd_ctx_nums ctx_nums,
int idx, int numa_id, int op_type)
+{
- int ctx_set_size = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num;
- struct uacce_dev *dev;
- int i;
- dev = wd_find_dev_by_numa(list, numa_id);
- if (!dev)
return -WD_EBUSY;
- for (i = idx; i < idx + ctx_set_size; i++) {
wd_comp_ctx.ctxs[i].ctx = wd_request_ctx(dev);
if (errno == WD_EBUSY) {
dev = wd_find_dev_by_numa(list, numa_id);
if (!dev)
return -WD_EBUSY;
i--;
}
wd_comp_ctx.ctxs[i].op_type = op_type;
wd_comp_ctx.ctxs[i].ctx_mode =
((i - idx) < ctx_nums.sync_ctx_num) ?
CTX_MODE_SYNC : CTX_MODE_ASYNC;
- }
- return 0;
+}
+static void wd_comp_release_ctx(void) +{
- int i;
- for (i = 0; i < wd_comp_ctx.ctx_num; i++)
if (wd_comp_ctx.ctxs[i].ctx) {
wd_release_ctx(wd_comp_ctx.ctxs[i].ctx);
wd_comp_ctx.ctxs[i].ctx = 0;
}
+}
+static int wd_comp_instance_sched(struct wd_ctx_nums ctx_nums, int idx,
int numa_id, int op_type)
+{
- struct sched_params sparams;
- int i, ret = 0;
- for (i = 0; i < CTX_MODE_MAX; i++) {
sparams.numa_id = numa_id;
sparams.type = op_type;
sparams.mode = i;
sparams.begin = idx + ctx_nums.sync_ctx_num * i;
sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i;
if (sparams.begin > sparams.end)
continue;
ret = wd_sched_rr_instance(wd_comp_sched, &sparams);
if (ret)
goto out;
- }
+out:
- return ret;
+}
+static int __wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp,
struct wd_ctx_params cparams)
+{
- int ctx_set_num = cparams.ctx_set_num;
- int max_node = numa_max_node() + 1;
- struct wd_ctx_nums ctx_nums;
- int i, j, ret;
- int idx = 0;
- for (i = 0; i < max_node; i++) {
if (!numa_bitmask_isbitset(bmp, i))
continue;
for (j = 0; j < ctx_set_num; j++) {
ctx_nums = cparams.ctx_set_size[j];
ret = wd_comp_request_ctx(list, ctx_nums, idx, i, j);
if (ret)
goto free_ctxs;
ret = wd_comp_instance_sched(ctx_nums, idx, i, j);
if (ret)
goto free_ctxs;
idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num);
}
- }
- ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched);
- if (ret)
goto free_ctxs;
- return 0;
+free_ctxs:
- wd_comp_release_ctx();
- return ret;
+}
+static struct wd_ctx_nums comp_default_ctxsize[] = {
- {1, 1}, {1, 1}, { }
+};
+static struct wd_ctx_params comp_default_cparams = {
- .ctx_set_num = WD_DIR_MAX,
- .ctx_set_size = comp_default_ctxsize,
+};
This implementation of default parameters is not very suitable. Is it better to pass a setup parameter structure or get it directly from the driver?
Thanks Longfang
+int wd_comp_init2(const char *alg_name, __u32 sched_type) +{
- struct uacce_dev_list *dev_list = NULL;
- __u32 ctx_set_num, ctx_set_size;
- struct bitmask *dev_bmp;
- bool flag;
- int ret;
- flag = wd_alg_try_init(&wd_comp_setting.status2);
- if (!flag)
return 0;
- if (!alg_name) {
WD_ERR("invalid: alg_name is NULL!\n");
ret = -WD_EINVAL;
goto out_uninit;
- }
- dev_list = wd_get_accel_list(alg_name);
- if (!dev_list) {
WD_ERR("invalid: alg_name is not support!\n");
ret = -WD_EINVAL;
goto out_uninit;
- }
- dev_bmp = wd_create_device_nodemask(dev_list);
- if (WD_IS_ERR(dev_bmp)) {
ret = WD_PTR_ERR(dev_bmp);
goto out_freelist;
- }
- wd_comp_numa_count = numa_bitmask_weight(dev_bmp);
- if (!wd_comp_numa_count) {
WD_ERR("invalid: bmp is clear!\n");
ret = -WD_ENODEV;
goto out_freebmp;
- }
- ctx_set_num = comp_default_cparams.ctx_set_num;
- ctx_set_size = wd_get_ctx_numbers(comp_default_cparams, ctx_set_num);
- wd_comp_ctx.ctx_num = ctx_set_size * wd_comp_numa_count;
- wd_comp_ctx.ctxs = calloc(wd_comp_ctx.ctx_num, sizeof(struct wd_ctx));
- if (!wd_comp_ctx.ctxs) {
ret = -WD_ENOMEM;
WD_ERR("failed to alloc ctxs!\n");
goto out_freebmp;
- }
- wd_comp_sched = wd_sched_rr_alloc(sched_type, ctx_set_num,
numa_max_node() + 1, wd_comp_poll_ctx);
- if (!wd_comp_sched) {
ret = -WD_EINVAL;
goto out_freectxs;
- }
- wd_comp_sched->name = SCHED_RR_NAME;
- ret = __wd_comp_init2(dev_list, dev_bmp, comp_default_cparams);
- if (ret)
goto out_freesched;
- wd_free_device_nodemask(dev_bmp);
- wd_free_list_accels(dev_list);
- wd_alg_set_init(&wd_comp_setting.status2);
- return ret;
+out_freesched:
- wd_sched_rr_release(wd_comp_sched);
- wd_comp_sched = NULL;
+out_freectxs:
- free(wd_comp_ctx.ctxs);
- wd_comp_ctx.ctxs = NULL;
+out_freebmp:
- wd_free_device_nodemask(dev_bmp);
+out_freelist:
- wd_free_list_accels(dev_list);
+out_uninit:
- wd_alg_clear_init(&wd_comp_setting.status2);
- return ret;
+}
+void wd_comp_uninit2(void) +{
- wd_comp_uninit();
- wd_comp_release_ctx();
- wd_sched_rr_release(wd_comp_sched);
- wd_alg_clear_init(&wd_comp_setting.status2);
+}
struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag) { return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag); @@ -289,6 +500,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->comp_lv = setup->comp_lv; sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW;
- /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param);
@@ -318,6 +530,7 @@ void wd_comp_free_sess(handle_t h_sess)
if (sess->sched_key) free(sess->sched_key);
- free(sess);
}
diff --git a/wd_util.c b/wd_util.c index efc0d41..471ca07 100644 --- a/wd_util.c +++ b/wd_util.c @@ -5,7 +5,6 @@ */
#define _GNU_SOURCE -#include <numa.h> #include <pthread.h> #include <semaphore.h> #include <string.h> @@ -1801,3 +1800,61 @@ bool wd_alg_try_init(enum wd_status *status)
return true; }
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{
- struct uacce_dev_list *p, *node, *result = NULL;
- struct uacce_dev *dev;
- int numa_id, ret;
- p = list;
- while (p) {
dev = p->dev;
numa_id = dev->numa_id;
ret = numa_bitmask_isbitset(bmp, numa_id);
if (!ret) {
p = p->next;
continue;
}
node = calloc(1, sizeof(*node));
if (!node) {
result = WD_ERR_PTR(-WD_ENOMEM);
goto out_free_list;
}
node->dev = wd_clone_dev(dev);
if (!node->dev) {
result = WD_ERR_PTR(-WD_ENOMEM);
goto out_free_node;
}
if (!result)
result = node;
else
wd_add_dev_to_list(result, node);
p = p->next;
- }
- return result;
+out_free_node:
- free(node);
+out_free_list:
- wd_free_list_accels(result);
- return result;
+}
+__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end) +{
- __u32 count = 0;
- int i;
- for (i = 0; i < end; i++) {
count += cparams.ctx_set_size[i].sync_ctx_num;
count += cparams.ctx_set_size[i].async_ctx_num;
- }
- return count;
+}