在 2022/10/31 21:25, fanghao (A) 写道:
在 2022/10/29 18:19, Yang Shen 写道:
Due to performance, uadk tries to leave many configuration options to users. This gives users great flexibility, but it also leads to a problem that the current initialization interface has high complexity. Therefore, in order to facilitate users to adapt quickly, a new set of interfaces is provided.
The 'wd_alg_init2_()' will complete all initialization steps. There are 4 parameters to describe the user configuration requirements. @alg: The algorithm users want to use. @sched_type: The scheduling type users want to use. @task_sp: Reserved.
task_type ? 同步或者异步吗,建议说明下。
给龙芳的切软算预留的接口,当前代码用不上,为了避免后续修改接口,所以这里提前加上, 在后续特性使能的时候会加上描述。
@ctx_params: The ctxs resources users want to use. Include per operation type ctx numbers and business process run numa.
If users think 'wd_alg_init2_()' is too complex, wd_alg_init2() is a simplified packaging and will use the default value of numa_bitmask and ctx_nums.
Signed-off-by: Yang Shen shenyang39@huawei.com
Makefile.am | 4 +- include/wd.h | 24 ++++ include/wd_alg_common.h | 27 +++++ include/wd_comp.h | 30 +++++ include/wd_util.h | 18 +++ wd.c | 62 +++++++++++ wd_comp.c | 94 ++++++++++++++++ wd_util.c | 236 +++++++++++++++++++++++++++++++++++++++- 8 files changed, 492 insertions(+), 3 deletions(-)
diff --git a/Makefile.am b/Makefile.am index 457af43..c5637e5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -87,7 +87,7 @@ AM_CFLAGS += -DWD_NO_LOG libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma -libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_comp_la_DEPENDENCIES = libwd.la libhisi_zip_la_LIBADD = -ldl @@ -104,7 +104,7 @@ else libwd_la_LDFLAGS=$(UADK_VERSION) libwd_la_LIBADD= -lnuma -libwd_comp_la_LIBADD= -lwd -ldl +libwd_comp_la_LIBADD= -lwd -ldl -lnuma libwd_comp_la_LDFLAGS=$(UADK_VERSION) libwd_comp_la_DEPENDENCIES= libwd.la diff --git a/include/wd.h b/include/wd.h index e1a87de..facd992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev); */ struct uacce_dev_list *wd_get_accel_list(const char *alg_name); +/**
- wd_find_dev_by_numa() - get device with max available ctx number
from an
- * device list according to numa id.
- @list: The device list.
- @numa_id: The numa_id.
- Return device if succeed and other error number if fail.
- */
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id);
/** * wd_get_accel_dev() - Get device supporting the algorithm with smallest numa distance to current numa node. @@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev); */ void wd_add_dev_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node); +/**
- wd_create_device_nodemask() - create a numa node mask of device
list.
- @list: The devices list.
- Return a pointer value if succeed, and error number if fail.
- */
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list);
+/**
- wd_free_device_nodemask() - free a numa node mask.
- @bmp: A numa node mask.
- */
+void wd_free_device_nodemask(struct bitmask *bmp);
/** * wd_ctx_get_dev_name() - Get the device name about task. * @h_ctx: The handle of context. diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index c455dc3..96e908f 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -63,6 +63,33 @@ struct wd_ctx_config { void *priv; }; +/**
- struct wd_ctx_nums - Define the ctx sets numbers.
- @sync_ctx_num: The ctx numbers which are used for sync mode for each
- ctx sets.
- @async_ctx_num: The ctx numbers which are used for async mode for
each
- ctx sets.
- */
+struct wd_ctx_nums { + __u32 sync_ctx_num; + __u32 async_ctx_num; +};
+/**
- struct wd_ctx_params - Define the ctx sets params which are used
for init
- algorithms.
- @op_type_num: Used for index of ctx_set_num, the order is the
same as
- wd_<alg>_op_type.
- @ctx_set_num: Each operation type ctx sets numbers.
- @bmp: Ctxs distribution. Means users want to run business process
on these
- numa or request ctx from devices located in these numa.
- */
+struct wd_ctx_params { + __u32 op_type_num; + struct wd_ctx_nums *ctx_set_num; + struct bitmask *bmp; +};
struct wd_ctx_internal { handle_t ctx; __u8 op_type; diff --git a/include/wd_comp.h b/include/wd_comp.h index e043a83..13a3e6a 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -7,6 +7,7 @@ #ifndef __WD_COMP_H #define __WD_COMP_H +#include <numa.h> #include "wd.h" #include "wd_alg_common.h" @@ -113,6 +114,35 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched); */ void wd_comp_uninit(void); +/**
- wd_comp_init2_() - A simplify interface to initializate uadk
- compression/decompression. This interface keeps most functions of
- wd_comp_init(). Users just need to descripe the deployment of
- business scenarios. Then the initialization will request appropriate
- resources to support the business scenarios.
- To make the initializate simpler, ctx_params support set NULL.
- And then the function will set them as default.
- Please do not use this interface with wd_comp_init() together, or
- some resources may be leak.
- @alg: The algorithm users want to use.
- @sched_type: The scheduling type users want to use.
- @task_tp: Reserved.
- @ctx_params: The ctxs resources users want to use. Include per
operation
- type ctx numbers and business process run numa.
- Return 0 if succeed and others if fail.
- */
+int wd_comp_init2_(char *alg, __u32 sched_type, int task_tp, struct wd_ctx_params *ctx_params);
+#define wd_comp_init2(alg, sched_type, task_tp) \ + wd_comp_init2_(alg, sched_type, task_tp, NULL)
+/**
- wd_comp_uninit2() - Uninitialise ctx configuration and scheduler.
- */
+void wd_comp_uninit2(void);
struct wd_comp_sess_setup { enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */ enum wd_comp_level comp_lv; /* Denoted by enum wd_comp_level */ diff --git a/include/wd_util.h b/include/wd_util.h index cd0e112..a51a35d 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -7,6 +7,7 @@ #ifndef __WD_UTIL_H #define __WD_UTIL_H +#include <numa.h> #include <stdbool.h> #include <sys/ipc.h> #include <sys/shm.h> @@ -112,6 +113,14 @@ struct wd_msg_handle { int (*recv)(handle_t sess, void *msg); }; +struct wd_init_attrs { + __u32 sched_type; + char *alg; + struct wd_sched *sched; + struct wd_ctx_params *ctx_params; + struct wd_ctx_config *ctx_config; +};
/* * wd_init_ctx_config() - Init internal ctx configuration. * @in: ctx configuration in global setting. @@ -404,6 +413,15 @@ static inline void wd_alg_clear_init(enum wd_status *status) __atomic_store(status, &setting, __ATOMIC_RELAXED); } +/**
- wd_alg_pre_init() - Request the ctxs and initialize the sched_domain
- * with the given devices list, ctxs number and
numa mask.
- @attrs: the algorithm initialization parameters.
- Return device if succeed and other error number if fail.
- */
+int wd_alg_pre_init(struct wd_init_attrs *attrs);
/** * wd_dfx_msg_cnt() - Message counter interface for ctx * @msg: Shared memory addr. diff --git a/wd.c b/wd.c index 78094d8..9eb69d2 100644 --- a/wd.c +++ b/wd.c @@ -727,6 +727,35 @@ free_list: return NULL; } +struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id) +{ + struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV); + struct uacce_dev_list *p = list; + int ctx_num, ctx_max = 0;
+ if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + }
+ while (p) { + if (numa_id != p->dev->numa_id) { + p = p->next; + continue; + }
+ ctx_num = wd_get_avail_ctx(p->dev); + if (ctx_num > ctx_max) { + dev = p->dev; + ctx_max = ctx_num; + }
+ p = p->next; + }
+ return dev; +}
void wd_free_list_accels(struct uacce_dev_list *list) { struct uacce_dev_list *curr, *next; @@ -793,6 +822,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg) return ioctl(ctx->fd, cmd, arg); } +struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list) +{ + struct uacce_dev_list *p; + struct bitmask *bmp;
+ if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + }
+ bmp = numa_allocate_nodemask(); + if (!bmp) { + WD_ERR("failed to alloc bitmask(%d)!\n", errno); + return WD_ERR_PTR(-WD_ENOMEM); + }
+ p = list; + while (p) { + numa_bitmask_setbit(bmp, p->dev->numa_id); + p = p->next; + }
+ return bmp; +}
+void wd_free_device_nodemask(struct bitmask *bmp) +{ + if (!bmp) + return;
+ numa_free_nodemask(bmp); +}
void wd_get_version(void) { const char *wd_released_time = UADK_RELEASED_TIME; diff --git a/wd_comp.c b/wd_comp.c index 44593a6..487fd02 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -14,6 +14,7 @@ #include "config.h" #include "drv/wd_comp_drv.h" +#include "wd_sched.h" #include "wd_util.h" #include "wd_comp.h" @@ -21,6 +22,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024) +#define SCHED_RR_NAME "sched_rr"
#define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ @@ -42,6 +45,7 @@ struct wd_comp_sess { struct wd_comp_setting { enum wd_status status; + enum wd_status status2; struct wd_ctx_config_internal config; struct wd_sched sched; struct wd_comp_driver *driver; @@ -52,6 +56,20 @@ struct wd_comp_setting { struct wd_env_config wd_comp_env_config; +static struct wd_init_attrs wd_comp_init_attrs; +static struct wd_ctx_config wd_comp_ctx; +static struct wd_sched *wd_comp_sched;
+static struct wd_ctx_nums wd_comp_ctx_num[] = { + {1, 1}, {1, 1}, {} +};
+static struct wd_ctx_params wd_comp_ctx_params = { + .op_type_num = WD_DIR_MAX, + .ctx_set_num = wd_comp_ctx_num, + .bmp = NULL, +};
#ifdef WD_STATIC_DRV static void wd_comp_set_static_drv(void) { @@ -178,6 +196,80 @@ void wd_comp_uninit(void) wd_alg_clear_init(&wd_comp_setting.status); } +int wd_comp_init2_(char *alg, __u32 sched_type, int task_tp, struct wd_ctx_params *ctx_params) +{ + enum wd_status status; + bool flag; + int ret;
+ wd_alg_get_init(&wd_comp_setting.status, &status); + if (status == WD_INIT) { + WD_INFO("UADK comp has been initialized with wd_comp_init()!\n"); + return 0; + }
+ flag = wd_alg_try_init(&wd_comp_setting.status2); + if (!flag) + return 0;
+ if (!alg) { + WD_ERR("invalid: alg is NULL!\n"); + ret = -WD_EINVAL; + goto out_uninit; + }
+ wd_comp_init_attrs.alg = alg; + wd_comp_init_attrs.sched_type = sched_type;
+ wd_comp_init_attrs.ctx_params = ctx_params ? ctx_params : &wd_comp_ctx_params; + wd_comp_init_attrs.ctx_config = &wd_comp_ctx;
+ wd_comp_sched = wd_sched_rr_alloc(sched_type, wd_comp_init_attrs.ctx_params->op_type_num, + numa_max_node() + 1, wd_comp_poll_ctx); + if (!wd_comp_sched) { + ret = -WD_EINVAL; + goto out_uninit; + } + wd_comp_sched->name = SCHED_RR_NAME; + wd_comp_init_attrs.sched = wd_comp_sched;
+ ret = wd_alg_pre_init(&wd_comp_init_attrs); + if (ret) + goto out_freesched;
+ ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched); + if (ret) + goto out_freesched;
+ wd_alg_set_init(&wd_comp_setting.status2);
+ return 0;
+out_freesched: + wd_sched_rr_release(wd_comp_sched);
+out_uninit: + wd_alg_clear_init(&wd_comp_setting.status2);
+ return ret; +}
+void wd_comp_uninit2(void) +{ + int i;
+ wd_comp_uninit();
+ for (i = 0; i < wd_comp_ctx.ctx_num; i++) + if (wd_comp_ctx.ctxs[i].ctx) { + wd_release_ctx(wd_comp_ctx.ctxs[i].ctx); + wd_comp_ctx.ctxs[i].ctx = 0; + }
+ wd_sched_rr_release(wd_comp_sched); + wd_alg_clear_init(&wd_comp_setting.status2); +}
struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag) { return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag); @@ -289,6 +381,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->comp_lv = setup->comp_lv; sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW;
/* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param); @@ -318,6 +411,7 @@ void wd_comp_free_sess(handle_t h_sess) if (sess->sched_key) free(sess->sched_key);
free(sess); } diff --git a/wd_util.c b/wd_util.c index fa77b46..8fed8ac 100644 --- a/wd_util.c +++ b/wd_util.c @@ -5,7 +5,6 @@ */ #define _GNU_SOURCE -#include <numa.h> #include <pthread.h> #include <semaphore.h> #include <string.h> @@ -1801,3 +1800,238 @@ bool wd_alg_try_init(enum wd_status *status) return true; }
+static __u32 wd_get_ctx_numbers(struct wd_ctx_params ctx_params, int end) +{ + __u32 count = 0; + int i;
+ for (i = 0; i < end; i++) { + count += ctx_params.ctx_set_num[i].sync_ctx_num; + count += ctx_params.ctx_set_num[i].async_ctx_num; + }
+ return count; +}
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{ + struct uacce_dev_list *p, *node, *result = NULL; + struct uacce_dev *dev; + int numa_id, ret;
+ if (!bmp) { + WD_ERR("invalid: bmp is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + }
+ p = list; + while (p) { + dev = p->dev; + numa_id = dev->numa_id; + ret = numa_bitmask_isbitset(bmp, numa_id); + if (!ret) { + p = p->next; + continue; + }
+ node = calloc(1, sizeof(*node)); + if (!node) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_list; + }
+ node->dev = wd_clone_dev(dev); + if (!node->dev) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_node; + }
+ if (!result) + result = node; + else + wd_add_dev_to_list(result, node);
+ p = p->next; + }
+ return result ? result : WD_ERR_PTR(-WD_ENODEV);
+out_free_node: + free(node); +out_free_list: + wd_free_list_accels(result); + return result; +}
+static int wd_init_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *list, + int idx, int numa_id, int op_type) +{ + struct wd_ctx_nums ctx_nums = attrs->ctx_params->ctx_set_num[op_type]; + __u32 ctx_set_num = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num; + struct wd_ctx_config *ctx_config = attrs->ctx_config; + struct uacce_dev *dev; + int i;
+ dev = wd_find_dev_by_numa(list, numa_id); + if (WD_IS_ERR(dev)) + return WD_PTR_ERR(dev);
+ for (i = idx; i < idx + ctx_set_num; i++) { + ctx_config->ctxs[i].ctx = wd_request_ctx(dev); + if (errno == WD_EBUSY) { + dev = wd_find_dev_by_numa(list, numa_id); + if (WD_IS_ERR(dev)) + return WD_PTR_ERR(dev); + i--; + } + ctx_config->ctxs[i].op_type = op_type; + ctx_config->ctxs[i].ctx_mode = + ((i - idx) < ctx_nums.sync_ctx_num) ? + CTX_MODE_SYNC : CTX_MODE_ASYNC; + }
+ return 0; +}
+static void wd_release_ctx_set(struct wd_ctx_config *ctx_config) +{ + int i;
+ for (i = 0; i < ctx_config->ctx_num; i++) + if (ctx_config->ctxs[i].ctx) { + wd_release_ctx(ctx_config->ctxs[i].ctx); + ctx_config->ctxs[i].ctx = 0; + } +}
+static int wd_instance_sched_set(struct wd_sched *sched, struct wd_ctx_nums ctx_nums, + int idx, int numa_id, int op_type) +{ + struct sched_params sparams; + int i, ret = 0;
+ for (i = 0; i < CTX_MODE_MAX; i++) { + sparams.numa_id = numa_id; + sparams.type = op_type; + sparams.mode = i; + sparams.begin = idx + ctx_nums.sync_ctx_num * i; + sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i; + if (sparams.begin > sparams.end) + continue; + ret = wd_sched_rr_instance(sched, &sparams); + if (ret) + goto out; + }
+out: + return ret; +}
+static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bmp, + struct uacce_dev_list *list) +{ + struct wd_ctx_params *ctx_params = attrs->ctx_params; + __u32 op_type_num = ctx_params->op_type_num; + int max_node = numa_max_node() + 1; + struct wd_ctx_nums ctx_nums; + int i, j, ret; + int idx = 0;
+ for (i = 0; i < max_node; i++) { + if (!numa_bitmask_isbitset(bmp, i)) + continue; + for (j = 0; j < op_type_num; j++) { + ctx_nums = ctx_params->ctx_set_num[j]; + ret = wd_init_ctx_set(attrs, list, idx, i, j); + if (ret) + goto free_ctxs; + ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j); + if (ret) + goto free_ctxs; + idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num); + } + }
+ return 0;
+free_ctxs: + wd_release_ctx_set(attrs->ctx_config);
+ return ret; +}
+int wd_alg_pre_init(struct wd_init_attrs *attrs) +{ + struct wd_ctx_config *ctx_config = attrs->ctx_config; + struct wd_ctx_params *ctx_params = attrs->ctx_params; + struct bitmask *used_bmp, *bmp = ctx_params->bmp; + struct uacce_dev_list *list, *used_list = NULL; + __u32 ctx_set_num, op_type_num; + int numa_cnt, ret;
+ list = wd_get_accel_list(attrs->alg); + if (!list) { + WD_ERR("failed to get devices!\n"); + return -WD_ENODEV; + }
+ op_type_num = ctx_params->op_type_num; + ctx_set_num = wd_get_ctx_numbers(*ctx_params, op_type_num); + if (!ctx_set_num || !op_type_num) { + WD_ERR("invalid: ctx_set_num is %d, op_type_num is %d!\n", + ctx_set_num, op_type_num); + ret = -WD_EINVAL; + goto out_freelist; + }
+ /* + * Not every numa has a device. Therefore, the first thing is to + * filter the devices in the selected numa node, and the second + * thing is to obtain the distribution of devices. + */ + if (bmp) { + used_list = wd_get_usable_list(list, bmp); + if (WD_IS_ERR(used_list)) { + ret = WD_PTR_ERR(used_list); + WD_ERR("failed to get usable devices(%d)!\n", ret); + goto out_freelist; + } + }
+ used_bmp = wd_create_device_nodemask(used_list ? used_list : list); + if (WD_IS_ERR(used_bmp)) { + ret = WD_PTR_ERR(used_bmp); + goto out_freeusedlist; + }
+ numa_cnt = numa_bitmask_weight(used_bmp); + if (!numa_cnt) { + ret = numa_cnt; + WD_ERR("invalid: bmp is clear!\n"); + goto out_freenodemask; + }
+ ctx_config->ctx_num = ctx_set_num * numa_cnt; + ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); + if (!ctx_config->ctxs) { + ret = -WD_ENOMEM; + WD_ERR("failed to alloc ctxs!\n"); + goto out_freenodemask; + }
+ ret = wd_init_ctx_and_sched(attrs, used_bmp, used_list ? used_list : list); + if (ret) + free(ctx_config->ctxs);
+out_freenodemask: + wd_free_device_nodemask(used_bmp);
+out_freeusedlist: + wd_free_list_accels(used_list);
+out_freelist: + wd_free_list_accels(list);
+ return ret; +}