Due to performance, uadk tries to leave many configuration options to users. This gives users great flexibility, but it also leads to a problem that the current initialization interface has high complexity. Therefore, in order to facilitate users to adapt quickly, a new set of interfaces is provided.
The 'wd_alg_init2_()' will complete all initialization steps. There are 4 parameters to describe the user configuration requirements. @alg: The algorithm users want to use. @sched_type: Scheduling type the user wants to use. @numa_bitmask: The bitmask provided by libnuma. Users can use this parameter to control requesting ctxs devices in the bind NUMA scenario. @ctx_nums: The requested ctx number for each numa node. Due to users may have different requirements for different types of ctx numbers, needs a two-dimensional array as input.
If users think 'wd_alg_init2_()' is too complex, wd_alg_init2() is a simplified packaging and will use the default value of numa_bitmask and ctx_nums.
Signed-off-by: Yang Shen shenyang39@huawei.com --- Makefile.am | 4 +- include/wd.h | 24 ++++ include/wd_alg_common.h | 24 ++++ include/wd_comp.h | 27 +++++ include/wd_util.h | 19 ++++ wd.c | 62 +++++++++++ wd_comp.c | 86 +++++++++++++++ wd_util.c | 236 +++++++++++++++++++++++++++++++++++++++- 8 files changed, 479 insertions(+), 3 deletions(-)
diff --git a/Makefile.am b/Makefile.am index 53f36f9..5465b64 100644 --- a/Makefile.am +++ b/Makefile.am @@ -86,7 +86,7 @@ AM_CFLAGS += -DWD_NO_LOG
libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma
-libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_comp_la_DEPENDENCIES = libwd.la
libhisi_zip_la_LIBADD = -ldl @@ -103,7 +103,7 @@ else libwd_la_LDFLAGS=$(UADK_VERSION) libwd_la_LIBADD= -lnuma
-libwd_comp_la_LIBADD= -lwd -ldl +libwd_comp_la_LIBADD= -lwd -ldl -lnuma libwd_comp_la_LDFLAGS=$(UADK_VERSION) libwd_comp_la_DEPENDENCIES= libwd.la
diff --git a/include/wd.h b/include/wd.h index e1a87de..facd992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev); */ struct uacce_dev_list *wd_get_accel_list(const char *alg_name);
+/** + * wd_find_dev_by_numa() - get device with max available ctx number from an + * device list according to numa id. + * @list: The device list. + * @numa_id: The numa_id. + * + * Return device if succeed and other error number if fail. + */ +struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id); + /** * wd_get_accel_dev() - Get device supporting the algorithm with smallest numa distance to current numa node. @@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev); */ void wd_add_dev_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node);
+/** + * wd_create_device_nodemask() - create a numa node mask of device list. + * @list: The devices list. + * + * Return a pointer value if succeed, and error number if fail. + */ +struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list); + +/** + * wd_free_device_nodemask() - free a numa node mask. + * @bmp: A numa node mask. + */ +void wd_free_device_nodemask(struct bitmask *bmp); + /** * wd_ctx_get_dev_name() - Get the device name about task. * @h_ctx: The handle of context. diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index c455dc3..5f63215 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -63,6 +63,30 @@ struct wd_ctx_config { void *priv; };
+/** + * struct wd_ctx_nums - Define the ctx sets numbers. + * @sync_ctx_num: The ctx numbers which are used for sync mode for each + * ctx sets. + * @async_ctx_num: The ctx numbers which are used for async mode for each + * ctx sets. + */ +struct wd_ctx_nums { + __u32 sync_ctx_num; + __u32 async_ctx_num; +}; + +/** + * struct wd_ctx_params - Define the ctx sets params which are used for init + * algorithms. + * @ctx_set_size: Number of ctx sets to be created. Usually users can + * set it according to <alg>_op_type. + * @ctx_set_num: Each ctx sets numbers. + */ +struct wd_ctx_params { + __u32 ctx_set_size; + struct wd_ctx_nums *ctx_set_num; +}; + struct wd_ctx_internal { handle_t ctx; __u8 op_type; diff --git a/include/wd_comp.h b/include/wd_comp.h index e043a83..d96110e 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -7,6 +7,7 @@ #ifndef __WD_COMP_H #define __WD_COMP_H
+#include <numa.h> #include "wd.h" #include "wd_alg_common.h"
@@ -113,6 +114,32 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched); */ void wd_comp_uninit(void);
+/** + * wd_comp_init2_() - A simplify interface to initializate uadk + * compression/decompression. This interface keeps most functions of + * wd_comp_init(). Users just need to descripe the deployment of + * business scenarios. Then the initialization will request appropriate + * resources to support the business scenarios. + * To make the initializate simpler, bmp and cparams support set NULL. + * And then the function will set them as default. + * + * @alg: The selected algorithm. + * @sched_type: The scheduler type. + * @bmp: Node mask of the required devices. + * @cparams: The ctx number settings. + * + * Return 0 if succeed and others if fail. + */ +int wd_comp_init2_(char *alg, __u32 sched_type, struct bitmask *bmp, struct wd_ctx_params *cparams); + +#define wd_comp_init2(alg, sched_type) \ + wd_comp_init2_(alg, sched_type, NULL, NULL) + +/** + * wd_comp_uninit2() - Uninitialise ctx configuration and scheduler. + */ +void wd_comp_uninit2(void); + struct wd_comp_sess_setup { enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */ enum wd_comp_level comp_lv; /* Denoted by enum wd_comp_level */ diff --git a/include/wd_util.h b/include/wd_util.h index eafe3ce..8ae70f1 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -7,6 +7,7 @@ #ifndef __WD_UTIL_H #define __WD_UTIL_H
+#include <numa.h> #include <stdbool.h> #include <sys/ipc.h> #include <sys/shm.h> @@ -112,6 +113,15 @@ struct wd_msg_handle { int (*recv)(handle_t sess, void *msg); };
+struct wd_init_attrs { + __u32 sched_type; + char *alg; + struct bitmask *bmp; + struct wd_sched *sched; + struct wd_ctx_params *cparams; + struct wd_ctx_config *ctx_config; +}; + /* * wd_init_ctx_config() - Init internal ctx configuration. * @in: ctx configuration in global setting. @@ -394,6 +404,15 @@ static inline void wd_alg_clear_init(enum wd_status *status) __atomic_store(status, &setting, __ATOMIC_RELAXED); }
+/** + * wd_alg_pre_init() - Request the ctxs and initialize the sched_domain + * with the given devices list, ctxs number and numa mask. + * @attrs: the algorithm initialization parameters. + * + * Return device if succeed and other error number if fail. + */ +int wd_alg_pre_init(struct wd_init_attrs *attrs); + /** * wd_dfx_msg_cnt() - Message counter interface for ctx * @msg: Shared memory addr. diff --git a/wd.c b/wd.c index 78094d8..9eb69d2 100644 --- a/wd.c +++ b/wd.c @@ -727,6 +727,35 @@ free_list: return NULL; }
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id) +{ + struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV); + struct uacce_dev_list *p = list; + int ctx_num, ctx_max = 0; + + if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + while (p) { + if (numa_id != p->dev->numa_id) { + p = p->next; + continue; + } + + ctx_num = wd_get_avail_ctx(p->dev); + if (ctx_num > ctx_max) { + dev = p->dev; + ctx_max = ctx_num; + } + + p = p->next; + } + + return dev; +} + void wd_free_list_accels(struct uacce_dev_list *list) { struct uacce_dev_list *curr, *next; @@ -793,6 +822,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg) return ioctl(ctx->fd, cmd, arg); }
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list) +{ + struct uacce_dev_list *p; + struct bitmask *bmp; + + if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + bmp = numa_allocate_nodemask(); + if (!bmp) { + WD_ERR("failed to alloc bitmask(%d)!\n", errno); + return WD_ERR_PTR(-WD_ENOMEM); + } + + p = list; + while (p) { + numa_bitmask_setbit(bmp, p->dev->numa_id); + p = p->next; + } + + return bmp; +} + +void wd_free_device_nodemask(struct bitmask *bmp) +{ + if (!bmp) + return; + + numa_free_nodemask(bmp); +} + void wd_get_version(void) { const char *wd_released_time = UADK_RELEASED_TIME; diff --git a/wd_comp.c b/wd_comp.c index 44593a6..ea80d13 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -14,6 +14,7 @@
#include "config.h" #include "drv/wd_comp_drv.h" +#include "wd_sched.h" #include "wd_util.h" #include "wd_comp.h"
@@ -21,6 +22,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024)
+#define SCHED_RR_NAME "sched_rr" + #define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ @@ -42,6 +45,7 @@ struct wd_comp_sess {
struct wd_comp_setting { enum wd_status status; + enum wd_status status2; struct wd_ctx_config_internal config; struct wd_sched sched; struct wd_comp_driver *driver; @@ -52,6 +56,19 @@ struct wd_comp_setting {
struct wd_env_config wd_comp_env_config;
+static struct wd_init_attrs wd_comp_init_attrs; +static struct wd_ctx_config wd_comp_ctx; +static struct wd_sched *wd_comp_sched; + +static struct wd_ctx_nums wd_comp_ctx_num[] = { + {1, 1}, {1, 1}, {} +}; + +static struct wd_ctx_params wd_comp_cparams = { + .ctx_set_size = WD_DIR_MAX, + .ctx_set_num = wd_comp_ctx_num +}; + #ifdef WD_STATIC_DRV static void wd_comp_set_static_drv(void) { @@ -178,6 +195,73 @@ void wd_comp_uninit(void) wd_alg_clear_init(&wd_comp_setting.status); }
+int wd_comp_init2_(char *alg, __u32 sched_type, struct bitmask *bmp, struct wd_ctx_params *cparams) +{ + bool flag; + int ret; + + flag = wd_alg_try_init(&wd_comp_setting.status2); + if (!flag) + return 0; + + if (!alg) { + WD_ERR("invalid: alg is NULL!\n"); + ret = -WD_EINVAL; + goto out_uninit; + } + + wd_comp_init_attrs.alg = alg; + wd_comp_init_attrs.sched_type = sched_type; + wd_comp_init_attrs.bmp = bmp; + wd_comp_init_attrs.cparams = cparams ? cparams : &wd_comp_cparams; + wd_comp_init_attrs.ctx_config = &wd_comp_ctx; + + wd_comp_sched = wd_sched_rr_alloc(sched_type, wd_comp_init_attrs.cparams->ctx_set_size, + numa_max_node() + 1, wd_comp_poll_ctx); + if (!wd_comp_sched) { + ret = -WD_EINVAL; + goto out_uninit; + } + wd_comp_sched->name = SCHED_RR_NAME; + wd_comp_init_attrs.sched = wd_comp_sched; + + ret = wd_alg_pre_init(&wd_comp_init_attrs); + if (ret) + goto out_freesched; + + ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched); + if (ret) + goto out_freesched; + + wd_alg_set_init(&wd_comp_setting.status2); + + return 0; + +out_freesched: + wd_sched_rr_release(wd_comp_sched); + +out_uninit: + wd_alg_clear_init(&wd_comp_setting.status2); + + return ret; +} + +void wd_comp_uninit2(void) +{ + int i; + + wd_comp_uninit(); + + for (i = 0; i < wd_comp_ctx.ctx_num; i++) + if (wd_comp_ctx.ctxs[i].ctx) { + wd_release_ctx(wd_comp_ctx.ctxs[i].ctx); + wd_comp_ctx.ctxs[i].ctx = 0; + } + + wd_sched_rr_release(wd_comp_sched); + wd_alg_clear_init(&wd_comp_setting.status2); +} + struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag) { return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag); @@ -289,6 +373,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->comp_lv = setup->comp_lv; sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW; + /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param); @@ -318,6 +403,7 @@ void wd_comp_free_sess(handle_t h_sess)
if (sess->sched_key) free(sess->sched_key); + free(sess); }
diff --git a/wd_util.c b/wd_util.c index fa77b46..d618776 100644 --- a/wd_util.c +++ b/wd_util.c @@ -5,7 +5,6 @@ */
#define _GNU_SOURCE -#include <numa.h> #include <pthread.h> #include <semaphore.h> #include <string.h> @@ -1801,3 +1800,238 @@ bool wd_alg_try_init(enum wd_status *status)
return true; } + +static __u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end) +{ + __u32 count = 0; + int i; + + for (i = 0; i < end; i++) { + count += cparams.ctx_set_num[i].sync_ctx_num; + count += cparams.ctx_set_num[i].async_ctx_num; + } + + return count; +} + +struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{ + struct uacce_dev_list *p, *node, *result = NULL; + struct uacce_dev *dev; + int numa_id, ret; + + if (!bmp) { + WD_ERR("invalid: bmp is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + p = list; + while (p) { + dev = p->dev; + numa_id = dev->numa_id; + ret = numa_bitmask_isbitset(bmp, numa_id); + if (!ret) { + p = p->next; + continue; + } + + node = calloc(1, sizeof(*node)); + if (!node) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_list; + } + + node->dev = wd_clone_dev(dev); + if (!node->dev) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_node; + } + + if (!result) + result = node; + else + wd_add_dev_to_list(result, node); + + p = p->next; + } + + return result; + +out_free_node: + free(node); +out_free_list: + wd_free_list_accels(result); + return result; +} + +static int wd_init_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *list, + int idx, int numa_id, int op_type) +{ + struct wd_ctx_nums ctx_nums = attrs->cparams->ctx_set_num[op_type]; + __u32 ctx_set_num = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num; + struct wd_ctx_config *ctx_config = attrs->ctx_config; + struct uacce_dev *dev; + int i; + + dev = wd_find_dev_by_numa(list, numa_id); + if (WD_IS_ERR(dev)) + return WD_PTR_ERR(dev); + + for (i = idx; i < idx + ctx_set_num; i++) { + ctx_config->ctxs[i].ctx = wd_request_ctx(dev); + if (errno == WD_EBUSY) { + dev = wd_find_dev_by_numa(list, numa_id); + if (WD_IS_ERR(dev)) + return WD_PTR_ERR(dev); + i--; + } + ctx_config->ctxs[i].op_type = op_type; + ctx_config->ctxs[i].ctx_mode = + ((i - idx) < ctx_nums.sync_ctx_num) ? + CTX_MODE_SYNC : CTX_MODE_ASYNC; + } + + return 0; +} + +static void wd_release_ctx_set(struct wd_ctx_config *ctx_config) +{ + int i; + + for (i = 0; i < ctx_config->ctx_num; i++) + if (ctx_config->ctxs[i].ctx) { + wd_release_ctx(ctx_config->ctxs[i].ctx); + ctx_config->ctxs[i].ctx = 0; + } +} + +static int wd_instance_sched_set(struct wd_sched *sched, struct wd_ctx_nums ctx_nums, + int idx, int numa_id, int op_type) +{ + struct sched_params sparams; + int i, ret = 0; + + for (i = 0; i < CTX_MODE_MAX; i++) { + sparams.numa_id = numa_id; + sparams.type = op_type; + sparams.mode = i; + sparams.begin = idx + ctx_nums.sync_ctx_num * i; + sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i; + if (sparams.begin > sparams.end) + continue; + ret = wd_sched_rr_instance(sched, &sparams); + if (ret) + goto out; + } + +out: + return ret; +} + +static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bmp, + struct uacce_dev_list *list) +{ + struct wd_ctx_params *cparams = attrs->cparams; + __u32 ctx_set_size = cparams->ctx_set_size; + int max_node = numa_max_node() + 1; + struct wd_ctx_nums ctx_nums; + int i, j, ret; + int idx = 0; + + for (i = 0; i < max_node; i++) { + if (!numa_bitmask_isbitset(bmp, i)) + continue; + for (j = 0; j < ctx_set_size; j++) { + ctx_nums = cparams->ctx_set_num[j]; + ret = wd_init_ctx_set(attrs, list, idx, i, j); + if (ret) + goto free_ctxs; + ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j); + if (ret) + goto free_ctxs; + idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num); + } + } + + return 0; + +free_ctxs: + wd_release_ctx_set(attrs->ctx_config); + + return ret; +} + +int wd_alg_pre_init(struct wd_init_attrs *attrs) +{ + struct wd_ctx_config *ctx_config = attrs->ctx_config; + struct wd_ctx_params *cparams = attrs->cparams; + struct uacce_dev_list *list, *used_list = NULL; + struct bitmask *used_bmp, *bmp = attrs->bmp; + __u32 ctx_set_num, ctx_set_size; + int numa_cnt, ret; + + list = wd_get_accel_list(attrs->alg); + if (!list) { + WD_ERR("failed to get devices!\n"); + return -WD_ENODEV; + } + + ctx_set_size = cparams->ctx_set_size; + ctx_set_num = wd_get_ctx_numbers(*cparams, ctx_set_size); + if (!ctx_set_num || !ctx_set_size) { + WD_ERR("invalid: ctx_set_num is %d, ctx_set_size is %d!\n", + ctx_set_num, ctx_set_size); + ret = -WD_EINVAL; + goto out_freelist; + } + + /* + * Not every numa has a device. Therefore, the first thing is to + * filter the devices in the selected numa node, and the second + * thing is to obtain the distribution of devices. + */ + if (bmp) { + used_list = wd_get_usable_list(list, bmp); + if (WD_IS_ERR(used_list)) { + ret = WD_PTR_ERR(used_list); + WD_ERR("failed to get usable devices(%d)!\n", ret); + goto out_freelist; + } + } + + used_bmp = wd_create_device_nodemask(used_list ? used_list : list); + if (WD_IS_ERR(bmp)) { + ret = WD_PTR_ERR(bmp); + goto out_freeusedlist; + } + + numa_cnt = numa_bitmask_weight(used_bmp); + if (!numa_cnt) { + ret = numa_cnt; + WD_ERR("invalid: bmp is clear!\n"); + goto out_freenodemask; + } + + ctx_config->ctx_num = ctx_set_num * numa_cnt; + ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); + if (!ctx_config->ctxs) { + ret = -WD_ENOMEM; + WD_ERR("failed to alloc ctxs!\n"); + goto out_freenodemask; + } + + ret = wd_init_ctx_and_sched(attrs, used_bmp, used_list ? used_list : list); + if (ret) + free(ctx_config->ctxs); + +out_freenodemask: + wd_free_device_nodemask(used_bmp); + +out_freeusedlist: + wd_free_list_accels(used_list); + +out_freelist: + wd_free_list_accels(list); + + return ret; +}