Due to performance, uadk tries to leave many configuration options to users. This gives users great flexibility, but it also leads to a problem that the current initialization interface has high complexity. Therefore, in order to facilitate users to adapt quickly, a new set of interfaces is provided.
The 'wd_alg_init2()' will complete all initialization steps. There are 4 parameters to describe the user configuration requirements. @device_list: The available uacce device list. Users can get it by wd_get_accel_list(). @numa_bitmask: The bitmask provided by libnuma. Users can use this parameter to control requesting ctxs devices in the bind NUMA scenario. @ctx_nums: The requested ctx number for each numa node. Due to users may have different requirements for different types of ctx numbers, needs a two-dimensional array as input. @sched_type: Scheduling type the user wants to use.
Signed-off-by: Yang Shen shenyang39@huawei.com --- Makefile.am | 4 +- include/wd.h | 24 +++++ include/wd_alg_common.h | 24 +++++ include/wd_comp.h | 28 +++++ include/wd_util.h | 19 ++++ wd.c | 62 +++++++++++ wd_comp.c | 222 ++++++++++++++++++++++++++++++++++++++++ wd_util.c | 59 ++++++++++- 8 files changed, 439 insertions(+), 3 deletions(-)
diff --git a/Makefile.am b/Makefile.am index 05d6bc7..2a2c0a7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -86,7 +86,7 @@ AM_CFLAGS += -DWD_NO_LOG
libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma
-libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_comp_la_DEPENDENCIES = libwd.la
libhisi_zip_la_LIBADD = -ldl @@ -103,7 +103,7 @@ else libwd_la_LDFLAGS=$(UADK_VERSION) libwd_la_LIBADD= -lnuma
-libwd_comp_la_LIBADD= -lwd -ldl +libwd_comp_la_LIBADD= -lwd -ldl -lnuma libwd_comp_la_LDFLAGS=$(UADK_VERSION) libwd_comp_la_DEPENDENCIES= libwd.la
diff --git a/include/wd.h b/include/wd.h index 4f3a32f..9893f43 100644 --- a/include/wd.h +++ b/include/wd.h @@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev); */ struct uacce_dev_list *wd_get_accel_list(const char *alg_name);
+/** + * wd_find_dev_by_numa() - get device with max available ctx number from an + * device list according to numa id. + * @list: The device list. + * @numa_id: The numa_id. + * + * Return device if succeed and other error number if fail. + */ +struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id); + /** * wd_get_accel_dev() - Get device supporting the algorithm with smallest numa distance to current numa node. @@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev); */ void wd_add_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node);
+/** + * wd_create_device_nodemask() - create a numa node mask of device list. + * @list: The devices list. + * + * Return a pointer value if succeed, and error number if fail. + */ +struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list); + +/** + * wd_free_device_nodemask() - free a numa node mask. + * @bmp: A numa node mask. + */ +void wd_free_device_nodemask(struct bitmask *bmp); + /** * wd_ctx_get_dev_name() - Get the device name about task. * @h_ctx: The handle of context. diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index c455dc3..f261830 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -63,6 +63,30 @@ struct wd_ctx_config { void *priv; };
+/** + * struct wd_ctx_nums - Define the ctx sets numbers. + * @sync_ctx_num: The ctx numbers which are used for sync mode for each + * ctx sets. + * @async_ctx_num: The ctx numbers which are used for async mode for each + * ctx sets. + */ +struct wd_ctx_nums { + __u32 sync_ctx_num; + __u32 async_ctx_num; +}; + +/** + * struct wd_ctx_params - Define the ctx sets params which are used for init + * algorithms. + * @ctx_set_num: Number of ctx sets to be created. Usually users can + * set it according to <alg>_op_type. + * @ctx_set_size: Each ctx sets numbers. + */ +struct wd_ctx_params { + __u32 ctx_set_num; + struct wd_ctx_nums *ctx_set_size; +}; + struct wd_ctx_internal { handle_t ctx; __u8 op_type; diff --git a/include/wd_comp.h b/include/wd_comp.h index e043a83..1d4f32c 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -7,6 +7,7 @@ #ifndef __WD_COMP_H #define __WD_COMP_H
+#include <numa.h> #include "wd.h" #include "wd_alg_common.h"
@@ -113,6 +114,33 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched); */ void wd_comp_uninit(void);
+/** + * wd_comp_init2() - A simplify interface to initializate uadk + * compression/decompression. Users can use wd_get_accel_list() to + * get the usable device list with the algrithms. Users should provide + * a device numa node mask to show which numa devices will be + * selected. wd_create_device_nodemask() can create a node mask + * according the list. If all numa devices on the list are match + * the requirement, just use the return of it. Otherwise, users can + * use the function in libnuma to set the node mask. + * To make the initializate simpler, bmp and cparams support set NULL. + * And then the function will set them as default. + * + * @list: The device list. + * @bmp: Node mask of the required devices. + * @cparams: The ctx settings. + * @sched_type: The scheduler type. + * + * Return 0 if succeed and others if fail. + */ +int wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp, + struct wd_ctx_params *cparams, __u32 sched_type); + +/** + * wd_comp_uninit2() - Uninitialise ctx configuration and scheduler. + */ +void wd_comp_uninit2(void); + struct wd_comp_sess_setup { enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */ enum wd_comp_level comp_lv; /* Denoted by enum wd_comp_level */ diff --git a/include/wd_util.h b/include/wd_util.h index 3737f27..4ee03ce 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -7,6 +7,7 @@ #ifndef __WD_UTIL_H #define __WD_UTIL_H
+#include <numa.h> #include <stdbool.h> #include <sys/ipc.h> #include <sys/shm.h> @@ -394,6 +395,24 @@ static inline void wd_alg_clear_init(enum wd_status *status) __atomic_store(status, &setting, __ATOMIC_RELAXED); }
+/** + * wd_get_usable_list() - choose the devices according bitmask. + * @list: The device list. + * @bmp: The devices node mask. + * + * Return a list that meet user's requirement if succeed, and error number if fail. + */ +struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp); + +/** + * wd_get_ctx_numbers() - count the ctx number for first to end. + * @cparams: the input ctx setting numbers. + * @end: the end index of cparams. + * + * Return the sum of top '@end' cparams ctx number. + */ +__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end); + /** * wd_dfx_msg_cnt() - Message counter interface for ctx * @msg: Shared memory addr. diff --git a/wd.c b/wd.c index 66a6df3..21ddd62 100644 --- a/wd.c +++ b/wd.c @@ -741,6 +741,35 @@ free_list: return NULL; }
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id) +{ + struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV); + struct uacce_dev_list *p = list; + int ctx_num, ctx_max = 0; + + if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + while (p) { + if (numa_id != p->dev->numa_id) { + p = p->next; + continue; + } + + ctx_num = wd_get_avail_ctx(p->dev); + if (ctx_num > ctx_max) { + dev = p->dev; + ctx_max = ctx_num; + } + + p = p->next; + } + + return dev; +} + void wd_free_list_accels(struct uacce_dev_list *list) { struct uacce_dev_list *curr, *next; @@ -807,6 +836,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg) return ioctl(ctx->fd, cmd, arg); }
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list) +{ + struct uacce_dev_list *p; + struct bitmask *bmp; + + if (!list) { + WD_ERR("invalid: list is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + bmp = numa_allocate_nodemask(); + if (!bmp) { + WD_ERR("failed to alloc bitmask(%d)!\n", errno); + return WD_ERR_PTR(-WD_EINVAL); + } + + p = list; + while (p) { + numa_bitmask_setbit(bmp, p->dev->numa_id); + p = p->next; + } + + return bmp; +} + +void wd_free_device_nodemask(struct bitmask *bmp) +{ + if (!bmp) + return; + + numa_free_nodemask(bmp); +} + void wd_get_version(void) { const char *wd_released_time = UADK_RELEASED_TIME; diff --git a/wd_comp.c b/wd_comp.c index 44593a6..cd3b4f3 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -14,6 +14,7 @@
#include "config.h" #include "drv/wd_comp_drv.h" +#include "wd_sched.h" #include "wd_util.h" #include "wd_comp.h"
@@ -21,6 +22,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024)
+#define SCHED_RR_NAME "sched_rr" + #define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ @@ -42,6 +45,7 @@ struct wd_comp_sess {
struct wd_comp_setting { enum wd_status status; + enum wd_status status2; struct wd_ctx_config_internal config; struct wd_sched sched; struct wd_comp_driver *driver; @@ -52,6 +56,19 @@ struct wd_comp_setting {
struct wd_env_config wd_comp_env_config;
+static struct wd_ctx_config wd_comp_ctx; +static struct wd_sched *wd_comp_sched; +static int wd_comp_numa_count; + +static struct wd_ctx_nums wd_comp_ctx_num[] = { + {1, 1}, {1, 1}, {} +}; + +static struct wd_ctx_params wd_comp_cparams = { + .ctx_set_num = WD_DIR_MAX, + .ctx_set_size = wd_comp_ctx_num +}; + #ifdef WD_STATIC_DRV static void wd_comp_set_static_drv(void) { @@ -178,6 +195,209 @@ void wd_comp_uninit(void) wd_alg_clear_init(&wd_comp_setting.status); }
+static int wd_comp_request_ctx(struct uacce_dev_list *list, + struct wd_ctx_nums ctx_nums, + int idx, int numa_id, int op_type) +{ + int ctx_set_size = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num; + struct uacce_dev *dev; + int i; + + dev = wd_find_dev_by_numa(list, numa_id); + if (!dev) + return -WD_EBUSY; + + for (i = idx; i < idx + ctx_set_size; i++) { + wd_comp_ctx.ctxs[i].ctx = wd_request_ctx(dev); + if (errno == WD_EBUSY) { + dev = wd_find_dev_by_numa(list, numa_id); + if (!dev) + return -WD_EBUSY; + i--; + } + wd_comp_ctx.ctxs[i].op_type = op_type; + wd_comp_ctx.ctxs[i].ctx_mode = + ((i - idx) < ctx_nums.sync_ctx_num) ? + CTX_MODE_SYNC : CTX_MODE_ASYNC; + } + + return 0; +} + +static void wd_comp_release_ctx(void) +{ + int i; + + for (i = 0; i < wd_comp_ctx.ctx_num; i++) + if (wd_comp_ctx.ctxs[i].ctx) { + wd_release_ctx(wd_comp_ctx.ctxs[i].ctx); + wd_comp_ctx.ctxs[i].ctx = 0; + } +} + +static int wd_comp_instance_sched(struct wd_ctx_nums ctx_nums, int idx, + int numa_id, int op_type) +{ + struct sched_params sparams; + int i, ret = 0; + + for (i = 0; i < CTX_MODE_MAX; i++) { + sparams.numa_id = numa_id; + sparams.type = op_type; + sparams.mode = i; + sparams.begin = idx + ctx_nums.sync_ctx_num * i; + sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i; + if (sparams.begin > sparams.end) + continue; + ret = wd_sched_rr_instance(wd_comp_sched, &sparams); + if (ret) + goto out; + } + +out: + return ret; +} + +static int __wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp, + struct wd_ctx_params cparams) +{ + int ctx_set_num = cparams.ctx_set_num; + int max_node = numa_max_node() + 1; + struct wd_ctx_nums ctx_nums; + int i, j, ret; + int idx = 0; + + for (i = 0; i < max_node; i++) { + if (!numa_bitmask_isbitset(bmp, i)) + continue; + for (j = 0; j < ctx_set_num; j++) { + ctx_nums = cparams.ctx_set_size[j]; + ret = wd_comp_request_ctx(list, ctx_nums, idx, i, j); + if (ret) + goto free_ctxs; + ret = wd_comp_instance_sched(ctx_nums, idx, i, j); + if (ret) + goto free_ctxs; + idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num); + } + } + + ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched); + if (ret) + goto free_ctxs; + + return 0; + +free_ctxs: + wd_comp_release_ctx(); + + return ret; +} + +int wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp, + struct wd_ctx_params *cparams, __u32 sched_type) +{ + struct uacce_dev_list *used_list = NULL; + int ctx_set_num, ctx_set_size, ret; + struct bitmask *used_bmp; + bool flag; + + flag = wd_alg_try_init(&wd_comp_setting.status2); + if (!flag) + return 0; + + if (!list) { + WD_ERR("invalid: list is NULL!\n"); + ret = -WD_EINVAL; + goto out_uninit; + } + + if (!cparams) + cparams = &wd_comp_cparams; + + ctx_set_num = cparams->ctx_set_num; + ctx_set_size = wd_get_ctx_numbers(*cparams, ctx_set_num); + if (!ctx_set_num || !ctx_set_size) { + WD_ERR("invalid: ctx_set_num is %d, ctx_set_size is %d!\n", + ctx_set_num, ctx_set_size); + ret = -WD_EINVAL; + goto out_uninit; + } + + if (!bmp) { + used_bmp = wd_create_device_nodemask(list); + if (WD_IS_ERR(bmp)) { + ret = WD_PTR_ERR(bmp); + goto out_uninit; + } + } else { + used_list = wd_get_usable_list(list, bmp); + if (WD_IS_ERR(used_list)) { + ret = WD_PTR_ERR(used_list); + WD_ERR("failed to get usable devices(%d)!\n", ret); + goto out_uninit; + } + used_bmp = wd_create_device_nodemask(used_list); + } + + ret = numa_bitmask_weight(used_bmp); + if (!ret) { + WD_ERR("invalid: bmp is clear!\n"); + goto out_freenodemask; + } + wd_comp_numa_count = ret; + + wd_comp_ctx.ctx_num = ctx_set_size * wd_comp_numa_count; + wd_comp_ctx.ctxs = calloc(wd_comp_ctx.ctx_num, sizeof(struct wd_ctx)); + if (!wd_comp_ctx.ctxs) { + ret = -WD_ENOMEM; + WD_ERR("failed to alloc ctxs!\n"); + goto out_freenodemask; + } + + wd_comp_sched = wd_sched_rr_alloc(sched_type, ctx_set_num, + numa_max_node() + 1, wd_comp_poll_ctx); + if (!wd_comp_sched) { + ret = -WD_EINVAL; + goto out_freectxs; + } + wd_comp_sched->name = SCHED_RR_NAME; + + ret = __wd_comp_init2(!used_list ? list : used_list, used_bmp, *cparams); + if (ret) + goto out_freesched; + + wd_free_list_accels(used_list); + wd_free_device_nodemask(used_bmp); + + wd_alg_set_init(&wd_comp_setting.status2); + + return ret; + +out_freesched: + wd_sched_rr_release(wd_comp_sched); + +out_freectxs: + free(wd_comp_ctx.ctxs); + +out_freenodemask: + wd_free_device_nodemask(used_bmp); + wd_free_list_accels(used_list); + +out_uninit: + wd_alg_clear_init(&wd_comp_setting.status2); + + return ret; +} + +void wd_comp_uninit2(void) +{ + wd_comp_uninit(); + wd_comp_release_ctx(); + wd_sched_rr_release(wd_comp_sched); + wd_alg_clear_init(&wd_comp_setting.status2); +} + struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag) { return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag); @@ -289,6 +509,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->comp_lv = setup->comp_lv; sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW; + /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param); @@ -318,6 +539,7 @@ void wd_comp_free_sess(handle_t h_sess)
if (sess->sched_key) free(sess->sched_key); + free(sess); }
diff --git a/wd_util.c b/wd_util.c index 00dea74..713261a 100644 --- a/wd_util.c +++ b/wd_util.c @@ -5,7 +5,6 @@ */
#define _GNU_SOURCE -#include <numa.h> #include <pthread.h> #include <semaphore.h> #include <string.h> @@ -1792,3 +1791,61 @@ bool wd_alg_try_init(enum wd_status *status)
return true; } + +struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{ + struct uacce_dev_list *p, *node, *result = NULL; + struct uacce_dev *dev; + int numa_id, ret; + + p = list; + while (p) { + dev = p->dev; + numa_id = dev->numa_id; + ret = numa_bitmask_isbitset(bmp, numa_id); + if (!ret) { + p = p->next; + continue; + } + + node = calloc(1, sizeof(*node)); + if (!node) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_list; + } + + node->dev = wd_clone_dev(dev); + if (!node->dev) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_node; + } + + if (!result) + result = node; + else + wd_add_to_list(result, node); + + p = p->next; + } + + return result; + +out_free_node: + free(node); +out_free_list: + wd_free_list_accels(result); + return result; +} + +__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end) +{ + __u32 count = 0; + int i; + + for (i = 0; i < end; i++) { + count += cparams.ctx_set_size[i].sync_ctx_num; + count += cparams.ctx_set_size[i].async_ctx_num; + } + + return count; +}