[Acc] [PATCH v2 5/6] uadk/comp - add wd_comp_init2

11 Jul 2022

Due to performance, uadk tries to leave many configuration options
to users. This gives users great flexibility, but it also leads to
a problem that the current initialization interface has high complexity.
Therefore, in order to facilitate users to adapt quickly, a new set
of interfaces is provided.

The 'wd_alg_init2()' will complete all initialization steps.
There are 4 parameters to describe the user configuration requirements.
@device_list: The available uacce device list. Users can get it by
wd_get_accel_list().
@numa_bitmask: The bitmask provided by libnuma. Users can use this
parameter to control requesting ctxs devices in the bind NUMA scenario.
@ctx_nums: The requested ctx number for each numa node. Due to users
may have different requirements for different types of ctx numbers,
needs a two-dimensional array as input.
@sched_type: Scheduling type the user wants to use.

Signed-off-by: Yang Shen <shenyang39@huawei.com>
---
 Makefile.am             |   4 +-
 include/wd.h            |  24 +++++
 include/wd_alg_common.h |  24 +++++
 include/wd_comp.h       |  28 +++++
 include/wd_util.h       |  19 ++++
 wd.c                    |  62 +++++++++++
 wd_comp.c               | 222 ++++++++++++++++++++++++++++++++++++++++
 wd_util.c               |  59 ++++++++++-
 8 files changed, 439 insertions(+), 3 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 05d6bc7..2a2c0a7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -86,7 +86,7 @@ AM_CFLAGS += -DWD_NO_LOG
 
 libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma
 
-libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl
+libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma
 libwd_comp_la_DEPENDENCIES = libwd.la
 
 libhisi_zip_la_LIBADD = -ldl
@@ -103,7 +103,7 @@ else
 libwd_la_LDFLAGS=$(UADK_VERSION)
 libwd_la_LIBADD= -lnuma
 
-libwd_comp_la_LIBADD= -lwd -ldl
+libwd_comp_la_LIBADD= -lwd -ldl -lnuma
 libwd_comp_la_LDFLAGS=$(UADK_VERSION)
 libwd_comp_la_DEPENDENCIES= libwd.la
 
diff --git a/include/wd.h b/include/wd.h
index 4f3a32f..9893f43 100644
--- a/include/wd.h
+++ b/include/wd.h
@@ -348,6 +348,16 @@ int wd_get_avail_ctx(struct uacce_dev *dev);
  */
 struct uacce_dev_list *wd_get_accel_list(const char *alg_name);
 
+/**
+ * wd_find_dev_by_numa() - get device with max available ctx number from an
+ *			   device list according to numa id.
+ * @list: The device list.
+ * @numa_id: The numa_id.
+ *
+ * Return device if succeed and other error number if fail.
+ */
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id);
+
 /**
  * wd_get_accel_dev() - Get device supporting the algorithm with
 			smallest numa distance to current numa node.
@@ -523,6 +533,20 @@ struct uacce_dev *wd_clone_dev(struct uacce_dev *dev);
  */
 void wd_add_to_list(struct uacce_dev_list *head, struct uacce_dev_list *node);
 
+/**
+ * wd_create_device_nodemask() - create a numa node mask of device list.
+ * @list: The devices list.
+ *
+ * Return a pointer value if succeed, and error number if fail.
+ */
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list);
+
+/**
+ * wd_free_device_nodemask() - free a numa node mask.
+ * @bmp: A numa node mask.
+ */
+void wd_free_device_nodemask(struct bitmask *bmp);
+
 /**
  * wd_ctx_get_dev_name() - Get the device name about task.
  * @h_ctx: The handle of context.
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h
index c455dc3..f261830 100644
--- a/include/wd_alg_common.h
+++ b/include/wd_alg_common.h
@@ -63,6 +63,30 @@ struct wd_ctx_config {
 	void *priv;
 };
 
+/**
+ * struct wd_ctx_nums - Define the ctx sets numbers.
+ * @sync_ctx_num: The ctx numbers which are used for sync mode for each
+ * ctx sets.
+ * @async_ctx_num: The ctx numbers which are used for async mode for each
+ * ctx sets.
+ */
+struct wd_ctx_nums {
+	__u32 sync_ctx_num;
+	__u32 async_ctx_num;
+};
+
+/**
+ * struct wd_ctx_params - Define the ctx sets params which are used for init
+ * algorithms.
+ * @ctx_set_num: Number of ctx sets to be created. Usually users can
+ * set it according to <alg>_op_type.
+ * @ctx_set_size: Each ctx sets numbers.
+ */
+struct wd_ctx_params {
+	__u32 ctx_set_num;
+	struct wd_ctx_nums *ctx_set_size;
+};
+
 struct wd_ctx_internal {
 	handle_t ctx;
 	__u8 op_type;
diff --git a/include/wd_comp.h b/include/wd_comp.h
index e043a83..1d4f32c 100644
--- a/include/wd_comp.h
+++ b/include/wd_comp.h
@@ -7,6 +7,7 @@
 #ifndef __WD_COMP_H
 #define __WD_COMP_H
 
+#include <numa.h>
 #include "wd.h"
 #include "wd_alg_common.h"
 
@@ -113,6 +114,33 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched);
  */
 void wd_comp_uninit(void);
 
+/**
+ * wd_comp_init2() - A simplify interface to initializate uadk
+ * compression/decompression. Users can use wd_get_accel_list() to
+ * get the usable device list with the algrithms. Users should provide
+ * a device numa node mask to show which numa devices will be
+ * selected. wd_create_device_nodemask() can create a node mask
+ * according the list. If all numa devices on the list are match
+ * the requirement, just use the return of it. Otherwise, users can
+ * use the function in libnuma to set the node mask.
+ * To make the initializate simpler, bmp and cparams support set NULL.
+ * And then the function will set them as default.
+ *
+ * @list: The device list.
+ * @bmp: Node mask of the required devices.
+ * @cparams: The ctx settings.
+ * @sched_type: The scheduler type.
+ *
+ * Return 0 if succeed and others if fail.
+ */
+int wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp,
+		  struct wd_ctx_params *cparams, __u32 sched_type);
+
+/**
+ * wd_comp_uninit2() - Uninitialise ctx configuration and scheduler.
+ */
+void wd_comp_uninit2(void);
+
 struct wd_comp_sess_setup {
 	enum wd_comp_alg_type alg_type; /* Denoted by enum wd_comp_alg_type */
 	enum wd_comp_level comp_lv;     /* Denoted by enum wd_comp_level */
diff --git a/include/wd_util.h b/include/wd_util.h
index 3737f27..4ee03ce 100644
--- a/include/wd_util.h
+++ b/include/wd_util.h
@@ -7,6 +7,7 @@
 #ifndef __WD_UTIL_H
 #define __WD_UTIL_H
 
+#include <numa.h>
 #include <stdbool.h>
 #include <sys/ipc.h>
 #include <sys/shm.h>
@@ -394,6 +395,24 @@ static inline void wd_alg_clear_init(enum wd_status *status)
 	__atomic_store(status, &setting, __ATOMIC_RELAXED);
 }
 
+/**
+ * wd_get_usable_list() - choose the devices according bitmask.
+ * @list: The device list.
+ * @bmp: The devices node mask.
+ *
+ * Return a list that meet user's requirement if succeed, and error number if fail.
+ */
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp);
+
+/**
+ * wd_get_ctx_numbers() - count the ctx number for first to end.
+ * @cparams: the input ctx setting numbers.
+ * @end: the end index of cparams.
+ *
+ * Return the sum of top '@end' cparams ctx number.
+ */
+__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end);
+
 /**
  * wd_dfx_msg_cnt() - Message counter interface for ctx
  * @msg: Shared memory addr.
diff --git a/wd.c b/wd.c
index 66a6df3..21ddd62 100644
--- a/wd.c
+++ b/wd.c
@@ -741,6 +741,35 @@ free_list:
 	return NULL;
 }
 
+struct uacce_dev *wd_find_dev_by_numa(struct uacce_dev_list *list, int numa_id)
+{
+	struct uacce_dev *dev = WD_ERR_PTR(-WD_ENODEV);
+	struct uacce_dev_list *p = list;
+	int ctx_num, ctx_max = 0;
+
+	if (!list) {
+		WD_ERR("invalid: list is NULL!\n");
+		return WD_ERR_PTR(-WD_EINVAL);
+	}
+
+	while (p) {
+		if (numa_id != p->dev->numa_id) {
+			p = p->next;
+			continue;
+		}
+
+		ctx_num = wd_get_avail_ctx(p->dev);
+		if (ctx_num > ctx_max) {
+			dev = p->dev;
+			ctx_max = ctx_num;
+		}
+
+		p = p->next;
+	}
+
+	return dev;
+}
+
 void wd_free_list_accels(struct uacce_dev_list *list)
 {
 	struct uacce_dev_list *curr, *next;
@@ -807,6 +836,39 @@ int wd_ctx_set_io_cmd(handle_t h_ctx, unsigned long cmd, void *arg)
 	return ioctl(ctx->fd, cmd, arg);
 }
 
+struct bitmask *wd_create_device_nodemask(struct uacce_dev_list *list)
+{
+	struct uacce_dev_list *p;
+	struct bitmask *bmp;
+
+	if (!list) {
+		WD_ERR("invalid: list is NULL!\n");
+		return WD_ERR_PTR(-WD_EINVAL);
+	}
+
+	bmp = numa_allocate_nodemask();
+	if (!bmp) {
+		WD_ERR("failed to alloc bitmask(%d)!\n", errno);
+		return WD_ERR_PTR(-WD_EINVAL);
+	}
+
+	p = list;
+	while (p) {
+		numa_bitmask_setbit(bmp, p->dev->numa_id);
+		p = p->next;
+	}
+
+	return bmp;
+}
+
+void wd_free_device_nodemask(struct bitmask *bmp)
+{
+	if (!bmp)
+		return;
+
+	numa_free_nodemask(bmp);
+}
+
 void wd_get_version(void)
 {
 	const char *wd_released_time = UADK_RELEASED_TIME;
diff --git a/wd_comp.c b/wd_comp.c
index 44593a6..cd3b4f3 100644
--- a/wd_comp.c
+++ b/wd_comp.c
@@ -14,6 +14,7 @@
 
 #include "config.h"
 #include "drv/wd_comp_drv.h"
+#include "wd_sched.h"
 #include "wd_util.h"
 #include "wd_comp.h"
 
@@ -21,6 +22,8 @@
 #define HW_CTX_SIZE			(64 * 1024)
 #define STREAM_CHUNK			(128 * 1024)
 
+#define SCHED_RR_NAME			"sched_rr"
+
 #define swap_byte(x) \
 	((((x) & 0x000000ff) << 24) | \
 	(((x) & 0x0000ff00) <<  8) | \
@@ -42,6 +45,7 @@ struct wd_comp_sess {
 
 struct wd_comp_setting {
 	enum wd_status status;
+	enum wd_status status2;
 	struct wd_ctx_config_internal config;
 	struct wd_sched sched;
 	struct wd_comp_driver *driver;
@@ -52,6 +56,19 @@ struct wd_comp_setting {
 
 struct wd_env_config wd_comp_env_config;
 
+static struct wd_ctx_config wd_comp_ctx;
+static struct wd_sched *wd_comp_sched;
+static int wd_comp_numa_count;
+
+static struct wd_ctx_nums wd_comp_ctx_num[] = {
+	{1, 1}, {1, 1}, {}
+};
+
+static struct wd_ctx_params wd_comp_cparams = {
+	.ctx_set_num = WD_DIR_MAX,
+	.ctx_set_size = wd_comp_ctx_num
+};
+
 #ifdef WD_STATIC_DRV
 static void wd_comp_set_static_drv(void)
 {
@@ -178,6 +195,209 @@ void wd_comp_uninit(void)
 	wd_alg_clear_init(&wd_comp_setting.status);
 }
 
+static int wd_comp_request_ctx(struct uacce_dev_list *list,
+			       struct wd_ctx_nums ctx_nums,
+			       int idx, int numa_id, int op_type)
+{
+	int ctx_set_size = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num;
+	struct uacce_dev *dev;
+	int i;
+
+	dev = wd_find_dev_by_numa(list, numa_id);
+	if (!dev)
+		return -WD_EBUSY;
+
+	for (i = idx; i < idx + ctx_set_size; i++) {
+		wd_comp_ctx.ctxs[i].ctx = wd_request_ctx(dev);
+		if (errno == WD_EBUSY) {
+			dev = wd_find_dev_by_numa(list, numa_id);
+			if (!dev)
+				return -WD_EBUSY;
+			i--;
+		}
+		wd_comp_ctx.ctxs[i].op_type = op_type;
+		wd_comp_ctx.ctxs[i].ctx_mode =
+			((i - idx) < ctx_nums.sync_ctx_num) ?
+			CTX_MODE_SYNC : CTX_MODE_ASYNC;
+	}
+
+	return 0;
+}
+
+static void wd_comp_release_ctx(void)
+{
+	int i;
+
+	for (i = 0; i < wd_comp_ctx.ctx_num; i++)
+		if (wd_comp_ctx.ctxs[i].ctx) {
+			wd_release_ctx(wd_comp_ctx.ctxs[i].ctx);
+			wd_comp_ctx.ctxs[i].ctx = 0;
+		}
+}
+
+static int wd_comp_instance_sched(struct wd_ctx_nums ctx_nums, int idx,
+				  int numa_id, int op_type)
+{
+	struct sched_params sparams;
+	int i, ret = 0;
+
+	for (i = 0; i < CTX_MODE_MAX; i++) {
+		sparams.numa_id = numa_id;
+		sparams.type = op_type;
+		sparams.mode = i;
+		sparams.begin = idx + ctx_nums.sync_ctx_num * i;
+		sparams.end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i;
+		if (sparams.begin > sparams.end)
+			continue;
+		ret = wd_sched_rr_instance(wd_comp_sched, &sparams);
+		if (ret)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int __wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp,
+			   struct wd_ctx_params cparams)
+{
+	int ctx_set_num = cparams.ctx_set_num;
+	int max_node = numa_max_node() + 1;
+	struct wd_ctx_nums ctx_nums;
+	int i, j, ret;
+	int idx = 0;
+
+	for (i = 0; i < max_node; i++) {
+		if (!numa_bitmask_isbitset(bmp, i))
+			continue;
+		for (j = 0; j < ctx_set_num; j++) {
+			ctx_nums = cparams.ctx_set_size[j];
+			ret = wd_comp_request_ctx(list, ctx_nums, idx, i, j);
+			if (ret)
+				goto free_ctxs;
+			ret = wd_comp_instance_sched(ctx_nums, idx, i, j);
+			if (ret)
+				goto free_ctxs;
+			idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num);
+		}
+	}
+
+	ret = wd_comp_init(&wd_comp_ctx, wd_comp_sched);
+	if (ret)
+		goto free_ctxs;
+
+	return 0;
+
+free_ctxs:
+	wd_comp_release_ctx();
+
+	return ret;
+}
+
+int wd_comp_init2(struct uacce_dev_list *list, struct bitmask *bmp,
+		  struct wd_ctx_params *cparams, __u32 sched_type)
+{
+	struct uacce_dev_list *used_list = NULL;
+	int ctx_set_num, ctx_set_size, ret;
+	struct bitmask *used_bmp;
+	bool flag;
+
+	flag = wd_alg_try_init(&wd_comp_setting.status2);
+	if (!flag)
+		return 0;
+
+	if (!list) {
+		WD_ERR("invalid: list is NULL!\n");
+		ret = -WD_EINVAL;
+		goto out_uninit;
+	}
+
+	if (!cparams)
+		cparams = &wd_comp_cparams;
+
+	ctx_set_num = cparams->ctx_set_num;
+	ctx_set_size = wd_get_ctx_numbers(*cparams, ctx_set_num);
+	if (!ctx_set_num || !ctx_set_size) {
+		WD_ERR("invalid: ctx_set_num is %d, ctx_set_size is %d!\n",
+		       ctx_set_num, ctx_set_size);
+		ret = -WD_EINVAL;
+		goto out_uninit;
+	}
+
+	if (!bmp) {
+		used_bmp = wd_create_device_nodemask(list);
+		if (WD_IS_ERR(bmp)) {
+			ret = WD_PTR_ERR(bmp);
+			goto out_uninit;
+		}
+	} else {
+		used_list = wd_get_usable_list(list, bmp);
+		if (WD_IS_ERR(used_list)) {
+			ret = WD_PTR_ERR(used_list);
+			WD_ERR("failed to get usable devices(%d)!\n", ret);
+			goto out_uninit;
+		}
+		used_bmp = wd_create_device_nodemask(used_list);
+	}
+
+	ret = numa_bitmask_weight(used_bmp);
+	if (!ret) {
+		WD_ERR("invalid: bmp is clear!\n");
+		goto out_freenodemask;
+	}
+	wd_comp_numa_count = ret;
+
+	wd_comp_ctx.ctx_num = ctx_set_size * wd_comp_numa_count;
+	wd_comp_ctx.ctxs = calloc(wd_comp_ctx.ctx_num, sizeof(struct wd_ctx));
+	if (!wd_comp_ctx.ctxs) {
+		ret = -WD_ENOMEM;
+		WD_ERR("failed to alloc ctxs!\n");
+		goto out_freenodemask;
+	}
+
+	wd_comp_sched = wd_sched_rr_alloc(sched_type, ctx_set_num,
+					  numa_max_node() + 1, wd_comp_poll_ctx);
+	if (!wd_comp_sched) {
+		ret = -WD_EINVAL;
+		goto out_freectxs;
+	}
+	wd_comp_sched->name = SCHED_RR_NAME;
+
+	ret = __wd_comp_init2(!used_list ? list : used_list, used_bmp, *cparams);
+	if (ret)
+		goto out_freesched;
+
+	wd_free_list_accels(used_list);
+	wd_free_device_nodemask(used_bmp);
+
+	wd_alg_set_init(&wd_comp_setting.status2);
+
+	return ret;
+
+out_freesched:
+	wd_sched_rr_release(wd_comp_sched);
+
+out_freectxs:
+	free(wd_comp_ctx.ctxs);
+
+out_freenodemask:
+	wd_free_device_nodemask(used_bmp);
+	wd_free_list_accels(used_list);
+
+out_uninit:
+	wd_alg_clear_init(&wd_comp_setting.status2);
+
+	return ret;
+}
+
+void wd_comp_uninit2(void)
+{
+	wd_comp_uninit();
+	wd_comp_release_ctx();
+	wd_sched_rr_release(wd_comp_sched);
+	wd_alg_clear_init(&wd_comp_setting.status2);
+}
+
 struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag)
 {
 	return wd_find_msg_in_pool(&wd_comp_setting.pool, idx, tag);
@@ -289,6 +509,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup)
 	sess->comp_lv = setup->comp_lv;
 	sess->win_sz = setup->win_sz;
 	sess->stream_pos = WD_COMP_STREAM_NEW;
+
 	/* Some simple scheduler don't need scheduling parameters */
 	sess->sched_key = (void *)wd_comp_setting.sched.sched_init(
 		     wd_comp_setting.sched.h_sched_ctx, setup->sched_param);
@@ -318,6 +539,7 @@ void wd_comp_free_sess(handle_t h_sess)
 
 	if (sess->sched_key)
 		free(sess->sched_key);
+
 	free(sess);
 }
 
diff --git a/wd_util.c b/wd_util.c
index 00dea74..713261a 100644
--- a/wd_util.c
+++ b/wd_util.c
@@ -5,7 +5,6 @@
  */
 
 #define _GNU_SOURCE
-#include <numa.h>
 #include <pthread.h>
 #include <semaphore.h>
 #include <string.h>
@@ -1792,3 +1791,61 @@ bool wd_alg_try_init(enum wd_status *status)
 
 	return true;
 }
+
+struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp)
+{
+	struct uacce_dev_list *p, *node, *result = NULL;
+	struct uacce_dev *dev;
+	int numa_id, ret;
+
+	p = list;
+	while (p) {
+		dev = p->dev;
+		numa_id = dev->numa_id;
+		ret = numa_bitmask_isbitset(bmp, numa_id);
+		if (!ret) {
+			p = p->next;
+			continue;
+		}
+
+		node = calloc(1, sizeof(*node));
+		if (!node) {
+			result = WD_ERR_PTR(-WD_ENOMEM);
+			goto out_free_list;
+		}
+
+		node->dev = wd_clone_dev(dev);
+		if (!node->dev) {
+			result = WD_ERR_PTR(-WD_ENOMEM);
+			goto out_free_node;
+		}
+
+		if (!result)
+			result = node;
+		else
+			wd_add_to_list(result, node);
+
+		p = p->next;
+	}
+
+	return result;
+
+out_free_node:
+	free(node);
+out_free_list:
+	wd_free_list_accels(result);
+	return result;
+}
+
+__u32 wd_get_ctx_numbers(struct wd_ctx_params cparams, int end)
+{
+	__u32 count = 0;
+	int i;
+
+	for (i = 0; i < end; i++) {
+		count += cparams.ctx_set_size[i].sync_ctx_num;
+		count += cparams.ctx_set_size[i].async_ctx_num;
+	}
+
+	return count;
+}
-- 
2.24.0