August 2025 - Acc - mailweb.openeuler.org

[PATCH 01/10] uadk: remove nosva limitation
by Zhangfei Gao 27 Aug '25

27 Aug '25

Remove nosva limitation to permit nosva run Signed-off-by: Zhangfei Gao <zhangfei.gao(a)linaro.org> --- wd.c | 4 ---- wd_alg.c | 44 +------------------------------------------- wd_util.c | 4 ---- 3 files changed, 1 insertion(+), 51 deletions(-) diff --git a/wd.c b/wd.c index 75a9469..5fa8feb 100644 --- a/wd.c +++ b/wd.c @@ -235,10 +235,6 @@ static int get_dev_info(struct uacce_dev *dev) ret = get_int_attr(dev, "flags", &dev->flags); if (ret < 0) return ret; - else if (!((unsigned int)dev->flags & UACCE_DEV_SVA)) { - WD_ERR("skip none sva uacce device!\n"); - return -WD_ENODEV; - } ret = get_int_attr(dev, "region_mmio_size", &value); if (ret < 0) diff --git a/wd_alg.c b/wd_alg.c index 08f0e2e..45619ba 100644 --- a/wd_alg.c +++ b/wd_alg.c @@ -23,47 +23,6 @@ static struct wd_alg_list alg_list_head; static struct wd_alg_list *alg_list_tail = &alg_list_head; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -static bool wd_check_dev_sva(const char *dev_name) -{ - char dev_path[PATH_MAX] = {'\0'}; - char buf[DEV_SVA_SIZE] = {'\0'}; - unsigned int val; - ssize_t ret; - int fd; - - ret = snprintf(dev_path, PATH_STR_SIZE, "%s/%s/%s", SYS_CLASS_DIR, - dev_name, SVA_FILE_NAME); - if (ret < 0) { - WD_ERR("failed to snprintf, device name: %s!\n", dev_name); - return false; - } - - /** - * The opened file is the specified device driver file. - * no need for realpath processing. - */ - fd = open(dev_path, O_RDONLY, 0); - if (fd < 0) { - WD_ERR("failed to open %s(%d)!\n", dev_path, -errno); - return false; - } - - ret = read(fd, buf, DEV_SVA_SIZE - 1); - if (ret <= 0) { - WD_ERR("failed to read anything at %s!\n", dev_path); - close(fd); - return false; - } - close(fd); - - val = strtol(buf, NULL, STR_DECIMAL); - if (val & UACCE_DEV_SVA) - return true; - - return false; -} - static bool wd_check_accel_dev(const char *dev_name) { struct dirent *dev_dir; @@ -80,8 +39,7 @@ static bool wd_check_accel_dev(const char *dev_name) !strncmp(dev_dir->d_name, "..", LINUX_PRTDIR_SIZE)) continue; - if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name)) && - wd_check_dev_sva(dev_dir->d_name)) { + if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name))) { closedir(wd_class); return true; } diff --git a/wd_util.c b/wd_util.c index f1b27bf..9675098 100644 --- a/wd_util.c +++ b/wd_util.c @@ -1883,10 +1883,6 @@ int wd_init_param_check(struct wd_ctx_config *config, struct wd_sched *sched) return -WD_EINVAL; } - if (!wd_is_sva(config->ctxs[0].ctx)) { - WD_ERR("invalid: the mode is non sva, please check system!\n"); - return -WD_EINVAL; - } return 0; } -- 2.25.1

2 10

[PATCH] uadk: Fix compilation issues when the __DATE__ and __TIME__ macros
by Qi Tao 21 Aug '25

21 Aug '25

From: lizhi <lizhi206(a)huawei.com> On some Ubuntu systems, the gcc compiler disables the DATE and TIME macros via the -Wdate-time flag during compilation. To ensure the compilation timestamp feature remains functional, this issue is addressed through the Makefile. Signed-off-by: Longfang Liu <liulongfang(a)huawei.com> --- uadk_tool/Makefile.am | 6 ++++++ uadk_tool/dfx/uadk_dfx.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/uadk_tool/Makefile.am b/uadk_tool/Makefile.am index 6fd9d95..9ae9fd7 100644 --- a/uadk_tool/Makefile.am +++ b/uadk_tool/Makefile.am @@ -1,10 +1,16 @@ ACLOCAL_AMFLAGS = -I m4 -I./include AUTOMAKE_OPTIONS = foreign subdir-objects + +# get UADK build time +BUILD_DATETIME := $(shell date -u +"%Y-%m-%d_%H:%M:%S") + AM_CFLAGS=-Wall -Werror -fno-strict-aliasing -I$(top_srcdir) -I$(top_srcdir)/benchmark/include \ -pthread AM_CFLAGS += -fPIC -fPIE -pie -fstack-protector-strong -D_FORTIFY_SOURCE=2 \ -O2 -ftrapv -Wl,-z,now -Wl,-s +AM_CFLAGS += -DUADK_BUILD_DATETIME=\"$(BUILD_DATETIME)\" + #AUTOMAKE_OPTIONS = subdir-objects bin_PROGRAMS=uadk_tool diff --git a/uadk_tool/dfx/uadk_dfx.c b/uadk_tool/dfx/uadk_dfx.c index 9c54b7b..8afcb15 100644 --- a/uadk_tool/dfx/uadk_dfx.c +++ b/uadk_tool/dfx/uadk_dfx.c @@ -14,7 +14,11 @@ #include "include/wd.h" #include "uadk_dfx.h" -#define uadk_build_date() printf("built on: %s %s\n", __DATE__, __TIME__) +#ifndef UADK_BUILD_DATETIME +#define UADK_BUILD_DATETIME "unknown" +#endif + +#define uadk_build_date() printf("built on: %s\n", UADK_BUILD_DATETIME) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define PRIVILEGE_FLAG 0666 -- 2.33.0

1 0

[PATCH] uadk_tools: Removal of the Shared Memory Interface for Queues
by Qi Tao 19 Aug '25

19 Aug '25

From: Longfang Liu <liulongfang(a)huawei.com> After removing the shared queue memory allocation interface, the UADK test tools must also eliminate the shared memory functionality. For individual memory reservations, the wd_reserve_memory interface should be used. When allocating memory for multiple queues, each queue should independently request its own reserved memory allocation. Signed-off-by: Longfang Liu <liulongfang(a)huawei.com> --- v1/test/hisi_hpre_test/hpre_test_tools.c | 392 ----------------------- v1/test/hisi_zip_test_sgl/wd_sched_sgl.c | 310 +++++++++--------- v1/test/test_mm/test_wd_mem.c | 8 +- v1/test/wd_sched.c | 247 +++++++------- 4 files changed, 300 insertions(+), 657 deletions(-) diff --git a/v1/test/hisi_hpre_test/hpre_test_tools.c b/v1/test/hisi_hpre_test/hpre_test_tools.c index 7f562f34..10a4ade9 100755 --- a/v1/test/hisi_hpre_test/hpre_test_tools.c +++ b/v1/test/hisi_hpre_test/hpre_test_tools.c @@ -644,317 +644,6 @@ int application_release_multiple_queue(char *dev, char *alg_type, unsigned int q printf("application_release_multiple_queue test end!\n"); return 0; } - -/*** - -***/ -int hpre_dev_queue_share(char *dev, char * share_dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return 1; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", share_dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - //target_q队列共享q队列预留内存； - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} -/*** - -***/ -int hpre_node_queue_share(char *dev, unsigned int node, unsigned int share_node, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - q.node_mask = node; - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return 1; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - target_q.node_mask = node; - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - //target_q队列共享q队列预留内存； - ret = do_dh(&q); - if(ret) - { - printf("do dh on q fail!\n"); - return 1; - } - ret = do_dh(&target_q); - if(ret) - { - printf("do dh on target q fail!\n"); - return 1; - } - - ret = wd_share_reserved_memory(&q, &target_q); - - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - ret = do_dh(&q); - if(ret) - { - printf("do dh on share q fail!\n"); - return 1; - } - ret = do_dh(&target_q); - if(ret) - { - printf("do dh on share target q fail!\n"); - return 1; - } - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} -/*** - -***/ -int hpre_dev_queue_interact_share(char *dev, char * share_dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return ret; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", share_dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - addr = wd_reserve_memory(&target_q, memory_size); - if(!addr) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - //target_q - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} - -/*** - -***/ -int hpre_dev_queue_cross_proc_share(char *dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - pid_t pid; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size=0; - - pid = fork(); - if(pid < 0) - { - printf("Creation process failed, pid:%d\n",pid); - return 1; - } - else if(pid == 0) - { - printf("child process:%d\n", pid); - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("request queue fail!\n"); - exit(1); - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("queue reserve memory fail!\n"); - exit(2); - } - printf("queue reserve memory success!\n"); - memset(addr, 0, memory_size); - exit(0); - } - printf("parent process:%d\n", pid); - pid_t wpid; - int status = -1; - wpid = waitpid(pid, &status, WUNTRACED | WCONTINUED); - if( wpid < 0) - { - printf("exited, status=%d\n", WEXITSTATUS(status)); - return(status); - } - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&target_q); - wd_release_queue(&q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} - /*** ***/ @@ -1696,87 +1385,6 @@ int main(int arc, char *argv[]) return 1; } } - else if(!strcmp(argv[1], "queue-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示共享预留内存的设备 - argv[5] - 表示申请队列的预留内存大小 - ***/ - //申请单个队列，预留内存，与其它队列共享预留内存 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - snprintf(share_dev, sizeof(share_dev), "%s", argv[4]); - memory_size = strtoul(argv[5], NULL, 10); - - ret = hpre_dev_queue_share(dev, share_dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "node-queue-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示设备node - argv[5] - 表示共享内存设备node - argv[6] - 表示申请队列的预留内存大小 - ***/ - //申请单个队列，预留内存，与其它队列共享预留内存 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - unsigned int node=0; - node = strtoul(argv[4], NULL, 16); - unsigned int share_node=0; - share_node = strtoul(argv[5], NULL, 16); - memory_size = strtoul(argv[6], NULL, 10); - - ret = hpre_node_queue_share(dev, node, share_node, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "queue-interact-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示共享预留内存的设备 - argv[5] - 表示申请队列的预留内存大小 - ***/ - //队列预留内存后作为共享的目标队列 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - snprintf(share_dev, sizeof(share_dev), "%s", argv[4]); - memory_size = strtoul(argv[5], NULL, 10); - - ret = hpre_dev_queue_interact_share(dev, share_dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "queue-cross-proc-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示申请队列的预留内存大小 - ***/ - //跨进程进行队列共享 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - memory_size = strtoul(argv[4], NULL, 10); - ret = hpre_dev_queue_cross_proc_share(dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } else if(!strcmp(argv[1], "mult-thread-queue")) { /*** diff --git a/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c b/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c index 31637565..7a3be22c 100644 --- a/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c +++ b/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c @@ -23,96 +23,33 @@ #define EXTRA_SIZE 4096 #define WD_WAIT_MS 1000 -static int __init_cache(struct wd_scheduler *sched, int data_fmt) +static int wd_sched_pre_uninit(struct wd_scheduler *sched, int data_fmt) { - int i; - int ret = -ENOMEM; + unsigned int flags = 0; struct q_info *qinfo; void *pool; + int i; - sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); - if (!sched->msgs) { - WD_ERR("calloc for sched->msgs fail!\n"); - return ret; - } - sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); - if (!sched->stat) { - WD_ERR("calloc for sched->stat fail!\n"); - goto err_with_msgs; - } qinfo = sched->qs[0].qinfo; - pool = qinfo->br.usr; - for (i = 0; i < sched->msg_cache_num; i++) { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ - sched->msgs[i].data_in = wd_alloc_blk(pool); - sched->msgs[i].data_out = wd_alloc_blk(pool); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; - } - } else { /* use sgl */ - sched->msgs[i].data_in = wd_alloc_sgl(pool, sched->msg_data_size); - sched->msgs[i].data_out = wd_alloc_sgl(pool, sched->msg_data_size); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; - } + flags = qinfo->dev_flags; + if (flags & WD_UACCE_DEV_PASID) { + if (sched->ss_region) { + free(sched->ss_region); + sched->ss_region = NULL; } + return 0; + } - if (sched->init_cache) - sched->init_cache(sched, i, data_fmt); + for (i = 0; i < sched->q_num; i++) { + wd_release_queue(&sched->qs[i]); + qinfo = sched->qs[i].qinfo; + if (data_fmt == WD_FLAT_BUF) + wd_blkpool_destroy(qinfo->br.usr); + else + wd_sglpool_destroy(qinfo->br.usr); } return 0; - -err_with_stat: - free(sched->stat); - sched->stat = NULL; -err_with_msgs: - free(sched->msgs); - sched->msgs = NULL; - return ret; -} - -static void __fini_cache(struct wd_scheduler *sched, int data_fmt) -{ - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - void *pool; - int i; - - if (sched->stat) { - free(sched->stat); - sched->stat = NULL; - } - if (!(flags & WD_UACCE_DEV_PASID)) { - pool = qinfo->br.usr; - if (pool) { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_blk(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_blk(pool, sched->msgs[i].data_out); - } - wd_blkpool_destroy(pool); - } else { /* use sgl */ - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_sgl(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_sgl(pool, sched->msgs[i].data_out); - } - wd_sglpool_destroy(pool); - } - } - } - if (sched->msgs) { - free(sched->msgs); - sched->msgs = NULL; - } } static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) @@ -124,14 +61,6 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) struct wd_sglpool_setup sp; void *pool; - for (i = 0; i < sched->q_num; i++) { - ret = wd_request_queue(&sched->qs[i]); - if (ret) { - WD_ERR("fail to request queue!\n"); - goto out_with_queues; - } - } - if (!sched->ss_region_size) sched->ss_region_size = EXTRA_SIZE + /* add 1 page extra */ sched->msg_cache_num * (sched->msg_data_size << 0x1); @@ -145,12 +74,22 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) ret = -ENOMEM; goto out_with_queues; } - } else { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer*/ - memset(&mm_setup, 0, sizeof(mm_setup)); - mm_setup.block_size = sched->msg_data_size; - mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ - mm_setup.align_size = 128; + return 0; + } + + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer*/ + memset(&mm_setup, 0, sizeof(mm_setup)); + mm_setup.block_size = sched->msg_data_size; + mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ + mm_setup.align_size = 128; + for (i = 0; i < sched->q_num; i++) { + ret = wd_request_queue(&sched->qs[i]); + if (ret) { + WD_ERR("fail to request queue!\n"); + goto out_with_queues; + } + + qinfo = sched->qs[i].qinfo; pool = wd_blkpool_create(&sched->qs[0], &mm_setup); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); @@ -162,15 +101,18 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) qinfo->br.iova_map = (void *)wd_blk_iova_map; qinfo->br.iova_unmap = (void *)wd_blk_iova_unmap; qinfo->br.usr = pool; - } else { /* use sgl*/ - memset(&sp, 0, sizeof(sp)); - sp.buf_size = sched->msg_data_size / 10; - sp.align_size = 64; - sp.sge_num_in_sgl = 60; - sp.buf_num_in_sgl = sp.sge_num_in_sgl; - sp.sgl_num = 3 * sched->msg_cache_num; - sp.buf_num = sp.buf_num_in_sgl * sp.sgl_num + sp.sgl_num * 2; - + } + } else { /* use sgl*/ + memset(&sp, 0, sizeof(sp)); + sp.buf_size = sched->msg_data_size / 10; + sp.align_size = 64; + sp.sge_num_in_sgl = 60; + sp.buf_num_in_sgl = sp.sge_num_in_sgl; + sp.sgl_num = 3 * sched->msg_cache_num; + sp.buf_num = sp.buf_num_in_sgl * sp.sgl_num + sp.sgl_num * 2; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; pool = wd_sglpool_create(&sched->qs[0], &sp); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); @@ -189,17 +131,132 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) return 0; out_with_queues: + for (j = i-1; j >= 0; j--) { + wd_release_queue(&sched->qs[j]); + qinfo = sched->qs[j].qinfo; + if (data_fmt == WD_FLAT_BUF) + wd_blkpool_destroy(qinfo->br.usr); + else + wd_sglpool_destroy(qinfo->br.usr); + } + if (flags & WD_UACCE_DEV_PASID) { if (sched->ss_region) { free(sched->ss_region); sched->ss_region = NULL; } } - for (j = i-1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); + return ret; } +static void __fini_cache(struct wd_scheduler *sched, int data_fmt) +{ + struct q_info *qinfo = sched->qs[0].qinfo; + unsigned int flags = qinfo->dev_flags; + void *pool; + int i, j; + + if (sched->stat) { + free(sched->stat); + sched->stat = NULL; + } + + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + + if (!(flags & WD_UACCE_DEV_PASID)) { + for (j = 0; j < sched->q_num; j++) { + qinfo = sched->qs[j].qinfo; + pool = qinfo->br.usr; + if (!pool) + continue; + + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_blk(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_blk(pool, sched->msgs[i].data_out); + } + } else { /* use sgl */ + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_sgl(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_sgl(pool, sched->msgs[i].data_out); + } + } + } + } +} + +static int __init_cache(struct wd_scheduler *sched, int data_fmt) +{ + struct q_info *qinfo; + unsigned int flags; + int ret = -ENOMEM; + int i, j; + void *pool; + + sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); + if (!sched->msgs) { + WD_ERR("calloc for sched->msgs fail!\n"); + return ret; + } + sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); + if (!sched->stat) { + WD_ERR("calloc for sched->stat fail!\n"); + goto err_with_msgs; + } + qinfo = sched->qs[0].qinfo; + pool = qinfo->br.usr; + flags = qinfo->dev_flags; + if ((flags & WD_UACCE_DEV_PASID)) + return 0; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; + pool = qinfo->br.usr; + for (j = 0; j < sched->msg_cache_num; j++) { + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ + sched->msgs[j].data_in = wd_alloc_blk(pool); + sched->msgs[j].data_out = wd_alloc_blk(pool); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_stat; + } + } else { /* use sgl */ + sched->msgs[j].data_in = wd_alloc_sgl(pool, sched->msg_data_size); + sched->msgs[j].data_out = wd_alloc_sgl(pool, sched->msg_data_size); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_stat; + } + } + + if (sched->init_cache) + sched->init_cache(sched, j, data_fmt); + } + } + + return 0; + +err_with_stat: + free(sched->stat); + sched->stat = NULL; + __fini_cache(sched, data_fmt); +err_with_msgs: + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + return ret; +} int wd_sched_init(struct wd_scheduler *sched, int data_fmt) { @@ -211,57 +268,22 @@ int wd_sched_init(struct wd_scheduler *sched, int data_fmt) if (ret < 0) return -EINVAL; - qinfo = sched->qs[0].qinfo; - flags = qinfo->dev_flags; - if (!(flags & WD_UACCE_DEV_PASID)) { - for (k = 1; k < sched->q_num; k++) { - ret = wd_share_reserved_memory(&sched->qs[0], - &sched->qs[k]); - if (ret) { - WD_ERR("fail to share queue reserved mem!\n"); - goto out_with_queues; - } - } - } - sched->cl = sched->msg_cache_num; ret = __init_cache(sched, data_fmt); if (ret) { WD_ERR("fail to init caches!\n"); - goto out_with_queues; + wd_sched_pre_uninit(sched, data_fmt); + return -EINVAL; } return 0; - -out_with_queues: - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - for (j = sched->q_num - 1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); - return ret; } void wd_sched_fini(struct wd_scheduler *sched, int data_fmt) { - int i; - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - __fini_cache(sched, data_fmt); - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - - for (i = sched->q_num - 1; i >= 0; i--) - wd_release_queue(&sched->qs[i]); + wd_sched_pre_uninit(sched, data_fmt); } static int __sync_send(struct wd_scheduler *sched) @@ -350,4 +372,4 @@ int wd_sched_work(struct wd_scheduler *sched, int remained) } return sched->cl; -} \ No newline at end of file +} diff --git a/v1/test/test_mm/test_wd_mem.c b/v1/test/test_mm/test_wd_mem.c index 09824b99..e2eec60e 100644 --- a/v1/test/test_mm/test_wd_mem.c +++ b/v1/test/test_mm/test_wd_mem.c @@ -208,10 +208,10 @@ void *mmt_sys_test_thread(void *data) return NULL; } - ret = wd_share_reserved_memory(pdata->qinfo1.q, &rsa_q); + ret = wd_request_queue(&pdata->qinfo1.q); if (ret) { wd_release_queue(&rsa_q); - MMT_PRT("Proc-%d, thrd-%d:share mem on rsa queue fail!\n", + MMT_PRT("Proc-%d, thrd-%d:rsa queue fail!\n", pid, thread_id); return NULL; } @@ -226,9 +226,9 @@ void *mmt_sys_test_thread(void *data) return NULL; } - ret = wd_share_reserved_memory(pdata->qinfo2.q, &zlib_q); + ret = wd_request_queue(&pdata->qinfo2.q); if (ret) { - MMT_PRT("Proc-%d, thrd-%d:share mem on zlib queue fail!\n", + MMT_PRT("Proc-%d, thrd-%d:zlib queue fail!\n", pid, thread_id); goto fail_release; diff --git a/v1/test/wd_sched.c b/v1/test/wd_sched.c index f5e46699..ce1d2604 100644 --- a/v1/test/wd_sched.c +++ b/v1/test/wd_sched.c @@ -22,94 +22,40 @@ #define EXTRA_SIZE 4096 #define WD_WAIT_MS 1000 -static int __init_cache(struct wd_scheduler *sched) +static int wd_sched_pre_uninit(struct wd_scheduler *sched) { - int i; - int ret = -ENOMEM; + unsigned int flags = 0; struct q_info *qinfo; void *pool; + int i; - sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); - if (!sched->msgs) { - WD_ERR("calloc for sched->msgs fail!\n"); - return ret; - } - sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); - if (!sched->stat) { - WD_ERR("calloc for sched->stat fail!\n"); - goto err_with_msgs; - } qinfo = sched->qs[0].qinfo; - pool = qinfo->br.usr; - for (i = 0; i < sched->msg_cache_num; i++) { - sched->msgs[i].data_in = wd_alloc_blk(pool); - sched->msgs[i].data_out = wd_alloc_blk(pool); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; + flags = qinfo->dev_flags; + if (flags & WD_UACCE_DEV_PASID) { + if (sched->ss_region) { + free(sched->ss_region); + sched->ss_region = NULL; } + return 0; + } - if (sched->init_cache) - sched->init_cache(sched, i); + for (i = 0; i < sched->q_num; i++) { + wd_release_queue(&sched->qs[i]); + qinfo = sched->qs[i].qinfo; + wd_blkpool_destroy(qinfo->br.usr); } return 0; - -err_with_stat: - free(sched->stat); - sched->stat = NULL; -err_with_msgs: - free(sched->msgs); - sched->msgs = NULL; - return ret; -} - -static void __fini_cache(struct wd_scheduler *sched) -{ - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - void *pool; - int i; - - if (sched->stat) { - free(sched->stat); - sched->stat = NULL; - } - if (!(flags & WD_UACCE_DEV_PASID)) { - pool = qinfo->br.usr; - if (pool) { - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_blk(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_blk(pool, sched->msgs[i].data_out); - } - wd_blkpool_destroy(pool); - } - } - if (sched->msgs) { - free(sched->msgs); - sched->msgs = NULL; - } } static int wd_sched_preinit(struct wd_scheduler *sched) { - int ret, i, j; + struct wd_blkpool_setup mm_setup; unsigned int flags = 0; struct q_info *qinfo; - struct wd_blkpool_setup mm_setup; + int ret, i, j; void *pool; - for (i = 0; i < sched->q_num; i++) { - ret = wd_request_queue(&sched->qs[i]); - if (ret) { - WD_ERR("fail to request queue!\n"); - goto out_with_queues; - } - } - if (!sched->ss_region_size) sched->ss_region_size = EXTRA_SIZE + /* add 1 page extra */ sched->msg_cache_num * (sched->msg_data_size << 0x1); @@ -120,18 +66,29 @@ static int wd_sched_preinit(struct wd_scheduler *sched) sched->ss_region = malloc(sched->ss_region_size); if (!sched->ss_region) { WD_ERR("fail to alloc sched ss region mem!\n"); + return -ENOMEM; + } + return 0; + } + + memset(&mm_setup, 0, sizeof(mm_setup)); + mm_setup.block_size = sched->msg_data_size; + mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ + mm_setup.align_size = 128; + for (i = 0; i < sched->q_num; i++) { + ret = wd_request_queue(&sched->qs[i]); + if (ret) { + WD_ERR("fail to request queue!\n"); ret = -ENOMEM; goto out_with_queues; } - } else { - memset(&mm_setup, 0, sizeof(mm_setup)); - mm_setup.block_size = sched->msg_data_size; - mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ - mm_setup.align_size = 128; - pool = wd_blkpool_create(&sched->qs[0], &mm_setup); + + qinfo = sched->qs[i].qinfo; + pool = wd_blkpool_create(&sched->qs[i], &mm_setup); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); ret = -ENOMEM; + wd_release_queue(&sched->qs[i]); goto out_with_queues; } qinfo->br.alloc = (void *)wd_alloc_blk; @@ -144,79 +101,135 @@ static int wd_sched_preinit(struct wd_scheduler *sched) return 0; out_with_queues: + for (j = i-1; j >= 0; j--) { + wd_release_queue(&sched->qs[j]); + qinfo = sched->qs[j].qinfo; + wd_blkpool_destroy(qinfo->br.usr); + } + if (flags & WD_UACCE_DEV_PASID) { if (sched->ss_region) { free(sched->ss_region); sched->ss_region = NULL; } } - for (j = i-1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); + return ret; } +static void __fini_cache(struct wd_scheduler *sched) +{ + struct q_info *qinfo = sched->qs[0].qinfo; + unsigned int flags = qinfo->dev_flags; + void *pool; + int i, j; -int wd_sched_init(struct wd_scheduler *sched) + if (sched->stat) { + free(sched->stat); + sched->stat = NULL; + } + + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + + if (!(flags & WD_UACCE_DEV_PASID)) { + for (j = 0; j < sched->q_num; j++) { + qinfo = sched->qs[j].qinfo; + pool = qinfo->br.usr; + if (!pool) + continue; + + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_blk(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_blk(pool, sched->msgs[i].data_out); + } + } + } +} + +static int __init_cache(struct wd_scheduler *sched) { - int ret, j, k; - unsigned int flags; struct q_info *qinfo; + unsigned int flags; + int ret = -ENOMEM; + int i, j; + void *pool; - ret = wd_sched_preinit(sched); - if (ret < 0) - return -EINVAL; + sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); + if (!sched->msgs) { + WD_ERR("calloc for sched->msgs fail!\n"); + return ret; + } + sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); + if (!sched->stat) { + WD_ERR("calloc for sched->stat fail!\n"); + goto err_with_msgs; + } qinfo = sched->qs[0].qinfo; + pool = qinfo->br.usr; flags = qinfo->dev_flags; - if (!(flags & WD_UACCE_DEV_PASID)) { - for (k = 1; k < sched->q_num; k++) { - ret = wd_share_reserved_memory(&sched->qs[0], - &sched->qs[k]); - if (ret) { - WD_ERR("fail to share queue reserved mem!\n"); - goto out_with_queues; + if ((flags & WD_UACCE_DEV_PASID)) + return 0; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; + pool = qinfo->br.usr; + for (j = 0; j < sched->msg_cache_num; j++) { + sched->msgs[j].data_in = wd_alloc_blk(pool); + sched->msgs[j].data_out = wd_alloc_blk(pool); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_alloc; } + + if (sched->init_cache) + sched->init_cache(sched, j); } } - sched->cl = sched->msg_cache_num; + return 0; + +err_with_alloc: + free(sched->stat); + sched->stat = NULL; + __fini_cache(sched); +err_with_msgs: + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + return ret; +} + +int wd_sched_init(struct wd_scheduler *sched) +{ + int ret; + ret = wd_sched_preinit(sched); + if (ret < 0) + return -EINVAL; + + sched->cl = sched->msg_cache_num; ret = __init_cache(sched); if (ret) { WD_ERR("fail to init caches!\n"); - goto out_with_queues; + wd_sched_pre_uninit(sched); + return -EINVAL; } return 0; - -out_with_queues: - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - for (j = sched->q_num - 1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); - return ret; } void wd_sched_fini(struct wd_scheduler *sched) { - int i; - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - __fini_cache(sched); - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - - for (i = sched->q_num - 1; i >= 0; i--) - wd_release_queue(&sched->qs[i]); + wd_sched_pre_uninit(sched); } static int __sync_send(struct wd_scheduler *sched) -- 2.33.0

1 0

[PATCH] uadk: removal of the Shared Memory Interface for Queues
by Qi Tao 19 Aug '25

19 Aug '25

From: Longfang Liu <liulongfang(a)huawei.com> In the No-SVA mode of UADK, a feature was implemented to reserve memory allocated for shared queues, which was exposed to external users through the wd_share_reserved_memory interface. After this functionality was removed from the kernel-space UACCE, the corresponding external interface in UADK must also be deleted to maintain consistency. Signed-off-by: Longfang Liu <liulongfang(a)huawei.com> --- v1/uacce.h | 1 - v1/wd.c | 57 ++---------------------------------------------------- v1/wd.h | 2 -- 3 files changed, 2 insertions(+), 58 deletions(-) diff --git a/v1/uacce.h b/v1/uacce.h index eef932c7..ffd576c1 100644 --- a/v1/uacce.h +++ b/v1/uacce.h @@ -79,7 +79,6 @@ enum uacce_qfrt { * Optimization method since close fd may delay */ #define WD_UACCE_CMD_PUT_Q _IO('W', 1) -#define WD_UACCE_CMD_SHARE_SVAS _IO('W', 2) #define WD_UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long) #endif diff --git a/v1/wd.c b/v1/wd.c index 13239b58..bab2712f 100644 --- a/v1/wd.c +++ b/v1/wd.c @@ -58,7 +58,6 @@ struct dev_info { int node_id; int numa_dis; int flags; - int ref; int available_instances; int iommu_type; unsigned int weight; @@ -531,7 +530,6 @@ static int get_queue_from_dev(struct wd_queue *q, const struct dev_info *dev) qinfo->iommu_type = dev->iommu_type; qinfo->dev_info = dev; qinfo->head = &qinfo->ss_list; - __atomic_clear(&qinfo->ref, __ATOMIC_RELEASE); TAILQ_INIT(&qinfo->ss_list); memcpy(qinfo->qfrs_offset, dev->qfrs_offset, sizeof(qinfo->qfrs_offset)); @@ -618,23 +616,14 @@ err_with_dev: void wd_release_queue(struct wd_queue *q) { - struct wd_ss_region_list *head; - struct q_info *qinfo, *sqinfo; + struct q_info *qinfo; if (!q || !q->qinfo) { WD_ERR("release queue parameter error!\n"); return; } - qinfo = q->qinfo; - if (__atomic_load_n(&qinfo->ref, __ATOMIC_RELAXED)) { - WD_ERR("q(%s) is busy, release fail!\n", q->capa.alg); - return; - } - head = qinfo->head; - sqinfo = container_of(head, struct q_info, ss_list); - if (sqinfo != qinfo) /* q_share */ - __atomic_sub_fetch(&sqinfo->ref, 1, __ATOMIC_RELAXED); + qinfo = q->qinfo; if (ioctl(qinfo->fd, WD_UACCE_CMD_PUT_Q)) WD_ERR("failed to put queue!\n"); @@ -721,48 +710,6 @@ void *wd_reserve_memory(struct wd_queue *q, size_t size) return drv_reserve_mem(q, size); } -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q) -{ - const struct dev_info *info, *tgt_info; - struct q_info *qinfo, *tqinfo; - int ret; - - if (!q || !target_q || !q->qinfo || !target_q->qinfo) { - WD_ERR("wd share reserved memory: parameter err!\n"); - return -WD_EINVAL; - } - - qinfo = q->qinfo; - tqinfo = target_q->qinfo; - tgt_info = tqinfo->dev_info; - info = qinfo->dev_info; - - /* Just share DMA memory from 'q' in NO-IOMMU mode */ - if (qinfo->iommu_type) { - WD_ERR("IOMMU opened, not support share mem!\n"); - return -WD_EINVAL; - } - - if (qinfo->iommu_type != tqinfo->iommu_type) { - WD_ERR("IOMMU type mismatching as share mem!\n"); - return -WD_EINVAL; - } - if (info->node_id != tgt_info->node_id) - WD_ERR("Warn: the 2 queues is not at the same node!\n"); - - ret = ioctl(qinfo->fd, WD_UACCE_CMD_SHARE_SVAS, tqinfo->fd); - if (ret) { - WD_ERR("ioctl share dma memory fail!\n"); - return ret; - } - - tqinfo->head = qinfo->head; - __atomic_add_fetch(&qinfo->ref, 1, __ATOMIC_RELAXED); - - return 0; -} - int wd_get_available_dev_num(const char *algorithm) { struct wd_queue q; diff --git a/v1/wd.h b/v1/wd.h index 79b8a2ad..35dcf31a 100644 --- a/v1/wd.h +++ b/v1/wd.h @@ -210,8 +210,6 @@ int wd_recv(struct wd_queue *q, void **resp); int wd_wait(struct wd_queue *q, __u16 ms); int wd_recv_sync(struct wd_queue *q, void **resp, __u16 ms); void *wd_reserve_memory(struct wd_queue *q, size_t size); -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q); int wd_get_available_dev_num(const char *algorithm); int wd_get_node_id(struct wd_queue *q); void *wd_iova_map(struct wd_queue *q, void *va, size_t sz); -- 2.33.0

1 0

[PATCH 00/22] support new algorithms and add some bugfixes
by Qi Tao 12 Aug '25

12 Aug '25

*** BLURB HERE *** Chenghai Huang (7): uadk: fix definition coding standard issues uadk: add or remove some store buf condition judgments uadk: add new alg called lz77_only uadk: remove redundant checks on bit read results uadk_tool: modify unrecv num in async benchmark test uadk_tool: add lz77_only alg in zip benchmark test uadk_tool: add lz4 alg in zip benchmark test Longfang Liu (3): uadk: resolve some code issues uadk: removal of the Shared Memory Interface for Queues uadk_tools: Removal of the Shared Memory Interface for Queues Qi Tao (1): uadk_tool: add aead algorithm Qinxin Xia (3): uadk: hisi_comp - abstract get sgl function and general deflate functions uadk: hisi_comp - support the new algorithm 'lz4' uadk: wd_comp - support the new algorithm 'lz4' Weili Qian (1): uadk: support data move Wenkai Lin (2): uadk: fix for rehash invalid size uadk: support hashjoin and gather algorithm Zhushuai Yin (5): uadk: Add max and min operations at the hash algorithm layer uadk: hash agg adapter drv parameter uadk:Add max,min,and rehash implementations uadk:zip algorithm increases buffer len interception uadk:fix dh prov segmentation issue Makefile.am | 31 +- drv/hisi_comp.c | 580 +++++-- drv/hisi_comp_huf.c | 11 +- drv/hisi_dae.c | 827 +++------- drv/hisi_dae.h | 229 +++ drv/hisi_dae_common.c | 387 +++++ drv/hisi_dae_join_gather.c | 1040 ++++++++++++ drv/hisi_qm_udrv.h | 3 +- drv/hisi_udma.c | 566 +++++++ include/drv/wd_agg_drv.h | 10 +- include/drv/wd_join_gather_drv.h | 52 + include/drv/wd_udma_drv.h | 34 + include/wd_agg.h | 9 +- include/wd_alg.h | 4 + include/wd_comp.h | 2 + include/wd_dae.h | 12 + include/wd_join_gather.h | 352 +++++ include/wd_udma.h | 124 ++ include/wd_util.h | 2 + libwd_dae.map | 34 +- uadk_tool/benchmark/sec_uadk_benchmark.c | 34 + uadk_tool/benchmark/sec_wd_benchmark.c | 34 + uadk_tool/benchmark/uadk_benchmark.c | 14 + uadk_tool/benchmark/uadk_benchmark.h | 4 + uadk_tool/benchmark/zip_uadk_benchmark.c | 24 +- v1/drv/hisi_zip_huf.c | 11 +- v1/drv/hisi_zip_udrv.c | 2 +- v1/test/hisi_hpre_test/hpre_test_tools.c | 392 ----- v1/test/hisi_zip_test_sgl/wd_sched_sgl.c | 310 ++-- v1/test/test_mm/test_wd_mem.c | 8 +- v1/test/wd_sched.c | 247 +-- v1/uacce.h | 1 - v1/wd.c | 57 +- v1/wd.h | 2 - wd.c | 2 +- wd_agg.c | 75 +- wd_comp.c | 56 +- wd_join_gather.c | 1823 ++++++++++++++++++++++ wd_sched.c | 3 +- wd_udma.c | 511 ++++++ wd_util.c | 34 +- 41 files changed, 6435 insertions(+), 1518 deletions(-) create mode 100644 drv/hisi_dae.h create mode 100644 drv/hisi_dae_common.c create mode 100644 drv/hisi_dae_join_gather.c create mode 100644 drv/hisi_udma.c create mode 100644 include/drv/wd_join_gather_drv.h create mode 100644 include/drv/wd_udma_drv.h create mode 100644 include/wd_join_gather.h create mode 100644 include/wd_udma.h create mode 100644 wd_join_gather.c create mode 100644 wd_udma.c -- 2.33.0

1 22

[PATCH] uadk: add new alg called lz77_only
by Qi Tao 11 Aug '25

11 Aug '25

From: Chenghai Huang <huangchenghai2(a)huawei.com> Supports LZ77 encoding for LZ4 without additional offset processing. The output includes literal and sequence (LitLength, MatchLength, Offset). Signed-off-by: Chenghai Huang <huangchenghai2(a)huawei.com> --- drv/hisi_comp.c | 310 +++++++++++++++++++++++++++++++++++++--------- include/wd_comp.h | 1 + wd_comp.c | 2 +- wd_util.c | 1 + 4 files changed, 255 insertions(+), 59 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 0c36301d..1c9f438f 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -84,6 +84,9 @@ #define OVERFLOW_DATA_SIZE 8 #define SEQ_DATA_SIZE_SHIFT 3 #define ZSTD_FREQ_DATA_SIZE 784 +#define ZSTD_MIN_OUT_SIZE 1000 +#define LZ77_MIN_OUT_SIZE 200 +#define PRICE_MIN_OUT_SIZE 4096 #define ZSTD_LIT_RESV_SIZE 16 #define REPCODE_SIZE 12 @@ -108,6 +111,8 @@ enum alg_type { HW_GZIP, HW_LZ77_ZSTD_PRICE = 0x42, HW_LZ77_ZSTD, + HW_LZ77_ONLY = 0x40, + HW_LZ77_ONLY_PRICE, }; enum hw_state { @@ -616,31 +621,30 @@ static void fill_buf_addr_lz77_zstd(struct hisi_zip_sqe *sqe, sqe->stream_ctx_addr_h = upper_32_bits(ctx_buf); } -static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_comp_msg *msg) +static int lz77_zstd_buf_check(struct wd_comp_msg *msg) { - struct wd_comp_req *req = &msg->req; - struct wd_lz77_zstd_data *data = req->priv; __u32 in_size = msg->req.src_len; - __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; __u32 out_size = msg->avail_out; - void *ctx_buf = NULL; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = out_size - lits_size; - if (unlikely(!data)) { - WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); - return -WD_EINVAL; + if (unlikely(in_size > ZSTD_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", in_size); + return -WD_EINVAL; } - if (unlikely(in_size > ZSTD_MAX_SIZE)) { - WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", - in_size); + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= ZSTD_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, ZSTD_MIN_OUT_SIZE + lits_size); return -WD_EINVAL; } - if (unlikely(out_size > HZ_MAX_SIZE)) { - WD_ERR("warning: avail_out(%u) is out of range , will set 8MB size max!\n", - out_size); - out_size = HZ_MAX_SIZE; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; } /* @@ -653,14 +657,92 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, return -WD_EINVAL; } + return 0; +} + +static int lz77_only_buf_check(struct wd_comp_msg *msg) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = out_size - lits_size; + + /* lits_size need to be less than 8M when use pbuffer */ + if (unlikely(lits_size > HZ_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_only is out of range!\n", in_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= LZ77_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, LZ77_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + /* For lz77_only, the hardware needs 32 Bytes buffer to output the dfx information */ + if (unlikely(out_size < ZSTD_LIT_RESV_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough, %u bytes are minimum!\n", + ZSTD_LIT_RESV_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + +static int lz77_buf_check(struct wd_comp_msg *msg) +{ + enum wd_comp_alg_type alg_type = msg->alg_type; + + if (alg_type == WD_LZ77_ZSTD) + return lz77_zstd_buf_check(msg); + else if (alg_type == WD_LZ77_ONLY) + return lz77_only_buf_check(msg); + + return 0; +} + +static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg) +{ + struct wd_comp_req *req = &msg->req; + struct wd_lz77_zstd_data *data = req->priv; + __u32 in_size = msg->req.src_len; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = msg->avail_out - lits_size; + void *ctx_buf = NULL; + int ret; + + if (unlikely(!data)) { + WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); + return -WD_EINVAL; + } + + ret = lz77_buf_check(msg); + if (ret) + return ret; + + if (unlikely(seq_avail_out > HZ_MAX_SIZE)) { + WD_ERR("warning: sequence avail_out(%u) is out of range , will set 8MB size max!\n", + seq_avail_out); + seq_avail_out = HZ_MAX_SIZE; + } + if (msg->ctx_buf) { ctx_buf = msg->ctx_buf + RSV_OFFSET; - if (data->blk_type != COMP_BLK) + if (msg->alg_type == WD_LZ77_ZSTD && data->blk_type != COMP_BLK) memcpy(ctx_buf + CTX_HW_REPCODE_OFFSET, msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); } - fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); + fill_buf_size_lz77_zstd(sqe, in_size, lits_size, seq_avail_out); fill_buf_addr_lz77_zstd(sqe, req->src, req->dst, req->dst + lits_size, ctx_buf); @@ -685,6 +767,103 @@ static struct wd_datalist *get_seq_start_list(struct wd_comp_req *req) return cur; } +static int lz77_zstd_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 seq_avail_out; + + if (unlikely(in_size > ZSTD_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", in_size); + return -WD_EINVAL; + } + + /* + * For lz77_zstd, the hardware needs 784 Bytes buffer to output + * the frequency information about input data. The sequences + * and frequency data need to be written to an independent sgl + * splited from list_dst. + */ + if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { + WD_ERR("invalid: output is not enough for literals, at least %u bytes!\n", + ZSTD_FREQ_DATA_SIZE + lits_size); + return -WD_EINVAL; + } else if (unlikely(out_size < ZSTD_FREQ_DATA_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", + ZSTD_FREQ_DATA_SIZE + lits_size - out_size); + return -WD_EINVAL; + } + + seq_avail_out = out_size - lits_size; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= ZSTD_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, ZSTD_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + +static int lz77_only_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 seq_avail_out; + + /* + * For lz77_only, the hardware needs 32 Bytes buffer to output + * the dfx information. The literals and sequences data need to be written + * to an independent sgl splited from list_dst. + */ + if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { + WD_ERR("invalid: output is not enough for literals, at least %u bytes!\n", + ZSTD_LIT_RESV_SIZE + lits_size); + return -WD_EINVAL; + } else if (unlikely(out_size < ZSTD_LIT_RESV_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", + ZSTD_LIT_RESV_SIZE + lits_size - out_size); + return -WD_EINVAL; + } + + seq_avail_out = out_size - lits_size; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= LZ77_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, LZ77_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + + +static int lz77_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + enum wd_comp_alg_type alg_type = msg->alg_type; + + if (alg_type == WD_LZ77_ZSTD) + return lz77_zstd_buf_check_sgl(msg, lits_size); + else if (alg_type == WD_LZ77_ONLY) + return lz77_only_buf_check_sgl(msg, lits_size); + + return 0; +} + static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { @@ -698,12 +877,6 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, __u32 lits_size; int ret; - if (unlikely(in_size > ZSTD_MAX_SIZE)) { - WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", - in_size); - return -WD_EINVAL; - } - if (unlikely(!data)) { WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); return -WD_EINVAL; @@ -715,26 +888,15 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(!seq_start)) return -WD_EINVAL; + lits_size = hisi_qm_get_list_size(req->list_dst, seq_start); + + ret = lz77_buf_check_sgl(msg, lits_size); + if (ret) + return ret; + data->literals_start = req->list_dst; data->sequences_start = seq_start; - /* - * For lz77_zstd, the hardware needs 784 Bytes buffer to output - * the frequency information about input data. The sequences - * and frequency data need to be written to an independent sgl - * splited from list_dst. - */ - lits_size = hisi_qm_get_list_size(req->list_dst, seq_start); - if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { - WD_ERR("invalid: output is not enough for literals, %u bytes are minimum!\n", - ZSTD_FREQ_DATA_SIZE + lits_size); - return -WD_EINVAL; - } else if (unlikely(out_size < ZSTD_FREQ_DATA_SIZE + lits_size)) { - WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", - ZSTD_FREQ_DATA_SIZE + lits_size - out_size); - return -WD_EINVAL; - } - fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); h_sgl_pool = hisi_qm_get_sglpool(h_qp); @@ -824,6 +986,15 @@ static void fill_alg_lz77_zstd(struct hisi_zip_sqe *sqe) sqe->dw9 = val; } +static void fill_alg_lz77_only(struct hisi_zip_sqe *sqe) +{ + __u32 val; + + val = sqe->dw9 & ~HZ_REQ_TYPE_MASK; + val |= HW_LZ77_ONLY; + sqe->dw9 = val; +} + static void fill_tag_v1(struct hisi_zip_sqe *sqe, __u32 tag) { sqe->dw13 = tag; @@ -841,7 +1012,7 @@ static int fill_comp_level_deflate(struct hisi_zip_sqe *sqe, enum wd_comp_level static int fill_comp_level_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_level comp_lv) { - __u32 val; + __u32 val, alg; switch (comp_lv) { case WD_COMP_L8: @@ -851,8 +1022,12 @@ static int fill_comp_level_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_leve */ break; case WD_COMP_L9: + alg = sqe->dw9 & HZ_REQ_TYPE_MASK; val = sqe->dw9 & ~HZ_REQ_TYPE_MASK; - val |= HW_LZ77_ZSTD_PRICE; + if (alg == HW_LZ77_ZSTD) + val |= HW_LZ77_ZSTD_PRICE; + else if (alg == HW_LZ77_ONLY) + val |= HW_LZ77_ONLY_PRICE; sqe->dw9 = val; break; default: @@ -911,18 +1086,22 @@ static void get_data_size_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_op_ty if (unlikely(!data)) return; + recv_msg->in_cons = sqe->consumed; data->lit_num = sqe->comp_data_length; data->seq_num = sqe->produced; - data->lit_length_overflow_cnt = sqe->dw31 >> LITLEN_OVERFLOW_CNT_SHIFT; - data->lit_length_overflow_pos = sqe->dw31 & LITLEN_OVERFLOW_POS_MASK; - data->freq = data->sequences_start + (data->seq_num << SEQ_DATA_SIZE_SHIFT) + - OVERFLOW_DATA_SIZE; - - if (ctx_buf) { - memcpy(ctx_buf + CTX_REPCODE2_OFFSET, - ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); - memcpy(ctx_buf + CTX_REPCODE1_OFFSET, - ctx_buf + RSV_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); + + if (recv_msg->alg_type == WD_LZ77_ZSTD) { + data->lit_length_overflow_cnt = sqe->dw31 >> LITLEN_OVERFLOW_CNT_SHIFT; + data->lit_length_overflow_pos = sqe->dw31 & LITLEN_OVERFLOW_POS_MASK; + data->freq = data->sequences_start + (data->seq_num << SEQ_DATA_SIZE_SHIFT) + + OVERFLOW_DATA_SIZE; + + if (ctx_buf) { + memcpy(ctx_buf + CTX_REPCODE2_OFFSET, + ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + memcpy(ctx_buf + CTX_REPCODE1_OFFSET, + ctx_buf + RSV_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); + } } } @@ -970,6 +1149,16 @@ struct hisi_zip_sqe_ops ops[] = { { .fill_comp_level = fill_comp_level_lz77_zstd, .get_data_size = get_data_size_lz77_zstd, .get_tag = get_tag_v3, + }, { + .alg_name = "lz77_only", + .fill_buf[WD_FLAT_BUF] = fill_buf_lz77_zstd, + .fill_buf[WD_SGL_BUF] = fill_buf_lz77_zstd_sgl, + .fill_sqe_type = fill_sqe_type_v3, + .fill_alg = fill_alg_lz77_only, + .fill_tag = fill_tag_v3, + .fill_comp_level = fill_comp_level_lz77_zstd, + .get_data_size = get_data_size_lz77_zstd, + .get_tag = get_tag_v3, } }; @@ -1079,10 +1268,6 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, return -WD_EINVAL; } - ret = ops[alg_type].fill_comp_level(sqe, msg->comp_lv); - if (unlikely(ret)) - return ret; - ret = ops[alg_type].fill_buf[msg->req.data_fmt]((handle_t)qp, sqe, msg); if (unlikely(ret)) return ret; @@ -1091,6 +1276,10 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, ops[alg_type].fill_alg(sqe); + ret = ops[alg_type].fill_comp_level(sqe, msg->comp_lv); + if (unlikely(ret)) + return ret; + ops[alg_type].fill_tag(sqe, msg->tag); state = (msg->stream_mode == WD_COMP_STATEFUL) ? HZ_STATEFUL : @@ -1132,7 +1321,7 @@ static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); - if (alg_type == WD_LZ77_ZSTD) { + if (alg_type == WD_LZ77_ZSTD || alg_type == WD_LZ77_ONLY) { hw_sgl_out = VA_ADDR(sqe->literals_addr_h, sqe->literals_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); @@ -1190,6 +1379,10 @@ static int get_alg_type(__u32 type) case HW_LZ77_ZSTD_PRICE: alg_type = WD_LZ77_ZSTD; break; + case HW_LZ77_ONLY: + case HW_LZ77_ONLY_PRICE: + alg_type = WD_LZ77_ONLY; + break; default: break; } @@ -1369,6 +1562,7 @@ static struct wd_alg_driver zip_alg_driver[] = { GEN_ZIP_ALG_DRIVER("deflate"), GEN_ZIP_ALG_DRIVER("lz77_zstd"), + GEN_ZIP_ALG_DRIVER("lz77_only"), }; #ifdef WD_STATIC_DRV diff --git a/include/wd_comp.h b/include/wd_comp.h index 45994ff6..0012ef6b 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -20,6 +20,7 @@ enum wd_comp_alg_type { WD_ZLIB, WD_GZIP, WD_LZ77_ZSTD, + WD_LZ77_ONLY, WD_COMP_ALG_MAX, }; diff --git a/wd_comp.c b/wd_comp.c index 647c320e..8e47a32f 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -27,7 +27,7 @@ #define cpu_to_be32(x) swap_byte(x) static const char *wd_comp_alg_name[WD_COMP_ALG_MAX] = { - "zlib", "gzip", "deflate", "lz77_zstd" + "zlib", "gzip", "deflate", "lz77_zstd", "lz77_only" }; struct wd_comp_sess { diff --git a/wd_util.c b/wd_util.c index 669743cb..f21b3236 100644 --- a/wd_util.c +++ b/wd_util.c @@ -107,6 +107,7 @@ static struct acc_alg_item alg_options[] = { {"gzip", "gzip"}, {"deflate", "deflate"}, {"lz77_zstd", "lz77_zstd"}, + {"lz77_only", "lz77_only"}, {"hashagg", "hashagg"}, {"udma", "udma"}, -- 2.33.0

1 5

[PATCH] uadk: removal of the Shared Memory Interface for Queues
by Qi Tao 08 Aug '25

08 Aug '25

From: Longfang Liu <liulongfang(a)huawei.com> In the No-SVA mode of UADK, a feature was implemented to reserve memory allocated for shared queues, which was exposed to external users through the wd_share_reserved_memory interface. After this functionality was removed from the kernel-space UACCE, the corresponding external interface in UADK must also be deleted to maintain consistency. Signed-off-by: Longfang Liu <liulongfang(a)huawei.com> --- v1/uacce.h | 1 - v1/wd.c | 57 ++---------------------------------------------------- v1/wd.h | 2 -- 3 files changed, 2 insertions(+), 58 deletions(-) diff --git a/v1/uacce.h b/v1/uacce.h index eef932c7..ffd576c1 100644 --- a/v1/uacce.h +++ b/v1/uacce.h @@ -79,7 +79,6 @@ enum uacce_qfrt { * Optimization method since close fd may delay */ #define WD_UACCE_CMD_PUT_Q _IO('W', 1) -#define WD_UACCE_CMD_SHARE_SVAS _IO('W', 2) #define WD_UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long) #endif diff --git a/v1/wd.c b/v1/wd.c index 13239b58..bab2712f 100644 --- a/v1/wd.c +++ b/v1/wd.c @@ -58,7 +58,6 @@ struct dev_info { int node_id; int numa_dis; int flags; - int ref; int available_instances; int iommu_type; unsigned int weight; @@ -531,7 +530,6 @@ static int get_queue_from_dev(struct wd_queue *q, const struct dev_info *dev) qinfo->iommu_type = dev->iommu_type; qinfo->dev_info = dev; qinfo->head = &qinfo->ss_list; - __atomic_clear(&qinfo->ref, __ATOMIC_RELEASE); TAILQ_INIT(&qinfo->ss_list); memcpy(qinfo->qfrs_offset, dev->qfrs_offset, sizeof(qinfo->qfrs_offset)); @@ -618,23 +616,14 @@ err_with_dev: void wd_release_queue(struct wd_queue *q) { - struct wd_ss_region_list *head; - struct q_info *qinfo, *sqinfo; + struct q_info *qinfo; if (!q || !q->qinfo) { WD_ERR("release queue parameter error!\n"); return; } - qinfo = q->qinfo; - if (__atomic_load_n(&qinfo->ref, __ATOMIC_RELAXED)) { - WD_ERR("q(%s) is busy, release fail!\n", q->capa.alg); - return; - } - head = qinfo->head; - sqinfo = container_of(head, struct q_info, ss_list); - if (sqinfo != qinfo) /* q_share */ - __atomic_sub_fetch(&sqinfo->ref, 1, __ATOMIC_RELAXED); + qinfo = q->qinfo; if (ioctl(qinfo->fd, WD_UACCE_CMD_PUT_Q)) WD_ERR("failed to put queue!\n"); @@ -721,48 +710,6 @@ void *wd_reserve_memory(struct wd_queue *q, size_t size) return drv_reserve_mem(q, size); } -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q) -{ - const struct dev_info *info, *tgt_info; - struct q_info *qinfo, *tqinfo; - int ret; - - if (!q || !target_q || !q->qinfo || !target_q->qinfo) { - WD_ERR("wd share reserved memory: parameter err!\n"); - return -WD_EINVAL; - } - - qinfo = q->qinfo; - tqinfo = target_q->qinfo; - tgt_info = tqinfo->dev_info; - info = qinfo->dev_info; - - /* Just share DMA memory from 'q' in NO-IOMMU mode */ - if (qinfo->iommu_type) { - WD_ERR("IOMMU opened, not support share mem!\n"); - return -WD_EINVAL; - } - - if (qinfo->iommu_type != tqinfo->iommu_type) { - WD_ERR("IOMMU type mismatching as share mem!\n"); - return -WD_EINVAL; - } - if (info->node_id != tgt_info->node_id) - WD_ERR("Warn: the 2 queues is not at the same node!\n"); - - ret = ioctl(qinfo->fd, WD_UACCE_CMD_SHARE_SVAS, tqinfo->fd); - if (ret) { - WD_ERR("ioctl share dma memory fail!\n"); - return ret; - } - - tqinfo->head = qinfo->head; - __atomic_add_fetch(&qinfo->ref, 1, __ATOMIC_RELAXED); - - return 0; -} - int wd_get_available_dev_num(const char *algorithm) { struct wd_queue q; diff --git a/v1/wd.h b/v1/wd.h index 79b8a2ad..35dcf31a 100644 --- a/v1/wd.h +++ b/v1/wd.h @@ -210,8 +210,6 @@ int wd_recv(struct wd_queue *q, void **resp); int wd_wait(struct wd_queue *q, __u16 ms); int wd_recv_sync(struct wd_queue *q, void **resp, __u16 ms); void *wd_reserve_memory(struct wd_queue *q, size_t size); -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q); int wd_get_available_dev_num(const char *algorithm); int wd_get_node_id(struct wd_queue *q); void *wd_iova_map(struct wd_queue *q, void *va, size_t sz); -- 2.33.0

1 1

[PATCH 04/32] uadk: support data move
by Qi Tao 08 Aug '25

08 Aug '25

From: Weili Qian <qianweili(a)huawei.com> UADK supports hardware acceleration for the data move. Currently, data copying and initialization are supported. Signed-off-by: Weili Qian <qianweili(a)huawei.com> --- Makefile.am | 27 +- drv/hisi_udma.c | 566 ++++++++++++++++++++++++++++++++++++++ include/drv/wd_udma_drv.h | 34 +++ include/wd_alg.h | 2 + include/wd_udma.h | 124 +++++++++ include/wd_util.h | 1 + libwd_dae.map | 15 +- wd_udma.c | 511 ++++++++++++++++++++++++++++++++++ wd_util.c | 2 + 9 files changed, 1278 insertions(+), 4 deletions(-) create mode 100644 drv/hisi_udma.c create mode 100644 include/drv/wd_udma_drv.h create mode 100644 include/wd_udma.h create mode 100644 wd_udma.c diff --git a/Makefile.am b/Makefile.am index c4b9c526..df756f72 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,16 +36,17 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ include/wd_comp.h include/wd_dh.h include/wd_digest.h \ include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ - include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h + include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ + include/wd_udma.h nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h -lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la +lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la libwd_udma.la uadk_driversdir=$(libdir)/uadk uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ - libisa_ce.la libisa_sve.la libhisi_dae.la + libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ @@ -69,6 +70,9 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/drv/hisi_sec_udrv.c v1/drv/hisi_sec_udrv.h \ v1/drv/hisi_rng_udrv.c v1/drv/hisi_rng_udrv.h +libwd_udma_la_SOURCES=wd_udma.h wd_udma_drv.h wd_udma.c \ + wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h + libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c \ wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h @@ -110,6 +114,9 @@ endif libhisi_dae_la_SOURCES=drv/hisi_dae.c drv/hisi_qm_udrv.c \ hisi_qm_udrv.h +libhisi_udma_la_SOURCES=drv/hisi_udma.c drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h + if WD_STATIC_DRV AM_CFLAGS += -DWD_STATIC_DRV -fPIC AM_CFLAGS += -DWD_NO_LOG @@ -124,6 +131,9 @@ libhisi_zip_la_LIBADD = -ldl libwd_crypto_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_crypto_la_DEPENDENCIES = libwd.la +libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread +libwd_udma_la_DEPENDENCIES = libwd.la + libwd_dae_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_dae_la_DEPENDENCIES = libwd.la @@ -139,6 +149,9 @@ libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la libisa_sve_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) libisa_sve_la_DEPENDENCIES = libwd.la libwd_crypto.la +libhisi_udma_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_udma_la_OBJECTS) +libhisi_udma_la_DEPENDENCIES = libwd.la libwd_udma.la + libhisi_dae_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_dae_la_OBJECTS) libhisi_dae_la_DEPENDENCIES = libwd.la libwd_dae.la @@ -160,6 +173,10 @@ libwd_crypto_la_LIBADD= -lwd -ldl -lnuma libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) -lpthread libwd_crypto_la_DEPENDENCIES= libwd.la +libwd_udma_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread +libwd_udma_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) +libwd_udma_la_DEPENDENCIES= libwd.la + libwd_dae_la_LIBADD= -lwd -ldl -lnuma -lm libwd_dae_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) libwd_dae_la_DEPENDENCIES= libwd.la @@ -184,6 +201,10 @@ libisa_sve_la_LIBADD= -lwd -lwd_crypto libisa_sve_la_LDFLAGS=$(UADK_VERSION) libisa_sve_la_DEPENDENCIES= libwd.la libwd_crypto.la +libhisi_udma_la_LIBADD= -lwd -lwd_udma +libhisi_udma_la_LDFLAGS=$(UADK_VERSION) +libhisi_udma_la_DEPENDENCIES= libwd.la libwd_udma.la + libhisi_dae_la_LIBADD= -lwd -lwd_dae libhisi_dae_la_LDFLAGS=$(UADK_VERSION) libhisi_dae_la_DEPENDENCIES= libwd.la libwd_dae.la diff --git a/drv/hisi_udma.c b/drv/hisi_udma.c new file mode 100644 index 00000000..57dae8cb --- /dev/null +++ b/drv/hisi_udma.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: Apache-2.0 +/* Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#include <math.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <sys/mman.h> +#include <sys/types.h> +#include "hisi_qm_udrv.h" +#include "../include/drv/wd_udma_drv.h" + +#define BIT(nr) (1UL << (nr)) +#define UDMA_CTX_Q_NUM_DEF 1 +#define UDMA_TASK_TYPE 0x3 +#define UDMA_SQE_TYPE 0x1 +#define UDMA_ALG_TYPE 2 +/* Multi max data size is (16M -1) * 64 */ +#define UDMA_M_MAX_ADDR_SIZE 1073741760 +/* Single max data size is (16M - 1) */ +#define UDMA_S_MAX_ADDR_SIZE 16777215 +#define UDMA_MAX_ADDR_NUM 64 +#define UDMA_ADDR_NUM_SHIFT 6 +#define UDMA_MULTI_ADDR_EN BIT(14) +#define UDMA_ADDR_NUM_SHIFT 6 +#define UDMA_SVA_PREFETCH_EN BIT(15) +#define UDMA_ADDR_RESV_NUM 16 +#define UDMA_ADDR_ALIGN_SIZE 128 + +enum { + DATA_MEMCPY = 0x0, + DATA_MEMSET = 0x7, +}; + +enum { + UDMA_TASK_DONE = 0x1, + UDMA_TASK_ERROR = 0x2, +}; + +struct udma_addr { + __u64 addr; + __u64 data_size; +}; + +struct udma_addr_array { + __u64 resv_addr[UDMA_ADDR_RESV_NUM]; + struct udma_addr src_addr[UDMA_MAX_ADDR_NUM]; + struct udma_addr dst_addr[UDMA_MAX_ADDR_NUM]; +}; + +struct udma_sqe { + __u32 bd_type : 6; + __u32 resv1 : 2; + __u32 task_type : 6; + __u32 resv2 : 2; + __u32 task_type_ext : 6; + __u32 resv3 : 9; + __u32 bd_invlid : 1; + __u32 rsv4[2]; + __u32 low_tag; + __u32 hi_tag; + /* The number of bytes to be copied or filled for single address. */ + __u32 data_size; + __u32 rsv5; + /* + * 0 ~ 13 bits: reserved, + * 14 bit： single address or multi addresses, + * 15 bit: sva prefetch en. + */ + __u16 dw0; + /* + * 0 ~5 bits: reserved, + * 6 ~ 13 bits: address num, + * 14 ~15 bits: reserved. + */ + __u16 dw1; + __u64 init_val; + __u32 rsv6[12]; + /* dst addr for single address task */ + __u64 dst_addr; + __u32 rsv7[2]; + /* src addr for single address task, addr array for multi addresses. */ + __u64 addr_array; + __u32 done_flag : 3; + __u32 rsv8 : 1; + __u32 ext_err_type : 12; + __u32 err_type : 8; + __u32 wtype : 8; + __u32 rsv9[3]; +}; + +struct udma_internal_addr { + struct udma_addr_array *addr_array; + __u8 *addr_status; + __u16 addr_count; + __u16 tail; +}; + +struct hisi_udma_ctx { + struct wd_ctx_config_internal config; +}; + +static int get_free_inter_addr(struct udma_internal_addr *inter_addr) +{ + __u16 addr_count = inter_addr->addr_count; + __u16 idx = inter_addr->tail; + __u16 cnt = 0; + + if (unlikely(!addr_count)) { + WD_ERR("invalid: internal addr count is 0!\n"); + return -WD_EINVAL; + } + + while (__atomic_test_and_set(&inter_addr->addr_status[idx], __ATOMIC_ACQUIRE)) { + idx = (idx + 1) % addr_count; + cnt++; + if (cnt == addr_count) + return -WD_EBUSY; + } + + inter_addr->tail = (idx + 1) % addr_count; + + return idx; +} + +static void put_inter_addr(struct udma_internal_addr *inter_addr, int idx) +{ + __atomic_clear(&inter_addr->addr_status[idx], __ATOMIC_RELEASE); +} + +static int check_udma_param(struct wd_udma_msg *msg) +{ + int i; + + if (unlikely(!msg)) { + WD_ERR("invalid: input udma msg is NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(msg->addr_num > UDMA_MAX_ADDR_NUM)) { + WD_ERR("invalid: input addr_num is more than %d!\n", UDMA_MAX_ADDR_NUM); + return -WD_EINVAL; + } + + /* + * When the single address length exceeds UDMA_S_MAX_ADDR_SIZE, + * the driver will split the address into multiple addresses and + * send them to the hardware. + */ + if (msg->addr_num == 1) { + if (unlikely(msg->dst->data_size > UDMA_M_MAX_ADDR_SIZE)) { + WD_ERR("invalid: input size %lu is more than %d!\n", + msg->dst->data_size, UDMA_M_MAX_ADDR_SIZE); + return -WD_EINVAL; + } + + return WD_SUCCESS; + } + + for (i = 0; i < msg->addr_num; i++) { + if (unlikely(msg->dst[i].data_size > UDMA_S_MAX_ADDR_SIZE)) { + WD_ERR("invalid: addr %d input size %lu is more than %d!\n", + i, msg->dst[i].data_size, UDMA_S_MAX_ADDR_SIZE); + return -WD_EINVAL; + } + } + + return WD_SUCCESS; +} + +static void fill_long_size_memcpy_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + __u32 addr_num = 0; + __u64 count; + + for (count = 0; count < msg->src->data_size; count += UDMA_S_MAX_ADDR_SIZE) { + addr_array->src_addr[addr_num].addr = (__u64)(uintptr_t)msg->src->addr + count; + addr_array->dst_addr[addr_num].addr = (__u64)(uintptr_t)msg->dst->addr + count; + if (count + UDMA_S_MAX_ADDR_SIZE <= msg->src->data_size) { + addr_array->src_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + addr_array->dst_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + } else { + addr_array->src_addr[addr_num].data_size = msg->src->data_size - count; + addr_array->dst_addr[addr_num].data_size = msg->dst->data_size - count; + } + addr_num++; + } + sqe->dw1 |= (addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_long_size_memset_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + __u32 addr_num = 0; + __u64 count; + + for (count = 0; count < msg->dst->data_size; count += UDMA_S_MAX_ADDR_SIZE) { + addr_array->dst_addr[addr_num].addr = (__u64)(uintptr_t)msg->dst->addr + count; + if (count + UDMA_S_MAX_ADDR_SIZE <= msg->dst->data_size) + addr_array->dst_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + else + addr_array->dst_addr[addr_num].data_size = msg->dst->data_size - count; + addr_num++; + } + + sqe->dw1 |= (addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_mulit_memset_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + int i; + + for (i = 0; i < msg->addr_num; i++) { + addr_array->dst_addr[i].addr = (__u64)(uintptr_t)msg->dst[i].addr; + addr_array->dst_addr[i].data_size = (__u64)(uintptr_t)msg->dst[i].data_size; + } + + sqe->dw1 |= ((__u32)msg->addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_multi_memcpy_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + int i; + + for (i = 0; i < msg->addr_num; i++) { + addr_array->src_addr[i].addr = (__u64)(uintptr_t)msg->src[i].addr; + addr_array->src_addr[i].data_size = (__u64)(uintptr_t)msg->src[i].data_size; + addr_array->dst_addr[i].addr = (__u64)(uintptr_t)msg->dst[i].addr; + addr_array->dst_addr[i].data_size = (__u64)(uintptr_t)msg->dst[i].data_size; + } + + sqe->dw1 |= ((__u32)msg->addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_multi_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + if (msg->addr_num == 1) { + if (msg->op_type == WD_UDMA_MEMCPY) + fill_long_size_memcpy_info(sqe, msg, addr_array); + else + fill_long_size_memset_info(sqe, msg, addr_array); + } else { + if (msg->op_type == WD_UDMA_MEMCPY) + fill_multi_memcpy_addr_info(sqe, msg, addr_array); + else + fill_mulit_memset_addr_info(sqe, msg, addr_array); + } + + sqe->addr_array = (__u64)(uintptr_t)addr_array; + sqe->dw0 |= UDMA_MULTI_ADDR_EN; +} + +static void fill_single_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + if (msg->op_type == WD_UDMA_MEMCPY) + sqe->addr_array = (__u64)(uintptr_t)msg->src->addr; + sqe->dst_addr = (__u64)(uintptr_t)msg->dst->addr; + sqe->data_size = msg->dst->data_size; +} + +static void fill_udma_sqe_addr(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + if (!addr_array) + fill_single_addr_info(sqe, msg); + else + fill_multi_addr_info(sqe, msg, addr_array); +} + +static void fill_sqe_type(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + sqe->bd_type = UDMA_SQE_TYPE; + sqe->task_type = UDMA_TASK_TYPE; + if (msg->op_type == WD_UDMA_MEMCPY) + sqe->task_type_ext = DATA_MEMCPY; + else + sqe->task_type_ext = DATA_MEMSET; +} + +static void fill_init_value(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + if (msg->op_type == WD_UDMA_MEMSET) + memset(&sqe->init_val, msg->value, sizeof(__u64)); +} + +static int udma_send(struct wd_alg_driver *drv, handle_t ctx, void *udma_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr = qp->priv; + struct udma_addr_array *addr_array = NULL; + struct wd_udma_msg *msg = udma_msg; + struct udma_sqe sqe = {0}; + __u16 send_cnt = 0; + int idx = 0; + int ret; + + ret = check_udma_param(msg); + if (unlikely(ret)) + return ret; + + if (msg->addr_num > 1 || msg->dst->data_size > UDMA_S_MAX_ADDR_SIZE) { + idx = get_free_inter_addr(inter_addr); + if (idx < 0) + return -WD_EBUSY; + + addr_array = &inter_addr->addr_array[idx]; + memset(addr_array, 0, sizeof(struct udma_addr_array)); + } + + fill_sqe_type(&sqe, msg); + fill_init_value(&sqe, msg); + fill_udma_sqe_addr(&sqe, msg, addr_array); + + hisi_set_msg_id(h_qp, &msg->tag); + sqe.low_tag = msg->tag; + sqe.hi_tag = (__u32)idx; + sqe.dw0 |= UDMA_SVA_PREFETCH_EN; + + ret = hisi_qm_send(h_qp, &sqe, 1, &send_cnt); + if (unlikely(ret)) { + if (ret != -WD_EBUSY) + WD_ERR("failed to send to hardware, ret = %d!\n", ret); + if (addr_array) + put_inter_addr(inter_addr, idx); + return ret; + } + + return WD_SUCCESS; +} + +static void dump_udma_msg(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + WD_ERR("dump UDMA message after a task error occurs.\n" + "op_type:%u addr_num:%d.\n", msg->op_type, msg->addr_num); +} + +static int udma_recv(struct wd_alg_driver *drv, handle_t ctx, void *udma_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr = qp->priv; + struct wd_udma_msg *msg = udma_msg; + struct wd_udma_msg *temp_msg = msg; + struct udma_sqe sqe = {0}; + __u16 recv_cnt = 0; + int ret; + + ret = hisi_qm_recv(h_qp, &sqe, 1, &recv_cnt); + if (ret) + return ret; + + ret = hisi_check_bd_id(h_qp, msg->tag, sqe.low_tag); + if (ret) + goto out; + + msg->tag = sqe.low_tag; + if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { + temp_msg = wd_udma_get_msg(qp->q_info.idx, msg->tag); + if (!temp_msg) { + WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", + qp->q_info.idx, msg->tag); + ret = -WD_EINVAL; + goto out; + } + } + + msg->result = WD_SUCCESS; + if (sqe.done_flag != UDMA_TASK_DONE || + sqe.err_type || sqe.ext_err_type || sqe.wtype) { + WD_ERR("failed to do udma task! done=0x%x, err_type=0x%x\n" + "ext_err_type=0x%x, wtype=0x%x!\n", + (__u32)sqe.done_flag, (__u32)sqe.err_type, + (__u32)sqe.ext_err_type, (__u32)sqe.wtype); + msg->result = WD_IN_EPARA; + } + + if (unlikely(msg->result != WD_SUCCESS)) + dump_udma_msg(&sqe, temp_msg); + +out: + if (sqe.dw0 & UDMA_MULTI_ADDR_EN) + put_inter_addr(inter_addr, sqe.hi_tag); + return ret; +} + +static void udma_uninit_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr; + + if (!qp) + return; + + inter_addr = (struct udma_internal_addr *)qp->priv; + if (!inter_addr) + return; + + free(inter_addr->addr_array); + free(inter_addr->addr_status); + free(inter_addr); + qp->priv = NULL; +} + +static int udma_init_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + __u16 sq_depth = qp->q_info.sq_depth; + struct udma_internal_addr *inter_addr; + int ret = -WD_ENOMEM; + + inter_addr = calloc(1, sizeof(struct udma_internal_addr)); + if (!inter_addr) + return ret; + + inter_addr->addr_status = calloc(1, sizeof(__u8) * sq_depth); + if (!inter_addr->addr_status) + goto free_inter_addr; + + inter_addr->addr_array = aligned_alloc(UDMA_ADDR_ALIGN_SIZE, + sizeof(struct udma_addr_array) * sq_depth); + if (!inter_addr->addr_array) + goto free_addr_status; + + inter_addr->addr_count = sq_depth; + qp->priv = inter_addr; + + return WD_SUCCESS; + +free_addr_status: + free(inter_addr->addr_status); +free_inter_addr: + free(inter_addr); + + return ret; +} + +static int udma_init(struct wd_alg_driver *drv, void *conf) +{ + struct wd_ctx_config_internal *config = conf; + struct hisi_qm_priv qm_priv; + struct hisi_udma_ctx *priv; + handle_t h_qp = 0; + handle_t h_ctx; + __u32 i, j; + int ret; + + if (!config || !config->ctx_num) { + WD_ERR("invalid: udma init config is null or ctx num is 0!\n"); + return -WD_EINVAL; + } + + priv = malloc(sizeof(struct hisi_udma_ctx)); + if (!priv) + return -WD_ENOMEM; + + qm_priv.op_type = UDMA_ALG_TYPE; + qm_priv.sqe_size = sizeof(struct udma_sqe); + /* Allocate qp for each context */ + for (i = 0; i < config->ctx_num; i++) { + h_ctx = config->ctxs[i].ctx; + qm_priv.qp_mode = config->ctxs[i].ctx_mode; + /* Setting the epoll en to 0 for ASYNC ctx */ + qm_priv.epoll_en = (qm_priv.qp_mode == CTX_MODE_SYNC) ? + config->epoll_en : 0; + qm_priv.idx = i; + h_qp = hisi_qm_alloc_qp(&qm_priv, h_ctx); + if (!h_qp) { + ret = -WD_ENOMEM; + goto out; + } + config->ctxs[i].sqn = qm_priv.sqn; + ret = udma_init_qp_priv(h_qp); + if (ret) + goto free_h_qp; + } + memcpy(&priv->config, config, sizeof(struct wd_ctx_config_internal)); + drv->priv = priv; + + return WD_SUCCESS; +free_h_qp: + hisi_qm_free_qp(h_qp); +out: + for (j = 0; j < i; j++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[j].ctx); + udma_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + free(priv); + return ret; +} + +static void udma_exit(struct wd_alg_driver *drv) +{ + struct wd_ctx_config_internal *config; + struct hisi_udma_ctx *priv; + handle_t h_qp; + __u32 i; + + if (!drv || !drv->priv) + return; + + priv = (struct hisi_udma_ctx *)drv->priv; + config = &priv->config; + for (i = 0; i < config->ctx_num; i++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); + udma_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + + free(priv); + drv->priv = NULL; +} + +static int udma_get_usage(void *param) +{ + return 0; +} + +static struct wd_alg_driver udma_driver = { + .drv_name = "hisi_zip", + .alg_name = "udma", + .calc_type = UADK_ALG_HW, + .priority = 100, + .queue_num = UDMA_CTX_Q_NUM_DEF, + .op_type_num = 1, + .fallback = 0, + .init = udma_init, + .exit = udma_exit, + .send = udma_send, + .recv = udma_recv, + .get_usage = udma_get_usage, +}; + +#ifdef WD_STATIC_DRV +void hisi_udma_probe(void) +#else +static void __attribute__((constructor)) hisi_udma_probe(void) +#endif +{ + int ret; + + WD_INFO("Info: register UDMA alg drivers!\n"); + + ret = wd_alg_driver_register(&udma_driver); + if (ret && ret != -WD_ENODEV) + WD_ERR("failed to register UDMA driver, ret = %d!\n", ret); +} + +#ifdef WD_STATIC_DRV +void hisi_udma_remove(void) +#else +static void __attribute__((destructor)) hisi_udma_remove(void) +#endif +{ + WD_INFO("Info: unregister UDMA alg drivers!\n"); + + wd_alg_driver_unregister(&udma_driver); +} diff --git a/include/drv/wd_udma_drv.h b/include/drv/wd_udma_drv.h new file mode 100644 index 00000000..c8028f79 --- /dev/null +++ b/include/drv/wd_udma_drv.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#ifndef __WD_UDMA_DRV_H +#define __WD_UDMA_DRV_H + +#include <asm/types.h> + +#include "../wd_udma.h" +#include "../wd_util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* udma message format */ +struct wd_udma_msg { + struct wd_udma_req req; + struct wd_data_addr *src; + struct wd_data_addr *dst; + int addr_num; + int value; + enum wd_udma_op_type op_type; + __u32 tag; /* User-defined request identifier */ + __u8 result; /* alg op error code */ +}; + +struct wd_udma_msg *wd_udma_get_msg(__u32 idx, __u32 tag); + +#ifdef __cplusplus +} +#endif + +#endif /* __WD_UDMA_DRV_H */ diff --git a/include/wd_alg.h b/include/wd_alg.h index aba855d6..441b3bef 100644 --- a/include/wd_alg.h +++ b/include/wd_alg.h @@ -204,11 +204,13 @@ void hisi_sec2_probe(void); void hisi_hpre_probe(void); void hisi_zip_probe(void); void hisi_dae_probe(void); +void hisi_udma_probe(void); void hisi_sec2_remove(void); void hisi_hpre_remove(void); void hisi_zip_remove(void); void hisi_dae_remove(void); +void hisi_udma_remove(void); #endif diff --git a/include/wd_udma.h b/include/wd_udma.h new file mode 100644 index 00000000..d8a7964e --- /dev/null +++ b/include/wd_udma.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __WD_UDMA_H +#define __WD_UDMA_H + +#include <stdbool.h> + +#include "wd_alg_common.h" + +typedef void (*wd_udma_cb_t)(void *cb_param); + +/** + * wd_udma_op_type - Algorithm type of option. + */ +enum wd_udma_op_type { + WD_UDMA_MEMCPY, + WD_UDMA_MEMSET, + WD_UDMA_OP_MAX +}; + +/** + * wd_udma_sess_setup - udma session setup information. + * @sched_param: Parameters of the scheduling policy, + * usually allocated according to struct sched_params. + */ +struct wd_udma_sess_setup { + void *sched_param; +}; + +/** + * wd_data_addr - addr information of UDMA. + * @addr: Indicates the start address of the operation. + * @addr_size: Maximum size of the addr, in bytes. + * @count: Number of bytes to be set. + */ +struct wd_data_addr { + void *addr; + size_t addr_size; + size_t data_size; +}; + +/** + * wd_udma_req - udma operation request. + * @src: pointer to input address. + * @dst: pointer to output address, for WD_UDMA_MEMSET, only one of src and dst can be set. + * @addr_num: Number of address. + * @value: Value to be written for WD_UDMA_MEMSET. + * @op_type: udma operation type. + * @cb: Callback function. + * @cb_param: Parameters of the callback function. + * @state: operation result written back by the driver. + */ +struct wd_udma_req { + struct wd_data_addr *src; + struct wd_data_addr *dst; + int addr_num; + int value; + enum wd_udma_op_type op_type; + wd_udma_cb_t cb; + void *cb_param; + int status; +}; + +/** + * wd_udma_init() - A simplify interface to initializate ecc. + * To make the initializate simpler, ctx_params support set NULL. + * And then the function will set them as driver's default. + * + * @alg: The algorithm users want to use. + * @sched_type: The scheduling type users want to use. + * @task_type: Task types, including soft computing, hardware and hybrid computing. + * @ctx_params: The ctxs resources users want to use. Include per operation + * type ctx numbers and business process run numa. + * + * Return 0 if succeed and others if fail. + */ +int wd_udma_init(const char *alg, __u32 sched_type, + int task_type, struct wd_ctx_params *ctx_params); + +/** + * wd_udma_uninit() - Uninitialise ctx configuration and scheduler. + */ +void wd_udma_uninit(void); + +/** + * wd_udma_alloc_sess() - Allocate a wd udma session. + * @setup: Parameters to setup this session. + * + * Return 0 if failed. + */ +handle_t wd_udma_alloc_sess(struct wd_udma_sess_setup *setup); + +/** + * wd_udma_free_sess() - Free a wd udma session. + * @ sess: The sess to be freed. + */ +void wd_udma_free_sess(handle_t sess); + +/** + * wd_do_udma_sync() - Send a sync udma request. + * @h_sess: The session which request will be sent to. + * @req: Request. + */ +int wd_do_udma_sync(handle_t h_sess, struct wd_udma_req *req); + +/** + * wd_do_udma_async() - Send an async udma request. + * @sess: The session which request will be sent to. + * @req: Request. + */ +int wd_do_udma_async(handle_t h_sess, struct wd_udma_req *req); + +/** + * wd_udma_poll() - Poll finished request. + * + * This function will call poll_policy function which is registered to wd udma + * by user. + */ +int wd_udma_poll(__u32 expt, __u32 *count); + +#endif /* __WD_UDMA_H */ diff --git a/include/wd_util.h b/include/wd_util.h index 9e9d4e35..bbb18a7c 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -42,6 +42,7 @@ enum wd_type { WD_DH_TYPE, WD_ECC_TYPE, WD_AGG_TYPE, + WD_UDMA_TYPE, WD_TYPE_MAX, }; diff --git a/libwd_dae.map b/libwd_dae.map index 4c51b856..6597ff98 100644 --- a/libwd_dae.map +++ b/libwd_dae.map @@ -1,4 +1,4 @@ -UADK_CRYPTO_2.0 { +UADK_DAE_2.0 { global: wd_agg_alloc_sess; wd_agg_free_sess; @@ -17,5 +17,18 @@ global: wd_sched_rr_instance; wd_sched_rr_alloc; wd_sched_rr_release; + + wd_udma_alloc_sess; + wd_udma_free_sess; + wd_udma_init; + wd_udma_uninit; + wd_do_udma_sync; + wd_do_udma_async; + wd_udma_poll; + wd_udma_get_msg; + + wd_sched_rr_instance; + wd_sched_rr_alloc; + wd_sched_rr_release; local: *; }; diff --git a/wd_udma.c b/wd_udma.c new file mode 100644 index 00000000..7675df30 --- /dev/null +++ b/wd_udma.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#include <stdlib.h> +#include <pthread.h> +#include <limits.h> +#include "include/drv/wd_udma_drv.h" +#include "wd_udma.h" + +struct wd_udma_sess { + const char *alg_name; + wd_dev_mask_t *dev_mask; + void *priv; + void *sched_key; +}; + +static struct wd_udma_setting { + enum wd_status status; + struct wd_ctx_config_internal config; + struct wd_sched sched; + struct wd_async_msg_pool pool; + struct wd_alg_driver *driver; + void *dlhandle; + void *dlh_list; +} wd_udma_setting; + +static struct wd_init_attrs wd_udma_init_attrs; + +static void wd_udma_close_driver(void) +{ +#ifndef WD_STATIC_DRV + wd_dlclose_drv(wd_udma_setting.dlh_list); + wd_udma_setting.dlh_list = NULL; +#else + wd_release_drv(wd_udma_setting.driver); + hisi_udma_remove(); +#endif +} + +static int wd_udma_open_driver(void) +{ +#ifndef WD_STATIC_DRV + /* + * Driver lib file path could set by env param. + * then open tham by wd_dlopen_drv() + * use NULL means dynamic query path + */ + wd_udma_setting.dlh_list = wd_dlopen_drv(NULL); + if (!wd_udma_setting.dlh_list) { + WD_ERR("fail to open driver lib files.\n"); + return -WD_EINVAL; + } +#else + hisi_udma_probe(); +#endif + return WD_SUCCESS; +} + +void wd_udma_free_sess(handle_t sess) +{ + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)sess; + + if (!sess_t) { + WD_ERR("invalid: free udma sess param NULL!\n"); + return; + } + + if (sess_t->sched_key) + free(sess_t->sched_key); + free(sess_t); +} + +handle_t wd_udma_alloc_sess(struct wd_udma_sess_setup *setup) +{ + struct wd_udma_sess *sess; + + if (!setup) { + WD_ERR("invalid: alloc udma sess setup NULL!\n"); + return (handle_t)0; + } + + sess = calloc(1, sizeof(struct wd_udma_sess)); + if (!sess) + return (handle_t)0; + + sess->alg_name = "udma"; + /* Some simple scheduler don't need scheduling parameters */ + sess->sched_key = (void *)wd_udma_setting.sched.sched_init( + wd_udma_setting.sched.h_sched_ctx, setup->sched_param); + if (WD_IS_ERR(sess->sched_key)) { + WD_ERR("failed to init session schedule key!\n"); + goto free_sess; + } + + return (handle_t)sess; + +free_sess: + free(sess); + return (handle_t)0; +} + +static int wd_udma_addr_check(struct wd_data_addr *data_addr) +{ + if (unlikely(!data_addr->addr)) { + WD_ERR("invalid: udma addr is NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(!data_addr->data_size || + data_addr->data_size > data_addr->addr_size)) { + WD_ERR("invalid: udma size is error, data_size %lu, addr_size is %lu!\n", + data_addr->data_size, data_addr->addr_size); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_udma_param_check(struct wd_udma_sess *sess, + struct wd_udma_req *req) +{ + struct wd_data_addr *src, *dst; + int i, ret; + + if (unlikely(!sess || !req)) { + WD_ERR("invalid: input param NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(req->addr_num <= 0)) { + WD_ERR("invalid: addr num is error %d!\n", req->addr_num); + return -WD_EINVAL; + } + + src = req->src; + dst = req->dst; + if (unlikely(req->op_type >= WD_UDMA_OP_MAX)) { + WD_ERR("invalid: op_type is error %u!\n", req->op_type); + return -WD_EINVAL; + } else if (unlikely(req->op_type == WD_UDMA_MEMCPY && (!src || !dst))) { + WD_ERR("invalid: memcpy src or dst is NULL!\n"); + return -WD_EINVAL; + } else if (unlikely(req->op_type == WD_UDMA_MEMSET && + ((!src && !dst) || (src && dst)))) { + WD_ERR("invalid: memset src and dst is error!\n"); + return -WD_EINVAL; + } + + if (req->op_type == WD_UDMA_MEMSET) + dst = !req->src ? req->dst : req->src; + + for (i = 0; i < req->addr_num; i++) { + if (req->op_type == WD_UDMA_MEMCPY) { + ret = wd_udma_addr_check(&src[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memcpy src addr is error!\n"); + return -WD_EINVAL; + } + + ret = wd_udma_addr_check(&dst[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memcpy dst addr is error!\n"); + return -WD_EINVAL; + } + + if (unlikely(dst[i].data_size != src[i].data_size)) { + WD_ERR("invalid: udma memcpy data_size is error!\n" + "src %lu, dst %lu!\n", + dst[i].data_size, src[i].data_size); + return -WD_EINVAL; + } + } else { + ret = wd_udma_addr_check(&dst[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memset addr is error!\n"); + return -WD_EINVAL; + } + } + } + + return WD_SUCCESS; +} + +static void fill_udma_msg(struct wd_udma_msg *msg, struct wd_udma_req *req) +{ + msg->result = WD_EINVAL; + + memcpy(&msg->req, req, sizeof(*req)); + msg->op_type = req->op_type; + msg->addr_num = req->addr_num; + msg->value = req->value; + if (req->op_type == WD_UDMA_MEMSET) { + msg->dst = !req->src ? req->dst : req->src; + } else { + msg->src = req->src; + msg->dst = req->dst; + } +} + +int wd_do_udma_sync(handle_t h_sess, struct wd_udma_req *req) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)h_sess; + struct wd_msg_handle msg_handle; + struct wd_ctx_internal *ctx; + struct wd_udma_msg msg = {0}; + __u32 idx; + int ret; + + ret = wd_udma_param_check(sess_t, req); + if (unlikely(ret)) + return ret; + + idx = wd_udma_setting.sched.pick_next_ctx(h_sched_ctx, + sess_t->sched_key, + CTX_MODE_SYNC); + ret = wd_check_ctx(config, CTX_MODE_SYNC, idx); + if (unlikely(ret)) + return ret; + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + ctx = config->ctxs + idx; + + fill_udma_msg(&msg, req); + + msg_handle.send = wd_udma_setting.driver->send; + msg_handle.recv = wd_udma_setting.driver->recv; + pthread_spin_lock(&ctx->lock); + ret = wd_handle_msg_sync(wd_udma_setting.driver, &msg_handle, ctx->ctx, + &msg, NULL, wd_udma_setting.config.epoll_en); + pthread_spin_unlock(&ctx->lock); + if (unlikely(ret)) + return ret; + + req->status = msg.result; + + return GET_NEGATIVE(msg.result); +} + +int wd_do_udma_async(handle_t sess, struct wd_udma_req *req) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)sess; + struct wd_udma_msg *msg = NULL; + struct wd_ctx_internal *ctx; + int ret, mid; + __u32 idx; + + ret = wd_udma_param_check(sess_t, req); + if (unlikely(ret)) + return ret; + + if (unlikely(!req->cb)) { + WD_ERR("invalid: udma input req cb is NULL!\n"); + return -WD_EINVAL; + } + + idx = wd_udma_setting.sched.pick_next_ctx(h_sched_ctx, + sess_t->sched_key, + CTX_MODE_ASYNC); + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (unlikely(ret)) + return ret; + ctx = config->ctxs + idx; + + mid = wd_get_msg_from_pool(&wd_udma_setting.pool, idx, (void **)&msg); + if (unlikely(mid < 0)) { + WD_ERR("failed to get msg from pool!\n"); + return mid; + } + + fill_udma_msg(msg, req); + msg->tag = mid; + + ret = wd_alg_driver_send(wd_udma_setting.driver, ctx->ctx, msg); + if (unlikely(ret)) { + if (ret != -WD_EBUSY) + WD_ERR("failed to send udma BD, hw is err!\n"); + + goto fail_with_msg; + } + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + + return WD_SUCCESS; + +fail_with_msg: + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, mid); + + return ret; +} + +static int wd_udma_poll_ctx(__u32 idx, __u32 expt, __u32 *count) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + struct wd_udma_msg rcv_msg = {0}; + struct wd_ctx_internal *ctx; + struct wd_udma_req *req; + struct wd_udma_msg *msg; + __u32 rcv_cnt = 0; + __u32 tmp = expt; + int ret; + + *count = 0; + + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (ret) + return ret; + + ctx = config->ctxs + idx; + + do { + ret = wd_alg_driver_recv(wd_udma_setting.driver, ctx->ctx, &rcv_msg); + if (ret == -WD_EAGAIN) { + return ret; + } else if (unlikely(ret)) { + WD_ERR("failed to async recv, ret = %d!\n", ret); + *count = rcv_cnt; + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, + rcv_msg.tag); + return ret; + } + rcv_cnt++; + msg = wd_find_msg_in_pool(&wd_udma_setting.pool, idx, rcv_msg.tag); + if (!msg) { + WD_ERR("failed to find udma msg!\n"); + return -WD_EINVAL; + } + + msg->req.status = rcv_msg.result; + req = &msg->req; + req->cb(req); + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, rcv_msg.tag); + *count = rcv_cnt; + } while (--tmp); + + return ret; +} + +int wd_udma_poll(__u32 expt, __u32 *count) +{ + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + + if (unlikely(!count || !expt)) { + WD_ERR("invalid: udma poll count is NULL or expt is 0!\n"); + return -WD_EINVAL; + } + + return wd_udma_setting.sched.poll_policy(h_sched_ctx, expt, count); +} + +static void wd_udma_clear_status(void) +{ + wd_alg_clear_init(&wd_udma_setting.status); +} + +static void wd_udma_alg_uninit(void) +{ + /* Uninit async request pool */ + wd_uninit_async_request_pool(&wd_udma_setting.pool); + /* Unset config, sched, driver */ + wd_clear_sched(&wd_udma_setting.sched); + wd_alg_uninit_driver(&wd_udma_setting.config, wd_udma_setting.driver); +} + +void wd_udma_uninit(void) +{ + enum wd_status status; + + wd_alg_get_init(&wd_udma_setting.status, &status); + if (status == WD_UNINIT) + return; + + wd_udma_alg_uninit(); + wd_alg_attrs_uninit(&wd_udma_init_attrs); + wd_alg_drv_unbind(wd_udma_setting.driver); + wd_udma_close_driver(); + wd_alg_clear_init(&wd_udma_setting.status); +} + +static int wd_udma_alg_init(struct wd_ctx_config *config, struct wd_sched *sched) +{ + int ret; + + ret = wd_set_epoll_en("WD_UDMA_EPOLL_EN", &wd_udma_setting.config.epoll_en); + if (ret < 0) + return ret; + + ret = wd_init_ctx_config(&wd_udma_setting.config, config); + if (ret < 0) + return ret; + + ret = wd_init_sched(&wd_udma_setting.sched, sched); + if (ret < 0) + goto out_clear_ctx_config; + + /* Allocate async pool for every ctx */ + ret = wd_init_async_request_pool(&wd_udma_setting.pool, config, WD_POOL_MAX_ENTRIES, + sizeof(struct wd_udma_msg)); + if (ret < 0) + goto out_clear_sched; + + ret = wd_alg_init_driver(&wd_udma_setting.config, wd_udma_setting.driver); + if (ret) + goto out_clear_pool; + + return WD_SUCCESS; + +out_clear_pool: + wd_uninit_async_request_pool(&wd_udma_setting.pool); +out_clear_sched: + wd_clear_sched(&wd_udma_setting.sched); +out_clear_ctx_config: + wd_clear_ctx_config(&wd_udma_setting.config); + return ret; +} + +int wd_udma_init(const char *alg, __u32 sched_type, int task_type, + struct wd_ctx_params *ctx_params) +{ + struct wd_ctx_nums udma_ctx_num[WD_UDMA_OP_MAX] = {0}; + struct wd_ctx_params udma_ctx_params = {0}; + int state, ret = -WD_EINVAL; + + pthread_atfork(NULL, NULL, wd_udma_clear_status); + + state = wd_alg_try_init(&wd_udma_setting.status); + if (state) + return state; + + if (!alg || sched_type >= SCHED_POLICY_BUTT || + task_type < 0 || task_type >= TASK_MAX_TYPE) { + WD_ERR("invalid: input param is wrong!\n"); + goto out_clear_init; + } + + if (strcmp(alg, "udma")) { + WD_ERR("invalid: the alg %s not support!\n", alg); + goto out_clear_init; + } + + state = wd_udma_open_driver(); + if (state) + goto out_clear_init; + + while (ret) { + memset(&wd_udma_setting.config, 0, sizeof(struct wd_ctx_config_internal)); + + /* Get alg driver and dev name */ + wd_udma_setting.driver = wd_alg_drv_bind(task_type, alg); + if (!wd_udma_setting.driver) { + WD_ERR("fail to bind a valid driver.\n"); + ret = -WD_EINVAL; + goto out_dlopen; + } + + udma_ctx_params.ctx_set_num = udma_ctx_num; + ret = wd_ctx_param_init(&udma_ctx_params, ctx_params, + wd_udma_setting.driver, WD_UDMA_TYPE, WD_UDMA_OP_MAX); + if (ret) { + if (ret == -WD_EAGAIN) { + wd_disable_drv(wd_udma_setting.driver); + wd_alg_drv_unbind(wd_udma_setting.driver); + continue; + } + goto out_driver; + } + + wd_udma_init_attrs.alg = alg; + wd_udma_init_attrs.sched_type = sched_type; + wd_udma_init_attrs.driver = wd_udma_setting.driver; + wd_udma_init_attrs.ctx_params = &udma_ctx_params; + wd_udma_init_attrs.alg_init = wd_udma_alg_init; + wd_udma_init_attrs.alg_poll_ctx = wd_udma_poll_ctx; + ret = wd_alg_attrs_init(&wd_udma_init_attrs); + if (ret) { + if (ret == -WD_ENODEV) { + wd_disable_drv(wd_udma_setting.driver); + wd_alg_drv_unbind(wd_udma_setting.driver); + wd_ctx_param_uninit(&udma_ctx_params); + continue; + } + WD_ERR("failed to init alg attrs!\n"); + goto out_params_uninit; + } + } + + wd_alg_set_init(&wd_udma_setting.status); + wd_ctx_param_uninit(&udma_ctx_params); + + return WD_SUCCESS; + +out_params_uninit: + wd_ctx_param_uninit(&udma_ctx_params); +out_driver: + wd_alg_drv_unbind(wd_udma_setting.driver); +out_dlopen: + wd_udma_close_driver(); +out_clear_init: + wd_alg_clear_init(&wd_udma_setting.status); + return ret; +} + +struct wd_udma_msg *wd_udma_get_msg(__u32 idx, __u32 tag) +{ + return wd_find_msg_in_pool(&wd_udma_setting.pool, idx, tag); +} diff --git a/wd_util.c b/wd_util.c index f1b27bf8..38d2d375 100644 --- a/wd_util.c +++ b/wd_util.c @@ -63,6 +63,7 @@ static const char *wd_env_name[WD_TYPE_MAX] = { "WD_DH_CTX_NUM", "WD_ECC_CTX_NUM", "WD_AGG_CTX_NUM", + "WD_UDMA_CTX_NUM", }; struct async_task { @@ -107,6 +108,7 @@ static struct acc_alg_item alg_options[] = { {"deflate", "deflate"}, {"lz77_zstd", "lz77_zstd"}, {"hashagg", "hashagg"}, + {"udma", "udma"}, {"rsa", "rsa"}, {"dh", "dh"}, -- 2.33.0

1 0

[PATCH 00/32] uadk: support some new algorithms and bugfix
by Qi Tao 07 Aug '25

07 Aug '25

*** BLURB HERE *** Chenghai Huang (9): uadk: fix definition coding standard issues uadk: add or remove some store buf condition judgments uadk: add new alg called lz77_only uadk: modify the condition for copying repcode uadk: remove redundant checks on bit read results uadk_tool: modify unrecv num in async benchmark test uadk_tool: add lz77_only alg in zip benchmark test uadk_tool: add lz4 alg in zip benchmark test uadk: fix a sgl pool memery issue in lz77_only Longfang Liu (1): uadk: resolve some code issues Qi Tao (1): uadk_tool: add aead algorithm Qinxin Xia (5): uadk: hisi_comp - abstract get sgl function and general deflate functions uadk: hisi_comp - support the new algorithm 'lz4' uadk: wd_comp - support the new algorithm 'lz4' uadk: hisi_comp: reduce some invalid spaces. uadk: rectify the incorrect boundary value judgment of LZ4 Weili Qian (1): uadk: support data move Wenkai Lin (9): uadk: support hashjoin and gather algorithm uadk: fix for build and probe index check uadk: fix for check_key_cols_info uadk: reduce repeated dae code uadk: modify print info for wd_gather_get_batch_rowsize uadk: fix for rehash invalid size uadk: optimize for rehash performance uadk: fix for the segment fault in gather param check uadk: fix for the hashjoin task runtime error Zhushuai Yin (6): uadk: Add max and min operations at the hash algorithm layer uadk: hash agg adapter drv parameter uadk:Add max,min,and rehash implementations uadk:zip algorithm increases buffer len interception uadk: Fix the problem of failing to intercept the new comp stream scene uadk:fix dh prov segmentation issue Makefile.am | 31 +- drv/hisi_comp.c | 565 +++++-- drv/hisi_comp_huf.c | 11 +- drv/hisi_dae.c | 826 +++------- drv/hisi_dae.h | 229 +++ drv/hisi_dae_common.c | 387 +++++ drv/hisi_dae_join_gather.c | 1040 ++++++++++++ drv/hisi_qm_udrv.h | 3 +- drv/hisi_udma.c | 566 +++++++ include/drv/wd_agg_drv.h | 10 +- include/drv/wd_join_gather_drv.h | 52 + include/drv/wd_udma_drv.h | 34 + include/wd_agg.h | 9 +- include/wd_alg.h | 4 + include/wd_comp.h | 2 + include/wd_dae.h | 12 + include/wd_join_gather.h | 352 +++++ include/wd_udma.h | 124 ++ include/wd_util.h | 2 + libwd_dae.map | 34 +- uadk_tool/benchmark/sec_uadk_benchmark.c | 34 + uadk_tool/benchmark/sec_wd_benchmark.c | 34 + uadk_tool/benchmark/uadk_benchmark.c | 14 + uadk_tool/benchmark/uadk_benchmark.h | 4 + uadk_tool/benchmark/zip_uadk_benchmark.c | 24 +- v1/drv/hisi_zip_huf.c | 11 +- v1/drv/hisi_zip_udrv.c | 2 +- wd.c | 2 +- wd_agg.c | 76 +- wd_comp.c | 56 +- wd_join_gather.c | 1823 ++++++++++++++++++++++ wd_sched.c | 3 +- wd_udma.c | 511 ++++++ wd_util.c | 34 +- 34 files changed, 6125 insertions(+), 796 deletions(-) create mode 100644 drv/hisi_dae.h create mode 100644 drv/hisi_dae_common.c create mode 100644 drv/hisi_dae_join_gather.c create mode 100644 drv/hisi_udma.c create mode 100644 include/drv/wd_join_gather_drv.h create mode 100644 include/drv/wd_udma_drv.h create mode 100644 include/wd_join_gather.h create mode 100644 include/wd_udma.h create mode 100644 wd_join_gather.c create mode 100644 wd_udma.c -- 2.33.0

1 32

[PATCH 00/12] uadk_engine/uadk_provider: some bugfix
by Qi Tao 07 Aug '25

07 Aug '25

From: JiangShui Yang <yangjiangshui(a)h-partners.com> Chenghai Huang (1): uadk_provider: add reset for part of ctx after final Qi Tao (1): uadk_provider/rsa: bugfix for rsa digest interface Weili Qian (3): uadk_engine/sm2: set the default user id uadk_provider/sm2: delete duplicate applications sess uadk_engine/digest: small packet directly uses soft computing Wenkai Lin (2): uadk_engine: optimize for sec cipher and digest init uadk_engine/aead: fix for set key problem Zhushuai Yin (3): uadk_provider:Improved SM3 hmac performance uadk_engine:Fix DH algorithm shared key comparison failure uadk_provider/sm2:fix the issue of SM2 authentication failure lizhi (2): uadk_provider: clean up unused functions uadk_prov: fix a cleancode issue src/uadk_aead.c | 65 ++++++++++---------- src/uadk_async.c | 5 ++ src/uadk_async.h | 1 + src/uadk_cipher.c | 98 +++++++++++++----------------- src/uadk_dh.c | 8 ++- src/uadk_digest.c | 120 ++++++++++++++++++------------------- src/uadk_prov_der_writer.c | 69 --------------------- src/uadk_prov_der_writer.h | 4 -- src/uadk_prov_digest.c | 29 ++++++--- src/uadk_prov_ffc.c | 3 +- src/uadk_prov_hmac.c | 18 ++++-- src/uadk_prov_rsa.c | 107 +++++++++++++++++++++------------ src/uadk_prov_sm2.c | 108 +++++++++++++++++---------------- src/uadk_sm2.c | 95 +++++++++++++++-------------- 14 files changed, 356 insertions(+), 374 deletions(-) -- 2.43.0

1 12