[PATCH 01/30] uadk: add reserved memory handling functionality to uadk
From: Longfang Liu <liulongfang@huawei.com> Add kernel-state reserved memory handling functionality to the uadk SVA framework to adapt to No-SVA features, including functions for applying for and initializing reserved memory pools, applying for, using, and releasing memory within the memory pool. Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- Makefile.am | 9 +- include/uacce.h | 7 + include/wd.h | 23 + include/wd_alg_common.h | 36 +- include/wd_bmm.h | 44 ++ include/wd_internal.h | 70 +++ include/wd_util.h | 2 + libwd.map | 14 + wd.c | 38 +- wd_bmm.c | 1057 +++++++++++++++++++++++++++++++++++++++ wd_util.c | 107 +++- 11 files changed, 1355 insertions(+), 52 deletions(-) create mode 100644 include/wd_bmm.h create mode 100644 include/wd_internal.h create mode 100644 wd_bmm.c diff --git a/Makefile.am b/Makefile.am index f897533..0e1203a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,12 +32,13 @@ YEAR = 2025 AM_CFLAGS+= -DUADK_VERSION_NUMBER="\"UADK version: ${MAJOR}.${MINOR}.${REVISION}\"" AM_CFLAGS+= -DUADK_RELEASED_TIME="\"Released ${MONTH} ${DAY}, ${YEAR}\"" -pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ +pkginclude_HEADERS = include/wd.h include/wd_internal.h include/wd_cipher.h include/wd_aead.h \ include/wd_comp.h include/wd_dh.h include/wd_digest.h \ include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ - include/wd_udma.h include/wd_join_gather.h + include/wd_udma.h include/wd_join_gather.h \ + include/wd_bmm.h nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h @@ -48,7 +49,7 @@ uadk_driversdir=$(libdir)/uadk uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la -libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ +libwd_la_SOURCES=wd.c wd_mempool.c wd_bmm.c wd_bmm.h wd.h wd_alg.c wd_alg.h \ v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ v1/wd_rsa.c v1/wd_rsa.h \ v1/wd_aead.c v1/wd_aead.h \ @@ -126,7 +127,7 @@ libwd_comp_la_DEPENDENCIES = libwd.la libhisi_zip_la_LIBADD = -ldl -libwd_crypto_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma +libwd_crypto_la_LIBADD = -lwd -ldl -lnuma -lm -lpthread libwd_crypto_la_DEPENDENCIES = libwd.la libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread diff --git a/include/uacce.h b/include/uacce.h index f7fae27..c6bb4fb 100644 --- a/include/uacce.h +++ b/include/uacce.h @@ -15,6 +15,12 @@ extern "C" { #define UACCE_CMD_START _IO('W', 0) #define UACCE_CMD_PUT_Q _IO('W', 1) +#define UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long) + +/* Pass DMA SS region slice size by granularity 64KB */ +#define UACCE_GRAN_SIZE 0x10000ull +#define UACCE_GRAN_SHIFT 16 +#define UACCE_GRAN_NUM_MASK 0xfffull /** * UACCE Device flags: @@ -33,6 +39,7 @@ enum { enum uacce_qfrt { UACCE_QFRT_MMIO = 0, /* device mmio region */ UACCE_QFRT_DUS = 1, /* device user share */ + UACCE_QFRT_SS, /* static share memory */ UACCE_QFRT_MAX, }; diff --git a/include/wd.h b/include/wd.h index b62d355..b97e5c7 100644 --- a/include/wd.h +++ b/include/wd.h @@ -38,6 +38,7 @@ typedef unsigned long long __u64; /* Required compiler attributes */ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define handle_t uintptr_t typedef struct wd_dev_mask wd_dev_mask_t; @@ -115,6 +116,28 @@ enum wd_alg_type { WD_AEAD, }; +/* Memory APIs for UADK API Layer */ +typedef void *(*wd_alloc)(void *usr, size_t size); +typedef void (*wd_free)(void *usr, void *va); + + /* Memory VA to DMA address map and unmap */ +typedef void *(*wd_map)(void *usr, void *va, size_t sz); +typedef void (*wd_unmap)(void *usr, void *va, void *dma, size_t sz); +typedef __u32 (*wd_bufsize)(void *usr); + +/* Memory from user, it is given at ctx creating. */ +struct wd_mm_ops { + wd_alloc alloc; /* Memory allocation */ + wd_free free; /* Memory free */ + wd_map iova_map; /* Get iova from user space VA */ + + /* Destroy the mapping between the PA of VA and iova */ + wd_unmap iova_unmap; + wd_bufsize get_bufsize; /* Optional */ + void *usr; /* Data for the above operations */ + bool sva_mode; /* Record whether the OS is SVA or No-SVA mode */ +}; + /* * If the actual size of data is inconsistent * with dsize, undefined behavior occurs. diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index fd77426..a294877 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -12,6 +12,7 @@ #include <numa.h> #include "wd.h" #include "wd_alg.h" +#include "wd_internal.h" #ifdef __cplusplus extern "C" { @@ -24,7 +25,6 @@ extern "C" { #define BITS_TO_BYTES(bits) (((bits) + 7) >> 3) #define BYTES_TO_BITS(bytes) ((bytes) << 3) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define MAX_STR_LEN 256 #define CTX_TYPE_INVALID 9999 #define POLL_TIME 1000 @@ -60,6 +60,13 @@ enum wd_init_type { WD_TYPE_V2, }; +enum wd_mem_type { + UADK_MEM_AUTO, + UADK_MEM_USER, + UADK_MEM_PROXY, + UADK_MEM_MAX, +}; + /* * struct wd_ctx - Define one ctx and related type. * @ctx: The ctx itself. @@ -132,27 +139,6 @@ struct wd_ctx_params { struct wd_cap_config *cap; }; -struct wd_soft_ctx { - void *priv; -}; - -struct wd_ctx_internal { - handle_t ctx; - __u8 op_type; - __u8 ctx_mode; - __u16 sqn; - pthread_spinlock_t lock; -}; - -struct wd_ctx_config_internal { - __u32 ctx_num; - int shmid; - struct wd_ctx_internal *ctxs; - void *priv; - bool epoll_en; - unsigned long *msg_cnt; -}; - /* * struct wd_comp_sched - Define a scheduler. * @name: Name of this scheduler. @@ -181,12 +167,6 @@ struct wd_sched { typedef int (*wd_alg_init)(struct wd_ctx_config *config, struct wd_sched *sched); typedef int (*wd_alg_poll_ctx)(__u32 idx, __u32 expt, __u32 *count); -struct wd_datalist { - void *data; - __u32 len; - struct wd_datalist *next; -}; - #ifdef __cplusplus } #endif diff --git a/include/wd_bmm.h b/include/wd_bmm.h new file mode 100644 index 0000000..76b56a0 --- /dev/null +++ b/include/wd_bmm.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef _WD_SVA_BMM_H +#define _WD_SVA_BMM_H + +#include <stdint.h> +#include "wd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Memory pool creating parameters */ +struct wd_mempool_setup { + __u32 block_size; /* Block buffer size */ + __u32 block_num; /* Block buffer number */ + __u32 align_size; /* Block buffer starting address align size */ + struct wd_mm_ops ops; /* memory from user if don't use UADK memory */ +}; + +void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup); +void wd_mempool_free(handle_t h_ctx, void *pool); +void *wd_mem_alloc(void *pool, size_t size); +void wd_mem_free(void *pool, void *buf); + +void *wd_mem_map(void *pool, void *buf, size_t sz); +void wd_mem_unmap(void *pool, void *buf_dma, void *buf, size_t sz); +int wd_get_free_num(void *pool, __u32 *free_num); +int wd_get_fail_num(void *pool, __u32 *fail_num); +__u32 wd_get_bufsize(void *pool); + +handle_t wd_find_ctx(const char *alg_name); +void wd_remove_ctx_list(void); +int wd_insert_ctx_list(handle_t h_ctx, char *alg_name); +__u32 wd_get_dev_id(void *pool); + +#ifdef __cplusplus +} +#endif + +#endif /* _WD_SVA_BMM_H */ diff --git a/include/wd_internal.h b/include/wd_internal.h new file mode 100644 index 0000000..cd90ebf --- /dev/null +++ b/include/wd_internal.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef WD_INTERNAL_H +#define WD_INTERNAL_H + +#include <pthread.h> +#include <stdbool.h> +#include "wd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DECIMAL_NUMBER 10 +#define MAX_FD_NUM 65535 + +struct wd_ctx_h { + int fd; + char dev_path[MAX_DEV_NAME_LEN]; + char *dev_name; + char *drv_name; + unsigned long qfrs_offs[UACCE_QFRT_MAX]; + void *qfrs_base[UACCE_QFRT_MAX]; + struct uacce_dev *dev; + void *priv; +}; + +struct wd_soft_ctx { + int fd; + void *priv; +}; + +struct wd_ce_ctx { + int fd; + char *drv_name; + void *priv; +}; + +struct wd_ctx_internal { + handle_t ctx; + __u8 op_type; + __u8 ctx_mode; + __u16 sqn; + pthread_spinlock_t lock; +}; + +struct wd_ctx_config_internal { + __u32 ctx_num; + int shmid; + struct wd_ctx_internal *ctxs; + void *priv; + bool epoll_en; + unsigned long *msg_cnt; + char *alg_name; +}; + +struct wd_datalist { + void *data; + __u32 len; + struct wd_datalist *next; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/wd_util.h b/include/wd_util.h index 4a5204d..a337284 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -553,6 +553,8 @@ static inline void wd_ctx_spin_unlock(struct wd_ctx_internal *ctx, int type) pthread_spin_unlock(&ctx->lock); } +int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type); + #ifdef __cplusplus } #endif diff --git a/libwd.map b/libwd.map index 5522ec0..b1b90b3 100644 --- a/libwd.map +++ b/libwd.map @@ -49,5 +49,19 @@ global: wd_enable_drv; wd_disable_drv; wd_get_alg_head; + + wd_find_ctx; + wd_get_dev_id; + wd_remove_ctx_list; + wd_insert_ctx_list; + wd_mempool_alloc; + wd_mempool_free; + wd_mem_alloc; + wd_mem_free; + wd_mem_map; + wd_mem_unmap; + wd_get_free_num; + wd_get_fail_num; + wd_get_bufsize; local: *; }; diff --git a/wd.c b/wd.c index c1cc282..3e867b6 100644 --- a/wd.c +++ b/wd.c @@ -20,6 +20,7 @@ #include "wd.h" #include "wd_alg.h" +#include "wd_internal.h" #define SYS_CLASS_DIR "/sys/class/uacce" #define FILE_MAX_SIZE (8 << 20) @@ -33,16 +34,18 @@ enum UADK_LOG_LEVEL { static int uadk_log_level = WD_LOG_INVALID; -struct wd_ctx_h { - int fd; - char dev_path[MAX_DEV_NAME_LEN]; - char *dev_name; - char *drv_name; - unsigned long qfrs_offs[UACCE_QFRT_MAX]; - void *qfrs_base[UACCE_QFRT_MAX]; - struct uacce_dev *dev; - void *priv; -}; +static int wd_check_ctx_type(handle_t h_ctx) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + + /* A simple and efficient method to check the queue type */ + if (ctx->fd < 0 || ctx->fd > MAX_FD_NUM) { + WD_INFO("Invalid: this ctx not HW ctx.\n"); + return -WD_HW_EACCESS; + } + + return 0; +} static void wd_parse_log_level(void) { @@ -446,7 +449,7 @@ void wd_release_ctx(handle_t h_ctx) { struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; - if (!ctx) + if (!ctx || wd_check_ctx_type(h_ctx)) return; close(ctx->fd); @@ -461,7 +464,7 @@ int wd_ctx_start(handle_t h_ctx) struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; int ret; - if (!ctx) + if (!ctx || wd_check_ctx_type(h_ctx)) return -WD_EINVAL; ret = wd_ctx_set_io_cmd(h_ctx, UACCE_CMD_START, NULL); @@ -527,6 +530,7 @@ void wd_ctx_unmap_qfr(handle_t h_ctx, enum uacce_qfrt qfrt) unsigned long wd_ctx_get_region_size(handle_t h_ctx, enum uacce_qfrt qfrt) { struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + if (!ctx || qfrt >= UACCE_QFRT_MAX) return 0; return ctx->qfrs_offs[qfrt]; @@ -585,8 +589,16 @@ int wd_ctx_wait(handle_t h_ctx, __u16 ms) int wd_is_sva(handle_t h_ctx) { struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + int ret; - if (!ctx || !ctx->dev) + if (!ctx) + return -WD_EINVAL; + + ret = wd_check_ctx_type(h_ctx); + if (ret) + return ret; + + if (!ctx->dev) return -WD_EINVAL; if ((unsigned int)ctx->dev->flags & UACCE_DEV_SVA) diff --git a/wd_bmm.c b/wd_bmm.c new file mode 100644 index 0000000..21c46ca --- /dev/null +++ b/wd_bmm.c @@ -0,0 +1,1057 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +/* Block Memory Management (lib): Adapted for SVA mode */ +#define _GNU_SOURCE +#include <dirent.h> +#include <numa.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "wd_internal.h" +#include "wd_bmm.h" +#include "uacce.h" +#include "wd.h" + +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define UACCE_DEV_IOMMU (1<<7) + +#define TAG_FREE 0x12345678 /* block is free */ +#define TAG_USED 0x87654321 /* block is busy */ +#define MAX_ALIGN_SIZE 0x1000 /* 4KB */ +#define MAX_BLOCK_SIZE 0x10000000 /* 256MB */ +#define BLK_BALANCE_SZ 0x100000ul +#define NUM_TIMES(x) (87 * (x) / 100) + +#define BYTE_SIZE 8 +#define BIT_SHIFT 3 + +struct wd_ss_region { + unsigned long long pa; + void *va; + size_t size; + TAILQ_ENTRY(wd_ss_region) next; +}; +TAILQ_HEAD(wd_ss_region_list, wd_ss_region); + +struct ctx_info { + int fd; + int iommu_type; + void *ss_va; + size_t ss_mm_size; + struct wd_ss_region_list ss_list; + struct wd_ss_region_list *head; + unsigned long qfrs_offset[UACCE_QFRT_MAX]; +}; + +struct wd_blk_hd { + unsigned int blk_tag; + unsigned int blk_num; + void *blk_dma; + void *blk; +}; + +struct wd_blkpool { + pthread_spinlock_t pool_lock; + unsigned int free_blk_num; + unsigned int alloc_failures; + struct ctx_info *cinfo; + struct wd_blk_hd *blk_array; // memory blk array + unsigned int total_blocks; // total blk numbers + unsigned char *free_bitmap; // free blk bitmap, 0 mean unused + unsigned int bitmap_size; // bitmap's memory size + void *usr_mem_start; + void *act_start; + unsigned int act_hd_sz; + unsigned int act_blk_sz; + unsigned long act_mem_sz; + unsigned int dev_id; + struct wd_mempool_setup setup; +}; + +struct mem_ctx_node { + char alg_name[CRYPTO_MAX_ALG_NAME]; + handle_t h_ctx; + int numa_id; + bool used; + TAILQ_ENTRY(mem_ctx_node) list_node; +}; +static TAILQ_HEAD(, mem_ctx_node) g_mem_ctx_list = TAILQ_HEAD_INITIALIZER(g_mem_ctx_list); +static pthread_mutex_t g_mem_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; + +handle_t wd_find_ctx(const char *alg_name) +{ + struct mem_ctx_node *close_node = NULL; + struct mem_ctx_node *node; + int min_distance = 0xFFFF; + int cpu = sched_getcpu(); + int nid = numa_node_of_cpu(cpu); + handle_t h_ctx = 0; + int numa_dis; + + if (!alg_name) { + WD_ERR("Invalid: alg_name is NULL!\n"); + return 0; + } + + pthread_mutex_lock(&g_mem_ctx_mutex); + TAILQ_FOREACH(node, &g_mem_ctx_list, list_node) { + if (node->used == false && strstr(node->alg_name, alg_name)) { + if (node->numa_id == nid) { + h_ctx = node->h_ctx; + node->used = true; + break; + } + + /* Query the queue with the shortest NUMA distance */ + numa_dis = numa_distance(nid, node->numa_id); + if (numa_dis < min_distance) { + min_distance = numa_dis; + close_node = node; + } + } + } + + /* If no ctx matching the NUMA ID, use the shortest distance instead ctx */ + if (!h_ctx && close_node) { + h_ctx = close_node->h_ctx; + close_node->used = true; + } + pthread_mutex_unlock(&g_mem_ctx_mutex); + + if (!h_ctx) + WD_ERR("Failed to find mem ctx for alg: %s\n", alg_name); + + return h_ctx; +} + +void wd_remove_ctx_list(void) +{ + struct mem_ctx_node *node; + + pthread_mutex_lock(&g_mem_ctx_mutex); + /* Free all list node */ + while ((node = TAILQ_FIRST(&g_mem_ctx_list)) != NULL) { + /* Use TAILQ_REMOVE to remove list node */ + TAILQ_REMOVE(&g_mem_ctx_list, node, list_node); + free(node); + } + + pthread_mutex_unlock(&g_mem_ctx_mutex); +} + +int wd_insert_ctx_list(handle_t h_ctx, char *alg_name) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + struct mem_ctx_node *new_node; + int numa_id; + + if (!alg_name || !h_ctx) { + WD_ERR("Invalid: input params is NULL!\n"); + return -WD_EINVAL; + } + + /* A simple and efficient method to check the queue type */ + if (ctx->fd < 0 || ctx->fd > MAX_FD_NUM) { + WD_INFO("Invalid ctx: this ctx not HW ctx.\n"); + return 0; + } + + numa_id = ctx->dev->numa_id; + new_node = malloc(sizeof(struct mem_ctx_node)); + if (new_node) { + pthread_mutex_lock(&g_mem_ctx_mutex); + strncpy(new_node->alg_name, alg_name, CRYPTO_MAX_ALG_NAME - 1); + new_node->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + new_node->numa_id = numa_id; + new_node->h_ctx = h_ctx; + new_node->used = false; + TAILQ_INSERT_TAIL(&g_mem_ctx_list, new_node, list_node); + pthread_mutex_unlock(&g_mem_ctx_mutex); + return 0; + } + + return -WD_ENOMEM; +} + +static void wd_free_slice(struct ctx_info *cinfo) +{ + struct wd_ss_region *rgn; + + while (true) { + rgn = TAILQ_FIRST(&cinfo->ss_list); + if (!rgn) + break; + TAILQ_REMOVE(&cinfo->ss_list, rgn, next); + free(rgn); + } +} + +static void wd_add_slice(struct ctx_info *cinfo, struct wd_ss_region *rgn) +{ + struct wd_ss_region *rg; + + rg = TAILQ_LAST(&cinfo->ss_list, wd_ss_region_list); + if (rg) { + if (rg->pa + rg->size == rgn->pa) { + rg->size += rgn->size; + free(rgn); + return; + } + } + + TAILQ_INSERT_TAIL(&cinfo->ss_list, rgn, next); +} + +static void wd_show_ss_slices(struct ctx_info *cinfo) +{ + struct wd_ss_region *rgn; + int i = 0; + + TAILQ_FOREACH(rgn, cinfo->head, next) { + WD_ERR("slice-%d:size = 0x%lx\n", i, rgn->size); + i++; + } +} + +static void bitmap_set_bit(unsigned char *bitmap, unsigned int bit_index) +{ + if (!bitmap) + return; + + bitmap[bit_index >> BIT_SHIFT] |= (1 << (bit_index % BYTE_SIZE)); +} + +static void bitmap_clear_bit(unsigned char *bitmap, unsigned int bit_index) +{ + if (!bitmap) + return; + + bitmap[bit_index >> BIT_SHIFT] &= ~(1 << (bit_index % BYTE_SIZE)); +} + +static bool bitmap_test_bit(const unsigned char *bitmap, unsigned int bit_index) +{ + if (!bitmap) + return false; + + /* bit is 1, it indicates that the block has already been used and is not free */ + if ((bitmap[bit_index >> BIT_SHIFT] >> (bit_index % BYTE_SIZE)) & 0x1) + return false; + + return true; +} + +static void *wd_mmap_qfr(struct ctx_info *cinfo, enum uacce_qfrt qfrt, size_t size) +{ + off_t off; + + off = qfrt * getpagesize(); + + return mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED, cinfo->fd, off); +} + +static void wd_unmap_reserve_mem(void *addr, size_t size) +{ + int ret; + + if (!addr) + return; + + ret = munmap(addr, size); + if (ret) + WD_ERR("wd qfr unmap failed!\n"); +} + +static void *wd_map_reserve_mem(struct wd_blkpool *pool, size_t size) +{ + struct ctx_info *cinfo = pool->cinfo; + struct wd_ss_region *rgn; + unsigned long info; + size_t tmp = size; + unsigned long i = 0; + void *ptr; + int ret = 1; + + if (!cinfo) { + WD_ERR("ctx queue information is NULL!\n"); + return NULL; + } + + /* Make sure memory map granularity size align */ + if (!cinfo->iommu_type) + tmp = ALIGN(tmp, UACCE_GRAN_SIZE); + + ptr = wd_mmap_qfr(cinfo, UACCE_QFRT_SS, tmp); + if (ptr == MAP_FAILED) { + WD_ERR("wd drv mmap fail!(err = %d)\n", errno); + return NULL; + } + + cinfo->ss_va = ptr; + cinfo->ss_mm_size = tmp; + tmp = 0; + while (ret > 0) { + info = i; + ret = ioctl(cinfo->fd, UACCE_CMD_GET_SS_DMA, &info); + if (ret < 0) { + wd_show_ss_slices(cinfo); + WD_ERR("get DMA fail!\n"); + goto err_out; + } + + rgn = malloc(sizeof(*rgn)); + if (!rgn) { + WD_ERR("alloc ss region fail!\n"); + goto err_out; + } + memset(rgn, 0, sizeof(*rgn)); + + if (cinfo->iommu_type) + rgn->size = cinfo->ss_mm_size; + else + rgn->size = (info & UACCE_GRAN_NUM_MASK) << + UACCE_GRAN_SHIFT; + rgn->pa = info & (~UACCE_GRAN_NUM_MASK); + rgn->va = ptr + tmp; + tmp += rgn->size; + wd_add_slice(cinfo, rgn); + + i++; + } + + return ptr; + +err_out: + wd_free_slice(cinfo); + wd_unmap_reserve_mem(cinfo->ss_va, cinfo->ss_mm_size); + + return NULL; +} + +static int wd_pool_params_check(struct wd_mempool_setup *setup) +{ + if (!setup->block_num || !setup->block_size || + setup->block_size > MAX_BLOCK_SIZE) { + WD_ERR("Invalid: block_size or block_num(%x, %u)!\n", + setup->block_size, setup->block_num); + return -WD_EINVAL; + } + + /* Check parameters, and align_size must be 2^N */ + if (setup->align_size <= 0x1 || setup->align_size > MAX_ALIGN_SIZE || + (setup->align_size & (setup->align_size - 0x1))) { + WD_ERR("Invalid align_size.\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_ctx_info_init(struct wd_ctx_h *ctx, struct wd_blkpool *p) +{ + struct ctx_info *cinfo; + + cinfo = calloc(1, sizeof(struct ctx_info)); + if (!cinfo) { + WD_ERR("failed to alloc ctx info memory.\n"); + return -WD_ENOMEM; + } + + cinfo->fd = ctx->fd; + cinfo->iommu_type = (unsigned int)ctx->dev->flags & UACCE_DEV_IOMMU; + cinfo->head = &cinfo->ss_list; + TAILQ_INIT(&cinfo->ss_list); + (void)memcpy(cinfo->qfrs_offset, ctx->qfrs_offs, + sizeof(cinfo->qfrs_offset)); + p->cinfo = (void *)cinfo; + + return 0; +} + +static int wd_pool_pre_layout(handle_t h_ctx, + struct wd_blkpool *p, + struct wd_mempool_setup *sp) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + struct ctx_info *cinfo = NULL; + unsigned int asz; + int ret; + + if (!ctx && !sp->ops.alloc) { + WD_ERR("ctx is NULL!\n"); + return -WD_EINVAL; + } + + if (!sp->ops.alloc) { + ret = wd_ctx_info_init(ctx, p); + if (ret) { + WD_ERR("failed to init ctx info.\n"); + return ret; + } + cinfo = p->cinfo; + } + + ret = wd_pool_params_check(sp); + if (ret) { + free(p->cinfo); + p->cinfo = NULL; + return ret; + } + + asz = sp->align_size; + + /* Get actual value by align */ + p->act_hd_sz = ALIGN(sizeof(struct wd_blk_hd), asz); + p->act_blk_sz = ALIGN(sp->block_size, asz); + p->act_mem_sz = (p->act_hd_sz + p->act_blk_sz) * + (unsigned long)sp->block_num + asz; + + /* + * When we use WD reserve memory and the blk_sz is larger than 1M, + * in order to ensure the mem_pool to be success, + * ensure that the allocated memory is an integer multiple of 1M. + */ + if (!sp->ops.alloc && (cinfo && !cinfo->iommu_type)) + p->act_mem_sz = ((p->act_mem_sz + BLK_BALANCE_SZ - 1) & ~(BLK_BALANCE_SZ - 1)) << 1; + + return WD_SUCCESS; +} + +/** + * wd_iova_map - Map virtual address to physical address + * @cinfo: context information + * @va: virtual address to map + * @sz: size of the mapping (not used in current implementation) + * + * When IOMMU is enabled, the PA is actually an IOVA; userspace still sees it + * as consistent and contiguous with the VA. + * When IOMMU is disabled, the PA refers to the kernel's physical address, which + * must be physically contiguous to be allocated by the kernel. + * Therefore, the PA address can be obtained from the offset of the VA. + * + */ +static void *wd_iova_map(struct ctx_info *cinfo, void *va, size_t sz) +{ + struct wd_ss_region *rgn; + unsigned long offset; + void *dma_addr; + + if (!cinfo || !va) { + WD_ERR("wd iova map: parameter err!\n"); + return NULL; + } + + /* Search through all memory regions to find where va belongs */ + TAILQ_FOREACH(rgn, cinfo->head, next) { + if (rgn->va <= va && va < rgn->va + rgn->size) { + /* Calculate offset within the region */ + offset = (uintptr_t)va - (uintptr_t)rgn->va; + /* Add base physical address of the region */ + dma_addr = (void *)((uintptr_t)rgn->pa + offset); + return dma_addr; + } + } + + WD_ERR("wd iova map: va not found in any region\n"); + return NULL; +} + +/** + * wd_iova_unmap - Unmap physical address (no-op in non-IOMMU mode) + * @cinfo: context information + * @va: virtual address + * @dma: physical address + * @sz: size of the mapping (not used in current implementation) + * + * In non-IOMMU mode, this function does nothing as there's no need to unmap. + * In IOMMU mode, this would typically involve unmapping the DMA address. + */ +static void wd_iova_unmap(struct ctx_info *cinfo, void *va, void *dma, size_t sz) +{ + /* For no-iommu, dma-unmap doing nothing */ +} + +static void wd_pool_uninit(struct wd_blkpool *p) +{ + struct ctx_info *cinfo = p->cinfo; + struct wd_blk_hd *fhd = NULL; + unsigned long block_size; + unsigned int i; + + block_size = (unsigned long)p->act_hd_sz + p->act_blk_sz; + /* Clean up the allocated resources. */ + for (i = 0; i < p->total_blocks; i++) { + /* Release the previously allocated blocks. */ + fhd = &p->blk_array[i]; + wd_iova_unmap(cinfo, fhd->blk, fhd->blk_dma, block_size); + } + + free(p->free_bitmap); + p->free_bitmap = NULL; + free(p->blk_array); + p->blk_array = NULL; +} + +static int wd_pool_init(struct wd_blkpool *p) +{ + struct ctx_info *cinfo = p->cinfo; + __u32 blk_size = p->setup.block_size; + void *dma_start, *dma_end, *va; + struct wd_blk_hd *fhd = NULL; + struct wd_blk_hd *hd = NULL; + unsigned int i, j, act_num; + unsigned long block_size; + unsigned int dma_num = 0; + + p->act_start = (void *)ALIGN((uintptr_t)p->usr_mem_start, + p->setup.align_size); + + /* Calculate the actual number of allocatable blocks */ + block_size = (unsigned long)(p->act_hd_sz + p->act_blk_sz); + if (block_size == 0) { + WD_ERR("Invalid block size with header.\n"); + return -WD_EINVAL; + } + act_num = p->act_mem_sz / block_size; + if (!act_num) { + WD_ERR("Invalid memory size.\n"); + return -WD_EINVAL; + } + + /* Allocate block array */ + p->blk_array = (struct wd_blk_hd *)malloc(act_num * sizeof(struct wd_blk_hd)); + if (!p->blk_array) { + WD_ERR("Failed to allocate block array.\n"); + return -WD_ENOMEM; + } + + /* Allocate bitmap */ + p->total_blocks = act_num; + p->bitmap_size = (act_num + BYTE_SIZE - 1) >> BIT_SHIFT; + p->free_bitmap = (unsigned char *)calloc(1, p->bitmap_size); + if (!p->free_bitmap) { + WD_ERR("Failed to allocate free bitmap.\n"); + goto bitmap_error; + } + + /* Initialize all blocks. */ + for (i = 0; i < act_num; i++) { + /* Calculate the virtual address of the current block. */ + va = (void *)((uintptr_t)p->act_start + block_size * i); + + /* Get the physical address. */ + dma_start = wd_iova_map(cinfo, va, 0); + dma_end = wd_iova_map(cinfo, va + blk_size - 1, 0); + if (!dma_start || !dma_end) { + WD_ERR("wd_iova_map err.\n"); + /* Clean up the allocated resources. */ + goto init_blk_error; + } + + /* Check whether the physical addresses are contiguous. */ + if ((uintptr_t)dma_end - (uintptr_t)dma_start != blk_size - 1) { + /* If OS kernel is not open SMMU, need to check dma address */ + WD_INFO("wd dma address not continuous.\n"); + /* Mark as unavailable, bit value is 1. */ + bitmap_set_bit(p->free_bitmap, i); + continue; + } + + /* Initialize the block. */ + hd = &p->blk_array[i]; + hd->blk_dma = dma_start; + hd->blk = va; + hd->blk_tag = TAG_FREE; + hd->blk_num = 0; + + dma_num++; + } + + /* + * if dma_num <= (1 / 1.15) * user's block_num, we think the pool + * is created with failure. + */ + if (dma_num <= NUM_TIMES(p->setup.block_num)) { + WD_ERR("dma_num = %u, not enough.\n", dma_num); + goto init_blk_error; + } + + p->free_blk_num = dma_num; + p->setup.block_num = dma_num; + + return WD_SUCCESS; + +init_blk_error: + /* Clean up the allocated resources. */ + for (j = 0; j < i; j++) { + /* Release the previously allocated blocks. */ + fhd = &p->blk_array[j]; + wd_iova_unmap(cinfo, fhd->blk, fhd->blk_dma, block_size); + } + free(p->free_bitmap); + +bitmap_error: + free(p->blk_array); + + return -WD_ENOMEM; +} + +static int usr_pool_init(struct wd_blkpool *p) +{ + struct wd_mempool_setup *sp = &p->setup; + __u32 blk_size = sp->block_size; + struct wd_blk_hd *hd = NULL; + __u32 i; + + p->act_start = (void *)ALIGN((uintptr_t)p->usr_mem_start, + sp->align_size); + for (i = 0; i < sp->block_num; i++) { + hd = (void *)((uintptr_t)p->act_start + (p->act_hd_sz + p->act_blk_sz) * i); + hd->blk = (void *)((uintptr_t)hd + p->act_hd_sz); + hd->blk_dma = sp->ops.iova_map(sp->ops.usr, hd->blk, blk_size); + if (!hd->blk_dma) { + WD_ERR("failed to map usr blk.\n"); + return -WD_ENOMEM; + } + hd->blk_tag = TAG_FREE; + } + + p->free_blk_num = sp->block_num; + + return WD_SUCCESS; +} + +static int wd_parse_dev_id(char *dev_name) +{ + char *last_dash = NULL; + char *endptr; + int dev_id; + + if (!dev_name) + return -WD_EINVAL; + + /* Find the last '-' in the string. */ + last_dash = strrchr(dev_name, '-'); + if (!last_dash || *(last_dash + 1) == '\0') + return -WD_EINVAL; + + /* Parse the following number */ + dev_id = strtol(last_dash + 1, &endptr, DECIMAL_NUMBER); + /* Check whether it is truly all digits */ + if (*endptr != '\0' || dev_id < 0) + return -WD_EINVAL; + + return dev_id; +} + +static int wd_mempool_init(handle_t h_ctx, struct wd_blkpool *pool, + struct wd_mempool_setup *setup) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + struct ctx_info *cinfo = pool->cinfo; + void *addr = NULL; + int ret; + + /* Use user's memory, and its ops alloc function */ + if (setup->ops.alloc && setup->ops.free && setup->ops.iova_map) { + addr = setup->ops.alloc(setup->ops.usr, pool->act_mem_sz); + if (!addr) { + WD_ERR("failed to allocate memory in user pool.\n"); + return -WD_EINVAL; + } + + pool->usr_mem_start = addr; + if (usr_pool_init(pool)) { + WD_ERR("failed to initialize user pool.\n"); + setup->ops.free(setup->ops.usr, addr); + return -WD_EINVAL; + } + } else { + /* Use wd to reserve memory */ + addr = wd_map_reserve_mem(pool, pool->act_mem_sz); + if (!addr) { + WD_ERR("wd pool failed to reserve memory.\n"); + return -WD_ENOMEM; + } + + pool->usr_mem_start = addr; + if (wd_pool_init(pool)) { + WD_ERR("failed to initialize wd pool.\n"); + goto err_out; + } + setup->block_num = pool->setup.block_num; + } + + ret = wd_parse_dev_id(ctx->dev_path); + if (ret < 0) { + wd_pool_uninit(pool); + goto err_out; + } + pool->dev_id = ret; + + return WD_SUCCESS; + +err_out: + if (pool->cinfo) { + wd_free_slice(cinfo); + wd_unmap_reserve_mem(cinfo->ss_va, cinfo->ss_mm_size); + pool->cinfo = NULL; + } + return -WD_EINVAL; +} + +void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup) +{ + struct wd_blkpool *pool = NULL; + int ret; + + if (!setup || !h_ctx) { + WD_ERR("Input param is NULL!\n"); + return NULL; + } + + ret = wd_is_sva(h_ctx); + if (ret < 0) { + WD_ERR("failed to check device ctx!\n"); + return NULL; + } else if (ret == UACCE_DEV_SVA) { + WD_ERR("the device is SVA mode!\n"); + return NULL; + } + + pool = calloc(1, sizeof(*pool)); + if (!pool) { + WD_ERR("failed to malloc pool.\n"); + return NULL; + } + ret = pthread_spin_init(&pool->pool_lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + goto err_pool_alloc; + + memcpy(&pool->setup, setup, sizeof(pool->setup)); + + ret = wd_pool_pre_layout(h_ctx, pool, setup); + if (ret) + goto err_pool_layout; + + ret = wd_mempool_init(h_ctx, pool, setup); + if (ret) + goto err_pool_init; + + return pool; + +err_pool_init: + if (pool->cinfo) { + free(pool->cinfo); + pool->cinfo = NULL; + } +err_pool_layout: + pthread_spin_destroy(&pool->pool_lock); +err_pool_alloc: + free(pool); + + return NULL; +} + +void wd_mempool_free(handle_t h_ctx, void *pool) +{ + struct wd_mempool_setup *setup; + struct wd_blkpool *p = pool; + + if (!p || !h_ctx) { + WD_ERR("pool destroy err, pool or ctx is NULL.\n"); + return; + } + + setup = &p->setup; + if (p->free_blk_num != setup->block_num) { + WD_ERR("Can not destroy blk pool, as it's in use.\n"); + return; + } + + if (setup->ops.free) + setup->ops.free(setup->ops.usr, p->usr_mem_start); + + if (p->cinfo) { + /* Free block array memory */ + if (p->blk_array) + free(p->blk_array); + + if (p->free_bitmap) + free(p->free_bitmap); + + wd_free_slice(p->cinfo); + wd_unmap_reserve_mem(p->cinfo->ss_va, p->cinfo->ss_mm_size); + free(p->cinfo); + p->cinfo = NULL; + } + + pthread_spin_destroy(&p->pool_lock); + free(p); +} + +void wd_mem_free(void *pool, void *buf) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *current_hd; + struct wd_blk_hd *hd; + unsigned int current_idx; + unsigned int blk_idx; + unsigned long offset; + unsigned int i, num; + unsigned long sz; + + if (unlikely(!p || !buf)) { + WD_ERR("free blk parameters err!\n"); + return; + } + + sz = p->act_hd_sz + p->act_blk_sz; + if (!sz) { + WD_ERR("memory pool blk size is zero!\n"); + return; + } + + if ((uintptr_t)buf < (uintptr_t)p->act_start) { + WD_ERR("free block addr is error.\n"); + return; + } + + /* Calculate the block index. */ + offset = (unsigned long)((uintptr_t)buf - (uintptr_t)p->act_start); + blk_idx = offset / sz; + + /* Check if the index is valid. */ + if (blk_idx >= p->total_blocks) { + WD_ERR("Invalid block index<%u>.\n", blk_idx); + return; + } + + /* Get the block header. */ + hd = &p->blk_array[blk_idx]; + num = hd->blk_num; + + pthread_spin_lock(&p->pool_lock); + /* Release all related blocks. */ + for (i = 0; i < num; i++) { + // Recalculate the index (since it is contiguous). + current_idx = blk_idx + i; + current_hd = &p->blk_array[current_idx]; + current_hd->blk_tag = TAG_FREE; + current_hd->blk_num = 0; + bitmap_clear_bit(p->free_bitmap, current_idx); + } + p->free_blk_num += num; + pthread_spin_unlock(&p->pool_lock); +} + +static int wd_find_contiguous_blocks(struct wd_blkpool *p, + unsigned int required_blocks, + unsigned int *start_block) +{ +#define MAX_SKIP_ATTEMPTS 10 + unsigned int consecutive_count = 0; + unsigned int skip_attempts = 0; + struct wd_blk_hd *hd, *tl; + unsigned int i; + + if (required_blocks == 0 || required_blocks > p->total_blocks) + return -WD_EINVAL; + + for (i = 0; i < p->total_blocks; i++) { + if (!bitmap_test_bit(p->free_bitmap, i)) { + consecutive_count = 0; + continue; + } + + if (consecutive_count == 0) + *start_block = i; + consecutive_count++; + + if (consecutive_count < required_blocks) + continue; + + /* Check DMA contiguity only if more than one block is needed */ + if (required_blocks > 1) { + hd = &p->blk_array[*start_block]; + tl = &p->blk_array[*start_block + required_blocks - 1]; + + if (((uintptr_t)tl->blk_dma - (uintptr_t)hd->blk_dma) != + ((uintptr_t)tl->blk - (uintptr_t)hd->blk)) { + /* Not contiguous, skip this start and try again */ + if (++skip_attempts > MAX_SKIP_ATTEMPTS) + return -WD_ENOMEM; + + i = *start_block; // will be incremented by loop + consecutive_count = 0; + continue; + } + } + + /* Found and DMA is contiguous */ + return WD_SUCCESS; + } + + return -WD_ENOMEM; +} + +void *wd_mem_alloc(void *pool, size_t size) +{ + unsigned int required_blocks; + unsigned int start_block = 0; + struct wd_blk_hd *hd = NULL; + struct wd_blkpool *p = pool; + unsigned int j; + int ret; + + if (unlikely(!p || !size)) { + WD_ERR("blk alloc pool is null!\n"); + return NULL; + } + + if (!p->act_blk_sz) { + WD_ERR("blk pool is error!\n"); + return NULL; + } + + /* Calculate the number of blocks required. */ + required_blocks = (size + p->act_blk_sz - 1) / p->act_blk_sz; + if (required_blocks > p->free_blk_num) { + p->alloc_failures++; + WD_ERR("Not enough free blocks.\n"); + return NULL; + } + + pthread_spin_lock(&p->pool_lock); + /* Find contiguous free blocks. */ + ret = wd_find_contiguous_blocks(p, required_blocks, &start_block); + if (ret != 0) { + p->alloc_failures++; + pthread_spin_unlock(&p->pool_lock); + WD_ERR("Failed to find contiguous blocks.\n"); + return NULL; + } + + /* Mark all required blocks as used */ + for (j = start_block; j < start_block + required_blocks; j++) { + p->blk_array[j].blk_tag = TAG_USED; + bitmap_set_bit(p->free_bitmap, j); + } + + p->free_blk_num -= required_blocks; + hd = &p->blk_array[start_block]; + hd->blk_num = required_blocks; + pthread_spin_unlock(&p->pool_lock); + + return hd->blk; +} + +void *wd_mem_map(void *pool, void *buf, size_t sz) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + unsigned long offset; + unsigned long blk_sz; + unsigned long blk_idx; + + if (unlikely(!pool || !buf)) { + WD_ERR("blk map err, pool is NULL!\n"); + return NULL; + } + + if (!sz || (uintptr_t)buf < (uintptr_t)p->act_start) { + WD_ERR("map buf addr is error.\n"); + return NULL; + } + /* Calculate the block index. */ + offset = (unsigned long)((uintptr_t)buf - (uintptr_t)p->act_start); + blk_sz = p->act_hd_sz + p->act_blk_sz; + blk_idx = offset / blk_sz; + + /* Check if the index is valid. */ + if (blk_idx >= p->total_blocks) { + WD_ERR("Invalid block index<%lu> in map.\n", blk_idx); + return NULL; + } + + hd = &p->blk_array[blk_idx]; + if (unlikely(hd->blk_tag != TAG_USED || + (uintptr_t)buf < (uintptr_t)hd->blk)) { + WD_ERR("dma map fail!\n"); + return NULL; + } + + return (void *)((uintptr_t)hd->blk_dma + ((uintptr_t)buf - + (uintptr_t)hd->blk)); +} + +void wd_mem_unmap(void *pool, void *buf_dma, void *buf, size_t sz) +{ + /* do nothing at no-iommu mode */ +} + +int wd_get_free_num(void *pool, __u32 *free_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !free_num) { + WD_ERR("get_free_blk_num err, parameter err!\n"); + return -WD_EINVAL; + } + + *free_num = __atomic_load_n(&p->free_blk_num, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +int wd_get_fail_num(void *pool, __u32 *fail_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !fail_num) { + WD_ERR("get_blk_alloc_failure err, pool is NULL!\n"); + return -WD_EINVAL; + } + + *fail_num = __atomic_load_n(&p->alloc_failures, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +__u32 wd_get_bufsize(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("get dev id is null!\n"); + return 0; + } + + return p->act_blk_sz; +} + +__u32 wd_get_dev_id(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("failed to get dev id!\n"); + return 0; + } + + return p->dev_id; +} + diff --git a/wd_util.c b/wd_util.c index e8a2934..d0d83eb 100644 --- a/wd_util.c +++ b/wd_util.c @@ -13,6 +13,8 @@ #include <ctype.h> #include "wd_sched.h" #include "wd_util.h" +#include "wd_alg.h" +#include "wd_bmm.h" #define WD_ASYNC_DEF_POLL_NUM 1 #define WD_ASYNC_DEF_QUEUE_DEPTH 1024 @@ -100,11 +102,6 @@ struct acc_alg_item { const char *algtype; }; -struct wd_ce_ctx { - char *drv_name; - void *priv; -}; - static struct acc_alg_item alg_options[] = { {"zlib", "zlib"}, {"gzip", "gzip"}, @@ -172,6 +169,93 @@ static struct acc_alg_item alg_options[] = { {"", ""} }; +static void *wd_internal_alloc(void *usr, size_t size) +{ + if (size != 0) + return malloc(size); + else + return NULL; +} + +static void wd_internal_free(void *usr, void *va) +{ + if (va != NULL) + free(va); +} + +static __u32 wd_mem_bufsize(void *usr) +{ + /* Malloc memory min size is 1 Byte */ + return 1; +} + +int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type) +{ + int ret; + + ret = wd_is_sva(h_ctx); + if (ret == UACCE_DEV_SVA || ret == -WD_HW_EACCESS) { + /* + * In software queue scenario, all memory is handled as virtual memory + * and processed in the same way as SVA mode + */ + mm_ops->sva_mode = true; + } else if (!ret) { + mm_ops->sva_mode = false; + } else { + WD_ERR("failed to check ctx!\n"); + return ret; + } + + /* + * Under SVA mode, there is no need to consider the memory type; + * directly proceed with virtual memory handling + */ + if (mm_ops->sva_mode) { + mm_ops->alloc = (void *)wd_internal_alloc; + mm_ops->free = (void *)wd_internal_free; + mm_ops->iova_map = NULL; + mm_ops->iova_unmap = NULL; + mm_ops->get_bufsize = (void *)wd_mem_bufsize; + mm_ops->usr = NULL; + return 0; + } + + switch (mem_type) { + case UADK_MEM_AUTO: + /* + * The memory pool needs to be allocated according to + * the block size when it is first executed in the UADK + */ + mm_ops->usr = NULL; + WD_ERR("automatic under No-SVA mode is not supported!\n"); + return -WD_EINVAL; + case UADK_MEM_USER: + if (!mm_ops->alloc || !mm_ops->free || !mm_ops->iova_map || + !mm_ops->iova_unmap || !mm_ops->usr) { // The user create a memory pool + WD_ERR("failed to check memory ops, some ops function is NULL!\n"); + return -WD_EINVAL; + } + break; + case UADK_MEM_PROXY: + if (!mm_ops->usr) { + WD_ERR("failed to check memory pool!\n"); + return -WD_EINVAL; + } + mm_ops->alloc = (void *)wd_mem_alloc; + mm_ops->free = (void *)wd_mem_free; + mm_ops->iova_map = (void *)wd_mem_map; + mm_ops->iova_unmap = (void *)wd_mem_unmap; + mm_ops->get_bufsize = (void *)wd_get_bufsize; + break; + default: + WD_ERR("failed to check memory type!\n"); + return -WD_EINVAL; + } + + return 0; +} + static void clone_ctx_to_internal(struct wd_ctx *ctx, struct wd_ctx_internal *ctx_in) { @@ -257,6 +341,12 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in, WD_ERR("failed to init ctxs lock!\n"); goto err_out; } + + ret = wd_insert_ctx_list(cfg->ctxs[i].ctx, in->alg_name); + if (ret) { + WD_ERR("failed to add ctx to mem list!\n"); + goto err_out; + } } in->ctxs = ctxs; @@ -318,6 +408,7 @@ void wd_clear_ctx_config(struct wd_ctx_config_internal *in) in->ctxs = NULL; } + wd_remove_ctx_list(); wd_shm_delete(in); } @@ -2485,7 +2576,7 @@ static int wd_init_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *l /* If the ctx set number is 0, the initialization is skipped. */ if (!ctx_set_num) - return 0; + return -WD_ENOPROC; dev = wd_find_dev_by_numa(list, numa_id); if (WD_IS_ERR(dev)) @@ -2573,7 +2664,9 @@ static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bm for (j = 0; j < op_type_num; j++) { ctx_nums = ctx_params->ctx_set_num[j]; ret = wd_init_ctx_set(attrs, list, idx, i, j); - if (ret) + if (ret == -WD_ENOPROC) + continue; + else if (ret) goto free_ctxs; ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j); if (ret) -- 2.33.0
From: Longfang Liu <liulongfang@huawei.com> In the previous UADK v2 framework, the entire UADK was bound to SVA mode, and checks were performed on device morphology and queue attributes according to SVA mode requirements. After adding the new No-SVA mode memory pool, these bindings need to be removed to ensure that UADK can normally execute business operations on an OS running in No-SVA mode Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- include/wd_alg.h | 25 ++++--------------- libwd.map | 4 +++ wd.c | 4 --- wd_alg.c | 64 +++++++++++++++++------------------------------- wd_util.c | 5 ---- 5 files changed, 31 insertions(+), 71 deletions(-) diff --git a/include/wd_alg.h b/include/wd_alg.h index 2fc350a..5ff73ca 100644 --- a/include/wd_alg.h +++ b/include/wd_alg.h @@ -115,26 +115,6 @@ struct wd_alg_driver { int (*get_extend_ops)(void *ops); }; -inline int wd_alg_driver_init(struct wd_alg_driver *drv, void *conf) -{ - return drv->init(drv, conf); -} - -inline void wd_alg_driver_exit(struct wd_alg_driver *drv) -{ - drv->exit(drv); -} - -inline int wd_alg_driver_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) -{ - return drv->send(drv, ctx, msg); -} - -inline int wd_alg_driver_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) -{ - return drv->recv(drv, ctx, msg); -} - /* * wd_alg_driver_register() - Register a device driver. * @wd_alg_driver: a device driver that supports an algorithm. @@ -194,6 +174,11 @@ bool wd_drv_alg_support(const char *alg_name, void wd_enable_drv(struct wd_alg_driver *drv); void wd_disable_drv(struct wd_alg_driver *drv); +int wd_alg_driver_init(struct wd_alg_driver *drv, void *conf); +void wd_alg_driver_exit(struct wd_alg_driver *drv); +int wd_alg_driver_send(struct wd_alg_driver *drv, handle_t ctx, void *msg); +int wd_alg_driver_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg); + struct wd_alg_list *wd_get_alg_head(void); #ifdef WD_STATIC_DRV diff --git a/libwd.map b/libwd.map index b1b90b3..90eb5c5 100644 --- a/libwd.map +++ b/libwd.map @@ -49,6 +49,10 @@ global: wd_enable_drv; wd_disable_drv; wd_get_alg_head; + wd_alg_driver_init; + wd_alg_driver_exit; + wd_alg_driver_send; + wd_alg_driver_recv; wd_find_ctx; wd_get_dev_id; diff --git a/wd.c b/wd.c index 3e867b6..657fbae 100644 --- a/wd.c +++ b/wd.c @@ -238,10 +238,6 @@ static int get_dev_info(struct uacce_dev *dev) ret = get_int_attr(dev, "flags", &dev->flags); if (ret < 0) return ret; - else if (!((unsigned int)dev->flags & UACCE_DEV_SVA)) { - WD_ERR("skip none sva uacce device!\n"); - return -WD_ENODEV; - } ret = get_int_attr(dev, "region_mmio_size", &value); if (ret < 0) diff --git a/wd_alg.c b/wd_alg.c index 08f0e2e..9c7c0fd 100644 --- a/wd_alg.c +++ b/wd_alg.c @@ -24,46 +24,6 @@ static struct wd_alg_list *alg_list_tail = &alg_list_head; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -static bool wd_check_dev_sva(const char *dev_name) -{ - char dev_path[PATH_MAX] = {'\0'}; - char buf[DEV_SVA_SIZE] = {'\0'}; - unsigned int val; - ssize_t ret; - int fd; - - ret = snprintf(dev_path, PATH_STR_SIZE, "%s/%s/%s", SYS_CLASS_DIR, - dev_name, SVA_FILE_NAME); - if (ret < 0) { - WD_ERR("failed to snprintf, device name: %s!\n", dev_name); - return false; - } - - /** - * The opened file is the specified device driver file. - * no need for realpath processing. - */ - fd = open(dev_path, O_RDONLY, 0); - if (fd < 0) { - WD_ERR("failed to open %s(%d)!\n", dev_path, -errno); - return false; - } - - ret = read(fd, buf, DEV_SVA_SIZE - 1); - if (ret <= 0) { - WD_ERR("failed to read anything at %s!\n", dev_path); - close(fd); - return false; - } - close(fd); - - val = strtol(buf, NULL, STR_DECIMAL); - if (val & UACCE_DEV_SVA) - return true; - - return false; -} - static bool wd_check_accel_dev(const char *dev_name) { struct dirent *dev_dir; @@ -80,8 +40,7 @@ static bool wd_check_accel_dev(const char *dev_name) !strncmp(dev_dir->d_name, "..", LINUX_PRTDIR_SIZE)) continue; - if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name)) && - wd_check_dev_sva(dev_dir->d_name)) { + if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name))) { closedir(wd_class); return true; } @@ -413,3 +372,24 @@ void wd_release_drv(struct wd_alg_driver *drv) select_node->refcnt--; pthread_mutex_unlock(&mutex); } + +int wd_alg_driver_init(struct wd_alg_driver *drv, void *conf) +{ + return drv->init(drv, conf); +} + +void wd_alg_driver_exit(struct wd_alg_driver *drv) +{ + drv->exit(drv); +} + +int wd_alg_driver_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) +{ + return drv->send(drv, ctx, msg); +} + +int wd_alg_driver_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) +{ + return drv->recv(drv, ctx, msg); +} + diff --git a/wd_util.c b/wd_util.c index d0d83eb..beb4131 100644 --- a/wd_util.c +++ b/wd_util.c @@ -1984,11 +1984,6 @@ int wd_init_param_check(struct wd_ctx_config *config, struct wd_sched *sched) return -WD_EINVAL; } - if (!wd_is_sva(config->ctxs[0].ctx)) { - WD_ERR("invalid: the mode is non sva, please check system!\n"); - return -WD_EINVAL; - } - return 0; } -- 2.33.0
From: Longfang Liu <liulongfang@huawei.com> In certain special scenarios (e.g., in No-SVA mode), the queues used by the scheduler must be from a specific device number; queues from other devices cannot be used to perform business operations. Therefore, a specialized scheduler needs to be added to ensure that subsequent business operations are executed only on the designated device. Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- include/wd_internal.h | 2 +- include/wd_sched.h | 3 + wd_sched.c | 195 ++++++++++++++++++++++++++++++++++++------ wd_util.c | 43 +++++++++- 4 files changed, 214 insertions(+), 29 deletions(-) diff --git a/include/wd_internal.h b/include/wd_internal.h index cd90ebf..769192d 100644 --- a/include/wd_internal.h +++ b/include/wd_internal.h @@ -13,7 +13,7 @@ #ifdef __cplusplus extern "C" { #endif - +#define NOSVA_DEVICE_MAX 16 #define DECIMAL_NUMBER 10 #define MAX_FD_NUM 65535 diff --git a/include/wd_sched.h b/include/wd_sched.h index be541c6..949396e 100644 --- a/include/wd_sched.h +++ b/include/wd_sched.h @@ -23,6 +23,8 @@ enum sched_policy_type { SCHED_POLICY_NONE, /* requests will need a fixed ctx */ SCHED_POLICY_SINGLE, + /* requests will be sent to ctxs and dev_id */ + SCHED_POLICY_DEV, SCHED_POLICY_BUTT, }; @@ -32,6 +34,7 @@ struct sched_params { __u8 mode; __u32 begin; __u32 end; + __u32 dev_id; }; typedef int (*user_poll_func)(__u32 pos, __u32 expect, __u32 *count); diff --git a/wd_sched.c b/wd_sched.c index 204ed23..46a8c00 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -33,6 +33,7 @@ struct sched_key { __u8 mode; __u32 sync_ctxid; __u32 async_ctxid; + __u32 dev_id; }; /* @@ -76,7 +77,8 @@ struct wd_sched_info { struct wd_sched_ctx { __u32 policy; __u32 type_num; - __u16 numa_num; + __u16 numa_num; + __u16 dev_num; user_poll_func poll_func; int numa_map[NUMA_NUM_NODES]; struct wd_sched_info sched_info[0]; @@ -309,7 +311,7 @@ static int session_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 * struct wd_sched_info *sched_info; __u32 loop_time = 0; __u32 last_count = 0; - __u16 i; + __u16 i, region_mum; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -323,14 +325,18 @@ static int session_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 * } sched_info = sched_ctx->sched_info; + if (sched_ctx->policy == SCHED_POLICY_DEV) + region_mum = sched_ctx->dev_num; + else + region_mum = sched_ctx->numa_num; /* - * Try different numa's ctx if we can't receive any + * Try different region's ctx if we can't receive any * package last time, it is more efficient. In most * bad situation, poll ends after MAX_POLL_TIMES loop. */ while (++loop_time < MAX_POLL_TIMES) { - for (i = 0; i < sched_ctx->numa_num;) { + for (i = 0; i < region_mum;) { /* If current numa is not valid, find next. */ if (!sched_info[i].valid) { i++; @@ -445,6 +451,115 @@ static int sched_single_poll_policy(handle_t h_sched_ctx, return 0; } +static bool sched_dev_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +{ + if (key->dev_id >= sched_ctx->dev_num || key->mode >= SCHED_MODE_BUTT || + key->type >= sched_ctx->type_num) { + WD_ERR("invalid: sched key's dev: %u, mode: %u, type: %u!\n", + key->dev_id, key->mode, key->type); + return false; + } + + return true; +} + +/* + * sched_dev_get_region - Get ctx region from ctx_map by the wd comp arg + */ +static struct sched_ctx_region *sched_dev_get_region(struct wd_sched_ctx *sched_ctx, + const struct sched_key *key) +{ + struct wd_sched_info *sched_info; + + sched_info = sched_ctx->sched_info; + if (key->dev_id < sched_ctx->dev_num && + sched_info[key->dev_id].ctx_region[key->mode][key->type].valid) + return &sched_info[key->dev_id].ctx_region[key->mode][key->type]; + + /* + * If the scheduling domain of dev_id does not exist, + * taskes operations cannot be executed using queues from other devices; + * otherwise, an SMMU error will occur. + */ + return NULL; +} + +/* + * session_dev_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. + * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. + * @sched_key: The key of schedule region. + * @sched_mode: The sched async/sync mode. + * + * The user must init the schedule info through wd_sched_rr_instance + */ +static __u32 session_dev_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *key, + const int sched_mode) +{ + struct sched_ctx_region *region = NULL; + bool ret; + + key->mode = sched_mode; + ret = sched_dev_key_valid(sched_ctx, key); + if (!ret) + return INVALID_POS; + + region = sched_dev_get_region(sched_ctx, key); + if (!region) + return INVALID_POS; + + return sched_get_next_pos_rr(region, NULL); +} + +static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + int cpu = sched_getcpu(); + int node = numa_node_of_cpu(cpu); + struct sched_key *skey; + + if (node < 0) { + WD_ERR("invalid: failed to get numa node!\n"); + return (handle_t)(-WD_EINVAL); + } + + if (!sched_ctx) { + WD_ERR("invalid: sched ctx is NULL!\n"); + return (handle_t)(-WD_EINVAL); + } + + skey = malloc(sizeof(struct sched_key)); + if (!skey) { + WD_ERR("failed to alloc memory for session sched key!\n"); + return (handle_t)(-WD_ENOMEM); + } + + if (!param) { + WD_DEBUG("no-sva session don't set scheduler parameters!\n"); + return (handle_t)(-WD_EINVAL); + } + + skey->type = param->type; + skey->dev_id = param->dev_id; + if (skey->dev_id > NOSVA_DEVICE_MAX) { + WD_ERR("failed to get valid sched device region!\n"); + goto out; + } + + skey->sync_ctxid = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); + skey->async_ctxid = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); + if (skey->sync_ctxid == INVALID_POS && skey->async_ctxid == INVALID_POS) { + WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); + goto out; + } + + return (handle_t)skey; + +out: + free(skey); + return (handle_t)(-WD_EINVAL); +} + static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { { .name = "RR scheduler", @@ -464,6 +579,12 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { .sched_init = sched_single_init, .pick_next_ctx = sched_single_pick_next_ctx, .poll_policy = sched_single_poll_policy, + }, { + .name = "Device RR scheduler", + .sched_policy = SCHED_POLICY_DEV, + .sched_init = session_dev_sched_init, + .pick_next_ctx = session_sched_pick_next_ctx, + .poll_policy = session_sched_poll_policy, } }; @@ -505,6 +626,7 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para struct wd_sched_info *sched_info = NULL; struct wd_sched_ctx *sched_ctx = NULL; __u8 type, mode; + __u32 dev_id; int numa_id; if (!sched || !sched->h_sched_ctx || !param) { @@ -518,16 +640,24 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para } numa_id = param->numa_id; + dev_id = param->dev_id; type = param->type; mode = param->mode; sched_ctx = (struct wd_sched_ctx *)sched->h_sched_ctx; - if (numa_id >= sched_ctx->numa_num || numa_id < 0) { + if (sched_ctx->numa_num > 0 && (numa_id >= sched_ctx->numa_num || + numa_id < 0)) { WD_ERR("invalid: sched_ctx's numa_id is %d, numa_num is %u!\n", numa_id, sched_ctx->numa_num); return -WD_EINVAL; } + if (sched_ctx->dev_num > 0 && dev_id >= sched_ctx->dev_num) { + WD_ERR("invalid: sched_ctx's dev id is %u, device num is %u!\n", + dev_id, sched_ctx->dev_num); + return -WD_EINVAL; + } + if (type >= sched_ctx->type_num) { WD_ERR("invalid: sched_ctx's type is %u, type_num is %u!\n", type, sched_ctx->type_num); @@ -540,24 +670,26 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para return -WD_EINVAL; } - sched_info = sched_ctx->sched_info; + if (sched_ctx->policy == SCHED_POLICY_DEV) + sched_info = &sched_ctx->sched_info[dev_id]; + else + sched_info = &sched_ctx->sched_info[numa_id]; - if (!sched_info[numa_id].ctx_region[mode]) { + if (!sched_info->ctx_region[mode]) { WD_ERR("invalid: ctx_region is NULL, numa: %d, mode: %u!\n", numa_id, mode); return -WD_EINVAL; } - sched_info[numa_id].ctx_region[mode][type].begin = param->begin; - sched_info[numa_id].ctx_region[mode][type].end = param->end; - sched_info[numa_id].ctx_region[mode][type].last = param->begin; - sched_info[numa_id].ctx_region[mode][type].valid = true; - sched_info[numa_id].valid = true; + sched_info->ctx_region[mode][type].begin = param->begin; + sched_info->ctx_region[mode][type].end = param->end; + sched_info->ctx_region[mode][type].last = param->begin; + sched_info->ctx_region[mode][type].valid = true; + sched_info->valid = true; - wd_sched_map_cpus_to_dev(sched_ctx); - - pthread_mutex_init(&sched_info[numa_id].ctx_region[mode][type].lock, - NULL); + if (sched_ctx->policy != SCHED_POLICY_DEV) + wd_sched_map_cpus_to_dev(sched_ctx); + pthread_mutex_init(&sched_info->ctx_region[mode][type].lock, NULL); return 0; } @@ -566,7 +698,7 @@ void wd_sched_rr_release(struct wd_sched *sched) { struct wd_sched_info *sched_info; struct wd_sched_ctx *sched_ctx; - int i, j; + int i, j, region_num; if (!sched) return; @@ -575,11 +707,17 @@ void wd_sched_rr_release(struct wd_sched *sched) if (!sched_ctx) goto ctx_out; + /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ + if (sched_ctx->policy == SCHED_POLICY_DEV) + region_num = sched_ctx->dev_num; + else + region_num = sched_ctx->numa_num; + sched_info = sched_ctx->sched_info; if (!sched_info) goto info_out; - for (i = 0; i < sched_ctx->numa_num; i++) { + for (i = 0; i < region_num; i++) { for (j = 0; j < SCHED_MODE_BUTT; j++) { if (sched_info[i].ctx_region[j]) { free(sched_info[i].ctx_region[j]); @@ -620,10 +758,7 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, struct wd_sched_info *sched_info; struct wd_sched_ctx *sched_ctx; struct wd_sched *sched; - int i, j; - - if (numa_num_check(numa_num)) - return NULL; + int i, j, region_num; if (sched_type >= SCHED_POLICY_BUTT || !type_num) { WD_ERR("invalid: sched_type is %u or type_num is %u!\n", @@ -643,7 +778,19 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, WD_ERR("failed to alloc memory for sched_ctx!\n"); goto err_out; } - sched_ctx->numa_num = numa_num; + + /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ + if (sched_type == SCHED_POLICY_DEV) { + sched_ctx->numa_num = 0; + sched_ctx->dev_num = numa_num; + region_num = sched_ctx->dev_num; + } else { + sched_ctx->numa_num = numa_num; + sched_ctx->dev_num = 0; + region_num = sched_ctx->numa_num; + if (numa_num_check(sched_ctx->numa_num)) + goto err_out; + } sched->h_sched_ctx = (handle_t)sched_ctx; if (sched_type == SCHED_POLICY_NONE || @@ -651,7 +798,7 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, goto simple_ok; sched_info = sched_ctx->sched_info; - for (i = 0; i < numa_num; i++) { + for (i = 0; i < region_num; i++) { for (j = 0; j < SCHED_MODE_BUTT; j++) { sched_info[i].ctx_region[j] = calloc(1, sizeof(struct sched_ctx_region) * type_num); diff --git a/wd_util.c b/wd_util.c index beb4131..b96e125 100644 --- a/wd_util.c +++ b/wd_util.c @@ -256,6 +256,31 @@ int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type) return 0; } +static int wd_parse_dev_id(handle_t h_ctx) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + char *dev_path = ctx->dev_path; + char *last_str = NULL; + char *endptr; + int dev_id; + + if (!dev_path) + return -WD_EINVAL; + + /* Find the last '-' in the string. */ + last_str = strrchr(dev_path, '-'); + if (!last_str || *(last_str + 1) == '\0') + return -WD_EINVAL; + + /* Parse the following number */ + dev_id = strtol(last_str + 1, &endptr, DECIMAL_NUMBER); + /* Check whether it is truly all digits */ + if (*endptr != '\0' || dev_id < 0) + return -WD_EINVAL; + + return dev_id; +} + static void clone_ctx_to_internal(struct wd_ctx *ctx, struct wd_ctx_internal *ctx_in) { @@ -2619,15 +2644,21 @@ static void wd_release_ctx_set(struct wd_ctx_config *ctx_config) } } -static int wd_instance_sched_set(struct wd_sched *sched, struct wd_ctx_nums ctx_nums, +static int wd_instance_sched_set(struct wd_init_attrs *attrs, struct wd_ctx_nums ctx_nums, int idx, int numa_id, int op_type) { + struct wd_sched *sched = attrs->sched; struct sched_params sparams; - int i, end, ret = 0; + int i, end, dev_id, ret = 0; + + dev_id = wd_parse_dev_id(attrs->ctx_config->ctxs[idx].ctx); + if (dev_id < 0) + return -WD_EINVAL; for (i = 0; i < CTX_MODE_MAX; i++) { sparams.numa_id = numa_id; sparams.type = op_type; + sparams.dev_id = dev_id; sparams.mode = i; sparams.begin = idx + ctx_nums.sync_ctx_num * i; end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i; @@ -2663,7 +2694,7 @@ static int wd_init_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bm continue; else if (ret) goto free_ctxs; - ret = wd_instance_sched_set(attrs->sched, ctx_nums, idx, i, j); + ret = wd_instance_sched_set(attrs, ctx_nums, idx, i, j); if (ret) goto free_ctxs; idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num); @@ -2931,7 +2962,11 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) } attrs->ctx_config = ctx_config; - alg_sched = wd_sched_rr_alloc(sched_type, attrs->ctx_params->op_type_num, + if (sched_type == SCHED_POLICY_DEV) + alg_sched = wd_sched_rr_alloc(sched_type, attrs->ctx_params->op_type_num, + NOSVA_DEVICE_MAX, alg_poll_func); + else + alg_sched = wd_sched_rr_alloc(sched_type, attrs->ctx_params->op_type_num, numa_max_node() + 1, alg_poll_func); if (!alg_sched) { WD_ERR("fail to instance scheduler\n"); -- 2.33.0
From: Longfang Liu <liulongfang@huawei.com> In the uadk device scheduling algorithm, when the device ID numbering scheme differs from the device count numbering scheme, all accelerator devices are uniformly numbered when passed through to user space. This can result in device IDs being significantly larger than the actual number of devices. Therefore, a mapping mechanism is required to handle the relationship between the number of devices within a process and their corresponding device IDs, thereby resolving this issue. Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- include/wd_internal.h | 3 +- wd_sched.c | 149 +++++++++++++++++++++++++++++++----------- wd_util.c | 2 +- 3 files changed, 115 insertions(+), 39 deletions(-) diff --git a/include/wd_internal.h b/include/wd_internal.h index 769192d..d899555 100644 --- a/include/wd_internal.h +++ b/include/wd_internal.h @@ -13,7 +13,8 @@ #ifdef __cplusplus extern "C" { #endif -#define NOSVA_DEVICE_MAX 16 + +#define DEVICE_REGION_MAX 16 #define DECIMAL_NUMBER 10 #define MAX_FD_NUM 65535 diff --git a/wd_sched.c b/wd_sched.c index 46a8c00..ec1e7b6 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -65,6 +65,10 @@ struct wd_sched_info { bool valid; }; +struct dev_region_map { + __u32 dev_id; + __u32 region_id; +}; /* * wd_sched_ctx - define the context of the scheduler. * @policy: define the policy of the scheduler. @@ -81,6 +85,7 @@ struct wd_sched_ctx { __u16 dev_num; user_poll_func poll_func; int numa_map[NUMA_NUM_NODES]; + struct dev_region_map dev_id_map[DEVICE_REGION_MAX]; struct wd_sched_info sched_info[0]; }; @@ -453,13 +458,27 @@ static int sched_single_poll_policy(handle_t h_sched_ctx, static bool sched_dev_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) { - if (key->dev_id >= sched_ctx->dev_num || key->mode >= SCHED_MODE_BUTT || - key->type >= sched_ctx->type_num) { - WD_ERR("invalid: sched key's dev: %u, mode: %u, type: %u!\n", + bool found = false; + int i; + + if (key->mode >= SCHED_MODE_BUTT || key->type >= sched_ctx->type_num) { + WD_ERR("invalid: sched key's device id: %u, mode: %u, type: %u!\n", key->dev_id, key->mode, key->type); return false; } + for (i = 0; i < sched_ctx->dev_num; i++) { + if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { + found = true; + break; + } + } + + if (!found) { + WD_ERR("invalid: dev_id %u is not registered!\n", key->dev_id); + return false; + } + return true; } @@ -470,11 +489,16 @@ static struct sched_ctx_region *sched_dev_get_region(struct wd_sched_ctx *sched_ const struct sched_key *key) { struct wd_sched_info *sched_info; - - sched_info = sched_ctx->sched_info; - if (key->dev_id < sched_ctx->dev_num && - sched_info[key->dev_id].ctx_region[key->mode][key->type].valid) - return &sched_info[key->dev_id].ctx_region[key->mode][key->type]; + int i, region_id; + + for (i = 0; i < sched_ctx->dev_num; i++) { + if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { + region_id = sched_ctx->dev_id_map[i].region_id; + sched_info = &sched_ctx->sched_info[region_id]; + if (sched_info->ctx_region[key->mode][key->type].valid) + return &sched_info->ctx_region[key->mode][key->type]; + } + } /* * If the scheduling domain of dev_id does not exist, @@ -528,23 +552,19 @@ static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) return (handle_t)(-WD_EINVAL); } + if (!param) { + WD_DEBUG("no-sva session don't set scheduler parameters!\n"); + return (handle_t)(-WD_EINVAL); + } + skey = malloc(sizeof(struct sched_key)); if (!skey) { WD_ERR("failed to alloc memory for session sched key!\n"); return (handle_t)(-WD_ENOMEM); } - if (!param) { - WD_DEBUG("no-sva session don't set scheduler parameters!\n"); - return (handle_t)(-WD_EINVAL); - } - skey->type = param->type; skey->dev_id = param->dev_id; - if (skey->dev_id > NOSVA_DEVICE_MAX) { - WD_ERR("failed to get valid sched device region!\n"); - goto out; - } skey->sync_ctxid = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); skey->async_ctxid = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); @@ -621,12 +641,68 @@ static void wd_sched_map_cpus_to_dev(struct wd_sched_ctx *sched_ctx) } } +static int wd_instance_dev_region(struct wd_sched_ctx *sched_ctx, + struct sched_params *param) +{ + struct wd_sched_info *sched_info; + __u32 region_idx = INVALID_POS; + __u8 type, mode; + __u32 dev_id; + int i; + + dev_id = param->dev_id; + type = param->type; + mode = param->mode; + + /* Check whether dev_id has already been registered. */ + for (i = 0; i < sched_ctx->dev_num; i++) { + if (sched_ctx->dev_id_map[i].dev_id == dev_id) { + region_idx = sched_ctx->dev_id_map[i].region_id; + break; + } + } + + /* If not registered, allocate a new region. */ + if (region_idx == INVALID_POS) { + if (sched_ctx->dev_num >= DEVICE_REGION_MAX) { + WD_ERR("too many devices registered!\n"); + return -WD_EINVAL; + } + + region_idx = sched_ctx->dev_num; + sched_ctx->dev_id_map[region_idx].dev_id = dev_id; + sched_ctx->dev_id_map[region_idx].region_id = region_idx; + sched_ctx->dev_num++; + + sched_info = &sched_ctx->sched_info[region_idx]; + } else { + sched_info = &sched_ctx->sched_info[region_idx]; + } + + /* Check whether the mode and type have already been registered. */ + if (sched_info->ctx_region[mode][type].valid) { + WD_INFO("device %u mode %u type %u already registered\n", + dev_id, mode, type); + return WD_SUCCESS; + } + + /* Initialize the scheduling region for this mode and type */ + sched_info->ctx_region[mode][type].begin = param->begin; + sched_info->ctx_region[mode][type].end = param->end; + sched_info->ctx_region[mode][type].last = param->begin; + sched_info->ctx_region[mode][type].valid = true; + sched_info->valid = true; + + pthread_mutex_init(&sched_info->ctx_region[mode][type].lock, NULL); + + return WD_SUCCESS; +} + int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *param) { struct wd_sched_info *sched_info = NULL; struct wd_sched_ctx *sched_ctx = NULL; __u8 type, mode; - __u32 dev_id; int numa_id; if (!sched || !sched->h_sched_ctx || !param) { @@ -640,7 +716,6 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para } numa_id = param->numa_id; - dev_id = param->dev_id; type = param->type; mode = param->mode; sched_ctx = (struct wd_sched_ctx *)sched->h_sched_ctx; @@ -652,12 +727,6 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para return -WD_EINVAL; } - if (sched_ctx->dev_num > 0 && dev_id >= sched_ctx->dev_num) { - WD_ERR("invalid: sched_ctx's dev id is %u, device num is %u!\n", - dev_id, sched_ctx->dev_num); - return -WD_EINVAL; - } - if (type >= sched_ctx->type_num) { WD_ERR("invalid: sched_ctx's type is %u, type_num is %u!\n", type, sched_ctx->type_num); @@ -671,10 +740,9 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para } if (sched_ctx->policy == SCHED_POLICY_DEV) - sched_info = &sched_ctx->sched_info[dev_id]; - else - sched_info = &sched_ctx->sched_info[numa_id]; + return wd_instance_dev_region(sched_ctx, param); + sched_info = &sched_ctx->sched_info[numa_id]; if (!sched_info->ctx_region[mode]) { WD_ERR("invalid: ctx_region is NULL, numa: %d, mode: %u!\n", numa_id, mode); @@ -687,11 +755,10 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para sched_info->ctx_region[mode][type].valid = true; sched_info->valid = true; - if (sched_ctx->policy != SCHED_POLICY_DEV) - wd_sched_map_cpus_to_dev(sched_ctx); + wd_sched_map_cpus_to_dev(sched_ctx); pthread_mutex_init(&sched_info->ctx_region[mode][type].lock, NULL); - return 0; + return WD_SUCCESS; } void wd_sched_rr_release(struct wd_sched *sched) @@ -709,7 +776,7 @@ void wd_sched_rr_release(struct wd_sched *sched) /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ if (sched_ctx->policy == SCHED_POLICY_DEV) - region_num = sched_ctx->dev_num; + region_num = DEVICE_REGION_MAX; else region_num = sched_ctx->numa_num; @@ -758,7 +825,8 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, struct wd_sched_info *sched_info; struct wd_sched_ctx *sched_ctx; struct wd_sched *sched; - int i, j, region_num; + int region_num; + int i, j; if (sched_type >= SCHED_POLICY_BUTT || !type_num) { WD_ERR("invalid: sched_type is %u or type_num is %u!\n", @@ -772,8 +840,13 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, return NULL; } + if (sched_type == SCHED_POLICY_DEV) + region_num = DEVICE_REGION_MAX; + else + region_num = numa_num; + sched_ctx = calloc(1, sizeof(struct wd_sched_ctx) + - sizeof(struct wd_sched_info) * numa_num); + sizeof(struct wd_sched_info) * region_num); if (!sched_ctx) { WD_ERR("failed to alloc memory for sched_ctx!\n"); goto err_out; @@ -782,12 +855,14 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ if (sched_type == SCHED_POLICY_DEV) { sched_ctx->numa_num = 0; - sched_ctx->dev_num = numa_num; - region_num = sched_ctx->dev_num; + sched_ctx->dev_num = 0; + for (i = 0; i < DEVICE_REGION_MAX; i++) { + sched_ctx->dev_id_map[i].dev_id = INVALID_POS; + sched_ctx->dev_id_map[i].region_id = INVALID_POS; + } } else { sched_ctx->numa_num = numa_num; sched_ctx->dev_num = 0; - region_num = sched_ctx->numa_num; if (numa_num_check(sched_ctx->numa_num)) goto err_out; } diff --git a/wd_util.c b/wd_util.c index b96e125..d0d62a1 100644 --- a/wd_util.c +++ b/wd_util.c @@ -2964,7 +2964,7 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) if (sched_type == SCHED_POLICY_DEV) alg_sched = wd_sched_rr_alloc(sched_type, attrs->ctx_params->op_type_num, - NOSVA_DEVICE_MAX, alg_poll_func); + DEVICE_REGION_MAX, alg_poll_func); else alg_sched = wd_sched_rr_alloc(sched_type, attrs->ctx_params->op_type_num, numa_max_node() + 1, alg_poll_func); -- 2.33.0
From: Longfang Liu <liulongfang@huawei.com> In the original uadk functionality, under SVA mode with huge page memory enabled, a set of automatic huge page memory pool allocation mechanisms is provided. Under No-SVA mode, a separate mechanism using kernel-reserved memory pools is available. Without changing the external interface, this patch merges the huge page memory pool functionality into the reserved memory pool system. This ensures that the unified reserved memory pool interface can be used consistently in both SVA and No-SVA modes. Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- wd_bmm.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 142 insertions(+), 25 deletions(-) diff --git a/wd_bmm.c b/wd_bmm.c index 21c46ca..12c3bcf 100644 --- a/wd_bmm.c +++ b/wd_bmm.c @@ -78,6 +78,11 @@ struct wd_blkpool { unsigned long act_mem_sz; unsigned int dev_id; struct wd_mempool_setup setup; + + /* SVA mode for Hugepage */ + bool sva_mode; + handle_t hp_mempool; + handle_t hp_blkpool; }; struct mem_ctx_node { @@ -253,6 +258,123 @@ static bool bitmap_test_bit(const unsigned char *bitmap, unsigned int bit_index) return true; } +static int wd_parse_dev_id(char *dev_name) +{ + char *last_dash; + char *endptr; + int dev_id; + + if (!dev_name) + return -WD_EINVAL; + + /* Find the last '-' in the string. */ + last_dash = strrchr(dev_name, '-'); + if (!last_dash || *(last_dash + 1) == '\0') + return -WD_EINVAL; + + /* Parse the following number */ + dev_id = strtol(last_dash + 1, &endptr, DECIMAL_NUMBER); + /* Check whether it is truly all digits */ + if (*endptr != '\0' || dev_id < 0) + return -WD_EINVAL; + + return dev_id; +} + +/*----------------------------------SVA Hugepage memory pool---------------------------------*/ +static void *wd_hugepage_pool_create(handle_t h_ctx, struct wd_mempool_setup *setup) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + struct wd_blkpool *pool = NULL; + size_t total_size; + int numa_id, ret; + + pool = calloc(1, sizeof(*pool)); + if (!pool) { + WD_ERR("failed to malloc pool.\n"); + return NULL; + } + + pool->sva_mode = true; + memcpy(&pool->setup, setup, sizeof(pool->setup)); + + total_size = setup->block_size * setup->block_num; + numa_id = ctx->dev->numa_id; + + ret = wd_parse_dev_id(ctx->dev_path); + if (ret < 0) { + WD_ERR("failed to parse device id.\n"); + goto error; + } + pool->dev_id = ret; + + /* Create hugepage memory pool */ + pool->hp_mempool = wd_mempool_create(total_size, numa_id); + if (WD_IS_ERR(pool->hp_mempool)) { + WD_ERR("failed to create hugepage mempool.\n"); + goto error; + } + + /* Create memory blocks */ + pool->hp_blkpool = wd_blockpool_create(pool->hp_mempool, + setup->block_size, + setup->block_num); + if (WD_IS_ERR(pool->hp_blkpool)) { + WD_ERR("failed to create hugepage blockpool.\n"); + wd_mempool_destroy(pool->hp_mempool); + goto error; + } + + pool->free_blk_num = setup->block_num; + pool->act_blk_sz = setup->block_size; + + return pool; +error: + free(pool); + return NULL; +} + +static void wd_hugepage_pool_destroy(struct wd_blkpool *p) +{ + if (p->hp_blkpool) { + wd_blockpool_destroy(p->hp_blkpool); + p->hp_blkpool = 0; + } + + if (p->hp_mempool) { + wd_mempool_destroy(p->hp_mempool); + p->hp_mempool = 0; + } + + free(p); +} + +static void *wd_hugepage_blk_alloc(struct wd_blkpool *p, size_t size) +{ + if (size > p->act_blk_sz) { + WD_ERR("request size %zu > block size %u\n", size, p->act_blk_sz); + return NULL; + } + + void *addr = wd_block_alloc(p->hp_blkpool); + if (!addr) { + p->alloc_failures++; + WD_ERR("failed to alloc block from hugepage pool.\n"); + return NULL; + } + + __atomic_fetch_sub(&p->free_blk_num, 1, __ATOMIC_RELAXED); + return addr; +} + +static void wd_hugepage_blk_free(struct wd_blkpool *p, void *buf) +{ + /* The function call ensures that buf is not null */ + wd_block_free(p->hp_blkpool, buf); + __atomic_fetch_add(&p->free_blk_num, 1, __ATOMIC_RELAXED); +} + +/*----------------------------------No-SVA kernel memory pool--------------------------------*/ static void *wd_mmap_qfr(struct ctx_info *cinfo, enum uacce_qfrt qfrt, size_t size) { off_t off; @@ -634,29 +756,6 @@ static int usr_pool_init(struct wd_blkpool *p) return WD_SUCCESS; } -static int wd_parse_dev_id(char *dev_name) -{ - char *last_dash = NULL; - char *endptr; - int dev_id; - - if (!dev_name) - return -WD_EINVAL; - - /* Find the last '-' in the string. */ - last_dash = strrchr(dev_name, '-'); - if (!last_dash || *(last_dash + 1) == '\0') - return -WD_EINVAL; - - /* Parse the following number */ - dev_id = strtol(last_dash + 1, &endptr, DECIMAL_NUMBER); - /* Check whether it is truly all digits */ - if (*endptr != '\0' || dev_id < 0) - return -WD_EINVAL; - - return dev_id; -} - static int wd_mempool_init(handle_t h_ctx, struct wd_blkpool *pool, struct wd_mempool_setup *setup) { @@ -728,8 +827,8 @@ void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup) WD_ERR("failed to check device ctx!\n"); return NULL; } else if (ret == UACCE_DEV_SVA) { - WD_ERR("the device is SVA mode!\n"); - return NULL; + WD_INFO("the device is SVA mode!\n"); + return wd_hugepage_pool_create(h_ctx, setup); } pool = calloc(1, sizeof(*pool)); @@ -742,6 +841,7 @@ void *wd_mempool_alloc(handle_t h_ctx, struct wd_mempool_setup *setup) goto err_pool_alloc; memcpy(&pool->setup, setup, sizeof(pool->setup)); + pool->sva_mode = false; ret = wd_pool_pre_layout(h_ctx, pool, setup); if (ret) @@ -776,6 +876,11 @@ void wd_mempool_free(handle_t h_ctx, void *pool) return; } + if (p->sva_mode) { + wd_hugepage_pool_destroy(p); + return; + } + setup = &p->setup; if (p->free_blk_num != setup->block_num) { WD_ERR("Can not destroy blk pool, as it's in use.\n"); @@ -819,6 +924,11 @@ void wd_mem_free(void *pool, void *buf) return; } + if (p->sva_mode) { + wd_hugepage_blk_free(p, buf); + return; + } + sz = p->act_hd_sz + p->act_blk_sz; if (!sz) { WD_ERR("memory pool blk size is zero!\n"); @@ -922,6 +1032,9 @@ void *wd_mem_alloc(void *pool, size_t size) return NULL; } + if (p->sva_mode) + return wd_hugepage_blk_alloc(p, size); + if (!p->act_blk_sz) { WD_ERR("blk pool is error!\n"); return NULL; @@ -972,6 +1085,10 @@ void *wd_mem_map(void *pool, void *buf, size_t sz) return NULL; } + /* VA == IOVA in SVA mode */ + if (p->sva_mode) + return buf; + if (!sz || (uintptr_t)buf < (uintptr_t)p->act_start) { WD_ERR("map buf addr is error.\n"); return NULL; -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> Initialize the algorithm names supported by each module. During subsequent initialization, this will be used to determine whether the algorithm executed by the user is supported. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> --- wd_aead.c | 1 + wd_agg.c | 1 + wd_cipher.c | 1 + wd_comp.c | 1 + wd_dh.c | 1 + wd_digest.c | 1 + wd_ecc.c | 1 + wd_join_gather.c | 1 + wd_rsa.c | 1 + wd_udma.c | 1 + 10 files changed, 10 insertions(+) diff --git a/wd_aead.c b/wd_aead.c index b10890f..373b6fe 100644 --- a/wd_aead.c +++ b/wd_aead.c @@ -446,6 +446,7 @@ static int wd_aead_init_nolock(struct wd_ctx_config *config, struct wd_sched *sc if (ret < 0) return ret; + wd_aead_setting.config.alg_name = "aead"; ret = wd_init_ctx_config(&wd_aead_setting.config, config); if (ret) return ret; diff --git a/wd_agg.c b/wd_agg.c index 85fb7f6..8c54d10 100644 --- a/wd_agg.c +++ b/wd_agg.c @@ -580,6 +580,7 @@ static int wd_agg_alg_init(struct wd_ctx_config *config, struct wd_sched *sched) if (ret < 0) return ret; + wd_agg_setting.config.alg_name = "hashagg"; ret = wd_init_ctx_config(&wd_agg_setting.config, config); if (ret < 0) return ret; diff --git a/wd_cipher.c b/wd_cipher.c index ef54dc8..92ca07b 100644 --- a/wd_cipher.c +++ b/wd_cipher.c @@ -328,6 +328,7 @@ static int wd_cipher_common_init(struct wd_ctx_config *config, if (ret < 0) return ret; + wd_cipher_setting.config.alg_name = "cipher"; ret = wd_init_ctx_config(&wd_cipher_setting.config, config); if (ret < 0) return ret; diff --git a/wd_comp.c b/wd_comp.c index ac0470f..435f5a8 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -154,6 +154,7 @@ static int wd_comp_init_nolock(struct wd_ctx_config *config, struct wd_sched *sc if (ret < 0) return ret; + wd_comp_setting.config.alg_name = "zlib gzip deflate lz77_zstd lz4 lz77_only"; ret = wd_init_ctx_config(&wd_comp_setting.config, config); if (ret < 0) return ret; diff --git a/wd_dh.c b/wd_dh.c index 2c88ce2..221322f 100644 --- a/wd_dh.c +++ b/wd_dh.c @@ -125,6 +125,7 @@ static int wd_dh_common_init(struct wd_ctx_config *config, struct wd_sched *sche if (ret < 0) return ret; + wd_dh_setting.config.alg_name = "dh"; ret = wd_init_ctx_config(&wd_dh_setting.config, config); if (ret) return ret; diff --git a/wd_digest.c b/wd_digest.c index cb0a111..2d31176 100644 --- a/wd_digest.c +++ b/wd_digest.c @@ -260,6 +260,7 @@ static int wd_digest_init_nolock(struct wd_ctx_config *config, if (ret < 0) return ret; + wd_digest_setting.config.alg_name = "digest"; ret = wd_init_ctx_config(&wd_digest_setting.config, config); if (ret < 0) return ret; diff --git a/wd_ecc.c b/wd_ecc.c index 2a1228d..b1971b9 100644 --- a/wd_ecc.c +++ b/wd_ecc.c @@ -192,6 +192,7 @@ static int wd_ecc_common_init(struct wd_ctx_config *config, struct wd_sched *sch if (ret < 0) return ret; + wd_ecc_setting.config.alg_name = "sm2 x448 x25519 ecdsa ecdh"; ret = wd_init_ctx_config(&wd_ecc_setting.config, config); if (ret < 0) return ret; diff --git a/wd_join_gather.c b/wd_join_gather.c index a845e03..915c1b8 100644 --- a/wd_join_gather.c +++ b/wd_join_gather.c @@ -684,6 +684,7 @@ static int wd_join_gather_alg_init(struct wd_ctx_config *config, struct wd_sched if (ret < 0) return ret; + wd_join_gather_setting.config.alg_name = "hashjoin gather"; ret = wd_init_ctx_config(&wd_join_gather_setting.config, config); if (ret < 0) return ret; diff --git a/wd_rsa.c b/wd_rsa.c index 4cb72fc..cf9239c 100644 --- a/wd_rsa.c +++ b/wd_rsa.c @@ -166,6 +166,7 @@ static int wd_rsa_common_init(struct wd_ctx_config *config, struct wd_sched *sch if (ret < 0) return ret; + wd_rsa_setting.config.alg_name = "rsa"; ret = wd_init_ctx_config(&wd_rsa_setting.config, config); if (ret < 0) return ret; diff --git a/wd_udma.c b/wd_udma.c index 5f47291..eebe495 100644 --- a/wd_udma.c +++ b/wd_udma.c @@ -390,6 +390,7 @@ static int wd_udma_alg_init(struct wd_ctx_config *config, struct wd_sched *sched if (ret < 0) return ret; + wd_udma_setting.config.alg_name = "udma"; ret = wd_init_ctx_config(&wd_udma_setting.config, config); if (ret < 0) return ret; -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> Address memory management issues in failure paths, including double-free and use-after-free problems. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/zip_wd_benchmark.c | 36 ++++++++++++++------------ 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c index ab9a894..c12edba 100644 --- a/uadk_tool/benchmark/zip_wd_benchmark.c +++ b/uadk_tool/benchmark/zip_wd_benchmark.c @@ -292,7 +292,7 @@ static int init_zip_wd_queue(struct acc_option *options) u32 outsize; u32 insize; u8 op_type; - int i, j; + int i, j, k; int ret = 0; op_type = options->optype % WCRYPTO_DIR_MAX; @@ -356,22 +356,22 @@ static int init_zip_wd_queue(struct acc_option *options) goto bds_error; bds = g_thread_queue.bd_res[j].bds; - for (i = 0; i < MAX_POOL_LENTH_COMP; i++) { - bds[i].src = wd_alloc_blk(pool); - if (!bds[i].src) { + for (k = 0; k < MAX_POOL_LENTH_COMP; k++) { + bds[k].src = wd_alloc_blk(pool); + if (!bds[k].src) { ret = -ENOMEM; goto blk_error2; } - bds[i].src_len = insize; + bds[k].src_len = insize; - bds[i].dst = wd_alloc_blk(pool); - if (!bds[i].dst) { + bds[k].dst = wd_alloc_blk(pool); + if (!bds[k].dst) { ret = -ENOMEM; goto blk_error3; } - bds[i].dst_len = outsize; + bds[k].dst_len = outsize; - get_rand_data(bds[i].src, insize * COMPRESSION_RATIO_FACTOR); + get_rand_data(bds[k].src, insize * COMPRESSION_RATIO_FACTOR); } } @@ -379,11 +379,11 @@ static int init_zip_wd_queue(struct acc_option *options) return 0; blk_error3: - wd_free_blk(pool, bds[i].src); + wd_free_blk(pool, bds[k].src); blk_error2: - for (i--; i >= 0; i--) { - wd_free_blk(pool, bds[i].src); - wd_free_blk(pool, bds[i].dst); + for (k--; k >= 0; k--) { + wd_free_blk(pool, bds[k].src); + wd_free_blk(pool, bds[k].dst); } bds_error: wd_blkpool_destroy(g_thread_queue.bd_res[j].pool); @@ -391,9 +391,9 @@ pool_err: for (j--; j >= 0; j--) { pool = g_thread_queue.bd_res[j].pool; bds = g_thread_queue.bd_res[j].bds; - for (i = 0; i < MAX_POOL_LENTH_COMP; i++) { - wd_free_blk(pool, bds[i].src); - wd_free_blk(pool, bds[i].dst); + for (k = 0; k < MAX_POOL_LENTH_COMP; k++) { + wd_free_blk(pool, bds[k].src); + wd_free_blk(pool, bds[k].dst); } free(bds); wd_blkpool_destroy(pool); @@ -404,6 +404,7 @@ queue_out: free(g_thread_queue.bd_res[i].queue); } free(g_thread_queue.bd_res); + g_thread_queue.bd_res = NULL; return ret; } @@ -413,6 +414,9 @@ static void uninit_zip_wd_queue(void) void *pool = NULL; int j, i; + if (!g_thread_queue.bd_res) + return; + for (j = 0; j < g_thread_num; j++) { pool = g_thread_queue.bd_res[j].pool; bds = g_thread_queue.bd_res[j].bds; -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> When use sgl memory with uadk, we needs to convert the user SGL into a hardware SGL. For performance considerations, we creates an SGL pool for each queue, from which SGL are requested when performing tasks. In the nosva scenario, to obtain the mapping the relationship between virtual and physical addresses, SGL memory needs to be allocated from the memory pool. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- drv/hisi_comp.c | 4 +- drv/hisi_qm_udrv.c | 140 +++++++++++++++++++++++++++++++++++---------- drv/hisi_qm_udrv.h | 6 +- drv/hisi_sec.c | 6 +- 4 files changed, 118 insertions(+), 38 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 001150e..87d2103 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -369,7 +369,7 @@ static int get_sgl_from_pool(handle_t h_qp, struct comp_sgl *c_sgl) { handle_t h_sgl_pool; - h_sgl_pool = hisi_qm_get_sglpool(h_qp); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; @@ -1426,7 +1426,7 @@ static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool; - h_sgl_pool = hisi_qm_get_sglpool(h_qp); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool to free hw sgl!\n"); return; diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c index 70a6698..9251b4c 100644 --- a/drv/hisi_qm_udrv.c +++ b/drv/hisi_qm_udrv.c @@ -76,8 +76,7 @@ struct hisi_sge { void *page_ctrl; __le32 len; __le32 pad; - __le32 pad0; - __le32 pad1; + uintptr_t vbuff; }; /* use default hw sgl head size 64B, in little-endian */ @@ -91,7 +90,8 @@ struct hisi_sgl { /* the sge num in this sgl */ __le16 entry_length_in_sgl; __le16 pad0; - __le64 pad1[5]; + __le64 pad1[4]; + struct hisi_sgl *next; /* valid sge buffs total size */ __le64 entry_size_in_sgl; struct hisi_sge sge_entries[]; @@ -107,6 +107,7 @@ struct hisi_sgl_pool { __u32 top; __u32 sge_num; __u32 sgl_num; + struct wd_mm_ops *mm_ops; pthread_spinlock_t lock; }; @@ -357,8 +358,16 @@ static int hisi_qm_setup_info(struct hisi_qp *qp, struct hisi_qm_priv *config) goto err_destroy_lock; } + ret = pthread_spin_init(&q_info->sgl_lock, PTHREAD_PROCESS_SHARED); + if (ret) { + WD_DEV_ERR(qp->h_ctx, "failed to init qinfo sgl_lock!\n"); + goto err_destroy_sd_lock; + } + return 0; +err_destroy_sd_lock: + pthread_spin_destroy(&q_info->sd_lock); err_destroy_lock: pthread_spin_destroy(&q_info->rv_lock); err_out: @@ -370,6 +379,7 @@ static void hisi_qm_clear_info(struct hisi_qp *qp) { struct hisi_qm_queue_info *q_info = &qp->q_info; + pthread_spin_destroy(&q_info->sgl_lock); pthread_spin_destroy(&q_info->sd_lock); pthread_spin_destroy(&q_info->rv_lock); hisi_qm_unset_region(qp->h_ctx, q_info); @@ -413,7 +423,7 @@ handle_t hisi_qm_alloc_qp(struct hisi_qm_priv *config, handle_t ctx) goto out_qp; qp->h_sgl_pool = hisi_qm_create_sglpool(HISI_SGL_NUM_IN_BD, - HISI_SGE_NUM_IN_SGL); + HISI_SGE_NUM_IN_SGL, NULL); if (!qp->h_sgl_pool) goto free_info; @@ -451,6 +461,8 @@ void hisi_qm_free_qp(handle_t h_qp) wd_release_ctx_force(qp->h_ctx); hisi_qm_destroy_sglpool(qp->h_sgl_pool); + if (qp->h_nosva_sgl_pool) + hisi_qm_destroy_sglpool(qp->h_nosva_sgl_pool); hisi_qm_clear_info(qp); @@ -630,32 +642,46 @@ void hisi_set_msg_id(handle_t h_qp, __u32 *tag) } } -static void *hisi_qm_create_sgl(__u32 sge_num) +static void *hisi_qm_create_sgl(__u32 sge_num, struct wd_mm_ops *mm_ops) { void *sgl; int size; size = sizeof(struct hisi_sgl) + sge_num * (sizeof(struct hisi_sge)) + HISI_SGL_ALIGE; - sgl = calloc(1, size); - if (!sgl) { - WD_ERR("failed to create sgl!\n"); - return NULL; - } + if (mm_ops) + sgl = mm_ops->alloc(mm_ops->usr, size); + else + sgl = calloc(1, size); + + if (!sgl) + WD_ERR("failed to alloc memory for the hisi qm sgl!\n"); return sgl; } -static struct hisi_sgl *hisi_qm_align_sgl(const void *sgl, __u32 sge_num) +static struct hisi_sgl *hisi_qm_align_sgl(const void *sgl, __u32 sge_num, + struct wd_mm_ops *mm_ops) { struct hisi_sgl *sgl_align; + uintptr_t iova, iova_align; /* Hardware require the address must be 64 bytes aligned */ - sgl_align = (struct hisi_sgl *)ADDR_ALIGN_64(sgl); + if (mm_ops) { + iova = (uintptr_t)mm_ops->iova_map(mm_ops->usr, (struct hisi_sgl *)sgl, + sizeof(struct hisi_sgl)); + if (!iova) + return NULL; + iova_align = ADDR_ALIGN_64(iova); + sgl_align = (struct hisi_sgl *)((uintptr_t)sgl + iova_align - iova); + } else { + sgl_align = (struct hisi_sgl *)ADDR_ALIGN_64(sgl); + } sgl_align->entry_sum_in_chain = sge_num; sgl_align->entry_sum_in_sgl = 0; sgl_align->entry_length_in_sgl = sge_num; sgl_align->next_dma = 0; + sgl_align->next = 0; return sgl_align; } @@ -665,18 +691,27 @@ static void hisi_qm_free_sglpool(struct hisi_sgl_pool *pool) __u32 i; if (pool->sgl) { - for (i = 0; i < pool->sgl_num; i++) - free(pool->sgl[i]); + if (pool->mm_ops && !pool->mm_ops->sva_mode) { + for (i = 0; i < pool->sgl_num; i++) + pool->mm_ops->free(pool->mm_ops->usr, pool->sgl[i]); + } else { + for (i = 0; i < pool->sgl_num; i++) + free(pool->sgl[i]); + } free(pool->sgl); } if (pool->sgl_align) free(pool->sgl_align); + + if (pool->mm_ops) + free(pool->mm_ops); + free(pool); } -handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num) +handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num, struct wd_mm_ops *mm_ops) { struct hisi_sgl_pool *sgl_pool; int ret; @@ -694,6 +729,16 @@ handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num) return 0; } + if (mm_ops && !mm_ops->sva_mode && mm_ops->alloc && mm_ops->free && + mm_ops->iova_map && mm_ops->usr) { + sgl_pool->mm_ops = malloc(sizeof(struct wd_mm_ops)); + if (!sgl_pool->mm_ops) { + WD_ERR("failed to alloc memory for sglpool mm_ops!\n"); + goto err_out; + } + memcpy(sgl_pool->mm_ops, mm_ops, sizeof(struct wd_mm_ops)); + } + sgl_pool->sgl = calloc(sgl_num, sizeof(void *)); if (!sgl_pool->sgl) { WD_ERR("failed to alloc memory for sgl!\n"); @@ -708,14 +753,14 @@ handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num) /* base the sgl_num create the sgl chain */ for (i = 0; i < sgl_num; i++) { - sgl_pool->sgl[i] = hisi_qm_create_sgl(sge_num); + sgl_pool->sgl[i] = hisi_qm_create_sgl(sge_num, sgl_pool->mm_ops); if (!sgl_pool->sgl[i]) { sgl_pool->sgl_num = i; goto err_out; } sgl_pool->sgl_align[i] = hisi_qm_align_sgl(sgl_pool->sgl[i], - sge_num); + sge_num, sgl_pool->mm_ops); } sgl_pool->sgl_num = sgl_num; @@ -756,8 +801,8 @@ static struct hisi_sgl *hisi_qm_sgl_pop(struct hisi_sgl_pool *pool) pthread_spin_lock(&pool->lock); if (pool->top == 0) { - WD_ERR("invalid: the sgl pool is empty!\n"); pthread_spin_unlock(&pool->lock); + WD_ERR("invalid: the sgl pool is empty!\n"); return NULL; } @@ -771,12 +816,13 @@ static int hisi_qm_sgl_push(struct hisi_sgl_pool *pool, struct hisi_sgl *hw_sgl) { pthread_spin_lock(&pool->lock); if (pool->top >= pool->depth) { - WD_ERR("invalid: the sgl pool is full!\n"); pthread_spin_unlock(&pool->lock); + WD_ERR("invalid: the sgl pool is full!\n"); return -WD_EINVAL; } hw_sgl->next_dma = 0; + hw_sgl->next = 0; hw_sgl->entry_sum_in_sgl = 0; hw_sgl->entry_sum_in_chain = pool->sge_num; hw_sgl->entry_length_in_sgl = pool->sge_num; @@ -800,7 +846,7 @@ void hisi_qm_put_hw_sgl(handle_t sgl_pool, void *hw_sgl) return; while (cur) { - next = (struct hisi_sgl *)cur->next_dma; + next = (struct hisi_sgl *)cur->next; ret = hisi_qm_sgl_push(pool, cur); if (ret) break; @@ -832,7 +878,7 @@ static void hisi_qm_dump_sgl(void *sgl) WD_DEBUG("[sgl-%d]->sge_entries[%d].len: %u\n", k, i, tmp->sge_entries[i].len); - tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; k++; if (!tmp) { @@ -847,6 +893,7 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) struct hisi_sgl_pool *pool = (struct hisi_sgl_pool *)sgl_pool; struct wd_datalist *tmp = sgl; struct hisi_sgl *head, *next, *cur; + struct wd_mm_ops *mm_ops; __u32 i = 0; if (!pool || !sgl) { @@ -854,6 +901,12 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) return NULL; } + if (pool->mm_ops && !pool->mm_ops->iova_map) { + WD_ERR("invalid: mm_ops iova_map function is NULL!\n"); + return NULL; + } + + mm_ops = pool->mm_ops; head = hisi_qm_sgl_pop(pool); if (!head) return NULL; @@ -872,7 +925,18 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) goto err_out; } - cur->sge_entries[i].buff = (uintptr_t)tmp->data; + if (mm_ops) + cur->sge_entries[i].buff = (uintptr_t)mm_ops->iova_map(mm_ops->usr, + tmp->data, tmp->len); + else + cur->sge_entries[i].buff = (uintptr_t)tmp->data; + + if (!cur->sge_entries[i].buff) { + WD_ERR("invalid: the iova map addr of sge is NULL!\n"); + goto err_out; + } + + cur->sge_entries[i].vbuff = (uintptr_t)tmp->data; cur->sge_entries[i].len = tmp->len; cur->entry_sum_in_sgl++; cur->entry_size_in_sgl += tmp->len; @@ -890,7 +954,12 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) WD_ERR("invalid: the sgl pool is not enough!\n"); goto err_out; } - cur->next_dma = (uintptr_t)next; + if (mm_ops) + cur->next_dma = (uintptr_t)mm_ops->iova_map(mm_ops->usr, + next, sizeof(*next)); + else + cur->next_dma = (uintptr_t)next; + cur->next = next; cur = next; head->entry_sum_in_chain += pool->sge_num; /* In the new sgl chain, the subscript must be reset */ @@ -912,10 +981,19 @@ err_out: return NULL; } -handle_t hisi_qm_get_sglpool(handle_t h_qp) +handle_t hisi_qm_get_sglpool(handle_t h_qp, struct wd_mm_ops *mm_ops) { struct hisi_qp *qp = (struct hisi_qp *)h_qp; + if (mm_ops && !mm_ops->sva_mode) { + pthread_spin_lock(&qp->q_info.sgl_lock); + if (!qp->h_nosva_sgl_pool) + qp->h_nosva_sgl_pool = hisi_qm_create_sglpool(HISI_SGL_NUM_IN_BD, + HISI_SGE_NUM_IN_SGL, mm_ops); + pthread_spin_unlock(&qp->q_info.sgl_lock); + return qp->h_nosva_sgl_pool; + } + return qp->h_sgl_pool; } @@ -927,7 +1005,7 @@ static void hisi_qm_sgl_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, __u32 offset; void *src; - src = (void *)tmp->sge_entries[begin_sge].buff + sge_offset; + src = (void *)tmp->sge_entries[begin_sge].vbuff + sge_offset; offset = tmp->sge_entries[begin_sge].len - sge_offset; /* the first one is enough for copy size, copy and return */ if (offset >= size) { @@ -939,7 +1017,7 @@ static void hisi_qm_sgl_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, while (tmp) { for (; i < tmp->entry_sum_in_sgl; i++) { - src = (void *)tmp->sge_entries[i].buff; + src = (void *)tmp->sge_entries[i].vbuff; if (offset + tmp->sge_entries[i].len >= size) { memcpy(pbuff + offset, src, size - offset); return; @@ -949,7 +1027,7 @@ static void hisi_qm_sgl_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, offset += tmp->sge_entries[i].len; } - tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; i = 0; } } @@ -964,14 +1042,14 @@ static void hisi_qm_pbuff_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, void *dst; if (tmp->sge_entries[begin_sge].len - sge_offset >= size) { - dst = (void *)tmp->sge_entries[begin_sge].buff + sge_offset; + dst = (void *)tmp->sge_entries[begin_sge].vbuff + sge_offset; memcpy(dst, pbuff, size); return; } while (tmp) { for (; i < tmp->entry_sum_in_sgl; i++) { - dst = (void *)tmp->sge_entries[i].buff; + dst = (void *)tmp->sge_entries[i].vbuff; if (offset + tmp->sge_entries[i].len >= size) { memcpy(dst, pbuff + offset, size - offset); return; @@ -981,7 +1059,7 @@ static void hisi_qm_pbuff_copy_inner(void *pbuff, struct hisi_sgl *hw_sgl, offset += tmp->sge_entries[i].len; } - tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; i = 0; } } @@ -1000,7 +1078,7 @@ void hisi_qm_sgl_copy(void *pbuff, void *hw_sgl, __u32 offset, __u32 size, while (len + tmp->entry_size_in_sgl <= offset) { len += tmp->entry_size_in_sgl; - tmp = (struct hisi_sgl *)tmp->next_dma; + tmp = (struct hisi_sgl *)tmp->next; if (!tmp) return; } diff --git a/drv/hisi_qm_udrv.h b/drv/hisi_qm_udrv.h index e787193..f066881 100644 --- a/drv/hisi_qm_udrv.h +++ b/drv/hisi_qm_udrv.h @@ -81,6 +81,7 @@ struct hisi_qm_queue_info { pthread_spinlock_t rv_lock; unsigned long region_size[UACCE_QFRT_MAX]; bool epoll_en; + pthread_spinlock_t sgl_lock; }; struct hisi_qp { @@ -89,6 +90,7 @@ struct hisi_qp { handle_t h_ctx; /* Private area for driver use, point to queue specifial data */ void *priv; + handle_t h_nosva_sgl_pool; }; /* Capabilities */ @@ -148,7 +150,7 @@ void hisi_set_msg_id(handle_t h_qp, __u32 *tag); * * Fixed me: the sge buff's size now is Fixed. */ -handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num); +handle_t hisi_qm_create_sglpool(__u32 sgl_num, __u32 sge_num, struct wd_mm_ops *mm_ops); /** * hisi_qm_destroy_sglpool - Destroy sgl pool in qm. @@ -176,7 +178,7 @@ void hisi_qm_put_hw_sgl(handle_t sgl_pool, void *hw_sgl); * hisi_qm_get_sglpool - Get the qp's hw sgl pool handle * @h_qp: Handle of the qp. */ -handle_t hisi_qm_get_sglpool(handle_t h_qp); +handle_t hisi_qm_get_sglpool(handle_t h_qp, struct wd_mm_ops *mm_ops); /** * hisi_qm_sgl_copy: Buffer copying from hw sgl to pbuff or pbuff to sgl diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index 1e95ce5..246edf4 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -1028,7 +1028,7 @@ static void hisi_sec_put_sgl(handle_t h_qp, __u8 alg_type, void *in, void *out) { handle_t h_sgl_pool; - h_sgl_pool = hisi_qm_get_sglpool(h_qp); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); if (!h_sgl_pool) return; @@ -1045,7 +1045,7 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, void *hw_sgl_in; void *hw_sgl_out; - h_sgl_pool = hisi_qm_get_sglpool(h_qp); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); if (!h_sgl_pool) { WD_ERR("failed to get sglpool for hw_v2!\n"); return -WD_EINVAL; @@ -1085,7 +1085,7 @@ static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, void *hw_sgl_in; void *hw_sgl_out; - h_sgl_pool = hisi_qm_get_sglpool(h_qp); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); if (!h_sgl_pool) { WD_ERR("failed to get sglpool for hw_v3!\n"); return -WD_EINVAL; -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> The h_qp may be null, check null pointer before use it. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_dae_common.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drv/hisi_dae_common.c b/drv/hisi_dae_common.c index 43b53e0..5cfc105 100644 --- a/drv/hisi_dae_common.c +++ b/drv/hisi_dae_common.c @@ -357,8 +357,10 @@ free_h_qp: out: for (j = 0; j < i; j++) { h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[j].ctx); - dae_uninit_qp_priv(h_qp); - hisi_qm_free_qp(h_qp); + if (h_qp) { + dae_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } } free(priv); return ret; @@ -378,8 +380,10 @@ void dae_exit(struct wd_alg_driver *drv) config = &priv->config; for (i = 0; i < config->ctx_num; i++) { h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); - dae_uninit_qp_priv(h_qp); - hisi_qm_free_qp(h_qp); + if (h_qp) { + dae_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } } free(priv); -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> Modify GATHER_ROW_BATCH_EMPTY_SIZE from 2 to 4 for hardware memory alignment. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_dae_join_gather.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drv/hisi_dae_join_gather.c b/drv/hisi_dae_join_gather.c index dcbeed4..f3eeb95 100644 --- a/drv/hisi_dae_join_gather.c +++ b/drv/hisi_dae_join_gather.c @@ -29,7 +29,7 @@ #define HASH_TABLE_MAX_INDEX_NUM 15 #define HASH_TABLE_INDEX_SIZE 12 #define HASH_TABLE_EMPTY_SIZE 4 -#define GATHER_ROW_BATCH_EMPTY_SIZE 2 +#define GATHER_ROW_BATCH_EMPTY_SIZE 4 /* DAE hardware protocol data */ enum dae_join_stage { -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> The batch number is needed for hardware to get all address from the row_batchs. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_dae_join_gather.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drv/hisi_dae_join_gather.c b/drv/hisi_dae_join_gather.c index f3eeb95..9b9c07d 100644 --- a/drv/hisi_dae_join_gather.c +++ b/drv/hisi_dae_join_gather.c @@ -138,6 +138,7 @@ static void fill_join_gather_misc_field(struct wd_join_gather_msg *msg, sqe->task_type_ext = DAE_GATHER_COMPLETE; sqe->multi_batch_en = msg->multi_batch_en; sqe->data_row_num = msg->req.output_row_num; + sqe->batch_num = msg->req.gather_req.row_batchs.batch_num; if (msg->index_type == WD_BATCH_ADDR_INDEX) sqe->index_batch_type = DAE_GATHER_BATCH_ADDR_INDEX; else -- 2.33.0
From: Zhushuai Yin <yinzhushuai@huawei.com> The SEC v2 interface needs to support the no-sva business model within the new evolution framework. This ensures that users switching to the v2 interface can utilize both the sva mode and the no-sva mode. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_sec.c | 1081 ++++++++++++++++++++++++++++++----- include/drv/wd_aead_drv.h | 23 +- include/drv/wd_cipher_drv.h | 2 + include/drv/wd_digest_drv.h | 2 + include/wd_aead.h | 2 + include/wd_cipher.h | 2 + include/wd_digest.h | 2 + wd_aead.c | 179 +++++- wd_cipher.c | 40 +- wd_digest.c | 42 +- 10 files changed, 1207 insertions(+), 168 deletions(-) diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index 246edf4..0074952 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -91,6 +91,7 @@ #define AUTH_ALG_OFFSET 11 #define WD_CIPHER_THEN_DIGEST 0x0 #define WD_DIGEST_THEN_CIPHER 0x1 +#define AEAD_AIV_OFFSET 0x6 #define SEC_CTX_Q_NUM_DEF 1 @@ -602,6 +603,157 @@ static int hisi_sec_get_usage(void *param) return 0; } +static int eops_param_check(struct wd_alg_driver *drv, struct wd_mm_ops *mm_ops) +{ + if (!drv || !drv->priv) { + WD_ERR("invalid: aead drv or priv is NULL!\n"); + return -WD_EINVAL; + } + + if (!mm_ops) { + WD_ERR("invalid: mm_ops is NULL!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int aead_sess_eops_init(struct wd_alg_driver *drv, + struct wd_mm_ops *mm_ops, void **params) +{ + struct wd_aead_aiv_addr *aiv_addr; + struct hisi_sec_ctx *sec_ctx; + struct hisi_qp *qp; + __u16 sq_depth; + int ret; + + ret = eops_param_check(drv, mm_ops); + if (ret) + return ret; + + if (!params) { + WD_ERR("invalid: extend ops init params address is NULL!\n"); + return -WD_EINVAL; + } + + if (*params) { + WD_ERR("invalid: extend ops init params repeatedly!\n"); + return -WD_EINVAL; + } + + aiv_addr = calloc(1, sizeof(struct wd_aead_aiv_addr)); + if (!aiv_addr) { + WD_ERR("aead failed to alloc aiv_addr memory!\n"); + return -WD_ENOMEM; + } + + sec_ctx = (struct hisi_sec_ctx *)drv->priv; + qp = (struct hisi_qp *)wd_ctx_get_priv(sec_ctx->config.ctxs[0].ctx); + sq_depth = qp->q_info.sq_depth; + aiv_addr->aiv = mm_ops->alloc(mm_ops->usr, (__u32)sq_depth << AEAD_AIV_OFFSET); + if (!aiv_addr->aiv) { + WD_ERR("aead failed to alloc aiv memory!\n"); + goto aiv_err; + } + memset(aiv_addr->aiv, 0, (__u32)sq_depth << AEAD_AIV_OFFSET); + if (!mm_ops->sva_mode) { + aiv_addr->aiv_nosva = mm_ops->iova_map(mm_ops->usr, aiv_addr->aiv, + (__u32)sq_depth << AEAD_AIV_OFFSET); + if (!aiv_addr->aiv_nosva) + goto aiv_nosva_err; + } + + aiv_addr->aiv_status = calloc(1, sq_depth); + if (!aiv_addr->aiv_status) { + WD_ERR("aead failed to alloc aiv_status memory!\n"); + goto aiv_status_err; + } + + *params = aiv_addr; + + return WD_SUCCESS; + +aiv_status_err: + if (!mm_ops->sva_mode) + mm_ops->iova_unmap(mm_ops->usr, aiv_addr->aiv, (void *)aiv_addr->aiv_nosva, + (__u32)sq_depth << AEAD_AIV_OFFSET); +aiv_nosva_err: + mm_ops->free(mm_ops->usr, aiv_addr->aiv); +aiv_err: + free(aiv_addr); + return -WD_ENOMEM; +} + +static void aead_sess_eops_uninit(struct wd_alg_driver *drv, + struct wd_mm_ops *mm_ops, void *params) +{ + struct wd_aead_aiv_addr *aiv_addr; + struct hisi_sec_ctx *sec_ctx; + struct hisi_qp *qp; + __u16 sq_depth; + int ret; + + ret = eops_param_check(drv, mm_ops); + if (ret) + return; + + if (!params) { + WD_ERR("invalid: extend ops uninit params address is NULL!\n"); + return; + } + + sec_ctx = (struct hisi_sec_ctx *)drv->priv; + qp = (struct hisi_qp *)wd_ctx_get_priv(sec_ctx->config.ctxs[0].ctx); + sq_depth = qp->q_info.sq_depth; + + aiv_addr = (struct wd_aead_aiv_addr *)params; + if (!mm_ops->sva_mode) + mm_ops->iova_unmap(mm_ops->usr, aiv_addr->aiv, (void *)aiv_addr->aiv_nosva, + (__u32)sq_depth << AEAD_AIV_OFFSET); + mm_ops->free(mm_ops->usr, aiv_addr->aiv); + free(aiv_addr->aiv_status); + free(params); +} + +static int aead_get_aiv_addr(struct hisi_qp *qp, struct wd_aead_msg *msg) +{ + struct wd_aead_aiv_addr *aiv_addr = (struct wd_aead_aiv_addr *)msg->drv_cfg; + __u16 sq_depth = qp->q_info.sq_depth; + int i; + + for (i = 0; i < sq_depth; i++) { + if (!__atomic_test_and_set(&aiv_addr->aiv_status[i], __ATOMIC_ACQUIRE)) { + msg->aiv = aiv_addr->aiv + i * AIV_STREAM_LEN; + return i; + } + } + + return -WD_EBUSY; +} + +static void aead_free_aiv_addr(struct wd_aead_msg *msg) +{ + struct wd_aead_aiv_addr *aiv_addr = (struct wd_aead_aiv_addr *)msg->drv_cfg; + __u32 aiv_idx; + + aiv_idx = (msg->aiv - aiv_addr->aiv) >> AEAD_AIV_OFFSET; + __atomic_clear(&aiv_addr->aiv_status[aiv_idx], __ATOMIC_RELEASE); +} + +static int sec_aead_get_extend_ops(void *ops) +{ + struct wd_aead_extend_ops *aead_ops = (struct wd_aead_extend_ops *)ops; + + if (!aead_ops) + return -WD_EINVAL; + + aead_ops->params = NULL; + aead_ops->eops_aiv_init = aead_sess_eops_init; + aead_ops->eops_aiv_uninit = aead_sess_eops_uninit; + + return WD_SUCCESS; +} + #define GEN_SEC_ALG_DRIVER(sec_alg_name, alg_type) \ {\ .drv_name = "hisi_sec2",\ @@ -616,6 +768,7 @@ static int hisi_sec_get_usage(void *param) .send = alg_type##_send,\ .recv = alg_type##_recv,\ .get_usage = hisi_sec_get_usage,\ + .get_extend_ops = sec_aead_get_extend_ops,\ } static struct wd_alg_driver cipher_alg_driver[] = { @@ -914,13 +1067,83 @@ static int fill_cipher_bd2_mode(struct wd_cipher_msg *msg, return 0; } -static void fill_cipher_bd2_addr(struct wd_cipher_msg *msg, - struct hisi_sec_sqe *sqe) +static void destroy_cipher_bd2_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe *sqe) { - sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; - sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->key; + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool; + + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("cipher failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + if (sqe->type2.data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->type2.data_src_addr, + msg->in_bytes); + + if (sqe->type2.data_dst_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->type2.data_dst_addr, + msg->out_bytes); + + if (sqe->type2.c_key_addr) + mm_ops->iova_unmap(mempool, msg->key, (void *)(uintptr_t)sqe->type2.c_key_addr, + msg->key_bytes); + + if (sqe->type2.c_ivin_addr) + mm_ops->iova_unmap(mempool, msg->iv, (void *)(uintptr_t)sqe->type2.c_ivin_addr, + msg->iv_bytes); +} + +static int fill_cipher_bd2_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool, *phy_addr; + + if (mm_ops->sva_mode) { + sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->key; + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("cipher failed to check memory type!\n"); + return -WD_EINVAL; + } + + /* No-SVA mode and Memory is USER mode or PROXY mode */ + mempool = mm_ops->usr; + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->type2.data_src_addr = (__u64)(uintptr_t)phy_addr; + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.data_dst_addr = (__u64)(uintptr_t)phy_addr; + if (msg->iv_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->iv, msg->iv_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->key_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->key, msg->key_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.c_key_addr = (__u64)(uintptr_t)phy_addr; + } + + return 0; + +map_err: + destroy_cipher_bd2_addr(msg, sqe); + return -WD_ENOMEM; } static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, @@ -945,8 +1168,6 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_CIPHER; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); - recv_msg->in = (__u8 *)(uintptr_t)sqe->type2.data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->type2.data_dst_addr; temp_msg = wd_cipher_get_msg(qp->q_info.idx, tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -954,6 +1175,9 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; @@ -964,6 +1188,8 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, else update_iv_sgl(temp_msg); + destroy_cipher_bd2_addr(temp_msg, sqe); + if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "cipher"); } @@ -1024,11 +1250,12 @@ static int cipher_len_check(struct wd_cipher_msg *msg) return 0; } -static void hisi_sec_put_sgl(handle_t h_qp, __u8 alg_type, void *in, void *out) +static void hisi_sec_put_sgl(handle_t h_qp, __u8 alg_type, void *in, void *out, + struct wd_mm_ops *mm_ops) { handle_t h_sgl_pool; - h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, mm_ops); if (!h_sgl_pool) return; @@ -1038,19 +1265,12 @@ static void hisi_sec_put_sgl(handle_t h_qp, __u8 alg_type, void *in, void *out) hisi_qm_put_hw_sgl(h_sgl_pool, out); } -static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, - struct hisi_sec_sqe *sqe, __u8 type) +static int hisi_sec_fill_sgl(handle_t h_sgl_pool, __u8 **in, __u8 **out, + struct hisi_sec_sqe *sqe, __u8 type) { - handle_t h_sgl_pool; void *hw_sgl_in; void *hw_sgl_out; - h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); - if (!h_sgl_pool) { - WD_ERR("failed to get sglpool for hw_v2!\n"); - return -WD_EINVAL; - } - hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v2!\n"); @@ -1078,19 +1298,12 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, return 0; } -static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, +static int hisi_sec_fill_sgl_v3(handle_t h_sgl_pool, __u8 **in, __u8 **out, struct hisi_sec_sqe3 *sqe, __u8 type) { - handle_t h_sgl_pool; void *hw_sgl_in; void *hw_sgl_out; - h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); - if (!h_sgl_pool) { - WD_ERR("failed to get sglpool for hw_v3!\n"); - return -WD_EINVAL; - } - hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v3!\n"); @@ -1165,6 +1378,7 @@ static int hisi_sec_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *w handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_cipher_msg *msg = wd_msg; struct hisi_sec_sqe sqe; + handle_t h_sgl_pool; __u16 count = 0; int ret; @@ -1179,7 +1393,13 @@ static int hisi_sec_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *w return ret; if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl(h_qp, &msg->in, &msg->out, &sqe, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("cipher failed to get sglpool for hw_v2!\n"); + return -WD_EINVAL; + } + + ret = hisi_sec_fill_sgl(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; @@ -1188,7 +1408,11 @@ static int hisi_sec_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *w hisi_set_msg_id(h_qp, &msg->tag); sqe.type2.clen_ivhlen |= (__u32)msg->in_bytes; sqe.type2.tag = (__u16)msg->tag; - fill_cipher_bd2_addr(msg, &sqe); + ret = fill_cipher_bd2_addr(msg, &sqe); + if (ret < 0) { + WD_ERR("cipher map memory is err(%d)!\n", ret); + return ret; + } ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (ret < 0) { @@ -1197,8 +1421,8 @@ static int hisi_sec_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *w if (msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, - msg->out); - + msg->out, msg->mm_ops); + destroy_cipher_bd2_addr(msg, &sqe); return ret; } @@ -1225,7 +1449,7 @@ static int hisi_sec_cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *w if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, recv_msg->in, - recv_msg->out); + recv_msg->out, recv_msg->mm_ops); return 0; } @@ -1316,13 +1540,83 @@ static int fill_cipher_bd3_mode(struct wd_cipher_msg *msg, return 0; } -static void fill_cipher_bd3_addr(struct wd_cipher_msg *msg, - struct hisi_sec_sqe3 *sqe) +static void destroy_cipher_bd3_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) { - sqe->data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; - sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->c_key_addr = (__u64)(uintptr_t)msg->key; + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool; + + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("cipher failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + if (sqe->data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->data_src_addr, + msg->in_bytes); + + if (sqe->data_dst_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->data_dst_addr, + msg->out_bytes); + + if (sqe->c_key_addr) + mm_ops->iova_unmap(mempool, msg->key, (void *)(uintptr_t)sqe->c_key_addr, + msg->key_bytes); + + if (sqe->no_scene.c_ivin_addr) + mm_ops->iova_unmap(mempool, msg->iv, + (void *)(uintptr_t)sqe->no_scene.c_ivin_addr, msg->iv_bytes); +} + +static int fill_cipher_bd3_addr(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool, *phy_addr; + + if (mm_ops->sva_mode) { + sqe->data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->c_key_addr = (__u64)(uintptr_t)msg->key; + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("cipher failed to check memory type!\n"); + return -WD_EINVAL; + } + + /* No-SVA mode and Memory is USER mode or PROXY mode */ + mempool = mm_ops->usr; + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->data_src_addr = (__u64)(uintptr_t)phy_addr; + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->data_dst_addr = (__u64)(uintptr_t)phy_addr; + if (msg->iv_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->iv, msg->iv_bytes); + if (!phy_addr) + goto map_err; + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->key_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->key, msg->key_bytes); + if (!phy_addr) + goto map_err; + sqe->c_key_addr = (__u64)(uintptr_t)phy_addr; + } + + return 0; + +map_err: + destroy_cipher_bd3_addr(msg, sqe); + return -WD_ENOMEM; } static int fill_cipher_bd3(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) @@ -1361,8 +1655,12 @@ static int fill_cipher_bd3(struct wd_cipher_msg *msg, struct hisi_sec_sqe3 *sqe) return 0; } -static void fill_sec_prefetch(__u8 data_fmt, __u32 len, __u16 hw_type, struct hisi_sec_sqe3 *sqe) +static void fill_sec_prefetch(__u8 data_fmt, __u32 len, __u16 hw_type, struct hisi_sec_sqe3 *sqe, + bool sva_mode) { + if (!sva_mode) + return; + if (hw_type >= HISI_QM_API_VER5_BASE || (data_fmt == WD_FLAT_BUF && len <= SEC_SVA_PREFETCH_MAX_LEN)) sqe->auth_mac_key |= (__u32)SEC_ENABLE_SVA_PREFETCH << SEC_SVA_PREFETCH_OFFSET; @@ -1374,6 +1672,7 @@ static int hisi_sec_cipher_send_v3(struct wd_alg_driver *drv, handle_t ctx, void struct hisi_qp *qp = (struct hisi_qp *)h_qp; struct wd_cipher_msg *msg = wd_msg; struct hisi_sec_sqe3 sqe; + handle_t h_sgl_pool; __u16 count = 0; int ret; @@ -1387,10 +1686,16 @@ static int hisi_sec_cipher_send_v3(struct wd_alg_driver *drv, handle_t ctx, void if (ret) return ret; - fill_sec_prefetch(msg->data_fmt, msg->in_bytes, qp->q_info.hw_type, &sqe); + fill_sec_prefetch(msg->data_fmt, msg->in_bytes, qp->q_info.hw_type, &sqe, + msg->mm_ops->sva_mode); if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl_v3(h_qp, &msg->in, &msg->out, &sqe, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("cipher failed to get sglpool for hw_v3!\n"); + return -WD_EINVAL; + } + ret = hisi_sec_fill_sgl_v3(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; @@ -1399,7 +1704,11 @@ static int hisi_sec_cipher_send_v3(struct wd_alg_driver *drv, handle_t ctx, void hisi_set_msg_id(h_qp, &msg->tag); sqe.c_len_ivin = (__u32)msg->in_bytes; sqe.tag = (__u64)(uintptr_t)msg->tag; - fill_cipher_bd3_addr(msg, &sqe); + ret = fill_cipher_bd3_addr(msg, &sqe); + if (ret < 0) { + WD_ERR("cipher map memory is err(%d)!\n", ret); + return ret; + } ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (ret < 0) { @@ -1408,8 +1717,8 @@ static int hisi_sec_cipher_send_v3(struct wd_alg_driver *drv, handle_t ctx, void if (msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, - msg->out); - + msg->out, msg->mm_ops); + destroy_cipher_bd3_addr(msg, &sqe); return ret; } @@ -1438,8 +1747,6 @@ static void parse_cipher_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_CIPHER; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); - recv_msg->in = (__u8 *)(uintptr_t)sqe->data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->data_dst_addr; temp_msg = wd_cipher_get_msg(qp->q_info.idx, tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -1447,6 +1754,9 @@ static void parse_cipher_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; @@ -1457,6 +1767,8 @@ static void parse_cipher_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, else update_iv_sgl(temp_msg); + destroy_cipher_bd3_addr(temp_msg, sqe); + if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "cipher"); } @@ -1481,7 +1793,7 @@ static int hisi_sec_cipher_recv_v3(struct wd_alg_driver *drv, handle_t ctx, void if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, recv_msg->in, - recv_msg->out); + recv_msg->out, recv_msg->mm_ops); return 0; } @@ -1519,7 +1831,6 @@ static int fill_digest_bd2_alg(struct wd_digest_msg *msg, return -WD_EINVAL; } sqe->type2.mac_key_alg |= (__u32)BYTES_TO_WORDS(msg->key_bytes) << MAC_LEN_OFFSET; - sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->key; sqe->type2.mac_key_alg |= g_hmac_a_alg[msg->alg] << AUTH_ALG_OFFSET; @@ -1586,6 +1897,83 @@ static int fill_digest_long_hash(handle_t h_qp, struct wd_digest_msg *msg, return 0; } +static void destroy_digest_bd2_addr(struct wd_digest_msg *msg, struct hisi_sec_sqe *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool; + + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("digest failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + + if (sqe->type2.data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->type2.data_src_addr, + msg->in_bytes); + + if (sqe->type2.mac_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->type2.mac_addr, + msg->out_bytes); + + if (sqe->type2.a_key_addr && msg->mode == WD_DIGEST_HMAC) + mm_ops->iova_unmap(mempool, msg->key, (void *)(uintptr_t)sqe->type2.a_key_addr, + msg->key_bytes); +} + +static int fill_digest_bd2_addr(struct wd_digest_msg *msg, struct hisi_sec_sqe *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool, *phy_addr; + + if (mm_ops->sva_mode) { + /* avoid HW accessing address 0 when the pointer is NULL */ + if (msg->in) + sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; + else + sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->out; + sqe->type2.mac_addr = (__u64)(uintptr_t)msg->out; + if (msg->mode == WD_DIGEST_HMAC) + sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->key; + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("digest failed to check memory type!\n"); + return -WD_EINVAL; + } + + /* No-SVA mode and Memory is USER mode or PROXY mode */ + mempool = mm_ops->usr; + if (msg->in_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->type2.data_src_addr = (__u64)(uintptr_t)phy_addr; + } + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.mac_addr = (__u64)(uintptr_t)phy_addr; + + if (msg->key_bytes != 0 && msg->mode == WD_DIGEST_HMAC) { + phy_addr = mm_ops->iova_map(mempool, msg->key, msg->key_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.a_key_addr = (__u64)(uintptr_t)phy_addr; + } + + return 0; + +map_err: + destroy_digest_bd2_addr(msg, sqe); + return -WD_ENOMEM; +} + static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, struct wd_digest_msg *recv_msg) { @@ -1606,7 +1994,6 @@ static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_DIGEST; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); - recv_msg->in = (__u8 *)(uintptr_t)sqe->type2.data_src_addr; temp_msg = wd_digest_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -1614,11 +2001,15 @@ static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, recv_msg->tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; } + destroy_digest_bd2_addr(temp_msg, sqe); + if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "digest"); } @@ -1719,6 +2110,7 @@ static int hisi_sec_digest_send(struct wd_alg_driver *drv, handle_t ctx, void *w handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_digest_msg *msg = wd_msg; struct hisi_sec_sqe sqe; + handle_t h_sgl_pool; __u16 count = 0; __u8 scene; __u8 de; @@ -1743,7 +2135,12 @@ static int hisi_sec_digest_send(struct wd_alg_driver *drv, handle_t ctx, void *w de = DATA_DST_ADDR_DISABLE << SEC_DE_OFFSET; if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl(h_qp, &msg->in, &msg->out, &sqe, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("digest failed to get sglpool for hw_v2!\n"); + return -WD_EINVAL; + } + ret = hisi_sec_fill_sgl(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; @@ -1751,16 +2148,19 @@ static int hisi_sec_digest_send(struct wd_alg_driver *drv, handle_t ctx, void *w sqe.sds_sa_type |= (__u8)(de | scene); sqe.type2.alen_ivllen |= (__u32)msg->in_bytes; - sqe.type2.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe.type2.mac_addr = (__u64)(uintptr_t)msg->out; + ret = fill_digest_bd2_addr(msg, &sqe); + if (ret) { + WD_ERR("digest map memory is err(%d)!\n", ret); + goto put_sgl; + } ret = fill_digest_bd2_alg(msg, &sqe); if (ret) - goto put_sgl; + goto destroy_addr; ret = fill_digest_long_hash(h_qp, msg, &sqe); if (ret) - goto put_sgl; + goto destroy_addr; hisi_set_msg_id(h_qp, &msg->tag); sqe.type2.tag = (__u16)msg->tag; @@ -1769,15 +2169,16 @@ static int hisi_sec_digest_send(struct wd_alg_driver *drv, handle_t ctx, void *w if (ret != -WD_EBUSY) WD_ERR("digest send sqe is err(%d)!\n", ret); - goto put_sgl; + goto destroy_addr; } return 0; +destroy_addr: + destroy_digest_bd2_addr(msg, &sqe); put_sgl: if (msg->data_fmt == WD_SGL_BUF) - hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); - + hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out, msg->mm_ops); return ret; } @@ -1801,7 +2202,7 @@ static int hisi_sec_digest_recv(struct wd_alg_driver *drv, handle_t ctx, void *w if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, recv_msg->in, - recv_msg->out); + recv_msg->out, recv_msg->mm_ops); return 0; } @@ -1831,7 +2232,7 @@ static int hmac_key_len_check(struct wd_digest_msg *msg) } static int fill_digest_bd3_alg(struct wd_digest_msg *msg, - struct hisi_sec_sqe3 *sqe) + struct hisi_sec_sqe3 *sqe) { int ret; @@ -1866,13 +2267,11 @@ static int fill_digest_bd3_alg(struct wd_digest_msg *msg, return ret; sqe->auth_mac_key |= (__u32)BYTES_TO_WORDS(msg->key_bytes) << SEC_AKEY_OFFSET_V3; - sqe->a_key_addr = (__u64)(uintptr_t)msg->key; sqe->auth_mac_key |= g_hmac_a_alg[msg->alg] << SEC_AUTH_ALG_OFFSET_V3; if (msg->alg == WD_DIGEST_AES_GMAC) { sqe->auth_mac_key |= AI_GEN_IVIN_ADDR << SEC_AI_GEN_OFFSET_V3; - sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)msg->iv; } } else { WD_ERR("failed to check digest mode, mode = %u\n", msg->mode); @@ -1956,12 +2355,104 @@ static void fill_digest_v3_scene(struct hisi_sec_sqe3 *sqe, sqe->bd_param |= (__u16)(de | scene); } +static void destroy_digest_bd3_addr(struct wd_digest_msg *msg, struct hisi_sec_sqe3 *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool; + + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("digest failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + + if (sqe->data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->data_src_addr, + msg->in_bytes); + + if (sqe->mac_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->mac_addr, + msg->out_bytes); + + if (sqe->a_key_addr && msg->mode == WD_DIGEST_HMAC) + mm_ops->iova_unmap(mempool, msg->key, (void *)(uintptr_t)sqe->a_key_addr, + msg->key_bytes); + + if (sqe->auth_ivin.a_ivin_addr && msg->mode == WD_DIGEST_HMAC && + msg->alg == WD_DIGEST_AES_GMAC) + mm_ops->iova_unmap(mempool, msg->iv, (void *)(uintptr_t)sqe->auth_ivin.a_ivin_addr, + MAX_IV_SIZE); +} + +static int fill_digest_bd3_addr(struct wd_digest_msg *msg, struct hisi_sec_sqe3 *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool, *phy_addr; + + if (msg->mm_ops->sva_mode) { + /* avoid HW accessing address 0 when the pointer is NULL */ + if (msg->in) + sqe->data_src_addr = (__u64)(uintptr_t)msg->in; + else + sqe->data_src_addr = (__u64)(uintptr_t)msg->out; + sqe->mac_addr = (__u64)(uintptr_t)msg->out; + if (msg->mode == WD_DIGEST_HMAC) + sqe->a_key_addr = (__u64)(uintptr_t)msg->key; + if (msg->mode == WD_DIGEST_HMAC && msg->alg == WD_DIGEST_AES_GMAC) + sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)msg->iv; + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("digest failed to check memory type!\n"); + return -WD_EINVAL; + } + + /* No-SVA mode and Memory is USER mode or PROXY mode */ + mempool = mm_ops->usr; + if (msg->in_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->data_src_addr = (__u64)(uintptr_t)phy_addr; + } + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->mac_addr = (__u64)(uintptr_t)phy_addr; + + if (msg->iv && msg->mode == WD_DIGEST_HMAC && + msg->alg == WD_DIGEST_AES_GMAC) { + phy_addr = mm_ops->iova_map(mempool, msg->iv, MAX_IV_SIZE); + if (!phy_addr) + goto map_err; + sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->key_bytes != 0 && msg->mode == WD_DIGEST_HMAC) { + phy_addr = mm_ops->iova_map(mempool, msg->key, msg->key_bytes); + if (!phy_addr) + goto map_err; + sqe->a_key_addr = (__u64)(uintptr_t)phy_addr; + } + + return 0; + +map_err: + destroy_digest_bd3_addr(msg, sqe); + return -WD_ENOMEM; +} + static int hisi_sec_digest_send_v3(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct hisi_qp *qp = (struct hisi_qp *)h_qp; struct wd_digest_msg *msg = wd_msg; struct hisi_sec_sqe3 sqe; + handle_t h_sgl_pool; __u16 count = 0; int ret; @@ -1980,43 +2471,53 @@ static int hisi_sec_digest_send_v3(struct wd_alg_driver *drv, handle_t ctx, void sqe.auth_mac_key = AUTH_HMAC_CALCULATE; if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl_v3(h_qp, &msg->in, &msg->out, &sqe, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("digest failed to get sglpool for hw_v3!\n"); + return -WD_EINVAL; + } + ret = hisi_sec_fill_sgl_v3(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; } sqe.a_len_key = (__u32)msg->in_bytes; - sqe.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe.mac_addr = (__u64)(uintptr_t)msg->out; + ret = fill_digest_bd3_addr(msg, &sqe); + if (ret < 0) { + WD_ERR("digest map memory is err(%d)!\n", ret); + goto put_sgl; + } ret = fill_digest_bd3_alg(msg, &sqe); if (ret) - goto put_sgl; + goto destroy_addr; ret = fill_digest_long_hash3(h_qp, msg, &sqe); if (ret) - goto put_sgl; + goto destroy_addr; hisi_set_msg_id(h_qp, &msg->tag); sqe.tag = (__u64)(uintptr_t)msg->tag; - fill_sec_prefetch(msg->data_fmt, msg->in_bytes, qp->q_info.hw_type, &sqe); + fill_sec_prefetch(msg->data_fmt, msg->in_bytes, qp->q_info.hw_type, &sqe, + msg->mm_ops->sva_mode); ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (ret < 0) { if (ret != -WD_EBUSY) WD_ERR("digest send sqe is err(%d)!\n", ret); - goto put_sgl; + goto destroy_addr; } return 0; +destroy_addr: + destroy_digest_bd3_addr(msg, &sqe); put_sgl: if (msg->data_fmt == WD_SGL_BUF) - hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); - + hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out, msg->mm_ops); return ret; } @@ -2040,7 +2541,6 @@ static void parse_digest_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_DIGEST; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); - recv_msg->in = (__u8 *)(uintptr_t)sqe->data_src_addr; temp_msg = wd_digest_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -2048,10 +2548,13 @@ static void parse_digest_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, recv_msg->tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; } + destroy_digest_bd3_addr(temp_msg, sqe); if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "digest"); @@ -2077,7 +2580,7 @@ static int hisi_sec_digest_recv_v3(struct wd_alg_driver *drv, handle_t ctx, void if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, recv_msg->in, - recv_msg->out); + recv_msg->out, recv_msg->mm_ops); return 0; } @@ -2253,22 +2756,6 @@ static void set_aead_auth_iv(struct wd_aead_msg *msg) } } -static void fill_aead_bd2_addr(struct wd_aead_msg *msg, - struct hisi_sec_sqe *sqe) -{ - sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; - sqe->type2.mac_addr = (__u64)(uintptr_t)msg->mac; - sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->ckey; - sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->akey; - sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - - /* CCM/GCM should init a_iv */ - set_aead_auth_iv(msg); - - sqe->type2.a_ivin_addr = (__u64)(uintptr_t)msg->aiv; -} - static int aead_len_check(struct wd_aead_msg *msg, enum sec_bd_type type) { if (msg->msg_state == AEAD_MSG_MIDDLE) { @@ -2291,6 +2778,11 @@ static int aead_len_check(struct wd_aead_msg *msg, enum sec_bd_type type) return -WD_EINVAL; } + if (unlikely(msg->in_bytes == 0 && msg->assoc_bytes == 0)) { + WD_ERR("aead input data length is 0\n"); + return -WD_EINVAL; + } + if (unlikely(msg->cmode == WD_CIPHER_CCM && msg->assoc_bytes > MAX_CCM_AAD_LEN)) { WD_ERR("aead ccm aad length is too long, size = %u\n", @@ -2323,14 +2815,14 @@ static void gcm_auth_ivin(struct wd_aead_msg *msg) __u32 final_counter = GCM_FINAL_COUNTER; /* auth_ivin = {cipher_ivin(16B), null(16B), auth_mac(16B), null(16B)} */ - memset(msg->aiv_stream, 0, AIV_STREAM_LEN); + memset(msg->aiv, 0, AIV_STREAM_LEN); - memcpy(msg->aiv_stream, msg->iv, GCM_IV_SIZE); + memcpy(msg->aiv, msg->iv, GCM_IV_SIZE); /* The last 4 bytes of c_ivin are counters */ - memcpy(msg->aiv_stream + GCM_IV_SIZE, &final_counter, GCM_FINAL_COUNTER_LEN); + memcpy(msg->aiv + GCM_IV_SIZE, &final_counter, GCM_FINAL_COUNTER_LEN); /* Fill auth_ivin with the mac of last MIDDLE BD */ - memcpy(msg->aiv_stream + GCM_STREAM_MAC_OFFSET, msg->mac, GCM_FULL_MAC_LEN); + memcpy(msg->aiv + GCM_STREAM_MAC_OFFSET, msg->mac, GCM_FULL_MAC_LEN); /* Use the user's origin mac for decrypt icv check */ if (msg->op_type == WD_CIPHER_DECRYPTION_DIGEST) @@ -2353,8 +2845,6 @@ static void fill_gcm_first_bd2(struct wd_aead_msg *msg, struct hisi_sec_sqe *sqe sqe->type2.c_alg = 0; sqe->type2.auth_src_offset = 0; sqe->type2.alen_ivllen = msg->assoc_bytes; - sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->ckey; } static void fill_gcm_middle_bd2(struct wd_aead_msg *msg, struct hisi_sec_sqe *sqe) @@ -2367,8 +2857,6 @@ static void fill_gcm_middle_bd2(struct wd_aead_msg *msg, struct hisi_sec_sqe *sq fill_gcm_akey_len(msg, sqe, BD_TYPE2); sqe->type2.alen_ivllen = 0; sqe->type2.a_ivin_addr = sqe->type2.mac_addr; - sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->ckey; } static void get_galois_vector_s(struct wd_aead_msg *msg, __u8 *s) @@ -2385,7 +2873,7 @@ static void get_galois_vector_s(struct wd_aead_msg *msg, __u8 *s) /* Based the little-endian operation */ for (i = 0; i < GCM_BLOCK_SIZE; i++) - s[i] = a_c[i] ^ msg->aiv_stream[(__u8)(GCM_AUTH_MAC_OFFSET - i)]; + s[i] = a_c[i] ^ msg->aiv[(__u8)(GCM_AUTH_MAC_OFFSET - i)]; } static int gcm_do_soft_mac(struct wd_aead_msg *msg) @@ -2424,9 +2912,9 @@ static int gcm_do_soft_mac(struct wd_aead_msg *msg) */ for (i = 0; i < GCM_BLOCK_SIZE; i++) G[i] = data[GCM_BLOCK_OFFSET - i] ^ - msg->aiv_stream[(__u8)(GCM_AUTH_MAC_OFFSET - i)]; + msg->aiv[(__u8)(GCM_AUTH_MAC_OFFSET - i)]; - galois_compute(G, H, msg->aiv_stream + GCM_STREAM_MAC_OFFSET, GCM_BLOCK_SIZE); + galois_compute(G, H, msg->aiv + GCM_STREAM_MAC_OFFSET, GCM_BLOCK_SIZE); len -= block; offset += block; } @@ -2436,7 +2924,7 @@ static int gcm_do_soft_mac(struct wd_aead_msg *msg) galois_compute(S, H, g, GCM_BLOCK_SIZE); /* Encrypt ctr0 based on AES_ECB */ - aes_encrypt(msg->ckey, msg->ckey_bytes, msg->aiv_stream, ctr_r); + aes_encrypt(msg->ckey, msg->ckey_bytes, msg->aiv, ctr_r); /* Get the GMAC tag final */ for (i = 0; i < GCM_BLOCK_SIZE; i++) @@ -2556,11 +3044,159 @@ static int aead_msg_state_check(struct wd_aead_msg *msg) return 0; } +static void destroy_aead_bd2_addr(struct wd_aead_msg *msg, struct hisi_sec_sqe *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + __u64 dma_addr; + void *mempool; + + aead_free_aiv_addr(msg); + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("aead failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + if (sqe->type2.data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->type2.data_src_addr, + msg->in_bytes); + + if (sqe->type2.data_dst_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->type2.data_dst_addr, + msg->out_bytes); + + if (sqe->type2.c_ivin_addr) + mm_ops->iova_unmap(mempool, msg->iv, (void *)(uintptr_t)sqe->type2.c_ivin_addr, + msg->iv_bytes); + + if (sqe->type2.a_key_addr) { + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE) + && msg->cmode == WD_CIPHER_GCM) + mm_ops->iova_unmap(mempool, msg->ckey, + (void *)(uintptr_t)sqe->type2.a_key_addr, + msg->ckey_bytes); + else + mm_ops->iova_unmap(mempool, msg->akey, + (void *)(uintptr_t)sqe->type2.a_key_addr, + msg->akey_bytes); + } + + if (sqe->type2.c_key_addr && !((msg->msg_state == AEAD_MSG_FIRST || + msg->msg_state == AEAD_MSG_MIDDLE) && msg->cmode == WD_CIPHER_GCM)) + mm_ops->iova_unmap(mempool, msg->ckey, (void *)(uintptr_t)sqe->type2.c_key_addr, + msg->ckey_bytes); + + if (sqe->type2.mac_addr) + mm_ops->iova_unmap(mempool, msg->mac, (void *)(uintptr_t)sqe->type2.mac_addr, + msg->auth_bytes); +} + +static int aead_mem_nosva_map(struct wd_aead_msg *msg, struct hisi_sec_sqe *sqe, int idx) +{ + struct wd_aead_aiv_addr *aiv_addr = (struct wd_aead_aiv_addr *)msg->drv_cfg; + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool, *phy_addr; + + /* No-SVA mode and Memory is USER mode or PROXY mode */ + mempool = mm_ops->usr; + + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes + msg->assoc_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->type2.data_src_addr = (__u64)(uintptr_t)phy_addr; + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.data_dst_addr = (__u64)(uintptr_t)phy_addr; + if (msg->iv_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->iv, msg->iv_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->akey_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->akey, msg->akey_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.a_key_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->ckey_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->ckey, msg->ckey_bytes); + if (!phy_addr) + goto map_err; + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE) + && msg->cmode == WD_CIPHER_GCM) + sqe->type2.a_key_addr = (__u64)(uintptr_t)phy_addr; + else + sqe->type2.c_key_addr = (__u64)(uintptr_t)phy_addr; + } + if (msg->auth_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->mac, msg->auth_bytes); + if (!phy_addr) + goto map_err; + sqe->type2.mac_addr = (__u64)(uintptr_t)phy_addr; + } + + /* CCM/GCM should init a_iv */ + set_aead_auth_iv(msg); + phy_addr = aiv_addr->aiv_nosva + (idx << AEAD_AIV_OFFSET); + sqe->type2.a_ivin_addr = (__u64)(uintptr_t)phy_addr; + + return 0; + +map_err: + destroy_aead_bd2_addr(msg, sqe); + return -WD_ENOMEM; +} + +static int fill_aead_bd2_addr(struct wd_aead_msg *msg, struct hisi_sec_sqe *sqe, + struct hisi_qp *qp) +{ + int idx; + + idx = aead_get_aiv_addr(qp, msg); + if (idx < 0) + return idx; + + /* sva mode */ + if (msg->mm_ops->sva_mode) { + sqe->type2.data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->type2.data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->type2.mac_addr = (__u64)(uintptr_t)msg->mac; + sqe->type2.c_key_addr = (__u64)(uintptr_t)msg->ckey; + sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->akey; + sqe->type2.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + /* CCM/GCM should init a_iv */ + set_aead_auth_iv(msg); + sqe->type2.a_ivin_addr = (__u64)(uintptr_t)msg->aiv; + + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE) + && msg->cmode == WD_CIPHER_GCM) + sqe->type2.a_key_addr = (__u64)(uintptr_t)msg->ckey; + + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("aead failed to check memory type!\n"); + aead_free_aiv_addr(msg); + return -WD_EINVAL; + } + + /* aiv addr is freed in destroy addr interface */ + return aead_mem_nosva_map(msg, sqe, idx); +} + static int hisi_sec_aead_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; struct wd_aead_msg *msg = wd_msg; struct hisi_sec_sqe sqe; + handle_t h_sgl_pool; __u16 count = 0; int ret; @@ -2583,17 +3219,27 @@ static int hisi_sec_aead_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ return ret; if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl(h_qp, &msg->in, &msg->out, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("aead failed to get sglpool for hw_v2!\n"); + return -WD_EINVAL; + } + ret = hisi_sec_fill_sgl(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; } - fill_aead_bd2_addr(msg, &sqe); + ret = fill_aead_bd2_addr(msg, &sqe, qp); + if (ret < 0) { + if (ret != -WD_EBUSY) + WD_ERR("aead map memory is err(%d)!\n", ret); + goto put_sgl; + } ret = fill_stream_bd2(msg, &sqe); if (unlikely(ret)) - goto put_sgl; + goto destroy_addr; hisi_set_msg_id(h_qp, &msg->tag); sqe.type2.tag = (__u16)msg->tag; @@ -2603,15 +3249,16 @@ static int hisi_sec_aead_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ if (ret != -WD_EBUSY) WD_ERR("aead send sqe is err(%d)!\n", ret); - goto put_sgl; + goto destroy_addr; } return 0; +destroy_addr: + destroy_aead_bd2_addr(msg, &sqe); put_sgl: if (msg->data_fmt == WD_SGL_BUF) - hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); - + hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out, msg->mm_ops); return ret; } @@ -2652,8 +3299,6 @@ static void parse_aead_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_AEAD; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); - recv_msg->in = (__u8 *)(uintptr_t)sqe->type2.data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->type2.data_dst_addr; temp_msg = wd_aead_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -2661,12 +3306,16 @@ static void parse_aead_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, recv_msg->tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; } update_stream_counter(temp_msg); + destroy_aead_bd2_addr(temp_msg, sqe); if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "aead"); @@ -2692,7 +3341,7 @@ static int hisi_sec_aead_recv(struct wd_alg_driver *drv, handle_t ctx, void *wd_ if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, recv_msg->in, - recv_msg->out); + recv_msg->out, recv_msg->mm_ops); return 0; } @@ -2789,23 +3438,6 @@ static int fill_aead_bd3_mode(struct wd_aead_msg *msg, return 0; } -static void fill_aead_bd3_addr(struct wd_aead_msg *msg, - struct hisi_sec_sqe3 *sqe) -{ - sqe->data_src_addr = (__u64)(uintptr_t)msg->in; - sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; - - sqe->mac_addr = (__u64)(uintptr_t)msg->mac; - sqe->c_key_addr = (__u64)(uintptr_t)msg->ckey; - sqe->a_key_addr = (__u64)(uintptr_t)msg->akey; - sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - - /* CCM/GCM should init a_iv */ - set_aead_auth_iv(msg); - - sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)msg->aiv; -} - static void fill_gcm_first_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) { sqe->auth_mac_key |= AI_GEN_INNER << SEC_AI_GEN_OFFSET_V3; @@ -2822,8 +3454,6 @@ static void fill_gcm_first_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sq sqe->c_mode_alg &= ~(0x7 << SEC_CALG_OFFSET_V3); sqe->auth_src_offset = 0; sqe->a_len_key = msg->assoc_bytes; - sqe->stream_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->a_key_addr = (__u64)(uintptr_t)msg->ckey; } static void fill_gcm_middle_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) @@ -2837,8 +3467,6 @@ static void fill_gcm_middle_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *s fill_gcm_akey_len(msg, sqe, BD_TYPE3); sqe->a_len_key = 0; sqe->auth_ivin.a_ivin_addr = sqe->mac_addr; - sqe->stream_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->a_key_addr = (__u64)(uintptr_t)msg->ckey; } static void fill_gcm_final_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) @@ -2853,9 +3481,6 @@ static void fill_gcm_final_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sq sqe->a_len_key = 0; sqe->stream_scene.long_a_data_len = msg->assoc_bytes; sqe->stream_scene.long_a_data_len |= msg->long_data_len << LONG_AUTH_DATA_OFFSET; - sqe->stream_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; - sqe->a_key_addr = (__u64)(uintptr_t)msg->ckey; - sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)msg->aiv_stream; } static int fill_stream_bd3(handle_t h_qp, struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) @@ -2939,12 +3564,167 @@ static int fill_aead_bd3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) return 0; } +static void destroy_aead_bd3_addr(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe) +{ + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool; + + aead_free_aiv_addr(msg); + /* SVA mode and skip */ + if (!mm_ops || mm_ops->sva_mode) + return; + + if (!mm_ops->usr) { + WD_ERR("aead failed to check memory pool!\n"); + return; + } + + mempool = mm_ops->usr; + if (sqe->data_src_addr) + mm_ops->iova_unmap(mempool, msg->in, (void *)(uintptr_t)sqe->data_src_addr, + msg->in_bytes); + + if (sqe->data_dst_addr) + mm_ops->iova_unmap(mempool, msg->out, (void *)(uintptr_t)sqe->data_dst_addr, + msg->out_bytes); + + if (sqe->no_scene.c_ivin_addr) + mm_ops->iova_unmap(mempool, msg->iv, (void *)(uintptr_t)sqe->no_scene.c_ivin_addr, + msg->iv_bytes); + else if (sqe->stream_scene.c_ivin_addr) + mm_ops->iova_unmap(mempool, msg->iv, + (void *)(uintptr_t)sqe->stream_scene.c_ivin_addr, + msg->iv_bytes); + + if (sqe->a_key_addr) { + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE || + msg->msg_state == AEAD_MSG_END) && msg->cmode == WD_CIPHER_GCM) + mm_ops->iova_unmap(mempool, msg->ckey, (void *)(uintptr_t)sqe->a_key_addr, + msg->ckey_bytes); + else + mm_ops->iova_unmap(mempool, msg->akey, (void *)(uintptr_t)sqe->a_key_addr, + msg->akey_bytes); + } + + if (sqe->c_key_addr && !((msg->msg_state == AEAD_MSG_FIRST || + msg->msg_state == AEAD_MSG_MIDDLE || msg->msg_state == AEAD_MSG_END) && + msg->cmode == WD_CIPHER_GCM)) + mm_ops->iova_unmap(mempool, msg->ckey, (void *)(uintptr_t)sqe->c_key_addr, + msg->ckey_bytes); + + if (sqe->mac_addr) + mm_ops->iova_unmap(mempool, msg->mac, (void *)(uintptr_t)sqe->mac_addr, + msg->auth_bytes); +} + +static int aead_mem_nosva_map_v3(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe, int idx) +{ + struct wd_aead_aiv_addr *aiv_addr = (struct wd_aead_aiv_addr *)msg->drv_cfg; + struct wd_mm_ops *mm_ops = msg->mm_ops; + void *mempool = mm_ops->usr; + void *phy_addr; + + phy_addr = mm_ops->iova_map(mempool, msg->in, msg->in_bytes + msg->assoc_bytes); + if (!phy_addr) + return -WD_ENOMEM; + sqe->data_src_addr = (__u64)(uintptr_t)phy_addr; + + phy_addr = mm_ops->iova_map(mempool, msg->out, msg->out_bytes); + if (!phy_addr) + goto map_err; + sqe->data_dst_addr = (__u64)(uintptr_t)phy_addr; + + if (msg->iv_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->iv, msg->iv_bytes); + if (!phy_addr) + goto map_err; + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)phy_addr; + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE || + msg->msg_state == AEAD_MSG_END) && msg->cmode == WD_CIPHER_GCM) + sqe->stream_scene.c_ivin_addr = (__u64)(uintptr_t)phy_addr; + } + + if (msg->akey_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->akey, msg->akey_bytes); + if (!phy_addr) + goto map_err; + sqe->a_key_addr = (__u64)(uintptr_t)phy_addr; + } + + if (msg->ckey_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->ckey, msg->ckey_bytes); + if (!phy_addr) + goto map_err; + sqe->c_key_addr = (__u64)(uintptr_t)phy_addr; + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE || + msg->msg_state == AEAD_MSG_END) && msg->cmode == WD_CIPHER_GCM) + sqe->a_key_addr = (__u64)(uintptr_t)phy_addr; + } + + if (msg->auth_bytes) { + phy_addr = mm_ops->iova_map(mempool, msg->mac, msg->auth_bytes); + if (!phy_addr) + goto map_err; + sqe->mac_addr = (__u64)(uintptr_t)phy_addr; + } + + /* CCM/GCM should init a_iv */ + set_aead_auth_iv(msg); + phy_addr = aiv_addr->aiv_nosva + (idx << AEAD_AIV_OFFSET); + sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)phy_addr; + + return 0; + +map_err: + destroy_aead_bd3_addr(msg, sqe); + return -WD_ENOMEM; +} + +static int fill_aead_bd3_addr(struct wd_aead_msg *msg, struct hisi_sec_sqe3 *sqe, + struct hisi_qp *qp) +{ + int idx; + + idx = aead_get_aiv_addr(qp, msg); + if (idx < 0) + return idx; + + /* sva mode */ + if (msg->mm_ops->sva_mode) { + sqe->data_src_addr = (__u64)(uintptr_t)msg->in; + sqe->data_dst_addr = (__u64)(uintptr_t)msg->out; + sqe->no_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->c_key_addr = (__u64)(uintptr_t)msg->ckey; + sqe->a_key_addr = (__u64)(uintptr_t)msg->akey; + sqe->mac_addr = (__u64)(uintptr_t)msg->mac; + + /* CCM/GCM should init a_iv */ + set_aead_auth_iv(msg); + sqe->auth_ivin.a_ivin_addr = (__u64)(uintptr_t)msg->aiv; + if ((msg->msg_state == AEAD_MSG_FIRST || msg->msg_state == AEAD_MSG_MIDDLE || + msg->msg_state == AEAD_MSG_END) && msg->cmode == WD_CIPHER_GCM) { + sqe->stream_scene.c_ivin_addr = (__u64)(uintptr_t)msg->iv; + sqe->a_key_addr = (__u64)(uintptr_t)msg->ckey; + } + return 0; + } + if (msg->mm_type > UADK_MEM_PROXY) { + WD_ERR("aead failed to check memory type!\n"); + aead_free_aiv_addr(msg); + return -WD_EINVAL; + } + + /* aiv addr is freed in destroy addr interface */ + return aead_mem_nosva_map_v3(msg, sqe, idx); +} + static int hisi_sec_aead_send_v3(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct hisi_qp *qp = (struct hisi_qp *)h_qp; struct wd_aead_msg *msg = wd_msg; struct hisi_sec_sqe3 sqe; + handle_t h_sgl_pool; __u16 count = 0; int ret; @@ -2967,19 +3747,30 @@ static int hisi_sec_aead_send_v3(struct wd_alg_driver *drv, handle_t ctx, void * return ret; fill_sec_prefetch(msg->data_fmt, msg->in_bytes + msg->assoc_bytes, - qp->q_info.hw_type, &sqe); + qp->q_info.hw_type, &sqe, msg->mm_ops->sva_mode); if (msg->data_fmt == WD_SGL_BUF) { - ret = hisi_sec_fill_sgl_v3(h_qp, &msg->in, &msg->out, &sqe, + h_sgl_pool = hisi_qm_get_sglpool(h_qp, msg->mm_ops); + if (!h_sgl_pool) { + WD_ERR("aead failed to get sglpool for hw_v3!\n"); + return -WD_EINVAL; + } + ret = hisi_sec_fill_sgl_v3(h_sgl_pool, &msg->in, &msg->out, &sqe, msg->alg_type); if (ret) return ret; } - fill_aead_bd3_addr(msg, &sqe); + ret = fill_aead_bd3_addr(msg, &sqe, qp); + if (ret < 0) { + if (ret != -WD_EBUSY) + WD_ERR("aead map memory is err(%d)!\n", ret); + goto put_sgl; + } + ret = fill_stream_bd3(h_qp, msg, &sqe); if (unlikely(ret)) - goto put_sgl; + goto destroy_addr; hisi_set_msg_id(h_qp, &msg->tag); sqe.tag = msg->tag; @@ -2988,15 +3779,16 @@ static int hisi_sec_aead_send_v3(struct wd_alg_driver *drv, handle_t ctx, void * if (ret != -WD_EBUSY) WD_ERR("aead send sqe is err(%d)!\n", ret); - goto put_sgl; + goto destroy_addr; } return 0; +destroy_addr: + destroy_aead_bd3_addr(msg, &sqe); put_sgl: if (msg->data_fmt == WD_SGL_BUF) - hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); - + hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out, msg->mm_ops); return ret; } @@ -3023,8 +3815,6 @@ static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_AEAD; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); - recv_msg->in = (__u8 *)(uintptr_t)sqe->data_src_addr; - recv_msg->out = (__u8 *)(uintptr_t)sqe->data_dst_addr; temp_msg = wd_aead_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -3032,12 +3822,16 @@ static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, recv_msg->tag); return; } + recv_msg->in = temp_msg->in; + recv_msg->out = temp_msg->out; + recv_msg->mm_ops = temp_msg->mm_ops; } else { /* The synchronization mode uses the same message */ temp_msg = recv_msg; } update_stream_counter(temp_msg); + destroy_aead_bd3_addr(temp_msg, sqe); if (unlikely(recv_msg->result != WD_SUCCESS)) dump_sec_msg(temp_msg, "aead"); @@ -3063,7 +3857,7 @@ static int hisi_sec_aead_recv_v3(struct wd_alg_driver *drv, handle_t ctx, void * if (recv_msg->data_fmt == WD_SGL_BUF) hisi_sec_put_sgl(h_qp, recv_msg->alg_type, - recv_msg->in, recv_msg->out); + recv_msg->in, recv_msg->out, recv_msg->mm_ops); return 0; } @@ -3128,6 +3922,7 @@ static void hisi_sec_exit(struct wd_alg_driver *drv) priv = (struct hisi_sec_ctx *)drv->priv; config = &priv->config; + for (i = 0; i < config->ctx_num; i++) { h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); hisi_qm_free_qp(h_qp); diff --git a/include/drv/wd_aead_drv.h b/include/drv/wd_aead_drv.h index a9c0e7c..2c53217 100644 --- a/include/drv/wd_aead_drv.h +++ b/include/drv/wd_aead_drv.h @@ -54,9 +54,7 @@ struct wd_aead_msg { /* input iv pointer */ __u8 *iv; /* input auth iv pointer */ - __u8 aiv[MAX_IV_SIZE]; - /* input auth iv pointer for stream mode */ - __u8 aiv_stream[AIV_STREAM_LEN]; + __u8 *aiv; /* input data pointer */ __u8 *in; /* output data pointer */ @@ -68,6 +66,25 @@ struct wd_aead_msg { /* total of data for stream mode */ __u64 long_data_len; enum wd_aead_msg_state msg_state; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; + void *drv_cfg; /* internal driver configuration */ +}; + +struct wd_aead_aiv_addr { + __u8 *aiv; + __u8 *aiv_status; + __u8 *aiv_nosva; +}; + +struct wd_aead_extend_ops { + void *params; + int (*eops_aiv_init)(struct wd_alg_driver *drv, + struct wd_mm_ops *mm_ops, + void **params); + void (*eops_aiv_uninit)(struct wd_alg_driver *drv, + struct wd_mm_ops *mm_ops, + void *params); }; struct wd_aead_msg *wd_aead_get_msg(__u32 idx, __u32 tag); diff --git a/include/drv/wd_cipher_drv.h b/include/drv/wd_cipher_drv.h index c6d8ddf..c0be0c3 100644 --- a/include/drv/wd_cipher_drv.h +++ b/include/drv/wd_cipher_drv.h @@ -48,6 +48,8 @@ struct wd_cipher_msg { __u8 *in; /* output data pointer */ __u8 *out; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; }; struct wd_cipher_msg *wd_cipher_get_msg(__u32 idx, __u32 tag); diff --git a/include/drv/wd_digest_drv.h b/include/drv/wd_digest_drv.h index a55ef5b..12398f2 100644 --- a/include/drv/wd_digest_drv.h +++ b/include/drv/wd_digest_drv.h @@ -59,6 +59,8 @@ struct wd_digest_msg { __u8 *partial_block; /* total of data for stream mode */ __u64 long_data_len; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; }; static inline enum hash_block_type get_hash_block_type(struct wd_digest_msg *msg) diff --git a/include/wd_aead.h b/include/wd_aead.h index 01f6980..4b5095f 100644 --- a/include/wd_aead.h +++ b/include/wd_aead.h @@ -41,6 +41,8 @@ struct wd_aead_sess_setup { enum wd_digest_type dalg; enum wd_digest_mode dmode; void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; /** diff --git a/include/wd_cipher.h b/include/wd_cipher.h index d54f7fe..1d82eac 100644 --- a/include/wd_cipher.h +++ b/include/wd_cipher.h @@ -73,6 +73,8 @@ struct wd_cipher_sess_setup { enum wd_cipher_alg alg; enum wd_cipher_mode mode; void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_cipher_req; diff --git a/include/wd_digest.h b/include/wd_digest.h index 6ce31f2..42a95db 100644 --- a/include/wd_digest.h +++ b/include/wd_digest.h @@ -100,6 +100,8 @@ struct wd_digest_sess_setup { enum wd_digest_type alg; enum wd_digest_mode mode; void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; typedef void *wd_digest_cb_t(void *cb_param); diff --git a/wd_aead.c b/wd_aead.c index 373b6fe..8467409 100644 --- a/wd_aead.c +++ b/wd_aead.c @@ -44,19 +44,22 @@ struct wd_aead_sess { enum wd_cipher_mode cmode; enum wd_digest_type dalg; enum wd_digest_mode dmode; - unsigned char ckey[MAX_CIPHER_KEY_SIZE]; - unsigned char akey[MAX_HMAC_KEY_SIZE]; + unsigned char *ckey; + unsigned char *akey; /* Mac data pointer for decrypto as stream mode */ - unsigned char mac_bak[WD_AEAD_CCM_GCM_MAX]; + unsigned char *mac_bak; __u16 ckey_bytes; __u16 akey_bytes; __u16 auth_bytes; void *priv; void *sched_key; /* Stored the counter for gcm stream mode */ - __u8 iv[MAX_IV_SIZE]; + __u8 *iv; /* Total of data for stream mode */ __u64 long_data_len; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; + struct wd_aead_extend_ops eops; }; struct wd_env_config wd_aead_env_config; @@ -302,26 +305,26 @@ int wd_aead_get_maxauthsize(handle_t h_sess) return g_aead_mac_len[sess->dalg]; } -handle_t wd_aead_alloc_sess(struct wd_aead_sess_setup *setup) +static struct wd_aead_sess *check_and_init_sess(struct wd_aead_sess_setup *setup) { - struct wd_aead_sess *sess = NULL; + struct wd_aead_sess *sess; bool ret; if (unlikely(!setup)) { WD_ERR("failed to check session input parameter!\n"); - return (handle_t)0; + return NULL; } if (setup->calg >= WD_CIPHER_ALG_TYPE_MAX || - setup->cmode >= WD_CIPHER_MODE_TYPE_MAX) { + setup->cmode >= WD_CIPHER_MODE_TYPE_MAX) { WD_ERR("failed to check algorithm setup!\n"); - return (handle_t)0; + return NULL; } sess = malloc(sizeof(struct wd_aead_sess)); if (!sess) { WD_ERR("failed to alloc session memory!\n"); - return (handle_t)0; + return NULL; } memset(sess, 0, sizeof(struct wd_aead_sess)); @@ -330,24 +333,160 @@ handle_t wd_aead_alloc_sess(struct wd_aead_sess_setup *setup) sess->cmode = setup->cmode; sess->dalg = setup->dalg; sess->dmode = setup->dmode; + ret = wd_drv_alg_support(sess->alg_name, wd_aead_setting.driver); if (!ret) { WD_ERR("failed to support this algorithm: %s!\n", sess->alg_name); - goto err_sess; + free(sess); + return NULL; + } + + return sess; +} + +static int aead_setup_memory_and_buffers(struct wd_aead_sess *sess, + struct wd_aead_sess_setup *setup) +{ + wd_alloc aead_alloc_func; + wd_free aead_free_func; + void *mempool; + int ret; + + ret = wd_mem_ops_init(wd_aead_setting.config.ctxs[0].ctx, + &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); + return -WD_EINVAL; + } + + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; + + aead_alloc_func = sess->mm_ops.alloc; + aead_free_func = sess->mm_ops.free; + mempool = sess->mm_ops.usr; + + sess->mac_bak = aead_alloc_func(mempool, WD_AEAD_CCM_GCM_MAX); + if (!sess->mac_bak) { + WD_ERR("aead failed to calloc mac_bak memory!\n"); + return -WD_ENOMEM; + } + memset(sess->mac_bak, 0, WD_AEAD_CCM_GCM_MAX); + + sess->iv = aead_alloc_func(mempool, MAX_IV_SIZE); + if (!sess->iv) { + WD_ERR("failed to alloc iv memory!\n"); + goto iv_err; + } + memset(sess->iv, 0, MAX_IV_SIZE); + + sess->ckey = aead_alloc_func(mempool, MAX_CIPHER_KEY_SIZE); + if (!sess->ckey) { + WD_ERR("failed to alloc ckey memory!\n"); + goto ckey_err; + } + memset(sess->ckey, 0, MAX_CIPHER_KEY_SIZE); + + sess->akey = aead_alloc_func(mempool, MAX_HMAC_KEY_SIZE); + if (!sess->akey) { + WD_ERR("failed to alloc akey memory!\n"); + goto akey_err; + } + memset(sess->akey, 0, MAX_HMAC_KEY_SIZE); + + return 0; + +akey_err: + aead_free_func(mempool, sess->ckey); +ckey_err: + aead_free_func(mempool, sess->iv); +iv_err: + aead_free_func(mempool, sess->mac_bak); + + return -WD_ENOMEM; +} + +static void cleanup_session(struct wd_aead_sess *sess) +{ + sess->mm_ops.free(sess->mm_ops.usr, sess->mac_bak); + sess->mm_ops.free(sess->mm_ops.usr, sess->iv); + sess->mm_ops.free(sess->mm_ops.usr, sess->ckey); + sess->mm_ops.free(sess->mm_ops.usr, sess->akey); + + if (sess) + free(sess); +} + +static int wd_aead_sess_eops_init(struct wd_aead_sess *sess) +{ + int ret; + + if (sess->eops.eops_aiv_init) { + if (!sess->eops.eops_aiv_uninit) { + WD_ERR("failed to get aead extend ops free in session!\n"); + return -WD_EINVAL; + } + ret = sess->eops.eops_aiv_init(wd_aead_setting.driver, &sess->mm_ops, + &sess->eops.params); + if (ret) { + WD_ERR("failed to init aead extend ops params in session!\n"); + return ret; + } + } + + return WD_SUCCESS; +} + +static void wd_aead_sess_eops_uninit(struct wd_aead_sess *sess) +{ + if (sess->eops.eops_aiv_uninit) { + sess->eops.eops_aiv_uninit(wd_aead_setting.driver, &sess->mm_ops, + sess->eops.params); + sess->eops.params = NULL; + } +} + +handle_t wd_aead_alloc_sess(struct wd_aead_sess_setup *setup) +{ + struct wd_aead_sess *sess; + int ret; + + sess = check_and_init_sess(setup); + if (!sess) + return (handle_t)0; + + if (aead_setup_memory_and_buffers(sess, setup)) { + free(sess); + return (handle_t)0; + } + + if (wd_aead_setting.driver->get_extend_ops) { + ret = wd_aead_setting.driver->get_extend_ops(&sess->eops); + if (ret) { + WD_ERR("failed to get aead sess extend ops!\n"); + goto sess_err; + } + } + + ret = wd_aead_sess_eops_init(sess); + if (ret) { + WD_ERR("failed to init aead sess extend eops!\n"); + goto sess_err; } - /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_aead_setting.sched.sched_init( - wd_aead_setting.sched.h_sched_ctx, setup->sched_param); + wd_aead_setting.sched.h_sched_ctx, setup->sched_param); if (WD_IS_ERR(sess->sched_key)) { WD_ERR("failed to init session schedule key!\n"); - goto err_sess; + goto sched_key_err; } return (handle_t)sess; -err_sess: - free(sess); +sched_key_err: + wd_aead_sess_eops_uninit(sess); +sess_err: + cleanup_session(sess); return (handle_t)0; } @@ -365,7 +504,8 @@ void wd_aead_free_sess(handle_t h_sess) if (sess->sched_key) free(sess->sched_key); - free(sess); + wd_aead_sess_eops_uninit(sess); + cleanup_session(sess); } static int wd_aead_param_check(struct wd_aead_sess *sess, @@ -717,8 +857,11 @@ static void fill_request_msg(struct wd_aead_msg *msg, struct wd_aead_req *req, msg->mac = req->mac; msg->auth_bytes = sess->auth_bytes; msg->data_fmt = req->data_fmt; - msg->msg_state = req->msg_state; + + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; + msg->drv_cfg = sess->eops.params; fill_stream_msg(msg, req, sess); } diff --git a/wd_cipher.c b/wd_cipher.c index 92ca07b..58656dc 100644 --- a/wd_cipher.c +++ b/wd_cipher.c @@ -63,9 +63,11 @@ struct wd_cipher_sess { enum wd_cipher_mode mode; wd_dev_mask_t *dev_mask; void *priv; - unsigned char key[MAX_CIPHER_KEY_SIZE]; + unsigned char *key; __u32 key_bytes; void *sched_key; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_env_config wd_cipher_env_config; @@ -250,6 +252,31 @@ int wd_cipher_set_key(handle_t h_sess, const __u8 *key, __u32 key_len) return 0; } +static int cipher_setup_memory_and_buffers(struct wd_cipher_sess *sess, + struct wd_cipher_sess_setup *setup) +{ + int ret; + + ret = wd_mem_ops_init(wd_cipher_setting.config.ctxs[0].ctx, + &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("cipher failed to init memory ops!\n"); + return ret; + } + + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; + + sess->key = sess->mm_ops.alloc(sess->mm_ops.usr, MAX_CIPHER_KEY_SIZE); + if (!sess->key) { + WD_ERR("cipher failed to alloc key memory!\n"); + return -WD_ENOMEM; + } + memset(sess->key, 0, MAX_CIPHER_KEY_SIZE); + + return 0; +} + handle_t wd_cipher_alloc_sess(struct wd_cipher_sess_setup *setup) { struct wd_cipher_sess *sess = NULL; @@ -282,16 +309,22 @@ handle_t wd_cipher_alloc_sess(struct wd_cipher_sess_setup *setup) sess->alg = setup->alg; sess->mode = setup->mode; + /* Memory type set */ + if (cipher_setup_memory_and_buffers(sess, setup)) + goto free_sess; + /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_cipher_setting.sched.sched_init( wd_cipher_setting.sched.h_sched_ctx, setup->sched_param); if (WD_IS_ERR(sess->sched_key)) { WD_ERR("failed to init session schedule key!\n"); - goto free_sess; + goto free_key; } return (handle_t)sess; +free_key: + sess->mm_ops.free(sess->mm_ops.usr, sess->key); free_sess: free(sess); return (handle_t)0; @@ -307,6 +340,7 @@ void wd_cipher_free_sess(handle_t h_sess) } wd_memset_zero(sess->key, sess->key_bytes); + sess->mm_ops.free(sess->mm_ops.usr, sess->key); if (sess->sched_key) free(sess->sched_key); @@ -545,6 +579,8 @@ static void fill_request_msg(struct wd_cipher_msg *msg, msg->iv = req->iv; msg->iv_bytes = req->iv_bytes; msg->data_fmt = req->data_fmt; + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; } static int cipher_iv_len_check(struct wd_cipher_req *req, diff --git a/wd_digest.c b/wd_digest.c index 2d31176..0b37f8b 100644 --- a/wd_digest.c +++ b/wd_digest.c @@ -64,10 +64,12 @@ struct wd_digest_sess { enum wd_digest_type alg; enum wd_digest_mode mode; void *priv; - unsigned char key[MAX_HMAC_KEY_SIZE]; + unsigned char *key; __u32 key_bytes; void *sched_key; struct wd_digest_stream_data stream_data; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_env_config wd_digest_env_config; @@ -187,6 +189,31 @@ int wd_digest_set_key(handle_t h_sess, const __u8 *key, __u32 key_len) return 0; } +static int digest_setup_memory_and_buffers(struct wd_digest_sess *sess, + struct wd_digest_sess_setup *setup) +{ + int ret; + + ret = wd_mem_ops_init(wd_digest_setting.config.ctxs[0].ctx, + &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); + return ret; + } + + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; + + sess->key = sess->mm_ops.alloc(sess->mm_ops.usr, MAX_HMAC_KEY_SIZE); + if (!sess->key) { + WD_ERR("digest failed to alloc key memory!\n"); + return -WD_ENOMEM; + } + memset(sess->key, 0, MAX_HMAC_KEY_SIZE); + + return 0; +} + handle_t wd_digest_alloc_sess(struct wd_digest_sess_setup *setup) { struct wd_digest_sess *sess = NULL; @@ -215,16 +242,23 @@ handle_t wd_digest_alloc_sess(struct wd_digest_sess_setup *setup) WD_ERR("failed to support this algorithm: %s!\n", sess->alg_name); goto err_sess; } + + /* Memory type set */ + if (digest_setup_memory_and_buffers(sess, setup)) + goto err_sess; + /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_digest_setting.sched.sched_init( wd_digest_setting.sched.h_sched_ctx, setup->sched_param); if (WD_IS_ERR(sess->sched_key)) { WD_ERR("failed to init session schedule key!\n"); - goto err_sess; + goto err_key; } return (handle_t)sess; +err_key: + sess->mm_ops.free(sess->mm_ops.usr, sess->key); err_sess: free(sess); return (handle_t)0; @@ -240,6 +274,7 @@ void wd_digest_free_sess(handle_t h_sess) } wd_memset_zero(sess->key, sess->key_bytes); + sess->mm_ops.free(sess->mm_ops.usr, sess->key); if (sess->sched_key) free(sess->sched_key); free(sess); @@ -603,6 +638,9 @@ static void fill_request_msg(struct wd_digest_msg *msg, msg->partial_block = sess->stream_data.partial_block; msg->partial_bytes = sess->stream_data.partial_bytes; + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; + /* Use iv_bytes to store the stream message state */ msg->iv_bytes = sess->stream_data.msg_state; } -- 2.33.0
From: Zhushuai Yin <yinzhushuai@huawei.com> Provide a public tool interface that can obtain the device ID through the device name. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/uadk_benchmark.c | 33 ++++++++++++++++++++++++++++ uadk_tool/benchmark/uadk_benchmark.h | 8 +++++++ 2 files changed, 41 insertions(+) diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index d2826ad..24737c5 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -200,6 +200,29 @@ void set_run_state(int state) g_run_state = state; } +int uadk_parse_dev_id(char *dev_name) +{ + char *last_dash = NULL; + char *endptr; + int dev_id; + + if (!dev_name) + return -WD_EINVAL; + + /* Find the last '-' in the string. */ + last_dash = strrchr(dev_name, '-'); + if (!last_dash || *(last_dash + 1) == '\0') + return -WD_EINVAL; + + /* Parse the following number */ + dev_id = strtol(last_dash + 1, &endptr, 10); + /* Check whether it is truly all digits */ + if (*endptr != '\0' || dev_id < 0) + return -WD_EINVAL; + + return dev_id; +} + static int get_alg_type(const char *alg_name) { int alg = ALG_MAX; @@ -717,6 +740,7 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) {"complevel", required_argument, 0, 16}, {"init2", no_argument, 0, 17}, {"device", required_argument, 0, 18}, + {"memory", required_argument, 0, 19}, {0, 0, 0, 0} }; @@ -788,6 +812,9 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) } strcpy(option->device, optarg); break; + case 19: + option->mem_type = strtol(optarg, NULL, 0); + break; default: ACC_TST_PRT("invalid: bad input parameter!\n"); print_benchmark_help(); @@ -864,6 +891,12 @@ int acc_option_convert(struct acc_option *option) goto param_err; } + /* Memory mode is only valid in SVA mode */ + if (option->mem_type > UADK_PROXY) { + ACC_TST_PRT("uadk benchmark memory type set error!\n"); + goto param_err; + } + return 0; param_err: diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index b03db6e..81ace1b 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -81,6 +81,13 @@ struct acc_option { bool latency; u32 sched_type; int task_type; + int mem_type; +}; + +enum uadk_mem_mode { + UADK_AUTO, // SVA or No-SVA + UADK_MANUAL, // No-SVA User manual + UADK_PROXY, // No-SVA UADK API proxy }; enum acc_type { @@ -224,6 +231,7 @@ extern void cal_avg_latency(u32 count); extern int get_alg_name(int alg, char *alg_name); extern void segmentfault_handler(int sig); +int uadk_parse_dev_id(char *dev_name); int acc_cmd_parse(int argc, char *argv[], struct acc_option *option); int acc_default_case(struct acc_option *option); int acc_option_convert(struct acc_option *option); -- 2.33.0
From: Zhushuai Yin <yinzhushuai@huawei.com> The newly added tool interface can support unified verification scenarios for SEC sva and nosva interfaces. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/sec_uadk_benchmark.c | 527 ++++++++++++++++++++++- 1 file changed, 518 insertions(+), 9 deletions(-) diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index cbd2a99..141b161 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -8,6 +8,7 @@ #include "include/wd_digest.h" #include "include/wd_aead.h" #include "include/wd_sched.h" +#include "include/wd_bmm.h" #define SEC_TST_PRT printf #define MAX_IVK_LENTH 64 @@ -17,6 +18,8 @@ #define SEC_MAX_MAC_LEN 64 #define SEC_SAVE_FILE_LEN 64 #define SEC_PERF_AUTH_SIZE 16 +#define SQE_SIZE 128 +#define SEC_OP_TYPE_MAX 2 char aead_key[] = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"; @@ -28,7 +31,7 @@ char g_save_mac[SEC_MAX_MAC_LEN]; struct uadk_bd { u8 *src; u8 *dst; - u8 mac[SEC_MAX_MAC_LEN]; + u8 *mac; }; struct bd_pool { @@ -40,6 +43,7 @@ struct thread_pool { u8 **iv; u8 **key; u8 **hash; + void *rsv_pool; } g_uadk_pool; typedef struct uadk_thread_res { @@ -54,6 +58,7 @@ typedef struct uadk_thread_res { u32 dalg; u32 dmode; u32 d_outbytes; + int mm_type; } thread_data; static struct wd_ctx_config g_ctx_cfg; @@ -66,6 +71,7 @@ static unsigned int g_alg; static unsigned int g_algtype; static unsigned int g_optype; static unsigned int g_maclen; +static unsigned int g_dev_id; struct aead_alg_info { int index; @@ -635,6 +641,7 @@ static int specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = 0; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); wd_free_list_accels(list); return 0; @@ -677,6 +684,7 @@ static int non_specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = 0; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); free(dev); } @@ -718,15 +726,31 @@ static int init_ctx_config(struct acc_option *options) goto free_ctxs; } + switch(subtype) { case CIPHER_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_cipher_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, SEC_OP_TYPE_MAX, + max_node, wd_cipher_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, + max_node, wd_cipher_poll_ctx); break; case AEAD_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_aead_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, SEC_OP_TYPE_MAX, + max_node, wd_aead_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, + max_node, wd_aead_poll_ctx); break; case DIGEST_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_digest_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, SEC_OP_TYPE_MAX, + max_node, wd_digest_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, + max_node, wd_digest_poll_ctx); break; default: SEC_TST_PRT("failed to parse alg subtype!\n"); @@ -743,6 +767,7 @@ static int init_ctx_config(struct acc_option *options) param.mode = mode; param.begin = 0; param.end = g_ctxnum - 1; + param.dev_id = g_dev_id; ret = wd_sched_rr_instance(g_sched, ¶m); if (ret) { SEC_TST_PRT("failed to fill sched data!\n"); @@ -867,7 +892,10 @@ static int init_ctx_config2(struct acc_option *options) /* init */ switch(subtype) { case CIPHER_TYPE: - ret = wd_cipher_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + ret = wd_cipher_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + ret = wd_cipher_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); if (ret) SEC_TST_PRT("failed to do cipher init2!\n"); break; @@ -877,12 +905,18 @@ static int init_ctx_config2(struct acc_option *options) SEC_TST_PRT("failed to do cipher intruction init2!\n"); break; case AEAD_TYPE: - ret = wd_aead_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + ret = wd_aead_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + ret = wd_aead_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); if (ret) SEC_TST_PRT("failed to do aead init2!\n"); break; case DIGEST_TYPE: - ret = wd_digest_init2_(alg_name, options->sched_type, options->task_type, &cparams); + if (options->mem_type == UADK_AUTO) + ret = wd_digest_init2_(alg_name, SCHED_POLICY_RR, options->task_type, &cparams); + else + ret = wd_digest_init2_(alg_name, SCHED_POLICY_DEV, options->task_type, &cparams); if (ret) SEC_TST_PRT("failed to do digest init2!\n"); break; @@ -1071,6 +1105,10 @@ static int init_uadk_bd_pool(void) memset(g_uadk_pool.pool[i].bds[j].dst, 0, step); if (!g_uadk_pool.pool[i].bds[j].dst) goto malloc_error3; + g_uadk_pool.pool[i].bds[j].mac = malloc(SEC_MAX_MAC_LEN); + memset(g_uadk_pool.pool[i].bds[j].mac, 0, SEC_MAX_MAC_LEN); + if (!g_uadk_pool.pool[i].bds[j].mac) + goto malloc_error4; if (g_alg != AEAD_TYPE) { get_rand_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen); @@ -1093,18 +1131,22 @@ static int init_uadk_bd_pool(void) return 0; +malloc_error4: + free(g_uadk_pool.pool[i].bds[j].dst); malloc_error3: free(g_uadk_pool.pool[i].bds[j].src); malloc_error2: for (j--; j >= 0; j--) { free(g_uadk_pool.pool[i].bds[j].src); free(g_uadk_pool.pool[i].bds[j].dst); + free(g_uadk_pool.pool[i].bds[j].mac); } malloc_error1: for (i--; i >= 0; i--) { for (j = 0; j < MAX_POOL_LENTH; j++) { free(g_uadk_pool.pool[i].bds[j].src); free(g_uadk_pool.pool[i].bds[j].dst); + free(g_uadk_pool.pool[i].bds[j].mac); } free(g_uadk_pool.pool[i].bds); g_uadk_pool.pool[i].bds = NULL; @@ -1133,6 +1175,7 @@ static void free_uadk_bd_pool(void) for (j = 0; j < MAX_POOL_LENTH; j++) { free(g_uadk_pool.pool[i].bds[j].src); free(g_uadk_pool.pool[i].bds[j].dst); + free(g_uadk_pool.pool[i].bds[j].mac); } } free(g_uadk_pool.pool[i].bds); @@ -1144,6 +1187,322 @@ static void free_uadk_bd_pool(void) free_ivkey_source(); } +static void free_rsv_ivkey(void *pool) +{ + int i; + + /* release the hash block for each thread */ + for (i = 0; i < g_thread_num; i++) { + if (g_uadk_pool.hash[i]) + wd_mem_free(pool, g_uadk_pool.hash[i]); + } + + /* release the key block for each thread */ + for (i = 0; i < g_thread_num; i++) { + if (g_uadk_pool.key[i]) + wd_mem_free(pool, g_uadk_pool.key[i]); + } + + /* release the IV block for each thread */ + for (i = 0; i < g_thread_num; i++) { + if (g_uadk_pool.iv[i]) + wd_mem_free(pool, g_uadk_pool.iv[i]); + } + + /* release a pointer array */ + free(g_uadk_pool.hash); + free(g_uadk_pool.key); + free(g_uadk_pool.iv); + + /* the memory pool is managed externally */ +} + +static int init_rsv_ivkey(handle_t h_ctx, void *pool) +{ + int i, j, m, idx; + + /* Check if the incoming memory pool is valid */ + if (!pool) { + SEC_TST_PRT("Invalid pool parameter\n"); + return -EINVAL; + } + + /* + * Allocate a pointer array + * (still using malloc, as the structure is small) + */ + g_uadk_pool.iv = malloc(g_thread_num * sizeof(char *)); + if (!g_uadk_pool.iv) { + SEC_TST_PRT("Failed to alloc IV pointers\n"); + goto error; + } + memset(g_uadk_pool.iv, 0, g_thread_num * sizeof(char *)); + + g_uadk_pool.key = malloc(g_thread_num * sizeof(char *)); + if (!g_uadk_pool.key) { + SEC_TST_PRT("Failed to alloc Key pointers\n"); + goto free_iv_pointers; + } + memset(g_uadk_pool.key, 0, g_thread_num * sizeof(char *)); + + g_uadk_pool.hash = malloc(g_thread_num * sizeof(char *)); + if (!g_uadk_pool.hash) { + SEC_TST_PRT("Failed to alloc Hash pointers\n"); + goto free_key_pointers; + } + memset(g_uadk_pool.hash, 0, g_thread_num * sizeof(char *)); + + /* Allocate IV blocks for each thread */ + for (i = 0; i < g_thread_num; i++) { + g_uadk_pool.iv[i] = wd_mem_alloc(pool, g_pktlen); + if (!g_uadk_pool.iv[i]) { + SEC_TST_PRT("Failed to alloc IV block\n"); + goto free_iv_blocks; + } + memset(g_uadk_pool.iv[i], 0, MAX_IVK_LENTH); + } + + /* Allocate KEY blocks for each thread */ + for (j = 0; j < g_thread_num; j++) { + g_uadk_pool.key[j] = wd_mem_alloc(pool, g_pktlen); + if (!g_uadk_pool.key[j]) { + SEC_TST_PRT("Failed to alloc Key block\n"); + goto free_key_blocks; + } + memcpy(g_uadk_pool.key[j], aead_key, SEC_PERF_KEY_LEN); + } + + /* Allocate HASH blocks for each thread */ + for (m = 0; m < g_thread_num; m++) { + g_uadk_pool.hash[m] = wd_mem_alloc(pool, g_pktlen); + if (!g_uadk_pool.hash[m]) { + SEC_TST_PRT("Failed to alloc Hash block\n"); + goto free_hash_blocks; + } + memcpy(g_uadk_pool.hash[m], aead_key, SEC_PERF_KEY_LEN); + } + + return 0; + +free_hash_blocks: + for (idx = m - 1; idx >= 0; idx--) { + if (g_uadk_pool.hash[idx]) + wd_mem_free(pool, g_uadk_pool.hash[idx]); + } +free_key_blocks: + for (idx = j - 1; idx >= 0; idx--) { + if (g_uadk_pool.key[idx]) + wd_mem_free(pool, g_uadk_pool.key[idx]); + } +free_iv_blocks: + for (idx = i - 1; idx >= 0; idx--) { + if (g_uadk_pool.iv[idx]) + wd_mem_free(pool, g_uadk_pool.iv[idx]); + } + free(g_uadk_pool.hash); +free_key_pointers: + free(g_uadk_pool.key); +free_iv_pointers: + free(g_uadk_pool.iv); +error: + return -ENOMEM; +} + +static int init_uadk_rsv_pool(struct acc_option *option) +{ + struct wd_mempool_setup pool_setup; + char *alg = option->algclass; + unsigned long step, len; + /* + * Assuming that h_ctx exists globally, + * it needs to be obtained based on actual conditions. + */ + handle_t h_ctx; + int i, j; + int ret; + + h_ctx = wd_find_ctx(alg); + if (!h_ctx) { + SEC_TST_PRT("Failed to find a ctx for alg:%s\n", option->algname); + return -EINVAL; + } + g_ctx_cfg.priv = (void *)h_ctx; + + if (g_alg != AEAD_TYPE) + step = sizeof(char) * g_pktlen; + else + step = sizeof(char) * g_pktlen * 2; + + /* Create a memory pool for managing memory blocks of src and dst */ + pool_setup.block_size = step; + /* + * Each thread requires two blocks (src and dst) for each uadk_bd, + * along with the IV, KEY, and MAC sections, + * thus necessitating five times the data. + */ + pool_setup.block_num = g_thread_num * MAX_POOL_LENTH * 8; + pool_setup.align_size = SQE_SIZE; + pool_setup.ops.alloc = NULL; + pool_setup.ops.free = NULL; + + g_uadk_pool.rsv_pool = wd_mempool_alloc(h_ctx, &pool_setup); + if (!g_uadk_pool.rsv_pool) { + SEC_TST_PRT("Failed to create block pool\n"); + return -ENOMEM; + } + + /* Initialize the memory for iv and key */ + ret = init_rsv_ivkey(h_ctx, g_uadk_pool.rsv_pool); + if (ret) { + SEC_TST_PRT("init uadk ivkey resource failed!\n"); + goto free_pool; + } + + if (g_alg != AEAD_TYPE) + len = sizeof(char) * g_pktlen; + else + len = sizeof(char) * g_pktlen * 2; + /* + * Allocate thread pool structure + * (still using malloc, as the structure is small) + */ + g_uadk_pool.pool = calloc(1, g_thread_num * sizeof(struct bd_pool)); + if (!g_uadk_pool.pool) { + SEC_TST_PRT("init uadk pool alloc thread failed!\n"); + goto free_ivkey; + } + + for (i = 0; i < g_thread_num; i++) { + g_uadk_pool.pool[i].bds = calloc(1, MAX_POOL_LENTH * sizeof(struct uadk_bd)); + if (!g_uadk_pool.pool[i].bds) { + SEC_TST_PRT("init uadk bds alloc failed!\n"); + goto malloc_error1; + } + + for (j = 0; j < MAX_POOL_LENTH; j++) { + /* Allocate memory blocks for src and dst from the memory pool */ + g_uadk_pool.pool[i].bds[j].src = wd_mem_alloc(g_uadk_pool.rsv_pool, len); + if (!g_uadk_pool.pool[i].bds[j].src) { + SEC_TST_PRT("Failed to alloc src block\n"); + goto malloc_error3; + } + + g_uadk_pool.pool[i].bds[j].dst = wd_mem_alloc(g_uadk_pool.rsv_pool, len); + if (!g_uadk_pool.pool[i].bds[j].dst) { + SEC_TST_PRT("Failed to alloc dst block\n"); + goto malloc_error3; + } + + g_uadk_pool.pool[i].bds[j].mac = wd_mem_alloc(g_uadk_pool.rsv_pool, SEC_MAX_MAC_LEN); + if (!g_uadk_pool.pool[i].bds[j].mac) { + SEC_TST_PRT("Failed to alloc mac block\n"); + goto malloc_error3; + } + + memset(g_uadk_pool.pool[i].bds[j].src, 0, len); + memset(g_uadk_pool.pool[i].bds[j].dst, 0, len); + memset(g_uadk_pool.pool[i].bds[j].mac, 0, step); + + if (g_alg != AEAD_TYPE) { + get_rand_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen); + if (g_prefetch) + get_rand_data(g_uadk_pool.pool[i].bds[j].dst, g_pktlen); + } else { + if (!g_optype) + get_aead_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen + SEC_AEAD_LEN); + else { + read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen + SEC_AEAD_LEN); + memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); + } + } + } + } + SEC_TST_PRT("Init uadk rsv block pool OK.\n"); + + return 0; + +malloc_error3: + /* Release the allocated src and dst blocks */ + if (g_uadk_pool.pool[i].bds[j].mac) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].mac); + if (g_uadk_pool.pool[i].bds[j].src) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].src); + if (g_uadk_pool.pool[i].bds[j].dst) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].dst); + goto malloc_error2; + +malloc_error2: + for (j--; j >= 0; j--) { + if (g_uadk_pool.pool[i].bds[j].src) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].src); + if (g_uadk_pool.pool[i].bds[j].dst) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].dst); + if (g_uadk_pool.pool[i].bds[j].mac) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].mac); + } +malloc_error1: + for (i--; i >= 0; i--) { + for (j = 0; j < MAX_POOL_LENTH; j++) { + if (g_uadk_pool.pool[i].bds[j].src) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].src); + if (g_uadk_pool.pool[i].bds[j].dst) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].dst); + if (g_uadk_pool.pool[i].bds[j].mac) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].mac); + } + free(g_uadk_pool.pool[i].bds); + g_uadk_pool.pool[i].bds = NULL; + } + free(g_uadk_pool.pool); + g_uadk_pool.pool = NULL; + +free_ivkey: + free_rsv_ivkey(g_uadk_pool.rsv_pool); + +free_pool: + wd_mempool_free(h_ctx, g_uadk_pool.rsv_pool); + g_uadk_pool.rsv_pool = NULL; + + SEC_TST_PRT("init uadk bd pool alloc failed!\n"); + return -ENOMEM; +} + +static void free_uadk_rsv_pool(struct acc_option *option) +{ + handle_t h_ctx = (handle_t)g_ctx_cfg.priv; + int i, j; + + /* save aad + ctext + mac */ + if (g_alg == AEAD_TYPE && !g_optype) + save_aead_dst_data(g_uadk_pool.pool[0].bds[0].dst, + g_pktlen + SEC_AEAD_LEN); + + for (i = 0; i < g_thread_num; i++) { + if (g_uadk_pool.pool[i].bds) { + for (j = 0; j < MAX_POOL_LENTH; j++) { + /* Use wd_mem_free to release the block */ + if (g_uadk_pool.pool[i].bds[j].src) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].src); + if (g_uadk_pool.pool[i].bds[j].dst) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].dst); + if (g_uadk_pool.pool[i].bds[j].mac) + wd_mem_free(g_uadk_pool.rsv_pool, g_uadk_pool.pool[i].bds[j].mac); + } + } + free(g_uadk_pool.pool[i].bds); + g_uadk_pool.pool[i].bds = NULL; + } + free(g_uadk_pool.pool); + g_uadk_pool.pool = NULL; + + /* Destroy IV and key */ + free_rsv_ivkey(g_uadk_pool.rsv_pool); + + /* Destroy memory pool */ + if (g_uadk_pool.rsv_pool) + wd_mempool_free(h_ctx, g_uadk_pool.rsv_pool); + g_uadk_pool.rsv_pool = NULL; +} /*-------------------------------uadk benchmark main code-------------------------------------*/ static void *sec_uadk_poll(void *data) @@ -1245,6 +1604,7 @@ static void *sec_uadk_cipher_async(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_cipher_sess_setup cipher_setup = {0}; + struct sched_params sc_param = {0}; struct wd_cipher_req creq; struct bd_pool *uadk_pool; u8 *priv_iv, *priv_key; @@ -1265,6 +1625,20 @@ static void *sec_uadk_cipher_async(void *arg) cipher_setup.alg = pdata->alg; cipher_setup.mode = pdata->mode; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + cipher_setup.sched_param = (void *)&sc_param; + + cipher_setup.mm_type = pdata->mm_type; + cipher_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + cipher_setup.mm_ops.alloc = (void *)wd_mem_alloc; + cipher_setup.mm_ops.free = (void *)wd_mem_free; + cipher_setup.mm_ops.iova_map = (void *)wd_mem_map; + cipher_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + cipher_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; h_sess = wd_cipher_alloc_sess(&cipher_setup); if (!h_sess) return NULL; @@ -1305,6 +1679,22 @@ static void *sec_uadk_cipher_async(void *arg) } count++; } + + /* Release memory after all tasks are complete. */ + if (count) { + i = 0; + while (get_recv_time() != g_ctxnum) { + if (i++ >= MAX_TRY_CNT) { + SEC_TST_PRT("failed to wait poll thread finish!\n"); + break; + } + + usleep(SEND_USLEEP); + } + } + /* Wait for the device to complete the tasks. */ + usleep(SEND_USLEEP * MAX_TRY_CNT); + wd_cipher_free_sess(h_sess); add_send_complete(); @@ -1316,6 +1706,7 @@ static void *sec_uadk_aead_async(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_aead_sess_setup aead_setup = {0}; + struct sched_params sc_param = {0}; u8 *priv_iv, *priv_key, *priv_hash; u32 auth_size = SEC_PERF_AUTH_SIZE; struct wd_aead_req areq = {0}; @@ -1338,6 +1729,21 @@ static void *sec_uadk_aead_async(void *arg) aead_setup.calg = pdata->alg; aead_setup.cmode = pdata->mode; + aead_setup.mm_type = pdata->mm_type; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; // sync mode + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + aead_setup.sched_param = (void *)&sc_param; + + aead_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + aead_setup.mm_ops.alloc = (void *)wd_mem_alloc; + aead_setup.mm_ops.free = (void *)wd_mem_free; + aead_setup.mm_ops.iova_map = (void *)wd_mem_map; + aead_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + aead_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; + if (pdata->is_union) { aead_setup.dalg = pdata->dalg; aead_setup.dmode = pdata->dmode; @@ -1412,6 +1818,22 @@ static void *sec_uadk_aead_async(void *arg) } count++; } + + /* Release memory after all tasks are complete. */ + if (count) { + i = 0; + while (get_recv_time() != g_ctxnum) { + if (i++ >= MAX_TRY_CNT) { + SEC_TST_PRT("failed to wait poll thread finish!\n"); + break; + } + + usleep(SEND_USLEEP); + } + } + /* Wait for the device to complete the tasks. */ + usleep(SEND_USLEEP * MAX_TRY_CNT); + wd_aead_free_sess(h_sess); add_send_complete(); @@ -1423,6 +1845,7 @@ static void *sec_uadk_digest_async(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_digest_sess_setup digest_setup = {0}; + struct sched_params sc_param = {0}; struct wd_digest_req dreq; struct bd_pool *uadk_pool; u8 *priv_iv, *priv_key; @@ -1443,6 +1866,21 @@ static void *sec_uadk_digest_async(void *arg) digest_setup.alg = pdata->alg; digest_setup.mode = pdata->mode; // digest mode is optype + digest_setup.mm_type = pdata->mm_type; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; // sync mode + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + digest_setup.sched_param = (void *)&sc_param; + + digest_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + digest_setup.mm_ops.alloc = (void *)wd_mem_alloc; + digest_setup.mm_ops.free = (void *)wd_mem_free; + digest_setup.mm_ops.iova_map = (void *)wd_mem_map; + digest_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + digest_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; + h_sess = wd_digest_alloc_sess(&digest_setup); if (!h_sess) return NULL; @@ -1482,6 +1920,22 @@ static void *sec_uadk_digest_async(void *arg) } count++; } + + /* Release memory after all tasks are complete. */ + if (count) { + i = 0; + while (get_recv_time() != g_ctxnum) { + if (i++ >= MAX_TRY_CNT) { + SEC_TST_PRT("failed to wait poll thread finish!\n"); + break; + } + + usleep(SEND_USLEEP); + } + } + /* Wait for the device to complete the tasks. */ + usleep(SEND_USLEEP * MAX_TRY_CNT); + wd_digest_free_sess(h_sess); add_send_complete(); @@ -1493,6 +1947,7 @@ static void *sec_uadk_cipher_sync(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_cipher_sess_setup cipher_setup = {0}; + struct sched_params sc_param = {0}; struct wd_cipher_req creq; struct bd_pool *uadk_pool; u8 *priv_iv, *priv_key; @@ -1512,6 +1967,20 @@ static void *sec_uadk_cipher_sync(void *arg) cipher_setup.alg = pdata->alg; cipher_setup.mode = pdata->mode; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; // sync mode + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + cipher_setup.sched_param = (void *)&sc_param; + + cipher_setup.mm_type = pdata->mm_type; + cipher_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + cipher_setup.mm_ops.alloc = (void *)wd_mem_alloc; + cipher_setup.mm_ops.free = (void *)wd_mem_free; + cipher_setup.mm_ops.iova_map = (void *)wd_mem_map; + cipher_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + cipher_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; h_sess = wd_cipher_alloc_sess(&cipher_setup); if (!h_sess) return NULL; @@ -1554,6 +2023,7 @@ static void *sec_uadk_aead_sync(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_aead_sess_setup aead_setup = {0}; + struct sched_params sc_param = {0}; u8 *priv_iv, *priv_key, *priv_hash; u32 auth_size = SEC_PERF_AUTH_SIZE; struct wd_aead_req areq = {0}; @@ -1576,6 +2046,20 @@ static void *sec_uadk_aead_sync(void *arg) aead_setup.calg = pdata->alg; aead_setup.cmode = pdata->mode; + aead_setup.mm_type = pdata->mm_type; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; // sync mode + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + aead_setup.sched_param = (void *)&sc_param; + + aead_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + aead_setup.mm_ops.alloc = (void *)wd_mem_alloc; + aead_setup.mm_ops.free = (void *)wd_mem_free; + aead_setup.mm_ops.iova_map = (void *)wd_mem_map; + aead_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + aead_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; if (pdata->is_union) { aead_setup.dalg = pdata->dalg; aead_setup.dmode = pdata->dmode; @@ -1644,6 +2128,7 @@ static void *sec_uadk_digest_sync(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_digest_sess_setup digest_setup = {0}; + struct sched_params sc_param = {0}; struct wd_digest_req dreq; struct bd_pool *uadk_pool; u8 *priv_iv, *priv_key; @@ -1663,6 +2148,20 @@ static void *sec_uadk_digest_sync(void *arg) digest_setup.alg = pdata->alg; digest_setup.mode = pdata->mode; // digest mode is optype + digest_setup.mm_type = pdata->mm_type; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; // sync mode + if (g_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); + digest_setup.sched_param = (void *)&sc_param; + + digest_setup.mm_ops.usr = g_uadk_pool.rsv_pool; + digest_setup.mm_ops.alloc = (void *)wd_mem_alloc; + digest_setup.mm_ops.free = (void *)wd_mem_free; + digest_setup.mm_ops.iova_map = (void *)wd_mem_map; + digest_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + digest_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; h_sess = wd_digest_alloc_sess(&digest_setup); if (!h_sess) return NULL; @@ -1741,6 +2240,7 @@ int sec_uadk_sync_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].td_id = i; threads_args[i].d_outbytes = threads_option.d_outbytes; + threads_args[i].mm_type = options->mem_type; ret = pthread_create(&tdid[i], NULL, uadk_sec_sync_run, &threads_args[i]); if (ret) { SEC_TST_PRT("Create sync thread fail!\n"); @@ -1813,6 +2313,7 @@ int sec_uadk_async_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].td_id = i; threads_args[i].d_outbytes = threads_option.d_outbytes; + threads_args[i].mm_type = options->mem_type; ret = pthread_create(&tdid[i], NULL, uadk_sec_async_run, &threads_args[i]); if (ret) { SEC_TST_PRT("Create async thread fail!\n"); @@ -1875,7 +2376,11 @@ int sec_uadk_benchmark(struct acc_option *options) if (ret) return ret; - ret = init_uadk_bd_pool(); + if (options->mem_type == UADK_AUTO) // SVA memory + ret = init_uadk_bd_pool(); + else + ret = init_uadk_rsv_pool(options); // In the test scenario, the user uses the uadk interface to apply for memory + if (ret) return ret; @@ -1889,7 +2394,11 @@ int sec_uadk_benchmark(struct acc_option *options) if (ret) return ret; - free_uadk_bd_pool(); + if (options->mem_type == UADK_AUTO) + free_uadk_bd_pool(); + else + free_uadk_rsv_pool(options); + if (options->inittype == INIT2_TYPE) uninit_ctx_config2(options->subtype); else -- 2.33.0
From: lizhi <lizhi206@huawei.com> The hpre v2 interface needs to support the no-sva business model within the new evolution framework. This ensures that users switching to the v2 interface can utilize both the sva mode and the no-sva mode. Signed-off-by: lizhi <lizhi206@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_hpre.c | 650 ++++++++++++++++++++++++++++----------- drv/hisi_qm_udrv.c | 15 +- include/drv/wd_dh_drv.h | 3 + include/drv/wd_ecc_drv.h | 3 + include/drv/wd_rsa_drv.h | 3 + include/wd_dh.h | 2 + include/wd_ecc.h | 2 + include/wd_rsa.h | 2 + wd_dh.c | 22 +- wd_ecc.c | 53 ++-- wd_rsa.c | 56 +++- 11 files changed, 578 insertions(+), 233 deletions(-) diff --git a/drv/hisi_hpre.c b/drv/hisi_hpre.c index 7ce842b..8a6d71c 100644 --- a/drv/hisi_hpre.c +++ b/drv/hisi_hpre.c @@ -34,14 +34,17 @@ #define SM2_PONIT_SIZE 64 #define MAX_HASH_LENS BITS_TO_BYTES(521) #define HW_PLAINTEXT_BYTES_MAX BITS_TO_BYTES(4096) -#define HPRE_CTX_Q_NUM_DEF 1 +#define HPRE_CTX_Q_NUM_DEF 1 +#define MAP_PAIR_NUM_MAX 6 #define CRT_PARAMS_SZ(key_size) ((5 * (key_size)) >> 1) #define CRT_GEN_PARAMS_SZ(key_size) ((7 * (key_size)) >> 1) #define GEN_PARAMS_SZ(key_size) ((key_size) << 1) #define CRT_PARAM_SZ(key_size) ((key_size) >> 1) -#define WD_TRANS_FAIL 0 +#define GEN_PARAMS_SZ_UL(key_size) ((unsigned long)(key_size) << 1) +#define DMA_ADDR(hi, lo) ((__u64)(((__u64)(hi) << 32) | (__u64)(lo))) +#define WD_TRANS_FAIL 0 #define CURVE_PARAM_NUM 6 #define SECP256R1_KEY_SIZE 32 @@ -78,6 +81,21 @@ enum hpre_alg_name { WD_ECC }; +enum hpre_hw_msg_field { + HW_MSG_IN, + HW_MSG_OUT, + HW_MSG_KEY, +}; + +struct map_info_cache { + struct map_pair { + void *addr; + uintptr_t pa; + size_t size; + } pairs[MAP_PAIR_NUM_MAX]; + size_t cnt; +}; + /* put vendor hardware message as a user interface is not suitable here */ struct hisi_hpre_sqe { __u32 alg : 5; @@ -113,12 +131,122 @@ struct hisi_hpre_sqe { struct hisi_hpre_ctx { struct wd_ctx_config_internal config; + struct wd_mm_ops *mm_ops; + handle_t rsv_mem_ctx; }; struct hpre_ecc_ctx { __u32 enable_hpcore; }; +static void add_map_info(struct map_info_cache *cache, void *addr, uintptr_t dma, size_t size) +{ + /* The cnt is guaranteed not to exceed MAP_PAIR_NUM_MAX within hpre. */ + cache->pairs[cache->cnt].addr = addr; + cache->pairs[cache->cnt].pa = dma; + cache->pairs[cache->cnt].size = size; + cache->cnt++; +} + +static void unmap_addr_in_cache(struct wd_mm_ops *mm_ops, struct map_info_cache *cache) +{ + size_t i; + + if (mm_ops->sva_mode) + return; + + /* The cnt is guaranteed not to exceed MAP_PAIR_NUM_MAX within hpre. */ + for (i = 0; i < cache->cnt; i++) + mm_ops->iova_unmap(mm_ops->usr, cache->pairs[i].addr, + cache->pairs[i].pa, cache->pairs[i].size); +} + +static void unsetup_hw_msg_addr(struct wd_mm_ops *mm_ops, enum hpre_hw_msg_field t_type, + struct hisi_hpre_sqe *hw_msg, void *va, size_t data_sz) +{ + void *addr; + + if (!mm_ops || mm_ops->sva_mode || !va || !data_sz) + return; + + switch (t_type) { + case HW_MSG_KEY: + addr = VA_ADDR(hw_msg->hi_key, hw_msg->low_key); + break; + case HW_MSG_IN: + addr = VA_ADDR(hw_msg->hi_in, hw_msg->low_in); + break; + case HW_MSG_OUT: + addr = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); + break; + default: + return; + } + + if (!addr) + return; + + mm_ops->iova_unmap(mm_ops->usr, va, (void *)addr, data_sz); +} + +static uintptr_t select_addr_by_sva_mode(struct wd_mm_ops *mm_ops, void *data, + size_t data_sz, struct map_info_cache *cache) +{ + uintptr_t addr; + + if (!mm_ops->sva_mode) { + addr = (uintptr_t)mm_ops->iova_map(mm_ops->usr, data, data_sz); + if (!addr) { + WD_ERR("Failed to get mappped DMA address for hardware.\n"); + return 0; + } + add_map_info(cache, data, addr, data_sz); + } else { + addr = (uintptr_t)data; + } + + return addr; +} + +static void fill_hw_msg_addr(enum hpre_hw_msg_field t_type, struct hisi_hpre_sqe *hw_msg, + uintptr_t addr) +{ + switch (t_type) { + case HW_MSG_KEY: + hw_msg->low_key = LW_U32(addr); + hw_msg->hi_key = HI_U32(addr); + break; + case HW_MSG_IN: + hw_msg->low_in = LW_U32(addr); + hw_msg->hi_in = HI_U32(addr); + break; + case HW_MSG_OUT: + hw_msg->low_out = LW_U32(addr); + hw_msg->hi_out = HI_U32(addr); + break; + default: + return; + } +} + +static int check_hpre_mem_params(struct wd_mm_ops *mm_ops, enum wd_mem_type mm_type) +{ + if (!mm_ops) { + WD_ERR("Memory operation functions are null.\n"); + return -WD_EINVAL; + } + + if (mm_type == UADK_MEM_AUTO && !mm_ops->sva_mode) { + WD_ERR("No-SVA in auto mode is not supported yet.\n"); + return -WD_EINVAL; + } else if (mm_type > UADK_MEM_PROXY) { + WD_ERR("failed to check memory type.\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + static void dump_hpre_msg(void *msg, int alg) { struct wd_rsa_msg *rsa_msg; @@ -269,7 +397,8 @@ static int fill_rsa_crt_prikey2(struct wd_rsa_prikey *prikey, *data = wd_dq->data; - return WD_SUCCESS; + return (int)(wd_dq->bsize + wd_qinv->bsize + wd_p->bsize + + wd_q->bsize + wd_dp->bsize); } static int fill_rsa_prikey1(struct wd_rsa_prikey *prikey, void **data) @@ -292,7 +421,7 @@ static int fill_rsa_prikey1(struct wd_rsa_prikey *prikey, void **data) *data = wd_d->data; - return WD_SUCCESS; + return (int)(wd_n->bsize + wd_d->bsize); } static int fill_rsa_pubkey(struct wd_rsa_pubkey *pubkey, void **data) @@ -315,7 +444,7 @@ static int fill_rsa_pubkey(struct wd_rsa_pubkey *pubkey, void **data) *data = wd_e->data; - return WD_SUCCESS; + return (int)(wd_n->bsize + wd_e->bsize); } static int fill_rsa_genkey_in(struct wd_rsa_kg_in *genkey) @@ -378,22 +507,21 @@ static int rsa_out_transfer(struct wd_rsa_msg *msg, { struct wd_rsa_req *req = &msg->req; struct wd_rsa_kg_out *key = req->dst; + struct wd_rsa_msg *target_msg; __u16 kbytes = msg->key_bytes; struct wd_dtb qinv, dq, dp; struct wd_dtb d, n; - void *data; int ret; - if (hw_msg->alg == HPRE_ALG_KG_CRT || hw_msg->alg == HPRE_ALG_KG_STD) { - /* async */ - if (qp_mode == CTX_MODE_ASYNC) { - data = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); - key = container_of(data, struct wd_rsa_kg_out, data); - } else { - key = req->dst; - } + target_msg = (struct wd_rsa_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); + if (!target_msg) { + WD_ERR("failed to get correct rsa send msg from hardware!\n"); + return -WD_ADDR_ERR; } + if (hw_msg->alg == HPRE_ALG_KG_CRT || hw_msg->alg == HPRE_ALG_KG_STD) + key = target_msg->req.dst; + msg->result = WD_SUCCESS; if (hw_msg->alg == HPRE_ALG_KG_CRT) { req->dst_bytes = CRT_GEN_PARAMS_SZ(kbytes); @@ -424,37 +552,38 @@ static int rsa_out_transfer(struct wd_rsa_msg *msg, return WD_SUCCESS; } -static int rsa_prepare_key(struct wd_rsa_msg *msg, - struct hisi_hpre_sqe *hw_msg) +static int rsa_prepare_key(struct wd_rsa_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { struct wd_rsa_req *req = &msg->req; + uintptr_t addr; + int ret, len; void *data; - int ret; if (req->op_type == WD_RSA_SIGN) { if (hw_msg->alg == HPRE_ALG_NC_CRT) { - ret = fill_rsa_crt_prikey2((void *)msg->key, &data); - if (ret) - return ret; + len = fill_rsa_crt_prikey2((void *)msg->key, &data); + if (len <= 0) + return -WD_EINVAL; } else { - ret = fill_rsa_prikey1((void *)msg->key, &data); - if (ret) - return ret; + len = fill_rsa_prikey1((void *)msg->key, &data); + if (len <= 0) + return -WD_EINVAL; hw_msg->alg = HPRE_ALG_NC_NCRT; } } else if (req->op_type == WD_RSA_VERIFY) { - ret = fill_rsa_pubkey((void *)msg->key, &data); - if (ret) - return ret; + len = fill_rsa_pubkey((void *)msg->key, &data); + if (len <= 0) + return -WD_EINVAL; hw_msg->alg = HPRE_ALG_NC_NCRT; } else if (req->op_type == WD_RSA_GENKEY) { ret = fill_rsa_genkey_in((void *)msg->key); if (ret) return ret; - ret = wd_rsa_kg_in_data((void *)msg->key, (char **)&data); - if (ret < 0) { + len = wd_rsa_kg_in_data((void *)msg->key, (char **)&data); + if (len < 0) { WD_ERR("failed to get rsa gen key data!\n"); - return ret; + return -WD_EINVAL; } if (hw_msg->alg == HPRE_ALG_NC_CRT) hw_msg->alg = HPRE_ALG_KG_CRT; @@ -465,36 +594,53 @@ static int rsa_prepare_key(struct wd_rsa_msg *msg, return -WD_EINVAL; } - hw_msg->low_key = LW_U32((uintptr_t)data); - hw_msg->hi_key = HI_U32((uintptr_t)data); + addr = select_addr_by_sva_mode(msg->mm_ops, data, len, cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_KEY, hw_msg, addr); - return WD_SUCCESS; + return ret; } -static int rsa_prepare_iot(struct wd_rsa_msg *msg, - struct hisi_hpre_sqe *hw_msg) +static int rsa_prepare_iot(struct wd_rsa_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { struct wd_rsa_req *req = &msg->req; struct wd_rsa_kg_out *kout = req->dst; int ret = WD_SUCCESS; - void *out = NULL; + uintptr_t phy, out; if (req->op_type != WD_RSA_GENKEY) { - hw_msg->low_in = LW_U32((uintptr_t)req->src); - hw_msg->hi_in = HI_U32((uintptr_t)req->src); - out = req->dst; + phy = select_addr_by_sva_mode(msg->mm_ops, req->src, req->src_bytes, cache); + if (!phy) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_IN, hw_msg, phy); + phy = select_addr_by_sva_mode(msg->mm_ops, req->dst, req->dst_bytes, cache); + if (!phy) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_OUT, hw_msg, phy); } else { hw_msg->low_in = 0; hw_msg->hi_in = 0; ret = wd_rsa_kg_out_data(kout, (char **)&out); if (ret < 0) return ret; - } - hw_msg->low_out = LW_U32((uintptr_t)out); - hw_msg->hi_out = HI_U32((uintptr_t)out); + if (!msg->mm_ops->sva_mode) { + phy = (uintptr_t)msg->mm_ops->iova_map(msg->mm_ops->usr, kout, ret); + if (!phy) { + WD_ERR("Failed to get DMA address for rsa output!\n"); + return -WD_ENOMEM; + } + add_map_info(cache, kout, phy, ret); + out = phy + out - (uintptr_t)kout; + } + + hw_msg->low_out = LW_U32(out); + hw_msg->hi_out = HI_U32(out); + } - return ret; + return WD_SUCCESS; } static int hpre_init_qm_priv(struct wd_ctx_config_internal *config, @@ -615,10 +761,17 @@ static int rsa_send(struct wd_alg_driver *drv, handle_t ctx, void *rsa_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_rsa_msg *msg = rsa_msg; + struct map_info_cache cache = {0}; struct hisi_hpre_sqe hw_msg; __u16 send_cnt = 0; int ret; + ret = check_hpre_mem_params(msg->mm_ops, msg->mm_type); + if (ret) { + WD_ERR("rsa memory parmas is err, and ret is %d!\n", ret); + return ret; + } + memset(&hw_msg, 0, sizeof(struct hisi_hpre_sqe)); if (msg->key_type == WD_RSA_PRIKEY1 || @@ -631,21 +784,30 @@ static int rsa_send(struct wd_alg_driver *drv, handle_t ctx, void *rsa_msg) hw_msg.task_len1 = msg->key_bytes / BYTE_BITS - 0x1; - ret = rsa_prepare_key(msg, &hw_msg); - if (ret < 0) - return ret; + ret = rsa_prepare_key(msg, &hw_msg, &cache); + if (ret) + goto rsa_fail; /* prepare in/out put */ - ret = rsa_prepare_iot(msg, &hw_msg); - if (ret < 0) - return ret; + ret = rsa_prepare_iot(msg, &hw_msg, &cache); + if (ret) + goto rsa_fail; hisi_set_msg_id(h_qp, &msg->tag); hw_msg.done = 0x1; hw_msg.etype = 0x0; - hw_msg.low_tag = msg->tag; + hw_msg.low_tag = LW_U32((uintptr_t)msg); + hw_msg.hi_tag = HI_U32((uintptr_t)msg); - return hisi_qm_send(h_qp, &hw_msg, 1, &send_cnt); + ret = hisi_qm_send(h_qp, &hw_msg, 1, &send_cnt); + if (unlikely(ret)) + goto rsa_fail; + + return ret; + +rsa_fail: + unmap_addr_in_cache(msg->mm_ops, &cache); + return ret; } static void hpre_result_check(struct hisi_hpre_sqe *hw_msg, @@ -671,32 +833,29 @@ static int rsa_recv(struct wd_alg_driver *drv, handle_t ctx, void *rsa_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct hisi_qp *qp = (struct hisi_qp *)h_qp; - struct hisi_hpre_sqe hw_msg = {0}; + struct wd_rsa_msg *target_msg; struct wd_rsa_msg *msg = rsa_msg; - struct wd_rsa_msg *temp_msg; + struct hisi_hpre_sqe hw_msg = {0}; + size_t ilen = 0, olen = 0; __u16 recv_cnt = 0; + __u16 kbytes; int ret; ret = hisi_qm_recv(h_qp, &hw_msg, 1, &recv_cnt); if (ret < 0) return ret; - ret = hisi_check_bd_id(h_qp, msg->tag, hw_msg.low_tag); + target_msg = (struct wd_rsa_msg *)VA_ADDR(hw_msg.hi_tag, hw_msg.low_tag); + if (!target_msg) { + WD_ERR("failed to get correct send msg from hardware!\n"); + return -WD_ADDR_ERR; + } + + ret = hisi_check_bd_id(h_qp, msg->tag, target_msg->tag); if (ret) return ret; - msg->tag = LW_U16(hw_msg.low_tag); - if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { - temp_msg = wd_rsa_get_msg(qp->q_info.idx, msg->tag); - if (!temp_msg) { - WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", - qp->q_info.idx, msg->tag); - return -WD_EINVAL; - } - } else { - temp_msg = msg; - } - + msg->tag = target_msg->tag; hpre_result_check(&hw_msg, &msg->result); if (!msg->result) { ret = rsa_out_transfer(msg, &hw_msg, qp->q_info.qp_mode); @@ -707,15 +866,36 @@ static int rsa_recv(struct wd_alg_driver *drv, handle_t ctx, void *rsa_msg) } if (unlikely(msg->result != WD_SUCCESS)) - dump_hpre_msg(temp_msg, WD_RSA); + dump_hpre_msg(target_msg, WD_RSA); + + if (!target_msg->mm_ops->sva_mode) { + kbytes = target_msg->key_bytes; + if (hw_msg.alg == HPRE_ALG_KG_CRT) { + olen = CRT_GEN_PARAMS_SZ(kbytes); + ilen = GEN_PARAMS_SZ_UL(kbytes); + } else if (hw_msg.alg == HPRE_ALG_KG_STD) { + olen = GEN_PARAMS_SZ_UL(kbytes); + ilen = GEN_PARAMS_SZ_UL(kbytes); + } else { + olen = kbytes; + ilen = kbytes; + } + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_IN, &hw_msg, + target_msg->req.src, ilen); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_OUT, &hw_msg, + target_msg->req.dst, olen); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_KEY, &hw_msg, + target_msg->key, target_msg->key_bytes); + } return WD_SUCCESS; } -static int fill_dh_xp_params(struct wd_dh_msg *msg, - struct hisi_hpre_sqe *hw_msg) +static int fill_dh_xp_params(struct wd_dh_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { struct wd_dh_req *req = &msg->req; + uintptr_t addr; void *x, *p; int ret; @@ -735,26 +915,30 @@ static int fill_dh_xp_params(struct wd_dh_msg *msg, return ret; } - hw_msg->low_key = LW_U32((uintptr_t)x); - hw_msg->hi_key = HI_U32((uintptr_t)x); + addr = select_addr_by_sva_mode(msg->mm_ops, x, GEN_PARAMS_SZ_UL(msg->key_bytes), cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_KEY, hw_msg, addr); - return WD_SUCCESS; + return ret; } -static int dh_out_transfer(struct wd_dh_msg *msg, - struct hisi_hpre_sqe *hw_msg, __u8 qp_mode) +static int dh_out_transfer(struct wd_dh_msg *msg, struct hisi_hpre_sqe *hw_msg, + __u8 qp_mode) { __u16 key_bytes = (hw_msg->task_len1 + 1) * BYTE_BITS; struct wd_dh_req *req = &msg->req; + struct wd_dh_msg *target_msg; void *out; int ret; - /* async */ - if (qp_mode == CTX_MODE_ASYNC) - out = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); - else - out = req->pri; + target_msg = (struct wd_dh_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); + if (!target_msg) { + WD_ERR("failed to get correct send msg from hardware!\n"); + return -WD_ADDR_ERR; + } + out = target_msg->req.pri; ret = hpre_bin_to_crypto_bin((char *)out, (const char *)out, key_bytes, "dh out"); if (!ret) @@ -768,12 +952,20 @@ static int dh_out_transfer(struct wd_dh_msg *msg, static int dh_send(struct wd_alg_driver *drv, handle_t ctx, void *dh_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct map_info_cache cache = {0}; struct wd_dh_msg *msg = dh_msg; struct wd_dh_req *req = &msg->req; struct hisi_hpre_sqe hw_msg; __u16 send_cnt = 0; + uintptr_t addr; int ret; + ret = check_hpre_mem_params(msg->mm_ops, msg->mm_type); + if (ret) { + WD_ERR("dh memory parmas is err, and ret is %d!\n", ret); + return ret; + } + memset(&hw_msg, 0, sizeof(struct hisi_hpre_sqe)); if (msg->is_g2 && req->op_type != WD_DH_PHASE2) @@ -791,32 +983,50 @@ static int dh_send(struct wd_alg_driver *drv, handle_t ctx, void *dh_msg) WD_ERR("failed to transfer dh g para format to hpre bin!\n"); return ret; } - - hw_msg.low_in = LW_U32((uintptr_t)msg->g); - hw_msg.hi_in = HI_U32((uintptr_t)msg->g); + addr = select_addr_by_sva_mode(msg->mm_ops, msg->g, msg->key_bytes, &cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_IN, &hw_msg, addr); } - ret = fill_dh_xp_params(msg, &hw_msg); - if (ret) - return ret; + ret = fill_dh_xp_params(msg, &hw_msg, &cache); + if (ret) { + WD_ERR("failed to fill dh x or p para!\n"); + goto dh_fail; + } hisi_set_msg_id(h_qp, &msg->tag); - hw_msg.low_out = LW_U32((uintptr_t)req->pri); - hw_msg.hi_out = HI_U32((uintptr_t)req->pri); hw_msg.done = 0x1; hw_msg.etype = 0x0; - hw_msg.low_tag = msg->tag; - return hisi_qm_send(h_qp, &hw_msg, 1, &send_cnt); + hw_msg.low_tag = LW_U32((uintptr_t)msg); + hw_msg.hi_tag = HI_U32((uintptr_t)msg); + + addr = select_addr_by_sva_mode(msg->mm_ops, req->pri, req->pri_bytes, &cache); + if (!addr) { + ret = -WD_ENOMEM; + goto dh_fail; + } + fill_hw_msg_addr(HW_MSG_OUT, &hw_msg, addr); + + ret = hisi_qm_send(h_qp, &hw_msg, 1, &send_cnt); + if (unlikely(ret)) + goto dh_fail; + + return ret; + +dh_fail: + unmap_addr_in_cache(msg->mm_ops, &cache); + return ret; } static int dh_recv(struct wd_alg_driver *drv, handle_t ctx, void *dh_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct hisi_qp *qp = (struct hisi_qp *)h_qp; - struct wd_dh_msg *msg = dh_msg; struct hisi_hpre_sqe hw_msg = {0}; - struct wd_dh_msg *temp_msg; + struct wd_dh_msg *msg = dh_msg; + struct wd_dh_msg *target_msg; __u16 recv_cnt = 0; int ret; @@ -824,22 +1034,17 @@ static int dh_recv(struct wd_alg_driver *drv, handle_t ctx, void *dh_msg) if (ret < 0) return ret; - ret = hisi_check_bd_id(h_qp, msg->tag, hw_msg.low_tag); + target_msg = (struct wd_dh_msg *)VA_ADDR(hw_msg.hi_tag, hw_msg.low_tag); + if (!target_msg) { + WD_ERR("failed to get correct send msg from hardware!\n"); + return -WD_ADDR_ERR; + } + + ret = hisi_check_bd_id(h_qp, msg->tag, target_msg->tag); if (ret) return ret; - msg->tag = LW_U16(hw_msg.low_tag); - if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { - temp_msg = wd_dh_get_msg(qp->q_info.idx, msg->tag); - if (!temp_msg) { - WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", - qp->q_info.idx, msg->tag); - return -WD_EINVAL; - } - } else { - temp_msg = msg; - } - + msg->tag = target_msg->tag; hpre_result_check(&hw_msg, &msg->result); if (!msg->result) { ret = dh_out_transfer(msg, &hw_msg, qp->q_info.qp_mode); @@ -850,7 +1055,16 @@ static int dh_recv(struct wd_alg_driver *drv, handle_t ctx, void *dh_msg) } if (unlikely(msg->result != WD_SUCCESS)) - dump_hpre_msg(temp_msg, WD_DH); + dump_hpre_msg(target_msg, WD_DH); + + if (!target_msg->mm_ops->sva_mode) { + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_OUT, &hw_msg, + target_msg->req.pri, target_msg->req.pri_bytes); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_KEY, &hw_msg, + target_msg->req.x_p, GEN_PARAMS_SZ_UL(target_msg->key_bytes)); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_IN, &hw_msg, + target_msg->g, target_msg->key_bytes); + } return WD_SUCCESS; } @@ -1081,30 +1295,49 @@ static bool is_prikey_used(__u8 op_type) op_type == HPRE_SM2_DEC; } -static int ecc_prepare_key(struct wd_ecc_msg *msg, - struct hisi_hpre_sqe *hw_msg) +static __u32 ecc_get_prikey_size(struct wd_ecc_msg *msg) +{ + if (msg->req.op_type == WD_SM2_SIGN || + msg->req.op_type == WD_ECDSA_SIGN || + msg->req.op_type == WD_SM2_DECRYPT) + return ECC_PRIKEY_SZ(msg->key_bytes); + else if (msg->curve_id == WD_X25519 || + msg->curve_id == WD_X448) + return X_DH_HW_KEY_SZ(msg->key_bytes); + else + return ECDH_HW_KEY_SZ(msg->key_bytes); +} + +static int ecc_prepare_key(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { void *data = NULL; + uintptr_t addr; + size_t ksz; int ret; if (is_prikey_used(msg->req.op_type)) { + ksz = ecc_get_prikey_size(msg); ret = ecc_prepare_prikey((void *)msg->key, &data, msg->curve_id); if (ret) return ret; } else { + ksz = ECC_PUBKEY_SZ(msg->key_bytes); ret = ecc_prepare_pubkey((void *)msg->key, &data); if (ret) return ret; } - hw_msg->low_key = LW_U32((uintptr_t)data); - hw_msg->hi_key = HI_U32((uintptr_t)data); + addr = select_addr_by_sva_mode(msg->mm_ops, data, ksz, cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_KEY, hw_msg, addr); - return WD_SUCCESS; + return ret; } -static void ecc_get_io_len(__u32 atype, __u32 hsz, size_t *ilen, - size_t *olen) +static void ecc_get_io_len(__u32 atype, __u32 hsz, + size_t *ilen, size_t *olen) { if (atype == HPRE_ALG_ECDH_MULTIPLY) { *olen = ECDH_OUT_PARAMS_SZ(hsz); @@ -1467,12 +1700,12 @@ static int ecc_prepare_out(struct wd_ecc_msg *msg, void **data) } /* prepare in/out hw message */ -static int ecc_prepare_iot(struct wd_ecc_msg *msg, - struct hisi_hpre_sqe *hw_msg) +static int ecc_prepare_iot(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { + size_t i_sz, o_sz; void *data = NULL; - size_t i_sz = 0; - size_t o_sz = 0; + uintptr_t addr; __u16 kbytes; int ret; @@ -1483,8 +1716,11 @@ static int ecc_prepare_iot(struct wd_ecc_msg *msg, WD_ERR("failed to prepare ecc in!\n"); return ret; } - hw_msg->low_in = LW_U32((uintptr_t)data); - hw_msg->hi_in = HI_U32((uintptr_t)data); + + addr = select_addr_by_sva_mode(msg->mm_ops, data, i_sz, cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_IN, hw_msg, addr); ret = ecc_prepare_out(msg, &data); if (ret) { @@ -1496,8 +1732,10 @@ static int ecc_prepare_iot(struct wd_ecc_msg *msg, if (!data) return WD_SUCCESS; - hw_msg->low_out = LW_U32((uintptr_t)data); - hw_msg->hi_out = HI_U32((uintptr_t)data); + addr = select_addr_by_sva_mode(msg->mm_ops, data, o_sz, cache); + if (!addr) + return -WD_ENOMEM; + fill_hw_msg_addr(HW_MSG_OUT, hw_msg, addr); return WD_SUCCESS; } @@ -1613,7 +1851,7 @@ static struct wd_ecc_out *create_ecdh_out(struct wd_ecc_msg *msg) return NULL; } - out = malloc(len); + out = msg->mm_ops->alloc(msg->mm_ops->usr, len); if (!out) { WD_ERR("failed to alloc out memory, sz = %u!\n", len); return NULL; @@ -1677,7 +1915,7 @@ static struct wd_ecc_msg *create_req(struct wd_ecc_msg *src, __u8 req_idx) prikey = (struct wd_ecc_prikey *)(ecc_key + 1); ecc_key->prikey = prikey; - prikey->data = malloc(ECC_PRIKEY_SZ(src->key_bytes)); + prikey->data = src->mm_ops->alloc(src->mm_ops->usr, ECC_PRIKEY_SZ(src->key_bytes)); if (unlikely(!prikey->data)) { WD_ERR("failed to alloc prikey data!\n"); goto fail_alloc_key_data; @@ -1696,7 +1934,7 @@ static struct wd_ecc_msg *create_req(struct wd_ecc_msg *src, __u8 req_idx) return dst; fail_set_prikey: - free(prikey->data); + src->mm_ops->free(src->mm_ops->usr, prikey->data); fail_alloc_key_data: free(ecc_key); fail_alloc_key: @@ -1709,9 +1947,9 @@ static void free_req(struct wd_ecc_msg *msg) { struct wd_ecc_key *key = (void *)msg->key; - free(key->prikey->data); + msg->mm_ops->free(msg->mm_ops->usr, key->prikey->data); free(key); - free(msg->req.dst); + msg->mm_ops->free(msg->mm_ops->usr, msg->req.dst); free(msg); } @@ -1732,7 +1970,8 @@ static int split_req(struct wd_ecc_msg *src, struct wd_ecc_msg **dst) return WD_SUCCESS; } -static int ecc_fill(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) +static int ecc_fill(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg, + struct map_info_cache *cache) { __u32 hw_sz = get_hw_keysz(msg->key_bytes); __u8 op_type = msg->req.op_type; @@ -1757,39 +1996,42 @@ static int ecc_fill(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) return ret; /* prepare key */ - ret = ecc_prepare_key(msg, hw_msg); + ret = ecc_prepare_key(msg, hw_msg, cache); if (ret) return ret; /* prepare in/out put */ - ret = ecc_prepare_iot(msg, hw_msg); + ret = ecc_prepare_iot(msg, hw_msg, cache); if (ret) return ret; hw_msg->done = 0x1; hw_msg->etype = 0x0; - hw_msg->low_tag = msg->tag; + + hw_msg->low_tag = LW_U32((uintptr_t)msg); + hw_msg->hi_tag = HI_U32((uintptr_t)msg); hw_msg->task_len1 = hw_sz / BYTE_BITS - 0x1; return ret; } -static int ecc_general_send(handle_t ctx, struct wd_ecc_msg *msg) +static int ecc_general_send(handle_t ctx, struct wd_ecc_msg *msg, + struct map_info_cache *cache) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct hisi_hpre_sqe hw_msg; __u16 send_cnt = 0; int ret; - ret = ecc_fill(msg, &hw_msg); + ret = ecc_fill(msg, &hw_msg, cache); if (ret) return ret; return hisi_qm_send(h_qp, &hw_msg, 1, &send_cnt); } - -static int sm2_enc_send(handle_t ctx, struct wd_ecc_msg *msg) +static int sm2_enc_send(handle_t ctx, struct wd_ecc_msg *msg, + struct map_info_cache *cache) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_sm2_enc_in *ein = msg->req.src; @@ -1801,7 +2043,7 @@ static int sm2_enc_send(handle_t ctx, struct wd_ecc_msg *msg) if (ein->plaintext.dsize <= HW_PLAINTEXT_BYTES_MAX && hash->type == WD_HASH_SM3) - return ecc_general_send(ctx, msg); + return ecc_general_send(ctx, msg, cache); if (unlikely(!ein->k_set)) { WD_ERR("invalid: k not set!\n"); @@ -1824,13 +2066,13 @@ static int sm2_enc_send(handle_t ctx, struct wd_ecc_msg *msg) return ret; } - ret = ecc_fill(msg_dst[0], &hw_msg[0]); + ret = ecc_fill(msg_dst[0], &hw_msg[0], cache); if (unlikely(ret)) { WD_ERR("failed to fill 1th sqe, ret = %d!\n", ret); goto fail_fill_sqe; } - ret = ecc_fill(msg_dst[1], &hw_msg[1]); + ret = ecc_fill(msg_dst[1], &hw_msg[1], cache); if (unlikely(ret)) { WD_ERR("failed to fill 2th sqe, ret = %d!\n", ret); goto fail_fill_sqe; @@ -1855,7 +2097,8 @@ fail_fill_sqe: return ret; } -static int sm2_dec_send(handle_t ctx, struct wd_ecc_msg *msg) +static int sm2_dec_send(handle_t ctx, struct wd_ecc_msg *msg, + struct map_info_cache *cache) { struct wd_sm2_dec_in *din = (void *)msg->req.src; struct wd_hash_mt *hash = &msg->hash; @@ -1865,7 +2108,7 @@ static int sm2_dec_send(handle_t ctx, struct wd_ecc_msg *msg) /* c2 data lens <= 4096 bit */ if (din->c2.dsize <= BITS_TO_BYTES(4096) && hash->type == WD_HASH_SM3) - return ecc_general_send(ctx, msg); + return ecc_general_send(ctx, msg, cache); if (unlikely(!hash->cb || hash->type >= WD_HASH_MAX)) { WD_ERR("invalid: input hash type %u is error!\n", hash->type); @@ -1891,14 +2134,14 @@ static int sm2_dec_send(handle_t ctx, struct wd_ecc_msg *msg) goto free_dst; } - ret = ecc_general_send(ctx, dst); + ret = ecc_general_send(ctx, dst, cache); if (unlikely(ret)) goto free_req_dst; return ret; free_req_dst: - free(dst->req.dst); + msg->mm_ops->free(msg->mm_ops->usr, dst->req.dst); free_dst: free(dst); return ret; @@ -1908,14 +2151,37 @@ static int ecc_send(struct wd_alg_driver *drv, handle_t ctx, void *ecc_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_ecc_msg *msg = ecc_msg; + struct map_info_cache cache = {0}; + int ret; + + ret = check_hpre_mem_params(msg->mm_ops, msg->mm_type); + if (ret) { + WD_ERR("ecc memory parmas is err, and ret is %d!\n", ret); + return ret; + } hisi_set_msg_id(h_qp, &msg->tag); - if (msg->req.op_type == WD_SM2_ENCRYPT) - return sm2_enc_send(ctx, msg); - else if (msg->req.op_type == WD_SM2_DECRYPT) - return sm2_dec_send(ctx, msg); + if (msg->req.op_type == WD_SM2_ENCRYPT) { + ret = sm2_enc_send(ctx, msg, &cache); + if (ret) + goto ecc_fail; + return ret; + } else if (msg->req.op_type == WD_SM2_DECRYPT) { + ret = sm2_dec_send(ctx, msg, &cache); + if (ret) + goto ecc_fail; + return ret; + } - return ecc_general_send(ctx, msg); + ret = ecc_general_send(ctx, msg, &cache); + if (ret) + goto ecc_fail; + + return ret; + +ecc_fail: + unmap_addr_in_cache(msg->mm_ops, &cache); + return ret; } static int ecdh_out_transfer(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) @@ -2020,14 +2286,14 @@ static int sm2_enc_out_transfer(struct wd_ecc_msg *msg, static int ecc_out_transfer(struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg, __u8 qp_mode) { + struct wd_ecc_msg *target_msg; int ret = -WD_EINVAL; - void *va; + + target_msg = (struct wd_ecc_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); /* async */ - if (qp_mode == CTX_MODE_ASYNC) { - va = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); - msg->req.dst = container_of(va, struct wd_ecc_out, data); - } + if (qp_mode == CTX_MODE_ASYNC) + msg->req.dst = target_msg->req.dst; if (hw_msg->alg == HPRE_ALG_SM2_SIGN || hw_msg->alg == HPRE_ALG_ECDSA_SIGN) @@ -2319,19 +2585,16 @@ static int sm2_convert_dec_out(struct wd_ecc_msg *src, static int ecc_sqe_parse(struct hisi_qp *qp, struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) { - struct wd_ecc_msg *temp_msg; + struct wd_ecc_msg *target_msg; + size_t ilen = 0; + size_t olen = 0; + __u16 kbytes; int ret; - msg->tag = LW_U16(hw_msg->low_tag); - if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { - temp_msg = wd_ecc_get_msg(qp->q_info.idx, msg->tag); - if (!temp_msg) { - WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", - qp->q_info.idx, msg->tag); - return -WD_EINVAL; - } - } else { - temp_msg = msg; + target_msg = (struct wd_ecc_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); + if (!target_msg) { + WD_ERR("failed to get correct ecc send msg from hardware!\n"); + return -WD_ADDR_ERR; } hpre_result_check(hw_msg, &msg->result); @@ -2347,10 +2610,20 @@ static int ecc_sqe_parse(struct hisi_qp *qp, struct wd_ecc_msg *msg, goto dump_err_msg; } - return ret; + return WD_SUCCESS; dump_err_msg: - dump_hpre_msg(temp_msg, WD_ECC); + kbytes = target_msg->key_bytes; + if (!target_msg->mm_ops->sva_mode) { + ecc_get_io_len(hw_msg->alg, kbytes, &ilen, &olen); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_OUT, hw_msg, + target_msg->req.dst, olen); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_KEY, hw_msg, + target_msg->key, kbytes); + unsetup_hw_msg_addr(target_msg->mm_ops, HW_MSG_IN, hw_msg, + target_msg->req.src, ilen); + } + dump_hpre_msg(target_msg, WD_ECC); return ret; } @@ -2363,8 +2636,6 @@ static int parse_second_sqe(handle_t h_qp, struct wd_ecc_msg *dst; __u16 recv_cnt = 0; int cnt = 0; - void *data; - __u32 hsz; int ret; while (1) { @@ -2380,10 +2651,12 @@ static int parse_second_sqe(handle_t h_qp, break; } - data = VA_ADDR(hw_msg.hi_out, hw_msg.low_out); - hsz = (hw_msg.task_len1 + 1) * BYTE_BITS; - dst = *(struct wd_ecc_msg **)((uintptr_t)data + - hsz * ECDH_OUT_PARAM_NUM); + dst = (struct wd_ecc_msg *)VA_ADDR(hw_msg.hi_tag, hw_msg.low_tag); + if (!dst) { + WD_ERR("failed to get correct sm2 enc second send msg from hardware!\n"); + return -WD_ADDR_ERR; + } + ret = ecc_sqe_parse((struct hisi_qp *)h_qp, dst, &hw_msg); msg->result = dst->result; *second = dst; @@ -2394,19 +2667,17 @@ static int parse_second_sqe(handle_t h_qp, static int sm2_enc_parse(handle_t h_qp, struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) { - __u16 tag = LW_U16(hw_msg->low_tag); struct wd_ecc_msg *second = NULL; struct wd_ecc_msg *first; struct wd_ecc_msg src; - void *data; - __u32 hsz; int ret; - msg->tag = tag; - data = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); - hsz = (hw_msg->task_len1 + 1) * BYTE_BITS; - first = *(struct wd_ecc_msg **)((uintptr_t)data + - hsz * ECDH_OUT_PARAM_NUM); + first = (struct wd_ecc_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); + if (!first) { + WD_ERR("failed to get correct sm2 enc msg from hardware!\n"); + return -WD_ADDR_ERR; + } + memcpy(&src, first + 1, sizeof(src)); /* parse first sqe */ @@ -2440,17 +2711,15 @@ free_first: static int sm2_dec_parse(handle_t ctx, struct wd_ecc_msg *msg, struct hisi_hpre_sqe *hw_msg) { - __u16 tag = LW_U16(hw_msg->low_tag); struct wd_ecc_msg *dst; struct wd_ecc_msg src; - void *data; - __u32 hsz; int ret; - data = VA_ADDR(hw_msg->hi_out, hw_msg->low_out); - hsz = (hw_msg->task_len1 + 1) * BYTE_BITS; - dst = *(struct wd_ecc_msg **)((uintptr_t)data + - hsz * ECDH_OUT_PARAM_NUM); + dst = (struct wd_ecc_msg *)VA_ADDR(hw_msg->hi_tag, hw_msg->low_tag); + if (!dst) { + WD_ERR("failed to get correct sm2 dec msg from hardware!\n"); + return -WD_ADDR_ERR; + } memcpy(&src, dst + 1, sizeof(src)); /* parse first sqe */ @@ -2460,7 +2729,6 @@ static int sm2_dec_parse(handle_t ctx, struct wd_ecc_msg *msg, goto fail; } msg->result = dst->result; - msg->tag = tag; ret = sm2_convert_dec_out(&src, dst); if (unlikely(ret)) { @@ -2469,7 +2737,7 @@ static int sm2_dec_parse(handle_t ctx, struct wd_ecc_msg *msg, } fail: - free(dst->req.dst); + dst->mm_ops->free(dst->mm_ops->usr, dst->req.dst); free(dst); return ret; @@ -2479,6 +2747,7 @@ static int ecc_recv(struct wd_alg_driver *drv, handle_t ctx, void *ecc_msg) { handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); struct wd_ecc_msg *msg = ecc_msg; + struct wd_ecc_msg *target_msg; struct hisi_hpre_sqe hw_msg; __u16 recv_cnt = 0; int ret; @@ -2487,10 +2756,17 @@ static int ecc_recv(struct wd_alg_driver *drv, handle_t ctx, void *ecc_msg) if (ret) return ret; - ret = hisi_check_bd_id(h_qp, msg->tag, hw_msg.low_tag); + target_msg = (struct wd_ecc_msg *)VA_ADDR(hw_msg.hi_tag, hw_msg.low_tag); + if (!target_msg) { + WD_ERR("failed to get correct send msg from hardware!\n"); + return -WD_ADDR_ERR; + } + + ret = hisi_check_bd_id(h_qp, msg->tag, target_msg->tag); if (ret) return ret; + msg->tag = target_msg->tag; if (hw_msg.alg == HPRE_ALG_ECDH_MULTIPLY && hw_msg.sm2_mlen == HPRE_SM2_ENC) return sm2_enc_parse(h_qp, msg, &hw_msg); @@ -2549,7 +2825,7 @@ static bool is_valid_hw_type(struct wd_alg_driver *drv) hpre_ctx = (struct hisi_hpre_ctx *)drv->priv; qp = (struct hisi_qp *)wd_ctx_get_priv(hpre_ctx->config.ctxs[0].ctx); - if (qp->q_info.hw_type < HISI_QM_API_VER3_BASE) + if (!qp || qp->q_info.hw_type < HISI_QM_API_VER3_BASE) return false; return true; } diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c index 9251b4c..216b80a 100644 --- a/drv/hisi_qm_udrv.c +++ b/drv/hisi_qm_udrv.c @@ -473,7 +473,7 @@ int hisi_qm_send(handle_t h_qp, const void *req, __u16 expect, __u16 *count) { struct hisi_qp *qp = (struct hisi_qp *)h_qp; struct hisi_qm_queue_info *q_info; - __u16 free_num, send_num; + __u16 free_num; __u16 tail; if (unlikely(!qp || !req || !count)) @@ -488,11 +488,14 @@ int hisi_qm_send(handle_t h_qp, const void *req, __u16 expect, __u16 *count) return -WD_EBUSY; } - send_num = expect > free_num ? free_num : expect; + if (expect > free_num) { + pthread_spin_unlock(&q_info->sd_lock); + return -WD_EBUSY; + } tail = q_info->sq_tail_index; - hisi_qm_fill_sqe(req, q_info, tail, send_num); - tail = (tail + send_num) % q_info->sq_depth; + hisi_qm_fill_sqe(req, q_info, tail, expect); + tail = (tail + expect) % q_info->sq_depth; /* * Before sending doorbell, check the queue status, @@ -510,9 +513,9 @@ int hisi_qm_send(handle_t h_qp, const void *req, __u16 expect, __u16 *count) q_info->sq_tail_index = tail; /* Make sure used_num is changed before the next thread gets free sqe. */ - __atomic_add_fetch(&q_info->used_num, send_num, __ATOMIC_RELAXED); + __atomic_add_fetch(&q_info->used_num, expect, __ATOMIC_RELAXED); pthread_spin_unlock(&q_info->sd_lock); - *count = send_num; + *count = expect; return 0; } diff --git a/include/drv/wd_dh_drv.h b/include/drv/wd_dh_drv.h index d205dc4..d2a6157 100644 --- a/include/drv/wd_dh_drv.h +++ b/include/drv/wd_dh_drv.h @@ -16,12 +16,15 @@ extern "C" { /* DH message format */ struct wd_dh_msg { struct wd_dh_req req; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; __u32 tag; /* User-defined request identifier */ void *g; __u16 gbytes; __u16 key_bytes; /* Input key bytes */ __u8 is_g2; __u8 result; /* Data format, denoted by WD error code */ + __u8 *rsv_out; /* reserved output data pointer */ }; struct wd_dh_msg *wd_dh_get_msg(__u32 idx, __u32 tag); diff --git a/include/drv/wd_ecc_drv.h b/include/drv/wd_ecc_drv.h index 6193c8b..b123a9b 100644 --- a/include/drv/wd_ecc_drv.h +++ b/include/drv/wd_ecc_drv.h @@ -48,6 +48,8 @@ extern "C" { /* ECC message format */ struct wd_ecc_msg { struct wd_ecc_req req; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; struct wd_hash_mt hash; __u32 tag; /* User-defined request identifier */ __u8 *key; /* Input key VA, should be DMA buffer */ @@ -55,6 +57,7 @@ struct wd_ecc_msg { __u8 curve_id; /* Ec curve denoted by enum wd_ecc_curve_type */ __u8 result; /* alg op error code */ void *drv_cfg; /* internal driver configuration */ + __u8 *rsv_out; /* reserved output data pointer */ }; struct wd_ecc_pubkey { diff --git a/include/drv/wd_rsa_drv.h b/include/drv/wd_rsa_drv.h index d231ecf..c12f3e0 100644 --- a/include/drv/wd_rsa_drv.h +++ b/include/drv/wd_rsa_drv.h @@ -42,11 +42,14 @@ struct wd_rsa_kg_out { /* RSA message format */ struct wd_rsa_msg { struct wd_rsa_req req; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; __u32 tag; /* User-defined request identifier */ __u16 key_bytes; /* Input key bytes */ __u8 key_type; /* Denoted by enum wd_rsa_key_type */ __u8 result; /* Data format, denoted by WD error code */ __u8 *key; /* Input key VA pointer, should be DMA buffer */ + __u8 *rsv_out; /* reserved output data pointer */ }; struct wd_rsa_msg *wd_rsa_get_msg(__u32 idx, __u32 tag); diff --git a/include/wd_dh.h b/include/wd_dh.h index afc2f7c..235c602 100644 --- a/include/wd_dh.h +++ b/include/wd_dh.h @@ -27,6 +27,8 @@ struct wd_dh_sess_setup { __u16 key_bits; /* DH key bites */ bool is_g2; /* is g2 mode or not */ void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_dh_req { diff --git a/include/wd_ecc.h b/include/wd_ecc.h index 6f670e2..18c1c0d 100644 --- a/include/wd_ecc.h +++ b/include/wd_ecc.h @@ -116,6 +116,8 @@ struct wd_ecc_sess_setup { struct wd_rand_mt rand; /* rand method from user */ struct wd_hash_mt hash; /* hash method from user */ void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_ecc_req { diff --git a/include/wd_rsa.h b/include/wd_rsa.h index 2f4e589..9c91432 100644 --- a/include/wd_rsa.h +++ b/include/wd_rsa.h @@ -60,6 +60,8 @@ struct wd_rsa_sess_setup { __u16 key_bits; /* RSA key bits */ bool is_crt; /* CRT mode or not */ void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; bool wd_rsa_is_crt(handle_t sess); diff --git a/wd_dh.c b/wd_dh.c index 221322f..0c1372c 100644 --- a/wd_dh.c +++ b/wd_dh.c @@ -26,6 +26,8 @@ struct wd_dh_sess { struct wd_dtb g; struct wd_dh_sess_setup setup; void *sched_key; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; static struct wd_dh_setting { @@ -326,6 +328,8 @@ static int fill_dh_msg(struct wd_dh_msg *msg, struct wd_dh_req *req, memcpy(&msg->req, req, sizeof(*req)); msg->result = WD_EINVAL; msg->key_bytes = sess->key_size; + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; if (unlikely(req->pri_bytes < sess->key_size)) { WD_ERR("invalid: req pri bytes %hu is error!\n", req->pri_bytes); @@ -581,6 +585,7 @@ void wd_dh_get_g(handle_t sess, struct wd_dtb **g) handle_t wd_dh_alloc_sess(struct wd_dh_sess_setup *setup) { struct wd_dh_sess *sess; + int ret; if (!setup) { WD_ERR("invalid: alloc dh sess setup NULL!\n"); @@ -606,10 +611,19 @@ handle_t wd_dh_alloc_sess(struct wd_dh_sess_setup *setup) memcpy(&sess->setup, setup, sizeof(*setup)); sess->key_size = setup->key_bits >> BYTE_BITS_SHIFT; - sess->g.data = malloc(sess->key_size); - if (!sess->g.data) + ret = wd_mem_ops_init(wd_dh_setting.config.ctxs[0].ctx, &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); goto sess_err; + } + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; + sess->g.data = sess->mm_ops.alloc(sess->mm_ops.usr, sess->key_size); + if (!sess->g.data) { + WD_ERR("failed to malloc sess g param memory!\n"); + goto sess_err; + } sess->g.bsize = sess->key_size; /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_dh_setting.sched.sched_init( @@ -622,7 +636,7 @@ handle_t wd_dh_alloc_sess(struct wd_dh_sess_setup *setup) return (handle_t)sess; sched_err: - free(sess->g.data); + sess->mm_ops.free(sess->mm_ops.usr, sess->g.data); sess_err: free(sess); return (handle_t)0; @@ -638,7 +652,7 @@ void wd_dh_free_sess(handle_t sess) } if (sess_t->g.data) - free(sess_t->g.data); + sess_t->mm_ops.free(sess_t->mm_ops.usr, sess_t->g.data); if (sess_t->sched_key) free(sess_t->sched_key); diff --git a/wd_ecc.c b/wd_ecc.c index b1971b9..2c88d0a 100644 --- a/wd_ecc.c +++ b/wd_ecc.c @@ -52,6 +52,8 @@ struct wd_ecc_sess { struct wd_ecc_sess_setup setup; struct wd_ecc_extend_ops eops; void *sched_key; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_ecc_curve_list { @@ -494,7 +496,7 @@ static void release_ecc_prikey(struct wd_ecc_sess *sess) struct wd_ecc_prikey *prikey = sess->key.prikey; wd_memset_zero(prikey->data, prikey->size); - free(prikey->data); + sess->mm_ops.free(sess->mm_ops.usr, prikey->data); free(prikey); sess->key.prikey = NULL; } @@ -503,7 +505,7 @@ static void release_ecc_pubkey(struct wd_ecc_sess *sess) { struct wd_ecc_pubkey *pubkey = sess->key.pubkey; - free(pubkey->data); + sess->mm_ops.free(sess->mm_ops.usr, pubkey->data); free(pubkey); sess->key.pubkey = NULL; } @@ -522,7 +524,7 @@ static struct wd_ecc_prikey *create_ecc_prikey(struct wd_ecc_sess *sess) } dsz = ECC_PRIKEY_SZ(hsz); - data = malloc(dsz); + data = sess->mm_ops.alloc(sess->mm_ops.usr, dsz); if (!data) { WD_ERR("failed to malloc prikey data, sz = %u!\n", dsz); free(prikey); @@ -551,7 +553,7 @@ static struct wd_ecc_pubkey *create_ecc_pubkey(struct wd_ecc_sess *sess) } dsz = ECC_PUBKEY_SZ(hsz); - data = malloc(dsz); + data = sess->mm_ops.alloc(sess->mm_ops.usr, dsz); if (!data) { WD_ERR("failed to malloc pubkey data, sz = %u!\n", dsz); free(pubkey); @@ -570,7 +572,7 @@ static void release_ecc_in(struct wd_ecc_sess *sess, struct wd_ecc_in *ecc_in) { wd_memset_zero(ecc_in->data, ecc_in->size); - free(ecc_in); + sess->mm_ops.free(sess->mm_ops.usr, ecc_in); } static struct wd_ecc_in *create_ecc_in(struct wd_ecc_sess *sess, __u32 num) @@ -585,7 +587,7 @@ static struct wd_ecc_in *create_ecc_in(struct wd_ecc_sess *sess, __u32 num) hsz = get_key_bsz(sess->key_size); len = sizeof(struct wd_ecc_in) + hsz * num; - in = malloc(len); + in = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!in) { WD_ERR("failed to malloc ecc in, sz = %u!\n", len); return NULL; @@ -613,7 +615,7 @@ static struct wd_ecc_in *create_sm2_sign_in(struct wd_ecc_sess *sess, len = sizeof(struct wd_ecc_in) + ECC_SIGN_IN_PARAM_NUM * ksz + m_len; - in = malloc(len); + in = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!in) { WD_ERR("failed to malloc sm2 sign in, sz = %llu!\n", len); return NULL; @@ -653,7 +655,7 @@ static struct wd_ecc_in *create_sm2_enc_in(struct wd_ecc_sess *sess, } len = sizeof(struct wd_ecc_in) + ksz + m_len; - in = malloc(len); + in = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!in) { WD_ERR("failed to malloc sm2 enc in, sz = %llu!\n", len); return NULL; @@ -697,7 +699,7 @@ static void *create_sm2_ciphertext(struct wd_ecc_sess *sess, __u32 m_len, *len = (__u64)st_sz + ECC_POINT_PARAM_NUM * (__u64)sess->key_size + (__u64)m_len + (__u64)h_byts; - start = malloc(*len); + start = sess->mm_ops.alloc(sess->mm_ops.usr, *len); if (unlikely(!start)) { WD_ERR("failed to alloc start, sz = %llu!\n", *len); return NULL; @@ -745,7 +747,7 @@ static struct wd_ecc_out *create_ecc_out(struct wd_ecc_sess *sess, __u32 num) hsz = get_key_bsz(sess->key_size); len = sizeof(struct wd_ecc_out) + hsz * num; - out = malloc(len); + out = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!out) { WD_ERR("failed to malloc out, sz = %u!\n", len); return NULL; @@ -1149,13 +1151,13 @@ static void del_sess_key(struct wd_ecc_sess *sess) { if (sess->key.prikey) { wd_memset_zero(sess->key.prikey->data, sess->key.prikey->size); - free(sess->key.prikey->data); + sess->mm_ops.free(sess->mm_ops.usr, sess->key.prikey->data); free(sess->key.prikey); sess->key.prikey = NULL; } if (sess->key.pubkey) { - free(sess->key.pubkey->data); + sess->mm_ops.free(sess->mm_ops.usr, sess->key.pubkey->data); free(sess->key.pubkey); sess->key.pubkey = NULL; } @@ -1227,6 +1229,15 @@ handle_t wd_ecc_alloc_sess(struct wd_ecc_sess_setup *setup) memcpy(&sess->setup, setup, sizeof(*setup)); sess->key_size = BITS_TO_BYTES(setup->key_bits); + /* Memory type set */ + ret = wd_mem_ops_init(wd_ecc_setting.config.ctxs[0].ctx, &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); + goto sess_err; + } + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; + if (wd_ecc_setting.driver->get_extend_ops) { ret = wd_ecc_setting.driver->get_extend_ops(&sess->eops); if (ret) { @@ -1508,9 +1519,10 @@ void wd_ecxdh_get_out_params(struct wd_ecc_out *out, struct wd_ecc_point **pbk) void wd_ecc_del_in(handle_t sess, struct wd_ecc_in *in) { + struct wd_ecc_sess *sess_t = (struct wd_ecc_sess *)sess; __u32 bsz; - if (!in) { + if (!sess_t || !in) { WD_ERR("invalid: del ecc in parameter error!\n"); return; } @@ -1522,14 +1534,15 @@ void wd_ecc_del_in(handle_t sess, struct wd_ecc_in *in) } wd_memset_zero(in->data, bsz); - free(in); + sess_t->mm_ops.free(sess_t->mm_ops.usr, in); } -void wd_ecc_del_out(handle_t sess, struct wd_ecc_out *out) +void wd_ecc_del_out(handle_t sess, struct wd_ecc_out *out) { + struct wd_ecc_sess *sess_t = (struct wd_ecc_sess *)sess; __u32 bsz; - if (!out) { + if (!sess_t || !out) { WD_ERR("invalid: del ecc out parameter error!\n"); return; } @@ -1541,7 +1554,7 @@ void wd_ecc_del_out(handle_t sess, struct wd_ecc_out *out) } wd_memset_zero(out->data, bsz); - free(out); + sess_t->mm_ops.free(sess_t->mm_ops.usr, out); } static int fill_ecc_msg(struct wd_ecc_msg *msg, struct wd_ecc_req *req, @@ -1551,6 +1564,8 @@ static int fill_ecc_msg(struct wd_ecc_msg *msg, struct wd_ecc_req *req, memcpy(&msg->req, req, sizeof(msg->req)); memcpy(&msg->hash, &sess->setup.hash, sizeof(msg->hash)); + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; msg->key_bytes = sess->key_size; msg->curve_id = sess->setup.cv.cfg.id; msg->drv_cfg = sess->eops.params; @@ -1922,7 +1937,7 @@ static struct wd_ecc_in *create_sm2_verf_in(struct wd_ecc_sess *sess, hsz = get_key_bsz(sess->key_size); len = sizeof(struct wd_ecc_in) + ECC_VERF_IN_PARAM_NUM * hsz + m_len; - in = malloc(len); + in = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!in) { WD_ERR("failed to malloc sm2 verf in, sz = %llu!\n", len); return NULL; @@ -2211,7 +2226,7 @@ struct wd_ecc_out *wd_sm2_new_dec_out(handle_t sess, __u32 plaintext_len) } len = sizeof(*ecc_out) + plaintext_len; - ecc_out = malloc(len); + ecc_out = sess_t->mm_ops.alloc(sess_t->mm_ops.usr, len); if (!ecc_out) { WD_ERR("failed to malloc ecc_out, sz = %llu!\n", len); return NULL; diff --git a/wd_rsa.c b/wd_rsa.c index cf9239c..bc78c6a 100644 --- a/wd_rsa.c +++ b/wd_rsa.c @@ -67,6 +67,8 @@ struct wd_rsa_sess { struct wd_rsa_prikey *prikey; struct wd_rsa_sess_setup setup; void *sched_key; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; static struct wd_rsa_setting { @@ -373,6 +375,8 @@ static int fill_rsa_msg(struct wd_rsa_msg *msg, struct wd_rsa_req *req, memcpy(&msg->req, req, sizeof(*req)); msg->key_bytes = sess->key_size; msg->result = WD_EINVAL; + msg->mm_ops = &sess->mm_ops; + msg->mm_type = sess->mm_type; switch (msg->req.op_type) { case WD_RSA_SIGN: @@ -641,7 +645,7 @@ struct wd_rsa_kg_in *wd_rsa_new_kg_in(handle_t sess, struct wd_dtb *e, } kg_in_size = (int)GEN_PARAMS_SZ(c->key_size); - kg_in = malloc(kg_in_size + sizeof(*kg_in)); + kg_in = c->mm_ops.alloc(c->mm_ops.usr, kg_in_size + sizeof(*kg_in)); if (!kg_in) { WD_ERR("failed to malloc kg_in memory!\n"); return NULL; @@ -681,19 +685,16 @@ void wd_rsa_get_kg_in_params(struct wd_rsa_kg_in *kin, struct wd_dtb *e, p->data = (void *)kin->p; } -static void del_kg(void *k) +void wd_rsa_del_kg_in(handle_t sess, struct wd_rsa_kg_in *ki) { - if (!k) { + struct wd_rsa_sess *c = (struct wd_rsa_sess *)sess; + + if (!c || !ki) { WD_ERR("invalid: del key generate params err!\n"); return; } - free(k); -} - -void wd_rsa_del_kg_in(handle_t sess, struct wd_rsa_kg_in *ki) -{ - del_kg(ki); + c->mm_ops.free(c->mm_ops.usr, ki); } struct wd_rsa_kg_out *wd_rsa_new_kg_out(handle_t sess) @@ -719,7 +720,7 @@ struct wd_rsa_kg_out *wd_rsa_new_kg_out(handle_t sess) else kg_out_size = (int)GEN_PARAMS_SZ(c->key_size); - kg_out = malloc(kg_out_size + sizeof(*kg_out)); + kg_out = c->mm_ops.alloc(c->mm_ops.usr, kg_out_size + sizeof(*kg_out)); if (!kg_out) { WD_ERR("failed to malloc kg_out memory!\n"); return NULL; @@ -741,13 +742,15 @@ struct wd_rsa_kg_out *wd_rsa_new_kg_out(handle_t sess) void wd_rsa_del_kg_out(handle_t sess, struct wd_rsa_kg_out *kout) { - if (!kout) { + struct wd_rsa_sess *c = (struct wd_rsa_sess *)sess; + + if (!c || !kout) { WD_ERR("invalid: param null at del kg out!\n"); return; } wd_memset_zero(kout->data, kout->size); - del_kg(kout); + c->mm_ops.free(c->mm_ops.usr, kout); } void wd_rsa_get_kg_out_params(struct wd_rsa_kg_out *kout, struct wd_dtb *d, @@ -804,6 +807,11 @@ void wd_rsa_set_kg_out_crt_psz(struct wd_rsa_kg_out *kout, size_t dq_sz, size_t dp_sz) { + if (!kout) { + WD_ERR("invalid: input null when set kg out crt psz!\n"); + return; + } + kout->qinvbytes = qinv_sz; kout->dqbytes = dq_sz; kout->dpbytes = dp_sz; @@ -813,6 +821,11 @@ void wd_rsa_set_kg_out_psz(struct wd_rsa_kg_out *kout, size_t d_sz, size_t n_sz) { + if (!kout) { + WD_ERR("invalid: input null when set kg out psz!\n"); + return; + } + kout->dbytes = d_sz; kout->nbytes = n_sz; } @@ -862,7 +875,7 @@ static int create_sess_key(struct wd_rsa_sess_setup *setup, if (setup->is_crt) { len = sizeof(struct wd_rsa_prikey) + (int)CRT_PARAMS_SZ(sess->key_size); - sess->prikey = malloc(len); + sess->prikey = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!sess->prikey) { WD_ERR("failed to alloc sess prikey2!\n"); return -WD_ENOMEM; @@ -873,7 +886,7 @@ static int create_sess_key(struct wd_rsa_sess_setup *setup, } else { len = sizeof(struct wd_rsa_prikey) + (int)GEN_PARAMS_SZ(sess->key_size); - sess->prikey = malloc(len); + sess->prikey = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!sess->prikey) { WD_ERR("failed to alloc sess prikey1!\n"); return -WD_ENOMEM; @@ -885,7 +898,7 @@ static int create_sess_key(struct wd_rsa_sess_setup *setup, len = sizeof(struct wd_rsa_pubkey) + (int)GEN_PARAMS_SZ(sess->key_size); - sess->pubkey = malloc(len); + sess->pubkey = sess->mm_ops.alloc(sess->mm_ops.usr, len); if (!sess->pubkey) { free(sess->prikey); WD_ERR("failed to alloc sess pubkey!\n"); @@ -912,8 +925,8 @@ static void del_sess_key(struct wd_rsa_sess *sess) wd_memset_zero(prk->pkey.pkey2.data, CRT_PARAMS_SZ(sess->key_size)); else wd_memset_zero(prk->pkey.pkey1.data, GEN_PARAMS_SZ(sess->key_size)); - free(sess->prikey); - free(sess->pubkey); + sess->mm_ops.free(sess->mm_ops.usr, sess->prikey); + sess->mm_ops.free(sess->mm_ops.usr, sess->pubkey); } static void del_sess(struct wd_rsa_sess *c) @@ -948,6 +961,15 @@ handle_t wd_rsa_alloc_sess(struct wd_rsa_sess_setup *setup) memcpy(&sess->setup, setup, sizeof(*setup)); sess->key_size = setup->key_bits >> BYTE_BITS_SHIFT; + /* Memory type set */ + ret = wd_mem_ops_init(wd_rsa_setting.config.ctxs[0].ctx, &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); + goto sess_err; + } + + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + sess->mm_type = setup->mm_type; ret = create_sess_key(setup, sess); if (ret) { WD_ERR("failed to create rsa sess keys!\n"); -- 2.33.0
From: lizhi <lizhi206@huawei.com> The newly added tool interface can support unified verification scenarios for hpre sva and nosva interfaces. Signed-off-by: lizhi <lizhi206@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/hpre_protocol_data.h | 7 + uadk_tool/benchmark/hpre_uadk_benchmark.c | 323 +++++++++++++++++++--- uadk_tool/benchmark/hpre_wd_benchmark.c | 15 +- 3 files changed, 304 insertions(+), 41 deletions(-) diff --git a/uadk_tool/benchmark/hpre_protocol_data.h b/uadk_tool/benchmark/hpre_protocol_data.h index 7bdb942..dcb5c85 100644 --- a/uadk_tool/benchmark/hpre_protocol_data.h +++ b/uadk_tool/benchmark/hpre_protocol_data.h @@ -1257,6 +1257,13 @@ static char ecdh_da_secp256k1[] = { 0x64, 0xfc, 0xbe, 0x4e, 0x44, 0x4a, 0xc6, 0x54, 0x71, 0x69, 0x2e, 0x4a, 0x46, 0xa7, 0x2d, 0xa3 }; +static char x448_da[] = { + 0x71, 0x71, 0xb4, 0x5b, 0x79, 0x51, 0x94, 0x70, 0x53, 0xf9, 0x77, 0x02, 0x64, 0xef, 0xc4, 0xdb, + 0x64, 0xfc, 0xbe, 0x4e, 0x44, 0x4a, 0xc6, 0x54, 0x71, 0x69, 0x2e, 0x4a, 0x46, 0xa7, 0x2d, 0xa3, + 0x68, 0xae, 0x87, 0x7c, 0x45, 0xb3, 0x8b, 0xa8, 0xa8, 0x8e, 0x4b, 0xe5, 0x1f, 0x4e, 0xe6, 0x89, + 0xe2, 0x74, 0x69, 0xc8, 0x17, 0x6c, 0x0d, 0xca +}; + #if 0 static char ecdh_db_secp256k1[] = { 0xe2, 0x74, 0x69, 0xc8, 0x17, 0x6c, 0x0d, 0xca, 0xdd, 0x9e, 0xf6, 0x2f, 0x30, 0x9f, 0xad, 0xf3, diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c index 1457236..fa26a61 100644 --- a/uadk_tool/benchmark/hpre_uadk_benchmark.c +++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c @@ -10,12 +10,17 @@ #include "include/wd_dh.h" #include "include/wd_ecc.h" #include "include/wd_sched.h" +#include "include/wd_bmm.h" +#include "include/wd_sched.h" #define ECC_CURVE_SECP256R1 0x3 /* default set with secp256r1 */ #define HPRE_TST_PRT printf #define ERR_OPTYPE 0xFF #define SM2_DG_SZ 1024 #define WD_SECP256R1 0x18 /* consistent with wd_ecc.c */ +#define SQE_SIZE 128 +#define POOL_MULTIPLY_FACTOR 2 +#define HPRE_OP_TYPE_MAX 6 struct hpre_rsa_key_in { void *e; @@ -87,6 +92,10 @@ struct hpre_ecc_setup { //----------------------------------ECC param-------------------------------------// +struct thread_pool { + void *rsv_pool; +} hpre_uadk_pool; + typedef struct uadk_thread_res { u32 subtype; u32 keybits; @@ -94,12 +103,14 @@ typedef struct uadk_thread_res { u32 optype; u32 td_id; u32 algtype; + int mm_type; } thread_data; static struct wd_ctx_config g_ctx_cfg; static struct wd_sched *g_sched; static unsigned int g_thread_num; static unsigned int g_ctxnum; +static unsigned int g_dev_id; static const char* const alg_operations[] = { "GenKey", "ShareKey", "Encrypt", "Decrypt", "Sign", "Verify", @@ -340,6 +351,7 @@ static int hpre_uadk_param_parse(thread_data *tddata, struct acc_option *options tddata->kmode = mode; tddata->optype = optype; tddata->algtype = algtype; + tddata->mm_type = options->mem_type; HPRE_TST_PRT("%s to run %s task!\n", options->algclass, alg_operations[options->optype]); @@ -404,6 +416,7 @@ static int specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = 0; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); wd_free_list_accels(list); return 0; @@ -446,7 +459,7 @@ static int non_specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = 0; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } - + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); free(dev); } @@ -489,17 +502,32 @@ static int init_hpre_ctx_config(struct acc_option *options) switch(subtype) { case RSA_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_rsa_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, HPRE_OP_TYPE_MAX, + max_node, wd_rsa_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, HPRE_OP_TYPE_MAX, + max_node, wd_rsa_poll_ctx); break; case DH_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_dh_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, HPRE_OP_TYPE_MAX, + max_node, wd_dh_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, HPRE_OP_TYPE_MAX, + max_node, wd_dh_poll_ctx); break; case ECDH_TYPE: case ECDSA_TYPE: case SM2_TYPE: case X25519_TYPE: case X448_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_ecc_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, HPRE_OP_TYPE_MAX, + max_node, wd_ecc_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, HPRE_OP_TYPE_MAX, + max_node, wd_ecc_poll_ctx); break; default: HPRE_TST_PRT("failed to parse alg subtype!\n"); @@ -516,6 +544,7 @@ static int init_hpre_ctx_config(struct acc_option *options) param.mode = mode; param.begin = 0; param.end = g_ctxnum - 1; + param.dev_id = g_dev_id; ret = wd_sched_rr_instance(g_sched, ¶m); if (ret) { HPRE_TST_PRT("failed to fill hpre sched data!\n"); @@ -651,15 +680,24 @@ static int init_hpre_ctx_config2(struct acc_option *options) /* init2 */ switch (subtype) { case RSA_TYPE: - return wd_rsa_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + return wd_rsa_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + return wd_rsa_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); case DH_TYPE: - return wd_dh_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + return wd_dh_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + return wd_dh_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); case ECDH_TYPE: case ECDSA_TYPE: case SM2_TYPE: case X25519_TYPE: case X448_TYPE: - return wd_ecc_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + return wd_ecc_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + return wd_ecc_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); default: HPRE_TST_PRT("failed to parse alg subtype on uninit2!\n"); return -EINVAL; @@ -671,6 +709,108 @@ out_freectx: return ret; } +static int hpre_uadk_get_block(u32 algtype) +{ + int block_size = 512; + + switch (algtype) { + case RSA_1024: + block_size = 1280; + break; + case RSA_2048: + block_size = 2560; + break; + case RSA_3072: + block_size = 3840; + break; + case RSA_4096: + block_size = 5120; + break; + case RSA_1024_CRT: + block_size = 1280; + break; + case RSA_2048_CRT: + block_size = 2560; + break; + case RSA_3072_CRT: + block_size = 3840; + break; + case RSA_4096_CRT: + block_size = 5120; + break; + case DH_768: + block_size = 1536; + break; + case DH_1024: + block_size = 2048; + break; + case DH_1536: + block_size = 3072; + break; + case DH_2048: + block_size = 4096; + break; + case DH_3072: + block_size = 6144; + break; + case DH_4096: + block_size = 8192; + break; + default: + block_size = 576; + break; + } + + return block_size; +} + +static int init_uadk_rsv_pool(struct acc_option *options) +{ + struct wd_mempool_setup pool_setup; + char *alg = options->algclass; + handle_t h_ctx; + unsigned long step; + + /* ctxs is NULL */ + h_ctx = wd_find_ctx(alg); + if (!h_ctx) { + HPRE_TST_PRT("Failed to find a ctx for alg:%s\n", options->algname); + return -EINVAL; + } + g_ctx_cfg.priv = (void *)h_ctx; + + step = hpre_uadk_get_block(options->algtype); + + pool_setup.block_size = step; + pool_setup.block_num = g_thread_num * MAX_POOL_LENTH * POOL_MULTIPLY_FACTOR; + pool_setup.align_size = SQE_SIZE; + pool_setup.ops.alloc = NULL; + pool_setup.ops.free = NULL; + + hpre_uadk_pool.rsv_pool = wd_mempool_alloc(h_ctx, &pool_setup); + if (!hpre_uadk_pool.rsv_pool) { + HPRE_TST_PRT("Failed to create block pool\n"); + return -ENOMEM; + } + + pool_setup.ops.alloc = (void *)wd_mem_alloc; + pool_setup.ops.free = (void *)wd_mem_free; + pool_setup.ops.iova_map = (void *)wd_mem_map; + pool_setup.ops.iova_unmap = (void *)wd_mem_unmap; + pool_setup.ops.get_bufsize = (void *)wd_get_bufsize; + pool_setup.ops.usr = hpre_uadk_pool.rsv_pool; + + return 0; +} + +static void free_uadk_rsv_pool(struct acc_option *option) +{ + handle_t h_ctx = (handle_t)g_ctx_cfg.priv; + + if (hpre_uadk_pool.rsv_pool) + wd_mempool_free(h_ctx, hpre_uadk_pool.rsv_pool); + hpre_uadk_pool.rsv_pool = NULL; +} /*-------------------------------uadk benchmark main code-------------------------------------*/ void *hpre_uadk_poll(void *data) @@ -1049,7 +1189,7 @@ static int get_ecc_curve(struct hpre_ecc_setup *setup, u32 cid) return 0; } -static int get_ecc_key_param(struct wd_ecc_curve *param, u32 key_bits) +static int get_ecc_key_param(struct wd_ecc_curve *param, u32 key_bits) { u32 key_size = (key_bits + 7) / 8; @@ -1188,12 +1328,22 @@ static int get_ecc_param_from_sample(struct hpre_ecc_setup *setup, setup->sign_size = sizeof(sm2_sign_data); } else { - /* x448, x25519 and ecdh-256 can share same private key of ecdh_da_secp256k1*/ - setup->priv_key = ecdh_da_secp256k1; + /* + * x25519 and ecdh-256 can share same 32-bytes private key of + * ecdh_da_secp256k1, while x448 should use 56-byte private key + * to get accurate performance. + */ + if (subtype == X448_TYPE) + setup->priv_key = x448_da; + else + setup->priv_key = ecdh_da_secp256k1; setup->except_pub_key = ecdh_except_b_pubkey_secp256k1; setup->pub_key = ecdh_cp_pubkey_secp256k1; setup->share_key = ecdh_cp_sharekey_secp256k1; - setup->priv_key_size = sizeof(ecdh_da_secp256k1); + if (subtype == X448_TYPE) + setup->priv_key_size = sizeof(x448_da); + else + setup->priv_key_size = sizeof(ecdh_da_secp256k1); setup->except_pub_key_size = sizeof(ecdh_except_b_pubkey_secp256k1); setup->pub_key_size = sizeof(ecdh_cp_pubkey_secp256k1); setup->share_key_size = sizeof(ecdh_cp_sharekey_secp256k1); @@ -1340,6 +1490,7 @@ static void *rsa_uadk_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; struct wd_rsa_sess_setup setup; struct wd_rsa_req req; void *key_info = NULL; @@ -1349,8 +1500,21 @@ static void *rsa_uadk_sync_run(void *arg) memset(&setup, 0, sizeof(setup)); memset(&req, 0, sizeof(req)); + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + setup.sched_param = (void *)&sc_param; setup.key_bits = pdata->keybits; setup.is_crt = pdata->kmode; + setup.mm_type = pdata->mm_type; + setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + setup.mm_ops.alloc = (void *)wd_mem_alloc; + setup.mm_ops.free = (void *)wd_mem_free; + setup.mm_ops.iova_map = (void *)wd_mem_map; + setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; h_sess = wd_rsa_alloc_sess(&setup); if (!h_sess) @@ -1372,7 +1536,7 @@ static void *rsa_uadk_sync_run(void *arg) rsa_key_in->p = rsa_key_in->e + key_size; rsa_key_in->q = rsa_key_in->p + (key_size >> 1); - ret = get_rsa_key_from_sample(h_sess, key_info, key_info, + ret = get_rsa_key_from_sample(h_sess, key_info, key_info, pdata->keybits, pdata->kmode); if (ret) { HPRE_TST_PRT("failed to get sample key data!\n"); @@ -1389,14 +1553,14 @@ static void *rsa_uadk_sync_run(void *arg) goto sample_release; } } else { - req.src = malloc(key_size); + req.src = setup.mm_ops.alloc(setup.mm_ops.usr, key_size); if (!req.src) { HPRE_TST_PRT("failed to alloc rsa in buffer!\n"); goto sample_release; } memset(req.src, 0, req.src_bytes); - memcpy(req.src + key_size - sizeof(rsa_m), rsa_m, sizeof(rsa_m)); - req.dst = malloc(key_size); + memcpy(req.src + key_size - sizeof(rsa_m), rsa_m, sizeof(rsa_m)); + req.dst = setup.mm_ops.alloc(setup.mm_ops.usr, key_size); if (!req.dst) { HPRE_TST_PRT("failed to alloc rsa out buffer!\n"); goto src_release; @@ -1435,10 +1599,10 @@ static void *rsa_uadk_sync_run(void *arg) dst_release: if (req.dst) - free(req.dst); + setup.mm_ops.free(setup.mm_ops.usr, req.dst); src_release: if (req.src) - free(req.src); + setup.mm_ops.free(setup.mm_ops.usr, req.src); sample_release: free(rsa_key_in); key_release: @@ -1460,6 +1624,7 @@ static void *rsa_uadk_async_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; struct rsa_async_tag *tag; struct wd_rsa_sess_setup setup; struct wd_rsa_req req; @@ -1471,8 +1636,21 @@ static void *rsa_uadk_async_run(void *arg) memset(&setup, 0, sizeof(setup)); memset(&req, 0, sizeof(req)); + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + setup.sched_param = (void *)&sc_param; setup.key_bits = pdata->keybits; setup.is_crt = pdata->kmode; + setup.mm_type = pdata->mm_type; + setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + setup.mm_ops.alloc = (void *)wd_mem_alloc; + setup.mm_ops.free = (void *)wd_mem_free; + setup.mm_ops.iova_map = (void *)wd_mem_map; + setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; h_sess = wd_rsa_alloc_sess(&setup); if (!h_sess) @@ -1518,14 +1696,14 @@ static void *rsa_uadk_async_run(void *arg) goto tag_release; } } else { - req.src = malloc(key_size); + req.src = setup.mm_ops.alloc(setup.mm_ops.usr, key_size); if (!req.src) { HPRE_TST_PRT("failed to alloc rsa in buffer!\n"); goto tag_release; } memset(req.src, 0, req.src_bytes); - memcpy(req.src + key_size - sizeof(rsa_m), rsa_m, sizeof(rsa_m)); - req.dst = malloc(key_size); + memcpy(req.src + key_size - sizeof(rsa_m), rsa_m, sizeof(rsa_m)); + req.dst = setup.mm_ops.alloc(setup.mm_ops.usr, key_size); if (!req.dst) { HPRE_TST_PRT("failed to alloc rsa out buffer!\n"); goto src_release; @@ -1581,10 +1759,10 @@ static void *rsa_uadk_async_run(void *arg) } if (req.dst) - free(req.dst); + setup.mm_ops.free(setup.mm_ops.usr, req.dst); src_release: if (req.src) - free(req.src); + setup.mm_ops.free(setup.mm_ops.usr, req.src); tag_release: free(tag); key_in_release: @@ -1691,27 +1869,27 @@ static int get_dh_param_from_sample(struct hpre_dh_param *setup, return 0; } -static int get_dh_opdata_param(handle_t h_sess, struct wd_dh_req *req, - struct hpre_dh_param *setup, int key_size) +static int get_dh_opdata_param(struct wd_dh_sess_setup *dh_setup, handle_t h_sess, + struct wd_dh_req *req, struct hpre_dh_param *setup, int key_size) { unsigned char *ag_bin = NULL; struct wd_dtb ctx_g; int ret; - ag_bin = malloc(2 * key_size); + ag_bin = dh_setup->mm_ops.alloc(dh_setup->mm_ops.usr, 2 * key_size); if (!ag_bin) return -ENOMEM; memset(ag_bin, 0, 2 * key_size); req->pv = ag_bin; - req->x_p = malloc(2 * key_size); + req->x_p = dh_setup->mm_ops.alloc(dh_setup->mm_ops.usr, 2 * key_size); if (!req->x_p) goto ag_error; memset(req->x_p, 0, 2 * key_size); - req->pri = malloc(2 * key_size); + req->pri = dh_setup->mm_ops.alloc(dh_setup->mm_ops.usr, 2 * key_size); if (!req->pri) goto xp_error; @@ -1748,11 +1926,11 @@ static int get_dh_opdata_param(handle_t h_sess, struct wd_dh_req *req, return 0; ctx_release: - free(req->pri); + dh_setup->mm_ops.free(dh_setup->mm_ops.usr, req->pri); xp_error: - free(req->x_p); + dh_setup->mm_ops.free(dh_setup->mm_ops.usr, req->x_p); ag_error: - free(req->pv); + dh_setup->mm_ops.free(dh_setup->mm_ops.usr, req->pv); return -ENOMEM; } @@ -1766,6 +1944,7 @@ static void *dh_uadk_async_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; struct wd_dh_sess_setup dh_setup; struct rsa_async_tag *tag; struct hpre_dh_param param; @@ -1777,6 +1956,19 @@ static void *dh_uadk_async_run(void *arg) memset(&dh_setup, 0, sizeof(dh_setup)); memset(&req, 0, sizeof(req)); + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + dh_setup.sched_param = (void *)&sc_param; + dh_setup.mm_type = pdata->mm_type; + dh_setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + dh_setup.mm_ops.alloc = (void *)wd_mem_alloc; + dh_setup.mm_ops.free = (void *)wd_mem_free; + dh_setup.mm_ops.iova_map = (void *)wd_mem_map; + dh_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + dh_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; dh_setup.key_bits = pdata->keybits; if (pdata->optype == WD_DH_PHASE2) dh_setup.is_g2 = true; // G1 is 0; G2 is 1; @@ -1791,7 +1983,7 @@ static void *dh_uadk_async_run(void *arg) param.optype = pdata->optype; req.op_type = pdata->optype; - ret = get_dh_opdata_param(h_sess, &req, ¶m, key_size); + ret = get_dh_opdata_param(&dh_setup, h_sess, &req, ¶m, key_size); if (ret){ HPRE_TST_PRT("failed to fill dh key gen req!\n"); goto sess_release; @@ -1847,9 +2039,9 @@ static void *dh_uadk_async_run(void *arg) free(tag); param_release: - free(req.x_p); - free(req.pv); - free(req.pri); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.pri); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.x_p); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.pv); sess_release: wd_dh_free_sess(h_sess); add_send_complete(); @@ -1861,6 +2053,7 @@ static void *dh_uadk_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; struct wd_dh_sess_setup dh_setup; struct hpre_dh_param setup; struct wd_dh_req req; @@ -1870,6 +2063,19 @@ static void *dh_uadk_sync_run(void *arg) memset(&dh_setup, 0, sizeof(dh_setup)); memset(&req, 0, sizeof(req)); + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + dh_setup.sched_param = (void *)&sc_param; + dh_setup.mm_type = pdata->mm_type; + dh_setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + dh_setup.mm_ops.alloc = (void *)wd_mem_alloc; + dh_setup.mm_ops.free = (void *)wd_mem_free; + dh_setup.mm_ops.iova_map = (void *)wd_mem_map; + dh_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + dh_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; dh_setup.key_bits = pdata->keybits; if (pdata->optype == WD_DH_PHASE2) dh_setup.is_g2 = true; // G1 is 0; G2 is 1; @@ -1884,7 +2090,7 @@ static void *dh_uadk_sync_run(void *arg) setup.optype = pdata->optype; req.op_type = pdata->optype; - ret = get_dh_opdata_param(h_sess, &req, &setup, key_size); + ret = get_dh_opdata_param(&dh_setup, h_sess, &req, &setup, key_size); if (ret){ HPRE_TST_PRT("failed to fill dh key gen req!\n"); goto param_release; @@ -1903,9 +2109,9 @@ static void *dh_uadk_sync_run(void *arg) } while(true); param_release: - free(req.x_p); - free(req.pv); - free(req.pri); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.pri); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.x_p); + dh_setup.mm_ops.free(dh_setup.mm_ops.usr, req.pv); sess_release: wd_dh_free_sess(h_sess); cal_avg_latency(count); @@ -2247,6 +2453,7 @@ static void *ecc_uadk_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; u32 cid = ECC_CURVE_SECP256R1; u32 subtype = pdata->subtype; struct wd_ecc_sess_setup sess_setup; @@ -2304,8 +2511,21 @@ static void *ecc_uadk_sync_run(void *arg) } // set def setting; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + sess_setup.sched_param = (void *)&sc_param; sess_setup.hash.cb = hpre_compute_hash; - sess_setup.hash.type = WD_HASH_SHA256; + sess_setup.hash.type = 0; + sess_setup.mm_type = pdata->mm_type; + sess_setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + sess_setup.mm_ops.alloc = (void *)wd_mem_alloc; + sess_setup.mm_ops.free = (void *)wd_mem_free; + sess_setup.mm_ops.iova_map = (void *)wd_mem_map; + sess_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + sess_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; ret = get_ecc_param_from_sample(&setup, subtype, pdata->keybits); if (ret) @@ -2403,6 +2623,7 @@ static void *ecc_uadk_async_run(void *arg) { thread_data *pdata = (thread_data *)arg; int key_size = pdata->keybits >> 3; + struct sched_params sc_param = {0}; u32 cid = ECC_CURVE_SECP256R1; u32 subtype = pdata->subtype; struct wd_ecc_sess_setup sess_setup; @@ -2464,6 +2685,19 @@ static void *ecc_uadk_async_run(void *arg) // set def setting; sess_setup.hash.cb = hpre_compute_hash; sess_setup.hash.type = WD_HASH_SHA256; + sc_param.numa_id = 0; + sc_param.type = 0; + sc_param.mode = 0; + if (hpre_uadk_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(hpre_uadk_pool.rsv_pool); + sess_setup.sched_param = (void *)&sc_param; + sess_setup.mm_type = pdata->mm_type; + sess_setup.mm_ops.usr = hpre_uadk_pool.rsv_pool; + sess_setup.mm_ops.alloc = (void *)wd_mem_alloc; + sess_setup.mm_ops.free = (void *)wd_mem_free; + sess_setup.mm_ops.iova_map = (void *)wd_mem_map; + sess_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + sess_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; ret = get_ecc_param_from_sample(&setup, subtype, pdata->keybits); if (ret) @@ -2630,6 +2864,7 @@ static int hpre_uadk_sync_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].td_id = i; threads_args[i].algtype = threads_option.algtype; + threads_args[i].mm_type = threads_option.mm_type; ret = pthread_create(&tdid[i], NULL, uadk_hpre_sync_run, &threads_args[i]); if (ret) { HPRE_TST_PRT("Create sync thread fail!\n"); @@ -2707,6 +2942,7 @@ static int hpre_uadk_async_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].td_id = i; threads_args[i].algtype = threads_option.algtype; + threads_args[i].mm_type = threads_option.mm_type; ret = pthread_create(&tdid[i], NULL, uadk_hpre_async_run, &threads_args[i]); if (ret) { HPRE_TST_PRT("Create async thread fail!\n"); @@ -2758,6 +2994,12 @@ int hpre_uadk_benchmark(struct acc_option *options) return ret; } + if (options->mem_type != UADK_AUTO) { + ret = init_uadk_rsv_pool(options); + if (ret) + return ret; + } + get_pid_cpu_time(&ptime); time_start(options->times); if (options->syncmode) @@ -2768,6 +3010,9 @@ int hpre_uadk_benchmark(struct acc_option *options) if (ret) return ret; + if (options->mem_type != UADK_AUTO) + free_uadk_rsv_pool(options); + if (options->inittype == INIT2_TYPE) uninit_hpre_ctx_config2(options->subtype); else diff --git a/uadk_tool/benchmark/hpre_wd_benchmark.c b/uadk_tool/benchmark/hpre_wd_benchmark.c index 60ed3bd..38c45b1 100644 --- a/uadk_tool/benchmark/hpre_wd_benchmark.c +++ b/uadk_tool/benchmark/hpre_wd_benchmark.c @@ -1613,11 +1613,22 @@ static int get_ecc_param_from_sample(struct hpre_ecc_setup *setup, setup->sign_size = sizeof(sm2_sign_data); } else { - setup->priv_key = ecdh_da_secp256k1; + /* + * x25519 and ecdh-256 can share same 32-bytes private key of + * ecdh_da_secp256k1, while x448 should use 56-byte private key + * to get accurate performance. + */ + if (subtype == X448_TYPE) + setup->priv_key = x448_da; + else + setup->priv_key = ecdh_da_secp256k1; setup->except_pub_key = ecdh_except_b_pubkey_secp256k1; setup->pub_key = ecdh_cp_pubkey_secp256k1; setup->share_key = ecdh_cp_sharekey_secp256k1; - setup->priv_key_size = sizeof(ecdh_da_secp256k1); + if (subtype == X448_TYPE) + setup->priv_key_size = sizeof(x448_da); + else + setup->priv_key_size = sizeof(ecdh_da_secp256k1); setup->except_pub_key_size = sizeof(ecdh_except_b_pubkey_secp256k1); setup->pub_key_size = sizeof(ecdh_cp_pubkey_secp256k1); setup->share_key_size = sizeof(ecdh_cp_sharekey_secp256k1); -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> When zstd uses blk_type to inform the driver that the previous block is an uncompressible block, the repcode of the current compression block should be based on the repcode of the last compressible block, rather than always using the repcode of the previous block regardless of whether it is compressible or not. Use CTX_REPCODE1_OFFSET as the address offset for storing the repcode of the previous block, and CTX_REPCODE2_OFFSET as the address offset for storing the repcode of the last compressible block. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 16 +++++++++------- v1/drv/hisi_zip_udrv.c | 16 +++++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 87d2103..ab2b5e7 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -822,9 +822,14 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, if (msg->ctx_buf) { ctx_buf = msg->ctx_buf + RSV_OFFSET; - if (msg->alg_type == WD_LZ77_ZSTD && data->blk_type != COMP_BLK) - memcpy(ctx_buf + CTX_HW_REPCODE_OFFSET, - msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); + if (msg->alg_type == WD_LZ77_ZSTD) { + if (data->blk_type != COMP_BLK) + memcpy(ctx_buf + CTX_HW_REPCODE_OFFSET, + msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); + else + memcpy(msg->ctx_buf + CTX_REPCODE2_OFFSET, + msg->ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + } } fill_buf_size_lz77_zstd(sqe, in_size, lits_size, seq_avail_out); @@ -1203,12 +1208,9 @@ static void get_data_size_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_op_ty data->freq = data->sequences_start + (data->seq_num << SEQ_DATA_SIZE_SHIFT) + OVERFLOW_DATA_SIZE; - if (ctx_buf) { - memcpy(ctx_buf + CTX_REPCODE2_OFFSET, - ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + if (ctx_buf) memcpy(ctx_buf + CTX_REPCODE1_OFFSET, ctx_buf + RSV_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); - } } } diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index ab4254e..8908e27 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -687,9 +687,14 @@ static void fill_zip_sqe_hw_info_lz77_zstd(void *ssqe, struct wcrypto_comp_msg * sqe->ctx_dw0 = *(__u32 *)msg->ctx_buf; sqe->ctx_dw1 = *(__u32 *)(msg->ctx_buf + CTX_PRIV1_OFFSET); sqe->ctx_dw2 = *(__u32 *)(msg->ctx_buf + CTX_PRIV2_OFFSET); - if (format->blk_type != COMP_BLK) - memcpy(msg->ctx_buf + CTX_HW_REPCODE_OFFSET + CTX_BUFFER_OFFSET, - msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); + if (msg->alg_type == WCRYPTO_LZ77_ZSTD) { + if (format->blk_type != COMP_BLK) + memcpy(msg->ctx_buf + CTX_HW_REPCODE_OFFSET + CTX_BUFFER_OFFSET, + msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); + else + memcpy(msg->ctx_buf + CTX_REPCODE2_OFFSET, + msg->ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + } } sqe->isize = msg->isize; @@ -820,13 +825,10 @@ static void fill_priv_lz77_zstd(void *ssqe, struct wcrypto_comp_msg *recv_msg) OVERFLOW_DATA_SIZE; } - if (ctx_buf) { - memcpy(ctx_buf + CTX_REPCODE2_OFFSET, - ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + if (ctx_buf) memcpy(ctx_buf + CTX_REPCODE1_OFFSET, ctx_buf + CTX_BUFFER_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); - } } int qm_parse_zip_sqe_v3(void *hw_msg, const struct qm_queue_info *info, -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> When the hardware configures the lit_length register, the stream mode LZ77_ZSTD algorithm needs to clear the lit_length in the context of each block input. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 5 +++++ v1/drv/hisi_zip_udrv.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index ab2b5e7..4dc0c6a 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -89,6 +89,7 @@ #define PRICE_MIN_OUT_SIZE 4096 #define ZSTD_LIT_RESV_SIZE 16 #define REPCODE_SIZE 12 +#define SEQ_LIT_LEN_SIZE 4 #define BUF_TYPE 2 @@ -829,6 +830,10 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, else memcpy(msg->ctx_buf + CTX_REPCODE2_OFFSET, msg->ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + + /* The literal length info of each bd needs to be cleared. */ + memset(ctx_buf + CTX_HW_REPCODE_OFFSET + REPCODE_SIZE, 0, + SEQ_LIT_LEN_SIZE); } } diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index 8908e27..3045e48 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -58,6 +58,7 @@ #define ZSTD_LIT_RSV_SIZE 16 #define ZSTD_FREQ_DATA_SIZE 784 #define REPCODE_SIZE 12 +#define SEQ_LIT_LEN_SIZE 4 #define OVERFLOW_DATA_SIZE 8 /* Error status 0xe indicates that dest_avail_out insufficient */ @@ -694,6 +695,10 @@ static void fill_zip_sqe_hw_info_lz77_zstd(void *ssqe, struct wcrypto_comp_msg * else memcpy(msg->ctx_buf + CTX_REPCODE2_OFFSET, msg->ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + + /* The literal length info of each bd needs to be cleared. */ + memset(msg->ctx_buf + CTX_HW_REPCODE_OFFSET + CTX_BUFFER_OFFSET + + REPCODE_SIZE, 0, SEQ_LIT_LEN_SIZE); } } -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> When decompressing the last block of data in stream mode, there is a possibility that the input data is fully consumed with insufficient output space, but the stream end flag is not returned. Therefore, when the tail packet data is fully consumed, check if the hardware status is 0x2. If it is 0x2, notify the user to send a request with a zero input size to flush the remaining data in the hardware. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 7 +++++++ v1/drv/hisi_zip_udrv.c | 9 +++++++++ v1/drv/hisi_zip_udrv.h | 1 + 3 files changed, 17 insertions(+) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 4dc0c6a..bcc4f17 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -49,12 +49,14 @@ #define LITLEN_OVERFLOW_POS_MASK 0xffffff #define HZ_DECOMP_NO_SPACE 0x01 +#define HZ_DECOMPING_NO_SPACE 0x02 #define HZ_DECOMP_BLK_NOSTART 0x03 #define HZ_NEGACOMPRESS 0x0d #define HZ_CRC_ERR 0x10 #define HZ_DECOMP_END 0x13 #define HZ_CTX_ST_MASK 0x000f +#define HZ_CTX_BFINAL_MASK 0x80 #define HZ_LSTBLK_MASK 0x0100 #define HZ_STATUS_MASK 0xff #define HZ_REQ_TYPE_MASK 0xff @@ -1591,6 +1593,11 @@ static int parse_zip_sqe(struct hisi_qp *qp, struct hisi_zip_sqe *sqe, if (ctx_st == HZ_DECOMP_NO_SPACE) recv_msg->req.status = WD_EAGAIN; + /* last block no space when decomping, need resend null size req */ + if (ctx_st == HZ_DECOMPING_NO_SPACE && recv_msg->req.src_len == recv_msg->in_cons && + (sqe->ctx_dw0 & HZ_CTX_BFINAL_MASK)) + recv_msg->req.status = WD_EAGAIN; + /* * It need to analysis the data cache by hardware. * If the cache data is a complete huffman block, diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index 3045e48..4e5ed80 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -49,6 +49,7 @@ #define HW_UNCOMP_DIF_CHECK_ERR 0x12 #define HW_DECOMP_NO_SPACE 0x01 +#define HW_DECOMPING_NO_SPACE 0x02 #define HW_DECOMP_BLK_NOSTART 0x03 #define HW_DECOMP_NO_CRC 0x04 #define ZIP_DIF_LEN 8 @@ -388,6 +389,7 @@ int qm_parse_zip_sqe(void *hw_msg, const struct qm_queue_info *info, { struct wcrypto_comp_msg *recv_msg = info->req_cache[i]; struct hisi_zip_sqe *sqe = hw_msg; + __u16 ctx_bfinal = sqe->ctx_dw0 & HZ_CTX_BFINAL_MASK; __u16 ctx_st = sqe->ctx_dw0 & HZ_CTX_ST_MASK; __u16 lstblk = sqe->dw3 & HZ_LSTBLK_MASK; __u32 status = sqe->dw3 & HZ_STATUS_MASK; @@ -437,6 +439,9 @@ int qm_parse_zip_sqe(void *hw_msg, const struct qm_queue_info *info, info->sqe_parse_priv(sqe, WCRYPTO_COMP, tag->priv); qm_parse_zip_sqe_set_status(recv_msg, status, lstblk, ctx_st); + if (ctx_st == HW_DECOMPING_NO_SPACE && recv_msg->in_size == recv_msg->in_cons && + ctx_bfinal) + recv_msg->status = WCRYPTO_DECOMP_END_NOSPACE; return 1; } @@ -841,6 +846,7 @@ int qm_parse_zip_sqe_v3(void *hw_msg, const struct qm_queue_info *info, { struct wcrypto_comp_msg *recv_msg = info->req_cache[i]; struct hisi_zip_sqe_v3 *sqe = hw_msg; + __u16 ctx_bfinal = sqe->ctx_dw0 & HZ_CTX_BFINAL_MASK; __u32 ctx_win_len = sqe->ctx_dw2 & CTX_WIN_LEN_MASK; __u16 ctx_st = sqe->ctx_dw0 & HZ_CTX_ST_MASK; __u16 lstblk = sqe->dw3 & HZ_LSTBLK_MASK; @@ -900,6 +906,9 @@ int qm_parse_zip_sqe_v3(void *hw_msg, const struct qm_queue_info *info, } qm_parse_zip_sqe_set_status(recv_msg, status, lstblk, ctx_st); + if (ctx_st == HW_DECOMPING_NO_SPACE && recv_msg->in_size == recv_msg->in_cons && + ctx_bfinal) + recv_msg->status = WCRYPTO_DECOMP_END_NOSPACE; /* * It need to analysis the data cache by hardware. diff --git a/v1/drv/hisi_zip_udrv.h b/v1/drv/hisi_zip_udrv.h index c93b01a..28a9c0f 100644 --- a/v1/drv/hisi_zip_udrv.h +++ b/v1/drv/hisi_zip_udrv.h @@ -120,6 +120,7 @@ struct hisi_zip_sqe_v3 { #define HZ_REF_VTYPE_SHIFT 12 #define HZ_BLK_SIZE_SHIFT 16 #define HZ_CTX_ST_MASK 0x000f +#define HZ_CTX_BFINAL_MASK 0x80 #define HZ_LSTBLK_MASK 0x0100 #define HZ_STATUS_MASK 0xff #define HZ_REQ_TYPE_MASK 0xff -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> Add a condition to check if the tail packet is not fully processed. When the bfinal end flag of the tail packet is in the second-to-last segment of data, it may cause the driver to incorrectly assume that the current data is the tail packet. In fact, there is still one more segment of data to be transmitted. Therefore, it is necessary to add a check for the data buffer status to notify the user to continue sending the packet. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 3 ++- v1/drv/hisi_zip_udrv.c | 4 ++-- v1/drv/hisi_zip_udrv.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index bcc4f17..3607ea2 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -57,6 +57,7 @@ #define HZ_CTX_ST_MASK 0x000f #define HZ_CTX_BFINAL_MASK 0x80 +#define HZ_CTX_STORE_MASK 0x7ffff #define HZ_LSTBLK_MASK 0x0100 #define HZ_STATUS_MASK 0xff #define HZ_REQ_TYPE_MASK 0xff @@ -1595,7 +1596,7 @@ static int parse_zip_sqe(struct hisi_qp *qp, struct hisi_zip_sqe *sqe, /* last block no space when decomping, need resend null size req */ if (ctx_st == HZ_DECOMPING_NO_SPACE && recv_msg->req.src_len == recv_msg->in_cons && - (sqe->ctx_dw0 & HZ_CTX_BFINAL_MASK)) + (sqe->ctx_dw0 & HZ_CTX_BFINAL_MASK) && (sqe->ctx_dw1 & HZ_CTX_STORE_MASK)) recv_msg->req.status = WD_EAGAIN; /* diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index 4e5ed80..44e1545 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -440,7 +440,7 @@ int qm_parse_zip_sqe(void *hw_msg, const struct qm_queue_info *info, qm_parse_zip_sqe_set_status(recv_msg, status, lstblk, ctx_st); if (ctx_st == HW_DECOMPING_NO_SPACE && recv_msg->in_size == recv_msg->in_cons && - ctx_bfinal) + ctx_bfinal && (sqe->ctx_dw1 & HZ_CTX_STORE_MASK)) recv_msg->status = WCRYPTO_DECOMP_END_NOSPACE; return 1; @@ -907,7 +907,7 @@ int qm_parse_zip_sqe_v3(void *hw_msg, const struct qm_queue_info *info, qm_parse_zip_sqe_set_status(recv_msg, status, lstblk, ctx_st); if (ctx_st == HW_DECOMPING_NO_SPACE && recv_msg->in_size == recv_msg->in_cons && - ctx_bfinal) + ctx_bfinal && (sqe->ctx_dw1 & HZ_CTX_STORE_MASK)) recv_msg->status = WCRYPTO_DECOMP_END_NOSPACE; /* diff --git a/v1/drv/hisi_zip_udrv.h b/v1/drv/hisi_zip_udrv.h index 28a9c0f..1037f43 100644 --- a/v1/drv/hisi_zip_udrv.h +++ b/v1/drv/hisi_zip_udrv.h @@ -121,6 +121,7 @@ struct hisi_zip_sqe_v3 { #define HZ_BLK_SIZE_SHIFT 16 #define HZ_CTX_ST_MASK 0x000f #define HZ_CTX_BFINAL_MASK 0x80 +#define HZ_CTX_STORE_MASK 0x7ffff #define HZ_LSTBLK_MASK 0x0100 #define HZ_STATUS_MASK 0xff #define HZ_REQ_TYPE_MASK 0xff -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> During asynchronous performance testing, it is common for devices to become busy. Simply retrying the operation is sufficient. Printing too many busy messages can affect user experience. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- wd_comp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wd_comp.c b/wd_comp.c index 435f5a8..5e62462 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -913,7 +913,9 @@ int wd_do_comp_async(handle_t h_sess, struct wd_comp_req *req) ret = wd_alg_driver_send(wd_comp_setting.driver, ctx->ctx, msg); if (unlikely(ret < 0)) { - WD_ERR("wd comp send error, ret = %d!\n", ret); + if (ret != -WD_EBUSY) + WD_ERR("wd comp send error, ret = %d!\n", ret); + goto fail_with_msg; } -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> When the requested memory pool size is greater than 1M, a certain amount of redundancy is reserved to avoid memory pool allocation failure. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- v1/wd_bmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1/wd_bmm.c b/v1/wd_bmm.c index c58484f..dd20ff7 100644 --- a/v1/wd_bmm.c +++ b/v1/wd_bmm.c @@ -122,7 +122,7 @@ static int wd_pool_pre_layout(struct wd_queue *q, * ensure that the allocated memory is an integer multiple of 1M. */ if (!sp->br.alloc && !qinfo->iommu_type) - p->act_mem_sz = (p->act_mem_sz + BLK_BALANCE_SZ - 1) & ~(BLK_BALANCE_SZ - 1); + p->act_mem_sz = ((p->act_mem_sz + BLK_BALANCE_SZ - 1) & ~(BLK_BALANCE_SZ - 1)) << 1; return WD_SUCCESS; } -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> When SMMU is disabled, it is common to encounter issues with finding sufficient contiguous memory during memory pool creation. To ensure the normal operation of performance testing, a certain number of retries should be performed. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/zip_wd_benchmark.c | 29 +++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c index c12edba..fd9dcbc 100644 --- a/uadk_tool/benchmark/zip_wd_benchmark.c +++ b/uadk_tool/benchmark/zip_wd_benchmark.c @@ -22,6 +22,7 @@ #define MAX_POOL_LENTH_COMP 512 #define CHUNK_SIZE (128 * 1024) #define MAX_UNRECV_PACKET_NUM 2 +#define MAX_POOL_CREATE_FAIL_TIME 10 #define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) @@ -284,6 +285,28 @@ static int zip_wd_param_parse(thread_data *tddata, struct acc_option *options) return 0; } +static int zip_wd_create_single_blkpool(struct thread_bd_res *bd_res, + struct wd_blkpool_setup blksetup) +{ + int retry_cnt = 0; + int ret; + + while (retry_cnt++ <= MAX_POOL_CREATE_FAIL_TIME) { + bd_res->pool = wd_blkpool_create(bd_res->queue, &blksetup); + if (bd_res->pool) + return 0; + + wd_release_queue(bd_res->queue); + ret = wd_request_queue(bd_res->queue); + if (ret) { + ZIP_TST_PRT("retry to request queue fail!\n"); + return ret; + } + } + + return -ENOMEM; +} + static int init_zip_wd_queue(struct acc_option *options) { struct wd_blkpool_setup blksetup; @@ -343,12 +366,12 @@ static int init_zip_wd_queue(struct acc_option *options) blksetup.align_size = ALIGN_SIZE; for (j = 0; j < g_thread_num; j++) { - g_thread_queue.bd_res[j].pool = wd_blkpool_create(g_thread_queue.bd_res[j].queue, &blksetup); - if (!g_thread_queue.bd_res[j].pool) { + ret = zip_wd_create_single_blkpool(&g_thread_queue.bd_res[j], blksetup); + if (ret) { ZIP_TST_PRT("create %dth pool fail!\n", j); - ret = -ENOMEM; goto pool_err; } + pool = g_thread_queue.bd_res[j].pool; g_thread_queue.bd_res[j].bds = malloc(sizeof(struct wd_bd) * MAX_POOL_LENTH_COMP); -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> We could use uadk.cnf to specify which UADK driver shared libraries (.so) should be loaded. Usage Guidelines: 1. Place this file (uadk.cnf) in the same directory as your driver .so files 2. List one driver library per line (e.g., "libhisi_zip.so") Example Configuration: libhisi_zip.so libhisi_sec.so Note: UADK will not load any .so files that are not explicitly listed here. If a non-existent so is specified, it will be skipped, and info user by log. If uadk.cnf is not present, load the libraries (.so) by default methods. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk.cnf | 26 ++++++ wd_util.c | 264 ++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 231 insertions(+), 59 deletions(-) create mode 100644 uadk.cnf diff --git a/uadk.cnf b/uadk.cnf new file mode 100644 index 0000000..23b8f9d --- /dev/null +++ b/uadk.cnf @@ -0,0 +1,26 @@ +# UADK Driver Configuration File +# ============================= +# +# This file specifies which UADK driver shared libraries (.so) should be loaded. +# +# Usage Guidelines: +# 1. Place this file (uadk.cnf) in the same directory as your driver .so files +# 2. List one driver library per line (e.g., "libhisi_zip.so") +# +# Example Configuration: +# libhisi_zip.so +# libhisi_sec.so +# # libhisi_hpre.so (commented out - will not load) +# +# Note: +# UADK will not load any .so files that are not explicitly listed here. +# If a non-existent so is specified, it will be skipped, and info user by log. +# If uadk.cnf is not present, load the libraries (.so) by default methods. + +libhisi_zip.so +libhisi_sec.so +libhisi_hpre.so +libisa_ce.so +libisa_sve.so +libhisi_dae.so +libhisi_udma.so \ No newline at end of file diff --git a/wd_util.c b/wd_util.c index d0d62a1..daa7309 100644 --- a/wd_util.c +++ b/wd_util.c @@ -35,6 +35,7 @@ #define WD_SOFT_ASYNC_CTX 1 #define WD_DRV_LIB_DIR "uadk" +#define WD_DRV_CONF_FILE "uadk.cnf" #define WD_PATH_DIR_NUM 2 @@ -2279,11 +2280,64 @@ static void dladdr_empty(void) { } +static int line_check_valid(char *line) +{ + line[strcspn(line, "\n")] = 0; + if (line[0] == '\0' || line[0] == '#') + return 0; + + if (!strstr(line, ".so")) + return 0; + + return 1; +} + +static int check_uadk_config_file(const char *wd_dir, const char *lib_file) +{ + char *path_buf, *uadk_cnf_path, *line; + int ret = -WD_EINVAL; + FILE *fp; + + path_buf = calloc(WD_PATH_DIR_NUM, PATH_MAX); + if (!path_buf) { + WD_ERR("fail to alloc memery for path_buf.\n"); + return -WD_ENOMEM; + } + + uadk_cnf_path = path_buf; + line = path_buf + PATH_MAX; + + snprintf(uadk_cnf_path, PATH_MAX, "%s/%s/%s", wd_dir, WD_DRV_LIB_DIR, + WD_DRV_CONF_FILE); + fp = fopen(uadk_cnf_path, "r"); + if (!fp) { + ret = 0; + goto free_buf; + } + + while (fgets(line, PATH_MAX, fp)) { + if (!line_check_valid(line)) + continue; + + if (strstr(line, lib_file)) { + ret = 0; + goto close_fp; + } + } + +close_fp: + fclose(fp); +free_buf: + free(path_buf); + return ret; +} + int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) { char *path_buf, *path, *file_path; Dl_info file_info; int len, rc, i; + int ret = 0; /* Get libwd.so file's system path */ rc = dladdr(dladdr_empty, &file_info); @@ -2292,7 +2346,7 @@ int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) return -WD_EINVAL; } - path_buf = calloc(WD_PATH_DIR_NUM, sizeof(char) * PATH_MAX); + path_buf = calloc(WD_PATH_DIR_NUM, PATH_MAX); if (!path_buf) { WD_ERR("fail to calloc path_buf.\n"); return -WD_ENOMEM; @@ -2312,28 +2366,32 @@ int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) if (is_dir) { len = snprintf(lib_path, PATH_MAX, "%s/%s", file_path, WD_DRV_LIB_DIR); - if (len >= PATH_MAX) - goto free_path; } else { - len = snprintf(lib_path, PATH_MAX, "%s/%s/%s", file_path, WD_DRV_LIB_DIR, lib_file); - if (len >= PATH_MAX) + /* Confirm whether the corresponding file exists in uadk.cnf */ + ret = check_uadk_config_file(file_path, lib_file); + if (ret) goto free_path; + + len = snprintf(lib_path, PATH_MAX, "%s/%s/%s", + file_path, WD_DRV_LIB_DIR, lib_file); } - if (realpath(lib_path, path) == NULL) { - WD_ERR("invalid: %s: no such file or directory!\n", path); + if (len >= PATH_MAX) { + ret = -WD_EINVAL; goto free_path; } - free(path_buf); - return 0; + if (!realpath(lib_path, path)) { + WD_ERR("invalid: %s: no such file or directory!\n", path); + ret = -WD_EINVAL; + } free_path: free(path_buf); - return -WD_EINVAL; + return ret; } -/** +/* * There are many other .so files in this file directory (/root/lib/), * and it is necessary to screen out valid uadk driver files * through this function. @@ -2361,36 +2419,93 @@ static int file_check_valid(const char *lib_file) return 0; } -void *wd_dlopen_drv(const char *cust_lib_dir) +static void create_lib_to_list(const char *lib_path, struct drv_lib_list **head) { typedef int (*alg_ops)(struct wd_alg_driver *drv); - struct drv_lib_list *node, *head = NULL; - char lib_dir_path[PATH_MAX] = {0}; - char lib_path[PATH_MAX] = {0}; - struct dirent *lib_dir; - alg_ops dl_func = NULL; - DIR *wd_dir; + struct drv_lib_list *node; + alg_ops dl_func; + + node = calloc(1, sizeof(*node)); + if (!node) + return; + + node->dlhandle = dlopen(lib_path, RTLD_NODELETE | RTLD_NOW); + if (!node->dlhandle) { + WD_ERR("failed to open lib file: %s, skipped\n", lib_path); + free(node); + return; + } + + dl_func = dlsym(node->dlhandle, "wd_alg_driver_register"); + if (!dl_func) { + WD_ERR("dlsym failed for %s: %s\n", lib_path, dlerror()); + dlclose(node->dlhandle); + free(node); + return; + } + + if (!*head) { + *head = node; + return; + } + add_lib_to_list(*head, node); +} + +static struct drv_lib_list *load_libraries_from_config(const char *config_path, + const char *lib_dir_path) +{ + char *path_buf, *lib_path, *line; + struct drv_lib_list *head = NULL; + FILE *config_file; int ret; - if (!cust_lib_dir) { - ret = wd_get_lib_file_path(NULL, lib_dir_path, true); - if (ret) - return NULL; - } else { - if (realpath(cust_lib_dir, lib_path) == NULL) { - WD_ERR("invalid: %s: no such file or directory!\n", lib_path); - return NULL; - } - strncpy(lib_dir_path, cust_lib_dir, PATH_MAX - 1); - lib_dir_path[PATH_MAX - 1] = '\0'; + path_buf = calloc(WD_PATH_DIR_NUM, PATH_MAX); + if (!path_buf) { + WD_ERR("fail to alloc memery for path_buf.\n"); + return NULL; } + lib_path = path_buf; + line = path_buf + PATH_MAX; - wd_dir = opendir(lib_dir_path); - if (!wd_dir) { - WD_ERR("UADK driver lib dir: %s not exist!\n", lib_dir_path); + config_file = fopen(config_path, "r"); + if (!config_file) { + WD_ERR("Failed to open config file: %s\n", config_path); + free(path_buf); + return NULL; + } + + /* Read config file line by line */ + while (fgets(line, PATH_MAX, config_file)) { + if (!line_check_valid(line)) + continue; + + ret = snprintf(lib_path, PATH_MAX, "%s/%s", lib_dir_path, line); + if (ret < 0) + break; + + create_lib_to_list(lib_path, &head); + } + + free(path_buf); + fclose(config_file); + return head; +} + +static struct drv_lib_list *load_all_libraries(DIR *wd_dir, const char *lib_dir_path) +{ + struct drv_lib_list *head = NULL; + struct dirent *lib_dir; + char *lib_path; + int ret; + + lib_path = calloc(1, PATH_MAX); + if (!lib_path) { + WD_ERR("fail to alloc memery for lib_path.\n"); return NULL; } + rewinddir(wd_dir); /* Ensure we're at the start of the directory */ + while ((lib_dir = readdir(wd_dir)) != NULL) { if (!strncmp(lib_dir->d_name, ".", LINUX_CRTDIR_SIZE) || !strncmp(lib_dir->d_name, "..", LINUX_PRTDIR_SIZE)) @@ -2400,43 +2515,74 @@ void *wd_dlopen_drv(const char *cust_lib_dir) if (ret) continue; - node = calloc(1, sizeof(*node)); - if (!node) - goto free_list; - ret = snprintf(lib_path, PATH_MAX, "%s/%s", lib_dir_path, lib_dir->d_name); if (ret < 0) - goto free_node; + break; - node->dlhandle = dlopen(lib_path, RTLD_NODELETE | RTLD_NOW); - if (!node->dlhandle) { - free(node); - /* there are many other files need to skip */ - continue; - } + create_lib_to_list(lib_path, &head); + } - dl_func = dlsym(node->dlhandle, "wd_alg_driver_register"); - if (dl_func == NULL) { - dlclose(node->dlhandle); - free(node); - continue; + free(lib_path); + return head; +} + +void *wd_dlopen_drv(const char *cust_lib_dir) +{ + char *path_buf, *lib_dir_path, *config_path, *lib_path; + struct drv_lib_list *head = NULL; + int ret, len; + DIR *wd_dir; + + path_buf = calloc(WD_PATH_DIR_NUM + 1, PATH_MAX); + if (!path_buf) { + WD_ERR("fail to alloc memory for path buffers.\n"); + return NULL; + } + + lib_dir_path = path_buf; + config_path = path_buf + PATH_MAX; + lib_path = config_path + PATH_MAX; + + if (!cust_lib_dir) { + ret = wd_get_lib_file_path(NULL, lib_dir_path, true); + if (ret) + goto free_path; + } else { + if (!realpath(cust_lib_dir, lib_path)) { + WD_ERR("invalid: %s: no such file or directory!\n", lib_path); + goto free_path; } - if (!head) - head = node; - else - add_lib_to_list(head, node); + len = snprintf(lib_dir_path, PATH_MAX, "%s", cust_lib_dir); + if (len < 0 || len >= PATH_MAX) + goto free_path; + + lib_dir_path[PATH_MAX - 1] = '\0'; } - closedir(wd_dir); - return (void *)head; + wd_dir = opendir(lib_dir_path); + if (!wd_dir) { + WD_ERR("UADK driver lib dir: %s not exist!\n", lib_dir_path); + goto free_path; + } -free_node: - free(node); -free_list: + len = snprintf(config_path, PATH_MAX, "%s/%s", lib_dir_path, WD_DRV_CONF_FILE); + if (len < 0 || len >= PATH_MAX) + goto close_dir; + + ret = access(config_path, F_OK); + if (!ret) + /* Load specified libraries from config file */ + head = load_libraries_from_config(config_path, lib_dir_path); + else + /* Load all valid .so files */ + head = load_all_libraries(wd_dir, lib_dir_path); + +close_dir: closedir(wd_dir); - wd_dlclose_drv(head); - return NULL; +free_path: + free(path_buf); + return (void *)head; } struct wd_alg_driver *wd_alg_drv_bind(int task_type, const char *alg_name) -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> The sqe addr filling for lz77, lz4, and deflate (including sgl and pbuffer) can be merged into a single interface. Additionally, to reduce the number of parameters, a struct is used to minimize the number of input parameters. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 115 ++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 3607ea2..7d6f94d 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -156,6 +156,13 @@ struct hisi_comp_buf { struct wd_datalist list_dst; }; +struct hisi_comp_sqe_addr { + void *src_addr; + void *dst_addr; + void *lit_addr; + void *ctx_addr; +}; + struct hisi_zip_sqe { __u32 consumed; __u32 produced; @@ -234,7 +241,7 @@ struct hisi_zip_sqe_ops { }; struct hisi_zip_ctx { - struct wd_ctx_config_internal config; + struct wd_ctx_config_internal config; }; struct comp_sgl { @@ -415,21 +422,29 @@ static void fill_comp_buf_size(struct hisi_zip_sqe *sqe, __u32 in_size, sqe->dest_avail_out = out_size; } -static void fill_buf_addr_deflate(struct hisi_zip_sqe *sqe, void *src, - void *dst, void *ctx_buf) +static void fill_buf_addr(struct hisi_zip_sqe *sqe, struct hisi_comp_sqe_addr *addr) { - sqe->source_addr_l = lower_32_bits(src); - sqe->source_addr_h = upper_32_bits(src); - sqe->dest_addr_l = lower_32_bits(dst); - sqe->dest_addr_h = upper_32_bits(dst); - sqe->stream_ctx_addr_l = lower_32_bits(ctx_buf); - sqe->stream_ctx_addr_h = upper_32_bits(ctx_buf); + sqe->source_addr_l = lower_32_bits(addr->src_addr); + sqe->source_addr_h = upper_32_bits(addr->src_addr); + sqe->dest_addr_l = lower_32_bits(addr->dst_addr); + sqe->dest_addr_h = upper_32_bits(addr->dst_addr); + + if (addr->lit_addr) { + sqe->literals_addr_l = lower_32_bits(addr->lit_addr); + sqe->literals_addr_h = upper_32_bits(addr->lit_addr); + } + + if (addr->ctx_addr) { + sqe->stream_ctx_addr_l = lower_32_bits(addr->ctx_addr); + sqe->stream_ctx_addr_h = upper_32_bits(addr->ctx_addr); + } } static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg, const char *head, int head_size) { + struct hisi_comp_sqe_addr addr = {0}; __u32 in_size = msg->req.src_len; __u32 out_size = msg->avail_out; struct hisi_comp_buf *buf; @@ -485,7 +500,10 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, if (msg->ctx_buf) ctx_buf = msg->ctx_buf + RSV_OFFSET; - fill_buf_addr_deflate(sqe, src, dst, ctx_buf); + addr.src_addr = src; + addr.dst_addr = dst; + addr.ctx_addr = ctx_buf; + fill_buf_addr(sqe, &addr); return 0; } @@ -508,14 +526,6 @@ static int fill_buf_gzip(handle_t h_qp, struct hisi_zip_sqe *sqe, return fill_buf_deflate_generic(sqe, msg, GZIP_HEADER, GZIP_HEADER_SZ); } -static void fill_buf_addr_lz4(struct hisi_zip_sqe *sqe, void *src, void *dst) -{ - sqe->source_addr_l = lower_32_bits(src); - sqe->source_addr_h = upper_32_bits(src); - sqe->dest_addr_l = lower_32_bits(dst); - sqe->dest_addr_h = upper_32_bits(dst); -} - static int check_lz4_msg(struct wd_comp_msg *msg, enum wd_buff_type buf_type) { /* LZ4 only support for compress and block mode */ @@ -546,6 +556,7 @@ static int check_lz4_msg(struct wd_comp_msg *msg, enum wd_buff_type buf_type) static int fill_buf_lz4(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { + struct hisi_comp_sqe_addr addr = {0}; void *src = msg->req.src; void *dst = msg->req.dst; int ret; @@ -556,7 +567,9 @@ static int fill_buf_lz4(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_comp_buf_size(sqe, msg->req.src_len, msg->avail_out); - fill_buf_addr_lz4(sqe, src, dst); + addr.src_addr = src; + addr.dst_addr = dst; + fill_buf_addr(sqe, &addr); return 0; } @@ -570,10 +583,10 @@ static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) sqe->dw9 = val; } -static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_datalist *list_src, - struct wd_datalist *list_dst) +static int fill_buf_addr_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_datalist *list_src, struct wd_datalist *list_dst) { + struct hisi_comp_sqe_addr addr = {0}; struct comp_sgl c_sgl; int ret; @@ -585,7 +598,9 @@ static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(ret)) return ret; - fill_buf_addr_deflate(sqe, c_sgl.in, c_sgl.out, NULL); + addr.src_addr = c_sgl.in; + addr.dst_addr = c_sgl.out; + fill_buf_addr(sqe, &addr); return 0; } @@ -639,7 +654,7 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_type_sgl(sqe); - ret = fill_buf_addr_deflate_sgl(h_qp, sqe, list_src, list_dst); + ret = fill_buf_addr_sgl(h_qp, sqe, list_src, list_dst); if (unlikely(ret)) return ret; @@ -699,20 +714,6 @@ static void fill_buf_size_lz77_zstd(struct hisi_zip_sqe *sqe, __u32 in_size, sqe->dest_avail_out = seqs_size; } -static void fill_buf_addr_lz77_zstd(struct hisi_zip_sqe *sqe, - void *src, void *lits_start, - void *seqs_start, void *ctx_buf) -{ - sqe->source_addr_l = lower_32_bits(src); - sqe->source_addr_h = upper_32_bits(src); - sqe->dest_addr_l = lower_32_bits(seqs_start); - sqe->dest_addr_h = upper_32_bits(seqs_start); - sqe->literals_addr_l = lower_32_bits(lits_start); - sqe->literals_addr_h = upper_32_bits(lits_start); - sqe->stream_ctx_addr_l = lower_32_bits(ctx_buf); - sqe->stream_ctx_addr_h = upper_32_bits(ctx_buf); -} - static int lz77_zstd_buf_check(struct wd_comp_msg *msg) { __u32 in_size = msg->req.src_len; @@ -804,6 +805,7 @@ static int lz77_buf_check(struct wd_comp_msg *msg) static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { + struct hisi_comp_sqe_addr addr = {0}; struct wd_comp_req *req = &msg->req; struct wd_lz77_zstd_data *data = req->priv; __u32 in_size = msg->req.src_len; @@ -842,11 +844,15 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_size_lz77_zstd(sqe, in_size, lits_size, seq_avail_out); - fill_buf_addr_lz77_zstd(sqe, req->src, req->dst, req->dst + lits_size, ctx_buf); - data->literals_start = req->dst; data->sequences_start = req->dst + lits_size; + addr.src_addr = req->src; + addr.dst_addr = req->dst + lits_size; + addr.lit_addr = req->dst; + addr.ctx_addr = ctx_buf; + fill_buf_addr(sqe, &addr); + return 0; } @@ -965,6 +971,7 @@ static int lz77_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { + struct hisi_comp_sqe_addr addr = {0}; struct wd_comp_req *req = &msg->req; struct wd_lz77_zstd_data *data = req->priv; __u32 in_size = msg->req.src_len; @@ -1004,28 +1011,10 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(ret)) return ret; - fill_buf_addr_lz77_zstd(sqe, c_sgl.in, c_sgl.out, - c_sgl.out_seq, NULL); - - return 0; -} - -static int fill_buf_addr_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_datalist *list_src, - struct wd_datalist *list_dst) -{ - struct comp_sgl c_sgl; - int ret; - - c_sgl.list_src = list_src; - c_sgl.list_dst = list_dst; - c_sgl.seq_start = NULL; - - ret = get_sgl_from_pool(h_qp, &c_sgl); - if (unlikely(ret)) - return ret; - - fill_buf_addr_lz4(sqe, c_sgl.in, c_sgl.out); + addr.src_addr = c_sgl.in; + addr.dst_addr = c_sgl.out_seq; + addr.lit_addr = c_sgl.out; + fill_buf_addr(sqe, &addr); return 0; } @@ -1045,7 +1034,7 @@ static int fill_buf_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_comp_buf_size(sqe, msg->req.src_len, msg->avail_out); - return fill_buf_addr_lz4_sgl(h_qp, sqe, list_src, list_dst); + return fill_buf_addr_sgl(h_qp, sqe, list_src, list_dst); } static void fill_sqe_type_v1(struct hisi_zip_sqe *sqe) -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> The current UADK v2 supports the nosav feature, which allows business operations to be executed when the driver is loaded with uacce_mode=2. However, it requires using the reserved memory allocated through uacce. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_comp.c | 260 +++++++++++++++++++++++++------------- include/drv/wd_comp_drv.h | 13 ++ include/wd_comp.h | 2 + wd_comp.c | 43 ++++++- 4 files changed, 228 insertions(+), 90 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 7d6f94d..9d2a9af 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -77,6 +77,7 @@ #define max_in_data_size(outl) ((__u32)(((__u64)(outl) << 3) / 9) & 0xfffffffc) #define HZ_MAX_SIZE (8 * 1024 * 1024) +#define HW_CTX_SIZE 0x10000 #define RSV_OFFSET 64 #define CTX_DW1_OFFSET 4 @@ -244,15 +245,6 @@ struct hisi_zip_ctx { struct wd_ctx_config_internal config; }; -struct comp_sgl { - void *in; - void *out; - void *out_seq; - struct wd_datalist *list_src; - struct wd_datalist *list_dst; - struct wd_datalist *seq_start; -}; - static void dump_zip_msg(struct wd_comp_msg *msg) { WD_ERR("dump zip message after a task error occurs.\n"); @@ -376,11 +368,11 @@ static int check_enable_store_buf(struct wd_comp_msg *msg, __u32 out_size, int h return 0; } -static int get_sgl_from_pool(handle_t h_qp, struct comp_sgl *c_sgl) +static int get_sgl_from_pool(handle_t h_qp, struct comp_sgl *c_sgl, struct wd_mm_ops *mm_ops) { handle_t h_sgl_pool; - h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); + h_sgl_pool = hisi_qm_get_sglpool(h_qp, mm_ops); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; @@ -415,6 +407,23 @@ err_free_sgl_in: return -WD_ENOMEM; } +static void free_hw_sgl(handle_t h_qp, struct comp_sgl *c_sgl, struct wd_mm_ops *mm_ops) +{ + handle_t h_sgl_pool; + + h_sgl_pool = hisi_qm_get_sglpool(h_qp, mm_ops); + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool!\n"); + return; + } + + hisi_qm_put_hw_sgl(h_sgl_pool, c_sgl->in); + hisi_qm_put_hw_sgl(h_sgl_pool, c_sgl->out); + + if (c_sgl->seq_start) + hisi_qm_put_hw_sgl(h_sgl_pool, c_sgl->out_seq); +} + static void fill_comp_buf_size(struct hisi_zip_sqe *sqe, __u32 in_size, __u32 out_size) { @@ -422,8 +431,124 @@ static void fill_comp_buf_size(struct hisi_zip_sqe *sqe, __u32 in_size, sqe->dest_avail_out = out_size; } -static void fill_buf_addr(struct hisi_zip_sqe *sqe, struct hisi_comp_sqe_addr *addr) +static int zip_mem_map(struct wd_mm_ops *mm_ops, struct hisi_zip_sqe *sqe, + struct hisi_comp_sqe_addr *addr) +{ + void *phy_dst, *phy_ctx, *mempool; + void *phy_lit = NULL; + void *phy_src = NULL; + + mempool = mm_ops->usr; + /* When the src len is 0, map is not required */ + if (sqe->input_data_length) { + phy_src = mm_ops->iova_map(mempool, addr->src_addr, sqe->input_data_length); + if (!phy_src) { + WD_ERR("get zip src dma address fail!\n"); + return -WD_ENOMEM; + } + + sqe->source_addr_l = lower_32_bits(phy_src); + sqe->source_addr_h = upper_32_bits(phy_src); + } + + if (addr->lit_addr) { + phy_lit = mm_ops->iova_map(mempool, addr->lit_addr, sqe->dw13); + if (!phy_lit) { + WD_ERR("get zip lits dma address fail!\n"); + goto unmap_src; + } + + sqe->literals_addr_l = lower_32_bits(phy_lit); + sqe->literals_addr_h = upper_32_bits(phy_lit); + } + + phy_dst = mm_ops->iova_map(mempool, addr->dst_addr, sqe->dest_avail_out); + if (!phy_dst) { + WD_ERR("get zip dst dma address fail!\n"); + goto unmap_lit; + } + + sqe->dest_addr_l = lower_32_bits(phy_dst); + sqe->dest_addr_h = upper_32_bits(phy_dst); + + /* The source addr needs to be filled with a valid address when length is 0 */ + if (!sqe->input_data_length) { + sqe->source_addr_l = sqe->dest_addr_l; + sqe->source_addr_h = sqe->dest_addr_h; + } + + if (addr->ctx_addr) { + phy_ctx = mm_ops->iova_map(mempool, addr->ctx_addr, HW_CTX_SIZE); + if (!phy_ctx) { + WD_ERR("get zip ctx dma address fail!\n"); + goto unmap_dst; + } + + sqe->stream_ctx_addr_l = lower_32_bits(phy_ctx); + sqe->stream_ctx_addr_h = upper_32_bits(phy_ctx); + } + + return 0; + +unmap_dst: + mm_ops->iova_unmap(mempool, addr->dst_addr, phy_dst, sqe->dest_avail_out); +unmap_lit: + if (addr->lit_addr) + mm_ops->iova_unmap(mempool, addr->lit_addr, phy_lit, sqe->dw13); +unmap_src: + if (sqe->input_data_length) + mm_ops->iova_unmap(mempool, addr->src_addr, phy_src, sqe->input_data_length); + return -WD_ENOMEM; +} + +static void zip_mem_unmap(struct wd_comp_msg *msg, struct hisi_zip_sqe *sqe) +{ + void *dma_addr, *src_addr, *seq_addr, *lit_addr; + struct wd_mm_ops *mm_ops = msg->mm_ops; + struct wd_comp_req *req = &msg->req; + void *mempool = mm_ops->usr; + + if (msg->data_fmt == WD_SGL_BUF) + src_addr = msg->c_sgl.in; + else + src_addr = req->src; + + if (sqe->input_data_length) { + dma_addr = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); + mm_ops->iova_unmap(mempool, src_addr, dma_addr, sqe->input_data_length); + } + + if (msg->alg_type == WD_LZ77_ZSTD || msg->alg_type == WD_LZ77_ONLY) { + if (msg->data_fmt == WD_SGL_BUF) { + seq_addr = msg->c_sgl.out_seq; + lit_addr = msg->c_sgl.out; + } else { + seq_addr = req->dst + sqe->dw13; + lit_addr = req->dst; + } + + dma_addr = VA_ADDR(sqe->literals_addr_h, sqe->literals_addr_l); + mm_ops->iova_unmap(mempool, lit_addr, dma_addr, sqe->dw13); + dma_addr = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); + mm_ops->iova_unmap(mempool, seq_addr, dma_addr, sqe->dest_avail_out); + } else { + dma_addr = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); + mm_ops->iova_unmap(mempool, req->dst, dma_addr, sqe->dest_avail_out); + } + + if (msg->stream_mode == WD_COMP_STATEFUL) { + dma_addr = VA_ADDR(sqe->stream_ctx_addr_h, sqe->stream_ctx_addr_l); + mm_ops->iova_unmap(mempool, msg->ctx_buf, dma_addr, HW_CTX_SIZE); + } +} + +static int fill_buf_addr(struct hisi_zip_sqe *sqe, struct hisi_comp_sqe_addr *addr, + struct wd_mm_ops *mm_ops) { + /* No-SVA mode and Memory is USER mode or PROXY mode */ + if (!mm_ops->sva_mode) + return zip_mem_map(mm_ops, sqe, addr); + sqe->source_addr_l = lower_32_bits(addr->src_addr); sqe->source_addr_h = upper_32_bits(addr->src_addr); sqe->dest_addr_l = lower_32_bits(addr->dst_addr); @@ -438,6 +563,8 @@ static void fill_buf_addr(struct hisi_zip_sqe *sqe, struct hisi_comp_sqe_addr *a sqe->stream_ctx_addr_l = lower_32_bits(addr->ctx_addr); sqe->stream_ctx_addr_h = upper_32_bits(addr->ctx_addr); } + + return 0; } static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, @@ -497,15 +624,13 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, fill_comp_buf_size(sqe, in_size, out_size); - if (msg->ctx_buf) + if (msg->stream_mode == WD_COMP_STATEFUL) ctx_buf = msg->ctx_buf + RSV_OFFSET; addr.src_addr = src; addr.dst_addr = dst; addr.ctx_addr = ctx_buf; - fill_buf_addr(sqe, &addr); - - return 0; + return fill_buf_addr(sqe, &addr, msg->mm_ops); } static int fill_buf_deflate(handle_t h_qp, struct hisi_zip_sqe *sqe, @@ -569,9 +694,7 @@ static int fill_buf_lz4(handle_t h_qp, struct hisi_zip_sqe *sqe, addr.src_addr = src; addr.dst_addr = dst; - fill_buf_addr(sqe, &addr); - - return 0; + return fill_buf_addr(sqe, &addr, msg->mm_ops); } static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) @@ -584,25 +707,28 @@ static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) } static int fill_buf_addr_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_datalist *list_src, struct wd_datalist *list_dst) + struct comp_sgl *c_sgl, struct wd_mm_ops *mm_ops) { struct hisi_comp_sqe_addr addr = {0}; - struct comp_sgl c_sgl; int ret; - c_sgl.list_src = list_src; - c_sgl.list_dst = list_dst; - c_sgl.seq_start = NULL; - - ret = get_sgl_from_pool(h_qp, &c_sgl); + ret = get_sgl_from_pool(h_qp, c_sgl, mm_ops); if (unlikely(ret)) return ret; - addr.src_addr = c_sgl.in; - addr.dst_addr = c_sgl.out; - fill_buf_addr(sqe, &addr); + addr.src_addr = c_sgl->in; + if (c_sgl->seq_start) { + addr.lit_addr = c_sgl->out; + addr.dst_addr = c_sgl->out_seq; + } else { + addr.dst_addr = c_sgl->out; + } + + ret = fill_buf_addr(sqe, &addr, mm_ops); + if (unlikely(ret)) + free_hw_sgl(h_qp, c_sgl, mm_ops); - return 0; + return ret; } static void fill_buf_sgl_skip(struct hisi_zip_sqe *sqe, __u32 src_skip, @@ -624,7 +750,6 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, int head_size) { struct wd_comp_req *req = &msg->req; - struct wd_datalist *list_src = req->list_src; struct wd_datalist *list_dst = req->list_dst; __u32 out_size = msg->avail_out; __u32 in_size = req->src_len; @@ -654,10 +779,6 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_type_sgl(sqe); - ret = fill_buf_addr_sgl(h_qp, sqe, list_src, list_dst); - if (unlikely(ret)) - return ret; - if (head != NULL && msg->req.op_type == WD_DIR_COMPRESS) { memcpy(req->list_dst->data, head, head_size); dst_skip = head_size; @@ -681,7 +802,10 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_comp_buf_size(sqe, in_size, out_size); - return 0; + msg->c_sgl.list_src = req->list_src; + msg->c_sgl.list_dst = list_dst; + msg->c_sgl.seq_start = NULL; + return fill_buf_addr_sgl(h_qp, sqe, &msg->c_sgl, msg->mm_ops); } static int fill_buf_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, @@ -826,7 +950,7 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(seq_avail_out > HZ_MAX_SIZE)) seq_avail_out = HZ_MAX_SIZE; - if (msg->ctx_buf) { + if (msg->stream_mode == WD_COMP_STATEFUL) { ctx_buf = msg->ctx_buf + RSV_OFFSET; if (msg->alg_type == WD_LZ77_ZSTD) { if (data->blk_type != COMP_BLK) @@ -851,9 +975,7 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, addr.dst_addr = req->dst + lits_size; addr.lit_addr = req->dst; addr.ctx_addr = ctx_buf; - fill_buf_addr(sqe, &addr); - - return 0; + return fill_buf_addr(sqe, &addr, msg->mm_ops); } static struct wd_datalist *get_seq_start_list(struct wd_comp_req *req) @@ -977,7 +1099,6 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, __u32 in_size = msg->req.src_len; __u32 out_size = msg->avail_out; struct wd_datalist *seq_start; - struct comp_sgl c_sgl; __u32 lits_size; int ret; @@ -1003,27 +1124,15 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); - c_sgl.list_src = req->list_src; - c_sgl.list_dst = req->list_dst; - c_sgl.seq_start = seq_start; - - ret = get_sgl_from_pool(h_qp, &c_sgl); - if (unlikely(ret)) - return ret; - - addr.src_addr = c_sgl.in; - addr.dst_addr = c_sgl.out_seq; - addr.lit_addr = c_sgl.out; - fill_buf_addr(sqe, &addr); - - return 0; + msg->c_sgl.list_src = req->list_src; + msg->c_sgl.list_dst = req->list_dst; + msg->c_sgl.seq_start = seq_start; + return fill_buf_addr_sgl(h_qp, sqe, &msg->c_sgl, msg->mm_ops); } static int fill_buf_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { - struct wd_datalist *list_src = msg->req.list_src; - struct wd_datalist *list_dst = msg->req.list_dst; int ret; ret = check_lz4_msg(msg, WD_SGL_BUF); @@ -1034,7 +1143,10 @@ static int fill_buf_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_comp_buf_size(sqe, msg->req.src_len, msg->avail_out); - return fill_buf_addr_sgl(h_qp, sqe, list_src, list_dst); + msg->c_sgl.list_src = msg->req.list_src; + msg->c_sgl.list_dst = msg->req.list_dst; + msg->c_sgl.seq_start = NULL; + return fill_buf_addr_sgl(h_qp, sqe, &msg->c_sgl, msg->mm_ops); } static void fill_sqe_type_v1(struct hisi_zip_sqe *sqe) @@ -1419,31 +1531,6 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, return 0; } -static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, - enum wd_comp_alg_type alg_type) -{ - void *hw_sgl_in, *hw_sgl_out; - handle_t h_sgl_pool; - - h_sgl_pool = hisi_qm_get_sglpool(h_qp, NULL); - if (unlikely(!h_sgl_pool)) { - WD_ERR("failed to get sglpool to free hw sgl!\n"); - return; - } - - hw_sgl_in = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); - - hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); - - if (alg_type == WD_LZ77_ZSTD || alg_type == WD_LZ77_ONLY) { - hw_sgl_out = VA_ADDR(sqe->literals_addr_h, - sqe->literals_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); - } -} - static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *comp_msg) { struct hisi_qp *qp = wd_ctx_get_priv(ctx); @@ -1467,7 +1554,7 @@ static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *com ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (unlikely(ret < 0)) { if (msg->req.data_fmt == WD_SGL_BUF) - free_hw_sgl(h_qp, &sqe, msg->alg_type); + free_hw_sgl(h_qp, &msg->c_sgl, msg->mm_ops); if (ret != -WD_EBUSY) WD_ERR("failed to send to hardware, ret = %d!\n", ret); @@ -1619,8 +1706,11 @@ static int parse_zip_sqe(struct hisi_qp *qp, struct hisi_zip_sqe *sqe, recv_msg->checksum = sqe->dw31; recv_msg->alg_type = alg_type; + if (recv_msg->mm_ops && !recv_msg->mm_ops->sva_mode) + zip_mem_unmap(recv_msg, sqe); + if (buf_type == WD_SGL_BUF) - free_hw_sgl((handle_t)qp, sqe, alg_type); + free_hw_sgl((handle_t)qp, &recv_msg->c_sgl, recv_msg->mm_ops); if (unlikely(recv_msg->req.status == WD_IN_EPARA)) dump_zip_msg(recv_msg); diff --git a/include/drv/wd_comp_drv.h b/include/drv/wd_comp_drv.h index 213cf2d..95f7fb7 100644 --- a/include/drv/wd_comp_drv.h +++ b/include/drv/wd_comp_drv.h @@ -24,9 +24,22 @@ enum wd_comp_state { WD_COMP_STATELESS, }; +struct comp_sgl { + void *in; + void *out; + void *out_seq; + struct wd_datalist *list_src; + struct wd_datalist *list_dst; + struct wd_datalist *seq_start; +}; + /* fixme wd_comp_msg */ struct wd_comp_msg { struct wd_comp_req req; + struct wd_mm_ops *mm_ops; + enum wd_mem_type mm_type; + /* Store sgl addr for nosva */ + struct comp_sgl c_sgl; /* Denoted HW ctx cache, for stream mode */ void *ctx_buf; /* Denoted by enum wd_comp_alg_type */ diff --git a/include/wd_comp.h b/include/wd_comp.h index 8e056d1..5d09536 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -151,6 +151,8 @@ struct wd_comp_sess_setup { enum wd_comp_winsz_type win_sz; /* Denoted by enum wd_comp_winsz_type */ enum wd_comp_op_type op_type; /* Denoted by enum wd_comp_op_type */ void *sched_param; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; /** diff --git a/wd_comp.c b/wd_comp.c index 5e62462..c67b7f1 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -15,7 +15,7 @@ #include "drv/wd_comp_drv.h" #include "wd_comp.h" -#define HW_CTX_SIZE (64 * 1024) +#define HW_CTX_SIZE 0x10000 #define STREAM_CHUNK (128 * 1024) #define WD_ZLIB_HEADER_SZ 2 #define WD_GZIP_HEADER_SZ 10 @@ -41,6 +41,8 @@ struct wd_comp_sess { __u32 checksum; __u8 *ctx_buf; void *sched_key; + struct wd_mm_ops mm_ops; + enum wd_mem_type mm_type; }; struct wd_comp_setting { @@ -437,6 +439,24 @@ static int wd_comp_check_sess_params(struct wd_comp_sess_setup *setup) return WD_SUCCESS; } +static int wd_alloc_ctx_buf(struct wd_mm_ops *mm_ops, struct wd_comp_sess *sess) +{ + + sess->ctx_buf = mm_ops->alloc(mm_ops->usr, HW_CTX_SIZE); + if (!sess->ctx_buf) + return -WD_ENOMEM; + + memset(sess->ctx_buf, 0, HW_CTX_SIZE); + + return WD_SUCCESS; +} + +static void wd_free_ctx_buf(struct wd_mm_ops *mm_ops, struct wd_comp_sess *sess) +{ + mm_ops->free(mm_ops->usr, sess->ctx_buf); + sess->ctx_buf = NULL; +} + handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) { struct wd_comp_sess *sess; @@ -453,8 +473,15 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) if (!sess) return (handle_t)0; - sess->ctx_buf = calloc(1, HW_CTX_SIZE); - if (!sess->ctx_buf) + /* Memory type set */ + ret = wd_mem_ops_init(wd_comp_setting.config.ctxs[0].ctx, &setup->mm_ops, setup->mm_type); + if (ret) { + WD_ERR("failed to init memory ops!\n"); + goto sess_err; + } + + ret = wd_alloc_ctx_buf(&setup->mm_ops, sess); + if (ret) goto sess_err; sess->alg_type = setup->alg_type; @@ -462,6 +489,9 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) sess->win_sz = setup->win_sz; sess->stream_pos = WD_COMP_STREAM_NEW; + sess->mm_type = setup->mm_type; + memcpy(&sess->mm_ops, &setup->mm_ops, sizeof(struct wd_mm_ops)); + /* Some simple scheduler don't need scheduling parameters */ sess->sched_key = (void *)wd_comp_setting.sched.sched_init( wd_comp_setting.sched.h_sched_ctx, setup->sched_param); @@ -473,7 +503,7 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) return (handle_t)sess; sched_err: - free(sess->ctx_buf); + wd_free_ctx_buf(&setup->mm_ops, sess); sess_err: free(sess); return (handle_t)0; @@ -487,7 +517,7 @@ void wd_comp_free_sess(handle_t h_sess) return; if (sess->ctx_buf) - free(sess->ctx_buf); + wd_free_ctx_buf(&sess->mm_ops, sess); if (sess->sched_key) free(sess->sched_key); @@ -522,6 +552,9 @@ static void fill_comp_msg(struct wd_comp_sess *sess, struct wd_comp_msg *msg, msg->win_sz = sess->win_sz; msg->avail_out = req->dst_len; + msg->mm_type = sess->mm_type; + msg->mm_ops = &sess->mm_ops; + msg->req.last = 1; } -- 2.33.0
From: Chenghai Huang <huangchenghai2@huawei.com> We can test the nosva performance using UADK v2 by specifying --memory 2 and supporting the --init2 method. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/zip_uadk_benchmark.c | 350 ++++++++++++++++++++--- uadk_tool/benchmark/zip_wd_benchmark.c | 20 +- 2 files changed, 315 insertions(+), 55 deletions(-) diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index 030f9bf..fc81c2b 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -6,10 +6,13 @@ #include "zip_uadk_benchmark.h" #include "include/wd_comp.h" #include "include/wd_sched.h" +#include "include/wd_bmm.h" #include "include/fse.h" #define ZIP_TST_PRT printf #define PATH_SIZE 64 +#define ALIGN_SIZE 64 +#define BLOCK_NUM 64 #define ZIP_FILE "./zip" #define COMP_LEN_RATE 2 #define DECOMP_LEN_RATE 2 @@ -30,6 +33,7 @@ struct bd_pool { struct thread_pool { struct bd_pool *pool; + void *rsv_pool; } g_zip_pool; enum ZIP_OP_MODE { @@ -62,6 +66,7 @@ typedef struct uadk_thread_res { struct zip_async_tag *tag; COMP_TUPLE_TAG *ftuple; char *hw_buff_out; + int mm_type; } thread_data; struct zip_file_head { @@ -78,6 +83,7 @@ static unsigned int g_ctxnum; static unsigned int g_pktlen; static unsigned int g_prefetch; static unsigned int g_state; +static unsigned int g_dev_id; #ifndef ZLIB_FSE static ZSTD_CCtx* zstd_soft_fse_init(unsigned int level) @@ -348,7 +354,10 @@ static int init_ctx_config2(struct acc_option *options) } /* init */ - ret = wd_comp_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + if (options->mem_type == UADK_AUTO) + ret = wd_comp_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &cparams); + else + ret = wd_comp_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &cparams); if (ret) { ZIP_TST_PRT("failed to do comp init2!\n"); return ret; @@ -419,6 +428,7 @@ static int specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = options->optype % WD_DIR_MAX; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); wd_free_list_accels(list); return 0; @@ -461,6 +471,7 @@ static int non_specified_device_request_ctx(struct acc_option *options) g_ctx_cfg.ctxs[i].op_type = options->optype % WD_DIR_MAX; g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; } + g_dev_id = uadk_parse_dev_id(dev->char_dev_path); free(dev); } @@ -502,7 +513,10 @@ static int init_ctx_config(struct acc_option *options) goto free_ctxs; } - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, max_node, wd_comp_poll_ctx); + if (options->mem_type == UADK_AUTO) + g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, max_node, wd_comp_poll_ctx); + else + g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, 2, max_node, wd_comp_poll_ctx); if (!g_sched) { ZIP_TST_PRT("failed to alloc sched!\n"); ret = -ENOMEM; @@ -520,6 +534,7 @@ static int init_ctx_config(struct acc_option *options) param.mode = mode; param.begin = 0; param.end = g_ctxnum - 1; + param.dev_id = g_dev_id; ret = wd_sched_rr_instance(g_sched, ¶m); if (ret) { ZIP_TST_PRT("failed to fill sched data!\n"); @@ -576,33 +591,34 @@ static int init_uadk_bd_pool(u32 optype) outsize = g_pktlen * DECOMP_LEN_RATE; } - g_zip_pool.pool = malloc(g_thread_num * sizeof(struct bd_pool)); + g_zip_pool.pool = calloc(1, g_thread_num * sizeof(struct bd_pool)); if (!g_zip_pool.pool) { ZIP_TST_PRT("init uadk pool alloc thread failed!\n"); return -ENOMEM; - } else { - for (i = 0; i < g_thread_num; i++) { - g_zip_pool.pool[i].bds = malloc(MAX_POOL_LENTH_COMP * - sizeof(struct uadk_bd)); - if (!g_zip_pool.pool[i].bds) { - ZIP_TST_PRT("init uadk bds alloc failed!\n"); - goto malloc_error1; - } - for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - g_zip_pool.pool[i].bds[j].src = calloc(1, insize); - if (!g_zip_pool.pool[i].bds[j].src) - goto malloc_error2; - g_zip_pool.pool[i].bds[j].src_len = insize; - - g_zip_pool.pool[i].bds[j].dst = malloc(outsize); - if (!g_zip_pool.pool[i].bds[j].dst) - goto malloc_error3; - g_zip_pool.pool[i].bds[j].dst_len = outsize; - - get_rand_data(g_zip_pool.pool[i].bds[j].src, insize * COMPRESSION_RATIO_FACTOR); - if (g_prefetch) - get_rand_data(g_zip_pool.pool[i].bds[j].dst, outsize); - } + } + + for (i = 0; i < g_thread_num; i++) { + g_zip_pool.pool[i].bds = calloc(1, MAX_POOL_LENTH_COMP * + sizeof(struct uadk_bd)); + if (!g_zip_pool.pool[i].bds) { + ZIP_TST_PRT("init uadk bds alloc failed!\n"); + goto malloc_error1; + } + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + g_zip_pool.pool[i].bds[j].src = calloc(1, insize); + if (!g_zip_pool.pool[i].bds[j].src) + goto malloc_error2; + g_zip_pool.pool[i].bds[j].src_len = insize; + + g_zip_pool.pool[i].bds[j].dst = malloc(outsize); + if (!g_zip_pool.pool[i].bds[j].dst) + goto malloc_error3; + g_zip_pool.pool[i].bds[j].dst_len = outsize; + + get_rand_data(g_zip_pool.pool[i].bds[j].src, + insize * COMPRESSION_RATIO_FACTOR); + if (g_prefetch) + get_rand_data(g_zip_pool.pool[i].bds[j].dst, outsize); } } @@ -649,6 +665,131 @@ static void free_uadk_bd_pool(void) g_zip_pool.pool = NULL; } +static int init_uadk_rsv_pool(struct acc_option *option) +{ + struct wd_mempool_setup pool_setup; + char *alg = option->algclass; + u32 insize = g_pktlen; + handle_t h_ctx; + u32 outsize; + int i, j; + + h_ctx = wd_find_ctx(alg); + if (!h_ctx) { + ZIP_TST_PRT("failed to find a ctx for alg: %s\n", option->algname); + return -EINVAL; + } + g_ctx_cfg.priv = (void *)h_ctx; + + if (option->algtype != LZ77_ZSTD) + outsize = g_pktlen + ALIGN_SIZE; + else + outsize = g_pktlen * DECOMP_LEN_RATE; + + pool_setup.block_size = outsize > CHUNK_SIZE ? outsize : CHUNK_SIZE; + pool_setup.block_num = g_thread_num * MAX_POOL_LENTH_COMP * BLOCK_NUM; + pool_setup.align_size = ALIGN_SIZE; + pool_setup.ops.alloc = NULL; + pool_setup.ops.free = NULL; + g_zip_pool.rsv_pool = wd_mempool_alloc(h_ctx, &pool_setup); + if (!g_zip_pool.rsv_pool) { + ZIP_TST_PRT("failed to create block pool\n"); + return -ENOMEM; + } + + pool_setup.ops.alloc = (void *)wd_mem_alloc; + pool_setup.ops.free = (void *)wd_mem_free; + pool_setup.ops.iova_map = (void *)wd_mem_map; + pool_setup.ops.iova_unmap = (void *)wd_mem_unmap; + pool_setup.ops.get_bufsize = (void *)wd_get_bufsize; + pool_setup.ops.usr = g_zip_pool.rsv_pool; + + g_zip_pool.pool = calloc(1, g_thread_num * sizeof(struct bd_pool)); + if (!g_zip_pool.pool) { + ZIP_TST_PRT("init uadk pool alloc thread failed!\n"); + goto free_pool; + } + + for (i = 0; i < g_thread_num; i++) { + g_zip_pool.pool[i].bds = calloc(1, MAX_POOL_LENTH_COMP * sizeof(struct uadk_bd)); + if (!g_zip_pool.pool[i].bds) { + ZIP_TST_PRT("init uadk bds alloc failed!\n"); + goto malloc_error1; + } + + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + g_zip_pool.pool[i].bds[j].src = wd_mem_alloc(g_zip_pool.rsv_pool, insize); + if (!g_zip_pool.pool[i].bds[j].src) { + ZIP_TST_PRT("Failed to alloc src block\n"); + goto malloc_error2; + } + g_zip_pool.pool[i].bds[j].src_len = insize; + + g_zip_pool.pool[i].bds[j].dst = wd_mem_alloc(g_zip_pool.rsv_pool, outsize); + if (!g_zip_pool.pool[i].bds[j].dst) { + ZIP_TST_PRT("Failed to alloc dst block\n"); + goto malloc_error3; + } + g_zip_pool.pool[i].bds[j].dst_len = outsize; + + get_rand_data(g_zip_pool.pool[i].bds[j].src, insize * COMPRESSION_RATIO_FACTOR); + if (g_prefetch) + get_rand_data(g_zip_pool.pool[i].bds[j].dst, outsize); + } + } + + return 0; + +malloc_error3: + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].src); + +malloc_error2: + for (j--; j >= 0; j--) { + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].src); + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].dst); + } +malloc_error1: + for (i--; i >= 0; i--) { + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].src); + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].dst); + } + free(g_zip_pool.pool[i].bds); + g_zip_pool.pool[i].bds = NULL; + } + free(g_zip_pool.pool); + g_zip_pool.pool = NULL; + +free_pool: + wd_mempool_free(h_ctx, g_zip_pool.rsv_pool); + g_zip_pool.rsv_pool = NULL; + + ZIP_TST_PRT("init uadk bd pool alloc failed!\n"); + return -ENOMEM; +} + +static void free_uadk_rsv_pool(struct acc_option *option) +{ + handle_t h_ctx = (handle_t)g_ctx_cfg.priv; + int i, j; + + for (i = 0; i < g_thread_num; i++) { + if (g_zip_pool.pool[i].bds) { + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].src); + wd_mem_free(g_zip_pool.rsv_pool, g_zip_pool.pool[i].bds[j].dst); + } + } + free(g_zip_pool.pool[i].bds); + g_zip_pool.pool[i].bds = NULL; + } + free(g_zip_pool.pool); + g_zip_pool.pool = NULL; + + wd_mempool_free(h_ctx, g_zip_pool.rsv_pool); + g_zip_pool.rsv_pool = NULL; +} + /*-------------------------------uadk benchmark main code-------------------------------------*/ static void *zip_lz77_async_cb(struct wd_comp_req *req, void *data) { @@ -748,6 +889,7 @@ static void *zip_uadk_blk_lz77_sync_run(void *arg) thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; ZSTD_CCtx *cctx = zstd_soft_fse_init(15); + struct sched_params sc_param = {0}; ZSTD_inBuffer zstd_input = {0}; ZSTD_outBuffer zstd_output = {0}; COMP_TUPLE_TAG *ftuple = NULL; @@ -768,11 +910,25 @@ static void *zip_uadk_blk_lz77_sync_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -789,7 +945,10 @@ static void *zip_uadk_blk_lz77_sync_run(void *arg) if (!ftuple) goto fse_err; - hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); + if (pdata->mm_type == UADK_MEM_AUTO) + hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); + else + hw_buff_out = wd_mem_alloc(g_zip_pool.rsv_pool, out_len * MAX_POOL_LENTH_COMP); if (!hw_buff_out) goto hw_buff_err; memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP); @@ -823,7 +982,10 @@ static void *zip_uadk_blk_lz77_sync_run(void *arg) } hw_buff_err: - free(hw_buff_out); + if (pdata->mm_type == UADK_MEM_AUTO) + free(hw_buff_out); + else + wd_mem_free(g_zip_pool.rsv_pool, hw_buff_out); fse_err: free(ftuple); wd_comp_free_sess(h_sess); @@ -841,6 +1003,7 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct sched_params sc_param = {0}; COMP_TUPLE_TAG *ftuple = NULL; struct bd_pool *uadk_pool; struct wd_comp_req creq; @@ -858,11 +1021,25 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -887,11 +1064,16 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg) while (in_len > 0) { creq.src_len = in_len > CHUNK_SIZE ? CHUNK_SIZE : in_len; - creq.dst_len = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len; + creq.dst_len = out_len > creq.src_len * 2 ? creq.src_len * 2 : out_len; creq.src = src; creq.dst = dst; creq.priv = &ftuple[i]; + if (creq.op_type == WD_DIR_COMPRESS) { + if (in_len <= CHUNK_SIZE) + creq.last = 1; + } + ret = wd_do_comp_strm(h_sess, &creq); if (ret < 0 || creq.status == WD_IN_EPARA) { ZIP_TST_PRT("wd comp, invalid or incomplete data! " @@ -899,12 +1081,13 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg) break; } - src += CHUNK_SIZE; - in_len -= CHUNK_SIZE; - dst += 2 * CHUNK_SIZE; - out_len -= 2 * CHUNK_SIZE; + src += creq.src_len; + in_len -= creq.src_len; + dst += creq.dst_len; + out_len -= creq.dst_len; } + wd_comp_reset_sess(h_sess); count++; if (get_run_state() == 0) @@ -926,6 +1109,7 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; ZSTD_CCtx *cctx = zstd_soft_fse_init(15); + struct sched_params sc_param = {0}; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -941,11 +1125,25 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -1001,6 +1199,7 @@ static void *zip_uadk_blk_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct sched_params sc_param = {0}; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -1015,11 +1214,25 @@ static void *zip_uadk_blk_sync_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -1061,6 +1274,7 @@ static void *zip_uadk_stm_sync_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct sched_params sc_param = {0}; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -1075,11 +1289,25 @@ static void *zip_uadk_stm_sync_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -1125,6 +1353,7 @@ static void *zip_uadk_blk_async_run(void *arg) { thread_data *pdata = (thread_data *)arg; struct wd_comp_sess_setup comp_setup = {0}; + struct sched_params sc_param = {0}; struct bd_pool *uadk_pool; struct wd_comp_req creq; handle_t h_sess; @@ -1140,11 +1369,25 @@ static void *zip_uadk_blk_async_run(void *arg) memset(&comp_setup, 0, sizeof(comp_setup)); memset(&creq, 0, sizeof(creq)); + sc_param.numa_id = param.numa_id; + sc_param.type = param.type; + sc_param.mode = param.mode; + sc_param.begin = param.begin; + sc_param.end = param.end; + if (g_zip_pool.rsv_pool) + sc_param.dev_id = wd_get_dev_id(g_zip_pool.rsv_pool); + comp_setup.alg_type = pdata->alg; comp_setup.op_type = pdata->optype; comp_setup.win_sz = pdata->win_sz; comp_setup.comp_lv = pdata->comp_lv; - comp_setup.sched_param = ¶m; + comp_setup.sched_param = &sc_param; + comp_setup.mm_type = pdata->mm_type; + comp_setup.mm_ops.alloc = (void *)wd_mem_alloc; + comp_setup.mm_ops.free = (void *)wd_mem_free; + comp_setup.mm_ops.iova_map = (void *)wd_mem_map; + comp_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; + comp_setup.mm_ops.usr = g_zip_pool.rsv_pool; h_sess = wd_comp_alloc_sess(&comp_setup); if (!h_sess) return NULL; @@ -1228,6 +1471,7 @@ static int zip_uadk_sync_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].win_sz = threads_option.win_sz; threads_args[i].comp_lv = threads_option.comp_lv; + threads_args[i].mm_type = options->mem_type; threads_args[i].td_id = i; ret = pthread_create(&tdid[i], NULL, uadk_zip_sync_run, &threads_args[i]); if (ret) { @@ -1293,6 +1537,7 @@ static int zip_uadk_async_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].win_sz = threads_option.win_sz; threads_args[i].comp_lv = threads_option.comp_lv; + threads_args[i].mm_type = options->mem_type; threads_args[i].td_id = i; if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) { struct bd_pool *uadk_pool = &g_zip_pool.pool[i]; @@ -1305,11 +1550,15 @@ static int zip_uadk_async_threads(struct acc_option *options) goto lz77_free; } - threads_args[i].hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); + if (options->mem_type == UADK_MEM_AUTO) + threads_args[i].hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); + else + threads_args[i].hw_buff_out = wd_mem_alloc(g_zip_pool.rsv_pool, out_len * MAX_POOL_LENTH_COMP); if (!threads_args[i].hw_buff_out) { ZIP_TST_PRT("failed to malloc lz77 hw_buff_out!\n"); goto lz77_free; } + memset(threads_args[i].hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP); } threads_args[i].tag = malloc(sizeof(struct zip_async_tag) * MAX_POOL_LENTH_COMP); @@ -1362,8 +1611,13 @@ lz77_free: if (threads_args[i].ftuple) free(threads_args[i].ftuple); - if (threads_args[i].hw_buff_out) - free(threads_args[i].hw_buff_out); + if (threads_args[i].hw_buff_out) { + if (options->mem_type == UADK_MEM_AUTO) + free(threads_args[i].hw_buff_out); + else + wd_mem_free(g_zip_pool.rsv_pool, + threads_args[i].hw_buff_out); + } } } async_error: @@ -1393,7 +1647,11 @@ int zip_uadk_benchmark(struct acc_option *options) if (ret) return ret; - ret = init_uadk_bd_pool(options->optype); + if (options->mem_type == UADK_MEM_AUTO) + ret = init_uadk_bd_pool(options->optype); + else + ret = init_uadk_rsv_pool(options); + if (ret) return ret; @@ -1415,7 +1673,11 @@ int zip_uadk_benchmark(struct acc_option *options) if (ret) return ret; - free_uadk_bd_pool(); + if (options->mem_type == UADK_MEM_AUTO) + free_uadk_bd_pool(); + else + free_uadk_rsv_pool(options); + if (options->inittype == INIT2_TYPE) uninit_ctx_config2(); else diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c index fd9dcbc..8388fd0 100644 --- a/uadk_tool/benchmark/zip_wd_benchmark.c +++ b/uadk_tool/benchmark/zip_wd_benchmark.c @@ -16,7 +16,6 @@ #define WCRYPTO_DIR_MAX (WCRYPTO_INFLATE + 1) #define ALIGN_SIZE 64 -#define COMP_LEN_RATE 2 #define DECOMP_LEN_RATE 2 #define COMPRESSION_RATIO_FACTOR 0.7 #define MAX_POOL_LENTH_COMP 512 @@ -311,21 +310,18 @@ static int init_zip_wd_queue(struct acc_option *options) { struct wd_blkpool_setup blksetup; struct wd_bd *bds = NULL; + u32 insize = g_pktlen; void *pool = NULL; u32 outsize; - u32 insize; u8 op_type; int i, j, k; int ret = 0; op_type = options->optype % WCRYPTO_DIR_MAX; - if (op_type == WCRYPTO_DEFLATE) {//compress - insize = g_pktlen; - outsize = g_pktlen * COMP_LEN_RATE; - } else { // decompress - insize = g_pktlen; + if (options->algtype != LZ77_ZSTD) + outsize = g_pktlen + ALIGN_SIZE; + else outsize = g_pktlen * DECOMP_LEN_RATE; - } g_thread_queue.bd_res = malloc(g_thread_num * sizeof(struct thread_bd_res)); if (!g_thread_queue.bd_res) { @@ -873,7 +869,7 @@ static void *zip_wd_blk_sync_run(void *arg) static void *zip_wd_stm_sync_run(void *arg) { - u32 in_len, out_len, total_out, count = 0; + u32 in_len, out_len, total_out, total_avail_out; thread_data *pdata = (thread_data *)arg; struct wcrypto_comp_ctx_setup comp_setup; struct wcrypto_comp_op_data opdata; @@ -881,6 +877,7 @@ static void *zip_wd_stm_sync_run(void *arg) struct wd_queue *queue; struct wd_bd *bd_pool; void *src, *dst; + u32 count = 0; int ret, i; if (pdata->td_id > g_thread_num) @@ -911,13 +908,14 @@ static void *zip_wd_stm_sync_run(void *arg) opdata.alg_type = pdata->alg; opdata.priv = NULL; opdata.status = 0; + total_avail_out = bd_pool[0].dst_len; while(1) { i = count % MAX_POOL_LENTH_COMP; src = bd_pool[i].src; dst = bd_pool[i].dst; in_len = bd_pool[i].src_len; - out_len = g_pktlen * DECOMP_LEN_RATE; + out_len = total_avail_out; total_out = 0; opdata.stream_pos = WCRYPTO_COMP_STREAM_NEW; @@ -926,7 +924,7 @@ static void *zip_wd_stm_sync_run(void *arg) opdata.avail_out = out_len > 2 * CHUNK_SIZE ? 2 * CHUNK_SIZE : out_len; opdata.in = src; opdata.out = dst; - opdata.flush = in_len ? WCRYPTO_SYNC_FLUSH : WCRYPTO_FINISH; + opdata.flush = in_len > CHUNK_SIZE ? WCRYPTO_SYNC_FLUSH : WCRYPTO_FINISH; ret = wcrypto_do_comp(ctx, &opdata, NULL); if (ret || opdata.status == WCRYPTO_DECOMP_END_NOSPACE || -- 2.33.0
From: Junchong Pan <panjunchong@h-partners.com> Add --sgl operations for sec and zip. Signed-off-by: Junchong Pan <panjunchong@h-partners.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- uadk_tool/benchmark/sec_uadk_benchmark.c | 205 +++++++++++++++++++---- uadk_tool/benchmark/uadk_benchmark.c | 13 ++ uadk_tool/benchmark/uadk_benchmark.h | 3 + uadk_tool/benchmark/zip_uadk_benchmark.c | 204 ++++++++++++++++++---- 4 files changed, 359 insertions(+), 66 deletions(-) diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index 141b161..4a64c94 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -72,6 +72,7 @@ static unsigned int g_algtype; static unsigned int g_optype; static unsigned int g_maclen; static unsigned int g_dev_id; +static unsigned int g_data_fmt; struct aead_alg_info { int index; @@ -970,6 +971,9 @@ static void save_aead_dst_data(u8 *addr, u32 size) static void read_aead_dst_data(u8 *addr, u32 len) { char file_name[SEC_SAVE_FILE_LEN] = {0}; + struct wd_datalist *current; + size_t bytes_to_read = 0; + size_t bytes_read = 0; char *alg_name; FILE *fp; int size; @@ -992,10 +996,29 @@ static void read_aead_dst_data(u8 *addr, u32 len) size = ftell(fp); rewind(fp); - size = fread(addr, 1, size, fp); - addr[size] = '\0'; - memcpy(g_save_mac, (char *)addr + len, SEC_MAX_MAC_LEN); + if (!g_data_fmt) { + size = fread(addr, 1, size, fp); + addr[size] = '\0'; + + memcpy(g_save_mac, (char *)addr + len, SEC_MAX_MAC_LEN); + } else { + current = (struct wd_datalist *)addr; + while (current && size > 0) { + bytes_to_read = current->len; + if (bytes_to_read > size) + bytes_to_read = size; + bytes_read = fread(current->data, 1, bytes_to_read, fp); + + if (bytes_read != bytes_to_read) { + SEC_TST_PRT("partial read: expected %zu, got %zu\n", bytes_to_read, bytes_read); + fclose(fp); + return; + } + size -= bytes_read; + current = current->next; + } + } fclose(fp); } @@ -1068,14 +1091,114 @@ static void free_ivkey_source(void) free(g_uadk_pool.iv); } +/* + * Calculate SGL unit size. + */ +static inline size_t cal_unit_sz(size_t sz) +{ + return (sz + SGL_ALIGNED_BYTES - 1) & ~(SGL_ALIGNED_BYTES - 1); +} + +/* + * Create SGL or common memory buffer. + */ +static void *create_buf(int sgl, size_t sz, size_t unit_sz) +{ + struct wd_datalist *head, *p, *q; + int i, tail_sz, sgl_num; + void *buf; + + buf = malloc(sz); + if (!buf) { + SEC_TST_PRT("Fail to allocate buffer %ld size!\n", sz); + return NULL; + } + + memset_buf(buf, sz); + + if (sgl == WD_FLAT_BUF) + return buf; + + if (g_alg != AEAD_TYPE) { + get_rand_data(buf, g_pktlen); + } else { + if (!g_optype) + get_aead_data(buf, g_pktlen + SEC_AEAD_LEN); + } + + tail_sz = sz % unit_sz; + sgl_num = sz / unit_sz; /* the number with unit_sz bytes */ + + /* the additional slot is for tail_sz */ + head = calloc(sgl_num + (tail_sz ? 1 : 0), sizeof(struct wd_datalist)); + if (!head) { + SEC_TST_PRT("Fail to allocate memory for SGL head!\n"); + goto out; + } + + q = NULL; + for (i = 0; i < sgl_num; i++) { + p = &head[i]; + p->data = buf + i * unit_sz; + p->len = unit_sz; + if (q) + q->next = p; + q = p; + } + + if (tail_sz) { + p = &head[i]; + p->data = buf + i * unit_sz; + p->len = tail_sz; + if (q) + q->next = p; + q = p; + } + + if (q) + q->next = NULL; + + return head; +out: + free(buf); + return NULL; +} + +static void free_buf(int sgl, void *buf) +{ + struct wd_datalist *head = buf; + struct wd_datalist *p = head; + void *data_buf = NULL; + + if (!buf) + return; + + if (sgl == WD_FLAT_BUF) { + free(buf); + return; + } + + if (head) + data_buf = head->data; + + /* free the whole data buffer of SGL */ + if (data_buf) + free(p->data); + + /* free SGL headers */ + free(buf); +} + static int init_uadk_bd_pool(void) { unsigned long step; + int unit_sz; int i, j; int ret; // make the block not align to 4K step = sizeof(char) * g_pktlen * 2; + unit_sz = cal_unit_sz(step); ret = init_ivkey_source(); if (ret) { @@ -1097,12 +1220,13 @@ static int init_uadk_bd_pool(void) goto malloc_error1; } for (j = 0; j < MAX_POOL_LENTH; j++) { - g_uadk_pool.pool[i].bds[j].src = malloc(step); - memset(g_uadk_pool.pool[i].bds[j].src, 0, step); + g_uadk_pool.pool[i].bds[j].src = create_buf(g_data_fmt, step, unit_sz); if (!g_uadk_pool.pool[i].bds[j].src) goto malloc_error2; - g_uadk_pool.pool[i].bds[j].dst = malloc(step); - memset(g_uadk_pool.pool[i].bds[j].dst, 0, step); + if (g_alg == DIGEST_TYPE) + g_uadk_pool.pool[i].bds[j].dst = malloc(step); + else + g_uadk_pool.pool[i].bds[j].dst = create_buf(g_data_fmt, step, unit_sz); if (!g_uadk_pool.pool[i].bds[j].dst) goto malloc_error3; g_uadk_pool.pool[i].bds[j].mac = malloc(SEC_MAX_MAC_LEN); @@ -1110,19 +1234,29 @@ static int init_uadk_bd_pool(void) if (!g_uadk_pool.pool[i].bds[j].mac) goto malloc_error4; - if (g_alg != AEAD_TYPE) { - get_rand_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen); - if (g_prefetch) - get_rand_data(g_uadk_pool.pool[i].bds[j].dst, - g_pktlen); - } else { - if (!g_optype) - get_aead_data(g_uadk_pool.pool[i].bds[j].src, - g_pktlen + SEC_AEAD_LEN); - else { + if (g_data_fmt == WD_FLAT_BUF) { + if (g_alg != AEAD_TYPE) { + get_rand_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen); + if (g_prefetch) + get_rand_data(g_uadk_pool.pool[i].bds[j].dst, + g_pktlen); + } else { + if (!g_optype) + get_aead_data(g_uadk_pool.pool[i].bds[j].src, + g_pktlen + SEC_AEAD_LEN); + else { + read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, + g_pktlen + SEC_AEAD_LEN); + memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); + } + } + } else if (g_data_fmt == WD_SGL_BUF) { + if (g_optype && g_alg == AEAD_TYPE) { read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, - g_pktlen + SEC_AEAD_LEN); + g_pktlen + SEC_AEAD_LEN); memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); + } else if (g_prefetch && g_alg == DIGEST_TYPE) { + get_rand_data(g_uadk_pool.pool[i].bds[j].dst, g_pktlen); } } } @@ -1132,20 +1266,23 @@ static int init_uadk_bd_pool(void) return 0; malloc_error4: - free(g_uadk_pool.pool[i].bds[j].dst); + if (g_alg == DIGEST_TYPE) + free(g_uadk_pool.pool[i].bds[j].dst); + else + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].dst); malloc_error3: - free(g_uadk_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].src); malloc_error2: for (j--; j >= 0; j--) { - free(g_uadk_pool.pool[i].bds[j].src); - free(g_uadk_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].dst); free(g_uadk_pool.pool[i].bds[j].mac); } malloc_error1: for (i--; i >= 0; i--) { for (j = 0; j < MAX_POOL_LENTH; j++) { - free(g_uadk_pool.pool[i].bds[j].src); - free(g_uadk_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].dst); free(g_uadk_pool.pool[i].bds[j].mac); } free(g_uadk_pool.pool[i].bds); @@ -1173,8 +1310,11 @@ static void free_uadk_bd_pool(void) for (i = 0; i < g_thread_num; i++) { if (g_uadk_pool.pool[i].bds) { for (j = 0; j < MAX_POOL_LENTH; j++) { - free(g_uadk_pool.pool[i].bds[j].src); - free(g_uadk_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].src); + if (g_alg == DIGEST_TYPE) + free(g_uadk_pool.pool[i].bds[j].dst); + else + free_buf(g_data_fmt, g_uadk_pool.pool[i].bds[j].dst); free(g_uadk_pool.pool[i].bds[j].mac); } } @@ -1655,7 +1795,7 @@ static void *sec_uadk_cipher_async(void *arg) creq.in_bytes = g_pktlen; creq.out_bytes = g_pktlen; creq.out_buf_bytes = g_pktlen; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.state = 0; creq.cb = cipher_async_cb; @@ -1787,7 +1927,7 @@ static void *sec_uadk_aead_async(void *arg) else areq.out_bytes = g_pktlen + 32; // aadsize + authsize = 32; - areq.data_fmt = 0; + areq.data_fmt = g_data_fmt; areq.state = 0; areq.cb = aead_async_cb; @@ -1895,7 +2035,7 @@ static void *sec_uadk_digest_async(void *arg) dreq.in_bytes = g_pktlen; dreq.out_bytes = pdata->d_outbytes; dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = 0; + dreq.data_fmt = g_data_fmt; dreq.state = 0; dreq.has_next = 0; dreq.cb = digest_async_cb; @@ -1997,7 +2137,7 @@ static void *sec_uadk_cipher_sync(void *arg) creq.in_bytes = g_pktlen; creq.out_bytes = g_pktlen; creq.out_buf_bytes = g_pktlen; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.state = 0; while(1) { @@ -2101,7 +2241,7 @@ static void *sec_uadk_aead_sync(void *arg) else areq.out_bytes = g_pktlen + 32; // aadsize + authsize = 32; - areq.data_fmt = 0; + areq.data_fmt = g_data_fmt; areq.state = 0; while(1) { @@ -2176,7 +2316,7 @@ static void *sec_uadk_digest_sync(void *arg) dreq.in_bytes = g_pktlen; dreq.out_bytes = pdata->d_outbytes; dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = 0; + dreq.data_fmt = g_data_fmt; dreq.state = 0; dreq.has_next = 0; @@ -2355,6 +2495,7 @@ int sec_uadk_benchmark(struct acc_option *options) g_alg = options->subtype; g_optype = options->optype; g_algtype = options->algtype; + g_data_fmt = options->data_fmt; if (g_alg == AEAD_TYPE) { g_maclen = get_aead_mac_len(g_algtype); diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 24737c5..f57c4f9 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -370,6 +370,15 @@ void segmentfault_handler(int sig) exit(1); } +void memset_buf(void *buf, size_t sz) +{ + char *ch = (char *)buf; + size_t i; + + for (i = 0; i < sz; i++) + ch[i] = 0; +} + /*-------------------------------------main code------------------------------------------------------*/ static void parse_alg_param(struct acc_option *option) { @@ -741,6 +750,7 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) {"init2", no_argument, 0, 17}, {"device", required_argument, 0, 18}, {"memory", required_argument, 0, 19}, + {"sgl", no_argument, 0, 20}, {0, 0, 0, 0} }; @@ -815,6 +825,9 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) case 19: option->mem_type = strtol(optarg, NULL, 0); break; + case 20: + option->data_fmt = WD_SGL_BUF; + break; default: ACC_TST_PRT("invalid: bad input parameter!\n"); print_benchmark_help(); diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index 81ace1b..83fd7fa 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -37,6 +37,7 @@ #define SEND_USLEEP 100 #define SEC_2_USEC 1000000 #define HASH_ZISE 16 +#define SGL_ALIGNED_BYTES 64 #define SCHED_SINGLE "sched_single" #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -82,6 +83,7 @@ struct acc_option { u32 sched_type; int task_type; int mem_type; + u32 data_fmt; }; enum uadk_mem_mode { @@ -230,6 +232,7 @@ extern u32 get_recv_time(void); extern void cal_avg_latency(u32 count); extern int get_alg_name(int alg, char *alg_name); extern void segmentfault_handler(int sig); +extern void memset_buf(void *buf, size_t sz); int uadk_parse_dev_id(char *dev_name); int acc_cmd_parse(int argc, char *argv[], struct acc_option *option); diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index fc81c2b..092f710 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 */ #include <numa.h> +#include <math.h> #include "uadk_benchmark.h" #include "zip_uadk_benchmark.h" @@ -84,6 +85,7 @@ static unsigned int g_pktlen; static unsigned int g_prefetch; static unsigned int g_state; static unsigned int g_dev_id; +static unsigned int g_data_fmt; #ifndef ZLIB_FSE static ZSTD_CCtx* zstd_soft_fse_init(unsigned int level) @@ -175,8 +177,15 @@ fd_error: static int load_file_data(const char *alg, u32 pkg_len, u32 optype) { struct zip_file_head *fhead = NULL; + struct wd_datalist *src_curr = NULL; + struct wd_datalist *dst_curr = NULL; char file_path[PATH_SIZE]; + size_t total_read = 0; + size_t total_len = 0; + size_t remaining = 0; ssize_t size = 0xff; + size_t copied = 0; + size_t len = 0; int i, j, fd; int ret; @@ -216,30 +225,63 @@ static int load_file_data(const char *alg, u32 pkg_len, u32 optype) // read data for one buffer one buffer from file line for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - memset(g_zip_pool.pool[0].bds[j].src, 0x0, - g_zip_pool.pool[0].bds[j].src_len); - if (size != 0) { // zero size buffer no need to read; - size = read(fd, g_zip_pool.pool[0].bds[j].src, - fhead->blk_sz[j]); - if (size < 0) { - ZIP_TST_PRT("Decompress read data error size: %lu!\n", size); - ret = -EINVAL; - goto read_err; - } else if (size == 0) { - ZIP_TST_PRT("Read file to the end!"); + if (g_data_fmt == 0) { + if (fhead->blk_sz[j] != 0) { + size = read(fd, g_zip_pool.pool[0].bds[j].src, fhead->blk_sz[j]); + if (size < 0) { + ZIP_TST_PRT("Decompress read data error size: %ld!\n", size); + ret = -EINVAL; + goto read_err; + } + g_zip_pool.pool[0].bds[j].src_len = size; + } else { + g_zip_pool.pool[0].bds[j].src_len = 0; } + } else { + src_curr = (struct wd_datalist *)g_zip_pool.pool[0].bds[j].src; + remaining = fhead->blk_sz[j]; + total_read = 0; + while (src_curr && remaining > 0) { + len = fmin(remaining, src_curr->len); + size = read(fd, src_curr->data, len); + if (size < 0) { + ZIP_TST_PRT("Decompress read data error at block %d!\n", j); + ret = -EINVAL; + goto read_err; + } + total_read += size; + remaining -= size; + src_curr = src_curr->next; + } + g_zip_pool.pool[0].bds[j].src_len = total_read; } - g_zip_pool.pool[0].bds[j].src_len = size; } for (i = 1; i < g_thread_num; i++) { for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - if (g_zip_pool.pool[0].bds[j].src_len) + if (g_zip_pool.pool[0].bds[j].src_len == 0) + continue; + + if (g_data_fmt == 0) { memcpy(g_zip_pool.pool[i].bds[j].src, g_zip_pool.pool[0].bds[j].src, g_zip_pool.pool[0].bds[j].src_len); - g_zip_pool.pool[i].bds[j].src_len = - g_zip_pool.pool[0].bds[j].src_len; + g_zip_pool.pool[i].bds[j].src_len = g_zip_pool.pool[0].bds[j].src_len; + } else { + src_curr = (struct wd_datalist *)g_zip_pool.pool[0].bds[j].src; + dst_curr = (struct wd_datalist *)g_zip_pool.pool[i].bds[j].src; + total_len = g_zip_pool.pool[0].bds[j].src_len; + copied = 0; + while (src_curr && dst_curr && copied < total_len) { + len = fmin(src_curr->len, dst_curr->len); + len = fmin(len, total_len - copied); + memcpy(dst_curr->data, src_curr->data, len); + copied += len; + src_curr = src_curr->next; + dst_curr = dst_curr->next; + } + g_zip_pool.pool[i].bds[j].src_len = total_len; + } } } @@ -575,8 +617,98 @@ static void uninit_ctx_config(void) wd_sched_rr_release(g_sched); } +/* + * Calculate SGL unit size. + */ +static inline size_t cal_unit_sz(size_t sz) +{ + return (sz + SGL_ALIGNED_BYTES - 1) & ~(SGL_ALIGNED_BYTES - 1); +} + +/* + * Create SGL or common memory buffer. + */ +static void *create_buf(int sgl, size_t sz, size_t unit_sz) +{ + struct wd_datalist *head, *p, *q; + int i, tail_sz, sgl_num; + void *buf; + + buf = malloc(sz); + if (!buf) { + ZIP_TST_PRT("Fail to allocate buffer %ld size!\n", sz); + return NULL; + } + + memset_buf(buf, sz); + + if (sgl == WD_FLAT_BUF) + return buf; + + if (sz == g_pktlen) { + get_rand_data(buf, sz * COMPRESSION_RATIO_FACTOR); + } else { + if (g_prefetch) + get_rand_data(buf, sz); + } + tail_sz = sz % unit_sz; + sgl_num = sz / unit_sz; /* the number with unit_sz bytes */ + + /* the additional slot is for tail_sz */ + head = calloc(sgl_num + 1, sizeof(struct wd_datalist)); + if (!head) { + ZIP_TST_PRT("Fail to allocate memory for SGL head!\n"); + goto out; + } + + q = NULL; + for (i = 0; i < sgl_num; i++) { + p = &head[i]; + p->data = buf + i * unit_sz; + p->len = unit_sz; + if (q) + q->next = p; + q = p; + } + + if (tail_sz) { + p = &head[i]; + p->data = buf + i * unit_sz; + p->len = tail_sz; + if (q) + q->next = p; + q = p; + } + + if (q) + q->next = NULL; + + return head; +out: + free(buf); + return NULL; +} + +static void free_buf(int sgl, void *buf) +{ + struct wd_datalist *p; + + if (!buf) + return; + if (sgl == WD_FLAT_BUF) { + free(buf); + return; + } + p = (struct wd_datalist *)buf; + /* free the whole data buffer of SGL */ + free(p->data); + /* free SGL headers */ + free(buf); +} + static int init_uadk_bd_pool(u32 optype) { + int unit_sz; u32 outsize; u32 insize; int i, j; @@ -605,37 +737,40 @@ static int init_uadk_bd_pool(u32 optype) goto malloc_error1; } for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - g_zip_pool.pool[i].bds[j].src = calloc(1, insize); + unit_sz = cal_unit_sz(insize); + g_zip_pool.pool[i].bds[j].src = create_buf(g_data_fmt, insize, unit_sz); if (!g_zip_pool.pool[i].bds[j].src) goto malloc_error2; g_zip_pool.pool[i].bds[j].src_len = insize; - g_zip_pool.pool[i].bds[j].dst = malloc(outsize); + unit_sz = cal_unit_sz(outsize); + g_zip_pool.pool[i].bds[j].dst = create_buf(g_data_fmt, outsize, unit_sz); if (!g_zip_pool.pool[i].bds[j].dst) goto malloc_error3; g_zip_pool.pool[i].bds[j].dst_len = outsize; - get_rand_data(g_zip_pool.pool[i].bds[j].src, - insize * COMPRESSION_RATIO_FACTOR); - if (g_prefetch) - get_rand_data(g_zip_pool.pool[i].bds[j].dst, outsize); + if (g_data_fmt == WD_FLAT_BUF) { + get_rand_data(g_zip_pool.pool[i].bds[j].src, insize * COMPRESSION_RATIO_FACTOR); + if (g_prefetch) + get_rand_data(g_zip_pool.pool[i].bds[j].dst, outsize); + } } } return 0; malloc_error3: - free(g_zip_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].src); malloc_error2: for (j--; j >= 0; j--) { - free(g_zip_pool.pool[i].bds[j].src); - free(g_zip_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].dst); } malloc_error1: for (i--; i >= 0; i--) { for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - free(g_zip_pool.pool[i].bds[j].src); - free(g_zip_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].dst); } free(g_zip_pool.pool[i].bds); g_zip_pool.pool[i].bds = NULL; @@ -654,8 +789,8 @@ static void free_uadk_bd_pool(void) for (i = 0; i < g_thread_num; i++) { if (g_zip_pool.pool[i].bds) { for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - free(g_zip_pool.pool[i].bds[j].src); - free(g_zip_pool.pool[i].bds[j].dst); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].src); + free_buf(g_data_fmt, g_zip_pool.pool[i].bds[j].dst); } } free(g_zip_pool.pool[i].bds); @@ -938,7 +1073,7 @@ static void *zip_uadk_blk_lz77_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = NULL; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.status = 0; ftuple = malloc(sizeof(COMP_TUPLE_TAG) * MAX_POOL_LENTH_COMP); @@ -1048,7 +1183,7 @@ static void *zip_uadk_stm_lz77_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = NULL; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.status = 0; ftuple = malloc(sizeof(COMP_TUPLE_TAG) * MAX_POOL_LENTH_COMP); @@ -1153,7 +1288,7 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = zip_lz77_async_cb; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.status = 0; while(1) { @@ -1242,7 +1377,7 @@ static void *zip_uadk_blk_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = NULL; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.priv = 0; creq.status = 0; @@ -1317,7 +1452,7 @@ static void *zip_uadk_stm_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = NULL; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.priv = 0; creq.status = 0; @@ -1397,7 +1532,7 @@ static void *zip_uadk_blk_async_run(void *arg) out_len = uadk_pool->bds[0].dst_len; creq.cb = zip_async_cb; - creq.data_fmt = 0; + creq.data_fmt = g_data_fmt; creq.priv = 0; creq.status = 0; @@ -1634,6 +1769,7 @@ int zip_uadk_benchmark(struct acc_option *options) g_pktlen = options->pktlen; g_ctxnum = options->ctxnums; g_prefetch = options->prefetch; + g_data_fmt = options->data_fmt; if (options->optype >= WD_DIR_MAX * 2) { ZIP_TST_PRT("ZIP optype error: %u\n", options->optype); -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> 1. Remove lock from hashjoin ctx. 2. Avoid to causes numeric value overflow. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_dae_join_gather.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drv/hisi_dae_join_gather.c b/drv/hisi_dae_join_gather.c index 9b9c07d..63c7670 100644 --- a/drv/hisi_dae_join_gather.c +++ b/drv/hisi_dae_join_gather.c @@ -86,7 +86,6 @@ struct join_gather_ctx { struct join_gather_col_data cols_data; struct hash_table_data table_data; struct hash_table_data rehash_table; - pthread_spinlock_t lock; __u32 hash_table_row_size; __u32 batch_row_size[DAE_MAX_TABLE_NUM]; }; @@ -365,7 +364,8 @@ static int check_join_gather_param(struct wd_join_gather_msg *msg) { struct wd_probe_out_info *output; struct wd_gather_req *greq; - __u64 row_num, size; + __u32 row_num; + __u64 size; if (!msg) { WD_ERR("invalid: input join gather msg is NULL!\n"); @@ -398,10 +398,10 @@ static int check_join_gather_param(struct wd_join_gather_msg *msg) return -WD_EINVAL; } if (msg->index_type == WD_BATCH_ADDR_INDEX) { - row_num = msg->req.output_row_num << DAE_ADDR_INDEX_SHIFT; - if (output->build_index.row_num < row_num) { - WD_ERR("build index row number is less than: %llu\n", - row_num); + row_num = output->build_index.row_num >> DAE_ADDR_INDEX_SHIFT; + if (row_num < msg->req.output_row_num) { + WD_ERR("build index row number(%u) is less than needed\n", + output->build_index.row_num); return -WD_EINVAL; } } @@ -438,10 +438,10 @@ static int check_join_gather_param(struct wd_join_gather_msg *msg) return -WD_EINVAL; } } else { - row_num = msg->req.output_row_num << DAE_ADDR_INDEX_SHIFT; - if (greq->index.row_num < row_num) { - WD_ERR("build index row number is less than: %llu\n", - row_num); + row_num = greq->index.row_num >> DAE_ADDR_INDEX_SHIFT; + if (row_num < msg->req.output_row_num) { + WD_ERR("build index row number(%u) is less than needed\n", + greq->index.row_num); return -WD_EINVAL; } } @@ -898,7 +898,6 @@ static void join_gather_sess_priv_uninit(struct wd_alg_driver *drv, void *priv) return; } - pthread_spin_destroy(&ctx->lock); free(ctx); } @@ -930,10 +929,6 @@ static int join_gather_sess_priv_init(struct wd_alg_driver *drv, if (ret) goto free_ctx; - ret = pthread_spin_init(&ctx->lock, PTHREAD_PROCESS_SHARED); - if (ret) - goto free_ctx; - *priv = ctx; return WD_SUCCESS; -- 2.33.0
From: Wenkai Lin <linwenkai6@hisilicon.com> Extract a common function fill_hashagg_data_info. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> Signed-off-by: Zongyu Wu <wuzongyu1@huawei.com> --- drv/hisi_dae.c | 56 +++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/drv/hisi_dae.c b/drv/hisi_dae.c index 4f4d13c..49387aa 100644 --- a/drv/hisi_dae.c +++ b/drv/hisi_dae.c @@ -26,7 +26,7 @@ #define DAE_VCHAR_OFFSET_SIZE 2 #define DAE_COL_BIT_NUM 4 #define DAE_AGG_START_COL 16 -#define DAE_HASHAGG_MAX_ROW_NUN 50000 +#define DAE_HASHAGG_MAX_ROW_NUM 50000 /* align size */ #define DAE_CHAR_ALIGN_SIZE 4 @@ -294,23 +294,8 @@ static void fill_hashagg_merge_key_data(struct dae_sqe *sqe, struct dae_ext_sqe } } -static void fill_hashagg_normal_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, - struct hashagg_col_data *cols_data, __u32 agg_cols_num) -{ - struct hw_agg_data *agg_data = cols_data->input_data; - __u32 i; - - for (i = 0; i < agg_cols_num; i++) { - sqe->agg_data_type[i] = agg_data[i].hw_type; - sqe->agg_data_type[i] |= agg_data[i].sum_outtype << DAE_COL_BIT_NUM; - ext_sqe->agg_data_info[i] = agg_data[i].data_info; - } - - sqe->agg_col_bitmap = GENMASK(agg_cols_num + DAE_AGG_START_COL - 1, DAE_AGG_START_COL); -} - -static void fill_hashagg_rehash_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, - struct hw_agg_data *agg_data, __u32 agg_cols_num) +static void fill_hashagg_data_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct hw_agg_data *agg_data, __u32 agg_cols_num) { __u32 i; @@ -344,14 +329,14 @@ static void fill_hashagg_input_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext hw_agg_addr = &addr_list->input_addr[DAE_AGG_START_COL]; usr_agg_addr = msg->req.agg_cols; agg_col_num = msg->agg_cols_num; - fill_hashagg_normal_info(sqe, ext_sqe, cols_data, agg_col_num); + fill_hashagg_data_info(sqe, ext_sqe, agg_data, agg_col_num); break; case WD_AGG_REHASH_INPUT: agg_data = cols_data->output_data; hw_agg_addr = &addr_list->input_addr[DAE_AGG_START_COL]; usr_agg_addr = msg->req.agg_cols; agg_col_num = cols_data->output_num; - fill_hashagg_rehash_info(sqe, ext_sqe, agg_data, agg_col_num); + fill_hashagg_data_info(sqe, ext_sqe, agg_data, agg_col_num); break; case WD_AGG_STREAM_OUTPUT: case WD_AGG_REHASH_OUTPUT: @@ -359,7 +344,7 @@ static void fill_hashagg_input_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext hw_agg_addr = &addr_list->output_addr[DAE_AGG_START_COL]; usr_agg_addr = msg->req.out_agg_cols; agg_col_num = cols_data->output_num; - fill_hashagg_normal_info(sqe, ext_sqe, cols_data, cols_data->input_num); + fill_hashagg_data_info(sqe, ext_sqe, cols_data->input_data, cols_data->input_num); break; } @@ -385,7 +370,7 @@ static void fill_hashagg_merge_input_data(struct dae_sqe *sqe, struct dae_ext_sq struct hashagg_ctx *agg_ctx = msg->priv; struct hashagg_col_data *cols_data = &agg_ctx->cols_data; - fill_hashagg_rehash_info(sqe, ext_sqe, cols_data->output_data, msg->agg_cols_num); + fill_hashagg_data_info(sqe, ext_sqe, cols_data->output_data, msg->agg_cols_num); } static void fill_hashagg_ext_addr(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, @@ -422,26 +407,15 @@ static void fill_hashagg_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, static int check_hashagg_param(struct wd_agg_msg *msg) { - struct hashagg_col_data *cols_data; - struct hashagg_ctx *agg_ctx; - if (!msg) { WD_ERR("invalid: input hashagg msg is NULL!\n"); return -WD_EINVAL; } - agg_ctx = msg->priv; - cols_data = &agg_ctx->cols_data; - if (cols_data->output_num > DAE_MAX_OUTPUT_COLS) { - WD_ERR("invalid: input hashagg output num %u is more than %d!\n", - cols_data->output_num, DAE_MAX_OUTPUT_COLS); - return -WD_EINVAL; - } - if ((msg->pos == WD_AGG_STREAM_INPUT || msg->pos == WD_AGG_REHASH_INPUT) && - msg->row_count > DAE_HASHAGG_MAX_ROW_NUN) { + msg->row_count > DAE_HASHAGG_MAX_ROW_NUM) { WD_ERR("invalid: input hashagg row count %u is more than %d!\n", - msg->row_count, DAE_HASHAGG_MAX_ROW_NUN); + msg->row_count, DAE_HASHAGG_MAX_ROW_NUM); return -WD_EINVAL; } @@ -1005,6 +979,7 @@ static int transfer_input_col_info(struct wd_agg_col_info *agg_cols, struct hw_agg_data *user_output_data, __u32 cols_num, __u32 *output_num) { + __u32 tmp = *output_num; __u32 i, j, k = 0; int ret; @@ -1013,7 +988,15 @@ static int transfer_input_col_info(struct wd_agg_col_info *agg_cols, WD_ERR("invalid: col alg num(%u) more than 2!\n", agg_cols[i].col_alg_num); return -WD_EINVAL; } + tmp += agg_cols[i].col_alg_num; + } + if (tmp > DAE_MAX_OUTPUT_COLS) { + WD_ERR("invalid: output col num is more than %d!\n", DAE_MAX_OUTPUT_COLS); + return -WD_EINVAL; + } + + for (i = 0; i < cols_num; i++) { for (j = 0; j < agg_cols[i].col_alg_num; j++) { ret = hashagg_check_input_data(&agg_cols[i], &user_input_data[i], &user_output_data[k], j); @@ -1137,6 +1120,9 @@ static int transfer_data_to_hw_type(struct hashagg_col_data *cols_data, struct wd_agg_col_info *agg_cols = setup->agg_cols_info; int ret; + if (setup->is_count_all) + cols_data->output_num++; + ret = transfer_input_col_info(agg_cols, user_input_data, user_output_data, setup->agg_cols_num, &cols_data->output_num); if (ret) -- 2.33.0
participants (1)
-
ZongYu Wu