[PATCH 0/9] bugfix for uadk

Qi Tao

23 Jul 2024 23 Jul '24

7:18 p.m.

*** BLURB HERE *** Longfang Liu (2): uadk_tools: add segfault locating function uadk: bugfix CE driver initialization problem Wenkai Lin (5): uadk/v1: fix for atomic memory order uadk: replace wd_lock to pthread_spinlock uadk/v1: fix for wd_lock implementation uadk: fix for env uninit segment fault uadk/v1: replace wd_spinlock to pthread_spin_lock Yang Shen (1): uadk/v1/drv: hisi_zip_udrv - fix the wrong literal buffer size Zhiqi Song (1): uadk/v1/hpre: remove redundant comments drv/hash_mb/hash_mb.c | 4 ++ drv/isa_ce_sm3.c | 6 +- drv/isa_ce_sm4.c | 4 ++ uadk_tool/benchmark/hpre_uadk_benchmark.c | 1 + uadk_tool/benchmark/hpre_wd_benchmark.c | 1 + uadk_tool/benchmark/sec_soft_benchmark.c | 1 + uadk_tool/benchmark/sec_uadk_benchmark.c | 1 + uadk_tool/benchmark/sec_wd_benchmark.c | 1 + uadk_tool/benchmark/trng_wd_benchmark.c | 1 + uadk_tool/benchmark/uadk_benchmark.c | 15 +++++ uadk_tool/benchmark/uadk_benchmark.h | 3 + uadk_tool/benchmark/zip_uadk_benchmark.c | 1 + uadk_tool/benchmark/zip_wd_benchmark.c | 1 + v1/drv/hisi_hpre_udrv.c | 14 +---- v1/drv/hisi_qm_udrv.c | 51 +++++++++++----- v1/drv/hisi_qm_udrv.h | 4 +- v1/drv/hisi_rng_udrv.c | 25 +++++--- v1/drv/hisi_rng_udrv.h | 2 +- v1/drv/hisi_zip_udrv.c | 14 ++--- v1/wd_util.c | 17 ++++-- wd_mempool.c | 74 +++++++++++------------ wd_util.c | 1 + 22 files changed, 154 insertions(+), 88 deletions(-) -- 2.33.0

Show replies by date

Qi Tao

23 Jul 23 Jul

7:18 p.m.

New subject: [PATCH 1/9] uadk/v1/hpre: remove redundant comments

From: Zhiqi Song <songzhiqi1@huawei.com> Remove redundant comments, as the API name clearly describes the function. And fix code wrap. Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com> --- v1/drv/hisi_hpre_udrv.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/v1/drv/hisi_hpre_udrv.c b/v1/drv/hisi_hpre_udrv.c index 84ec710..05518ab 100644 --- a/v1/drv/hisi_hpre_udrv.c +++ b/v1/drv/hisi_hpre_udrv.c @@ -490,12 +490,10 @@ int qm_fill_rsa_sqe(void *message, struct qm_queue_info *info, __u16 i) return -WD_EINVAL; hw_msg->task_len1 = msg->key_bytes / BYTE_BITS - 0x1; - /* prepare rsa key */ ret = qm_rsa_prepare_key(msg, q, hw_msg, &va, &size); if (unlikely(ret)) return ret; - /* prepare in/out put */ ret = qm_rsa_prepare_iot(msg, q, hw_msg); if (unlikely(ret)) { rsa_key_unmap(msg, q, hw_msg, va, size); @@ -576,13 +574,11 @@ static int fill_dh_g_param(struct wd_queue *q, struct wcrypto_dh_msg *msg, int ret; ret = qm_crypto_bin_to_hpre_bin((char *)msg->g, - (const char *)msg->g, msg->key_bytes, - msg->gbytes, "dh g"); + (const char *)msg->g, msg->key_bytes, msg->gbytes, "dh g"); if (unlikely(ret)) return ret; - phy = (uintptr_t)drv_iova_map(q, (void *)msg->g, - msg->key_bytes); + phy = (uintptr_t)drv_iova_map(q, (void *)msg->g, msg->key_bytes); if (unlikely(!phy)) { WD_ERR("Get dh g parameter dma address fail!\n"); return -WD_ENOMEM; @@ -1338,8 +1334,7 @@ static int qm_ecc_prepare_in(struct wcrypto_ecc_msg *msg, hw_msg->bd_rsv2 = 1; /* fall through */ case WCRYPTO_ECXDH_GEN_KEY: /* fall through */ case WCRYPTO_SM2_KG: - ret = ecc_prepare_dh_gen_in((void *)in, - data); + ret = ecc_prepare_dh_gen_in((void *)in, data); break; case WCRYPTO_ECXDH_COMPUTE_KEY: /* @@ -1667,17 +1662,14 @@ static int qm_fill_ecc_sqe_general(void *message, struct qm_queue_info *info, memset(hw_msg, 0, sizeof(struct hisi_hpre_sqe)); hw_msg->task_len1 = ((msg->key_bytes) >> BYTE_BITS_SHIFT) - 0x1; - /* prepare algorithm */ ret = qm_ecc_prepare_alg(hw_msg, msg); if (unlikely(ret)) return ret; - /* prepare key */ ret = qm_ecc_prepare_key(msg, q, hw_msg, &va, &size); if (unlikely(ret)) return ret; - /* prepare in/out put */ ret = qm_ecc_prepare_iot(msg, q, hw_msg); if (unlikely(ret)) goto map_key_fail; -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 2/9] uadk/v1: fix for atomic memory order

From: Wenkai Lin <linwenkai6@hisilicon.com> If the memory order of the atomic operation is used improperly, an exception occurs in multiple threads. The load operation should use __ATOMIC_ACQUIRE, all memory access operations of the current thread cannot be reordered before this instruction, which is visible to the store operation (release) of other threads. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- v1/wd_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1/wd_util.c b/v1/wd_util.c index d441805..f44da99 100644 --- a/v1/wd_util.c +++ b/v1/wd_util.c @@ -25,7 +25,7 @@ void wd_spinlock(struct wd_lock *lock) { while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) - while (__atomic_load_n(&lock->lock, __ATOMIC_RELAXED)) + while (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE)) ; } -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 3/9] uadk: replace wd_lock to pthread_spinlock

From: Wenkai Lin <linwenkai6@hisilicon.com> pthread_spinlock was proved to be more reliable than self-implemented lock, so we replaced wd_lock. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- wd_mempool.c | 74 +++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/wd_mempool.c b/wd_mempool.c index 2d21a0d..22db843 100644 --- a/wd_mempool.c +++ b/wd_mempool.c @@ -36,21 +36,6 @@ #define WD_HUNDRED 100 #define PAGE_SIZE_OFFSET 10 -struct wd_lock { - __u32 lock; -}; - -static inline void wd_spinlock(struct wd_lock *lock) -{ - while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) - while (__atomic_load_n(&lock->lock, __ATOMIC_RELAXED)); -} - -static inline void wd_unspinlock(struct wd_lock *lock) -{ - __atomic_clear(&lock->lock, __ATOMIC_RELEASE); -} - struct wd_ref { __u32 ref; }; @@ -127,7 +112,7 @@ struct blkpool { struct mempool *mp; struct memzone_list mz_list; unsigned long free_block_num; - struct wd_lock lock; + pthread_spinlock_t lock; struct wd_ref ref; }; @@ -161,8 +146,7 @@ struct mempool { size_t size; size_t real_size; struct bitmap *bitmap; - /* use self-define lock to avoid to use pthread lib in libwd */ - struct wd_lock lock; + pthread_spinlock_t lock; struct wd_ref ref; struct sys_hugepage_list hp_list; unsigned long free_blk_num; @@ -314,16 +298,16 @@ void *wd_block_alloc(handle_t blkpool) return NULL; } - wd_spinlock(&bp->lock); + pthread_spin_lock(&bp->lock); if (bp->top > 0) { bp->top--; bp->free_block_num--; p = bp->blk_elem[bp->top]; - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); return p; } - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); wd_atomic_sub(&bp->ref, 1); return NULL; @@ -336,17 +320,17 @@ void wd_block_free(handle_t blkpool, void *addr) if (!bp || !addr) return; - wd_spinlock(&bp->lock); + pthread_spin_lock(&bp->lock); if (bp->top < bp->depth) { bp->blk_elem[bp->top] = addr; bp->top++; bp->free_block_num++; - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); wd_atomic_sub(&bp->ref, 1); return; } - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); } static int alloc_memzone(struct blkpool *bp, void *addr, size_t blk_num, @@ -392,9 +376,9 @@ static void free_mem_to_mempool(struct blkpool *bp) { struct mempool *mp = bp->mp; - wd_spinlock(&mp->lock); + pthread_spin_lock(&mp->lock); free_mem_to_mempool_nolock(bp); - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); } static int check_mempool_real_size(struct mempool *mp, struct blkpool *bp) @@ -455,7 +439,7 @@ static int alloc_mem_multi_in_one(struct mempool *mp, struct blkpool *bp) int ret = -WD_ENOMEM; int pos = 0; - wd_spinlock(&mp->lock); + pthread_spin_lock(&mp->lock); if (check_mempool_real_size(mp, bp)) goto err_check_size; @@ -471,13 +455,13 @@ static int alloc_mem_multi_in_one(struct mempool *mp, struct blkpool *bp) pos = ret; } - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); return 0; err_free_memzone: free_mem_to_mempool_nolock(bp); err_check_size: - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); return ret; } @@ -493,7 +477,7 @@ static int alloc_mem_one_need_multi(struct mempool *mp, struct blkpool *bp) int ret = -WD_ENOMEM; int pos = 0; - wd_spinlock(&mp->lock); + pthread_spin_lock(&mp->lock); if (check_mempool_real_size(mp, bp)) goto err_check_size; @@ -509,13 +493,13 @@ static int alloc_mem_one_need_multi(struct mempool *mp, struct blkpool *bp) mp->real_size -= mp->blk_size * mem_combined_num; } - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); return 0; err_free_memzone: free_mem_to_mempool_nolock(bp); err_check_size: - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); return ret; } @@ -576,10 +560,13 @@ handle_t wd_blockpool_create(handle_t mempool, size_t block_size, bp->blk_size = block_size; bp->free_block_num = block_num; bp->mp = mp; + ret = pthread_spin_init(&bp->lock, PTHREAD_PROCESS_PRIVATE); + if (ret < 0) + goto err_free_bp; ret = alloc_mem_from_mempool(mp, bp); if (ret < 0) - goto err_free_bp; + goto err_uninit_lock; ret = init_blkpool_elem(bp); if (ret < 0) @@ -590,6 +577,8 @@ handle_t wd_blockpool_create(handle_t mempool, size_t block_size, err_free_mem: free_mem_to_mempool(bp); +err_uninit_lock: + pthread_spin_destroy(&bp->lock); err_free_bp: free(bp); err_sub_ref: @@ -613,6 +602,7 @@ void wd_blockpool_destroy(handle_t blkpool) sched_yield(); free_mem_to_mempool(bp); + pthread_spin_destroy(&bp->lock); free(bp->blk_elem); free(bp); wd_atomic_sub(&mp->ref, 1); @@ -919,10 +909,13 @@ handle_t wd_mempool_create(size_t size, int node) mp->node = node; mp->size = tmp; mp->blk_size = WD_MEMPOOL_BLOCK_SIZE; + ret = pthread_spin_init(&mp->lock, PTHREAD_PROCESS_PRIVATE); + if (ret < 0) + goto free_pool; ret = alloc_mem_from_hugepage(mp); if (ret < 0) - goto free_pool; + goto uninit_lock; ret = init_mempool(mp); if (ret < 0) @@ -933,6 +926,8 @@ handle_t wd_mempool_create(size_t size, int node) free_pool_memory: free_hugepage_mem(mp); +uninit_lock: + pthread_spin_destroy(&mp->lock); free_pool: free(mp); return (handle_t)(-WD_ENOMEM); @@ -951,6 +946,7 @@ void wd_mempool_destroy(handle_t mempool) while(wd_atomic_load(&mp->ref)); uninit_mempool(mp); free_hugepage_mem(mp); + pthread_spin_destroy(&mp->lock); free(mp); } @@ -968,7 +964,7 @@ void wd_mempool_stats(handle_t mempool, struct wd_mempool_stats *stats) return; } - wd_spinlock(&mp->lock); + pthread_spin_lock(&mp->lock); stats->page_type = mp->page_type; stats->page_size = mp->page_size; @@ -979,7 +975,7 @@ void wd_mempool_stats(handle_t mempool, struct wd_mempool_stats *stats) stats->blk_usage_rate = (stats->blk_num - mp->free_blk_num) / stats->blk_num * WD_HUNDRED; - wd_unspinlock(&mp->lock); + pthread_spin_unlock(&mp->lock); } void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) @@ -993,7 +989,7 @@ void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) return; } - wd_spinlock(&bp->lock); + pthread_spin_lock(&bp->lock); stats->block_size = bp->blk_size; stats->block_num = bp->depth; @@ -1006,12 +1002,12 @@ void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) if (!size) { WD_ERR("invalid: blkpool size is zero!\n"); - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); return; } stats->mem_waste_rate = (size - bp->blk_size * bp->depth) / size * WD_HUNDRED; - wd_unspinlock(&bp->lock); + pthread_spin_unlock(&bp->lock); } -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 4/9] uadk/v1: fix for wd_lock implementation

From: Wenkai Lin <linwenkai6@hisilicon.com> struct wd_lock has synchronization problems, therefore, modify wd_spinlock by referring to the implementation of pthread_spin_lock. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- v1/wd_util.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/v1/wd_util.c b/v1/wd_util.c index f44da99..29f6579 100644 --- a/v1/wd_util.c +++ b/v1/wd_util.c @@ -24,14 +24,23 @@ void wd_spinlock(struct wd_lock *lock) { - while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) - while (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE)) - ; + int val = 0; + + if (__atomic_compare_exchange_n(&lock->lock, &val, 1, 1, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + return; + + do { + do { + val = __atomic_load_n(&lock->lock, __ATOMIC_RELAXED); + } while (val != 0); + } while (!__atomic_compare_exchange_n(&lock->lock, &val, 1, 1, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); } void wd_unspinlock(struct wd_lock *lock) { - __atomic_clear(&lock->lock, __ATOMIC_RELEASE); + __atomic_store_n(&lock->lock, 0, __ATOMIC_RELEASE); } void *drv_iova_map(struct wd_queue *q, void *va, size_t sz) -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 5/9] uadk: fix for env uninit segment fault

From: Wenkai Lin <linwenkai6@hisilicon.com> config ctx_config should not be set if init failed. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- wd_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/wd_util.c b/wd_util.c index 99be973..76548c9 100644 --- a/wd_util.c +++ b/wd_util.c @@ -1212,6 +1212,7 @@ err_free_ctxs: free(ctx_config->ctxs); err_free_ctx_config: free(ctx_config); + config->ctx_config = NULL; return ret; } -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 6/9] uadk/v1/drv: hisi_zip_udrv - fix the wrong literal buffer size

From: Yang Shen <shenyang39@huawei.com> The driver reserves more 16 bytes for literal output buffer needed by hardware. But it forgets to add the offset to the beginning of the sequence. So the literal and sequence buffers have 16 bytes of overlap. In some case, the sequence data will be overwrited. Signed-off-by: Yang Shen <shenyang39@huawei.com> --- v1/drv/hisi_zip_udrv.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index 9c9694b..cc55ef5 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -177,13 +177,11 @@ int qm_fill_zip_sqe(void *smsg, struct qm_queue_info *info, __u16 i) return -WD_EINVAL; } - if (unlikely(msg->data_fmt != WD_SGL_BUF && - msg->in_size > MAX_BUFFER_SIZE)) { + if (unlikely(msg->data_fmt != WD_SGL_BUF && msg->in_size > MAX_BUFFER_SIZE)) { WD_ERR("The in_len is out of range in_len(%u)!\n", msg->in_size); return -WD_EINVAL; } - if (unlikely(msg->data_fmt != WD_SGL_BUF && - msg->avail_out > MAX_BUFFER_SIZE)) { + if (unlikely(msg->data_fmt != WD_SGL_BUF && msg->avail_out > MAX_BUFFER_SIZE)) { WD_ERR("warning: avail_out is out of range (%u), will set 8MB size max!\n", msg->avail_out); msg->avail_out = MAX_BUFFER_SIZE; @@ -500,8 +498,10 @@ static int fill_zip_addr_lz77_zstd(void *ssqe, } else { sqe->cipher_key_addr_l = lower_32_bits((__u64)addr.dest_addr); sqe->cipher_key_addr_h = upper_32_bits((__u64)addr.dest_addr); - sqe->dest_addr_l = lower_32_bits((__u64)addr.dest_addr + msg->in_size); - sqe->dest_addr_h = upper_32_bits((__u64)addr.dest_addr + msg->in_size); + sqe->dest_addr_l = lower_32_bits((__u64)addr.dest_addr + + msg->in_size + ZSTD_LIT_RSV_SIZE); + sqe->dest_addr_h = upper_32_bits((__u64)addr.dest_addr + + msg->in_size + ZSTD_LIT_RSV_SIZE); } sqe->stream_ctx_addr_l = lower_32_bits((__u64)addr.ctxbuf_addr); @@ -671,7 +671,7 @@ static void fill_priv_lz77_zstd(void *ssqe, struct wcrypto_comp_msg *recv_msg) format->sequences_start = zstd_out->sequence; } else { format->literals_start = recv_msg->dst; - format->sequences_start = recv_msg->dst + recv_msg->in_size; + format->sequences_start = recv_msg->dst + recv_msg->in_size + ZSTD_LIT_RSV_SIZE; format->freq = (void *)(&format->lit_length_overflow_pos + 1); } -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 7/9] uadk/v1: replace wd_spinlock to pthread_spin_lock

From: Wenkai Lin <linwenkai6@hisilicon.com> Due to memory differences, using wd_spinlock may cause synchronization problems, it is better to use the standard pthread spin lock of glibc. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- v1/drv/hisi_qm_udrv.c | 51 +++++++++++++++++++++++++++++------------- v1/drv/hisi_qm_udrv.h | 4 ++-- v1/drv/hisi_rng_udrv.c | 25 +++++++++++++++------ v1/drv/hisi_rng_udrv.h | 2 +- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/v1/drv/hisi_qm_udrv.c b/v1/drv/hisi_qm_udrv.c index 175a5c4..1d4f1d8 100644 --- a/v1/drv/hisi_qm_udrv.c +++ b/v1/drv/hisi_qm_udrv.c @@ -20,6 +20,7 @@ #include <sys/mman.h> #include <string.h> #include <stdint.h> +#include <pthread.h> #include <sys/ioctl.h> #include <sys/epoll.h> #include <sys/eventfd.h> @@ -458,6 +459,11 @@ static int qm_init_queue_info(struct wd_queue *q) struct hisi_qp_ctx qp_ctx = {0}; int ret; + if (!info->sqe_size) { + WD_ERR("invalid: sqe size is 0!\n"); + return -WD_EINVAL; + } + info->sq_tail_index = 0; info->cq_head_index = 0; info->cqc_phase = 1; @@ -502,11 +508,6 @@ static int qm_set_queue_info(struct wd_queue *q) ret = qm_set_queue_regions(q); if (ret) return -WD_EINVAL; - if (!info->sqe_size) { - WD_ERR("sqe size =%d err!\n", info->sqe_size); - ret = -WD_EINVAL; - goto err_with_regions; - } info->cq_base = (void *)((uintptr_t)info->sq_base + info->sqe_size * info->sq_depth); @@ -534,8 +535,24 @@ static int qm_set_queue_info(struct wd_queue *q) goto err_with_regions; } + ret = pthread_spin_init(&info->sd_lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { + WD_ERR("failed to init qinfo sd_lock!\n"); + goto free_cache; + } + + ret = pthread_spin_init(&info->rc_lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { + WD_ERR("failed to init qinfo rc_lock!\n"); + goto uninit_lock; + } + return 0; +uninit_lock: + pthread_spin_destroy(&info->sd_lock); +free_cache: + free(info->req_cache); err_with_regions: qm_unset_queue_regions(q); return ret; @@ -576,8 +593,10 @@ void qm_uninit_queue(struct wd_queue *q) struct q_info *qinfo = q->qinfo; struct qm_queue_info *info = qinfo->priv; - qm_unset_queue_regions(q); + pthread_spin_destroy(&info->rc_lock); + pthread_spin_destroy(&info->sd_lock); free(info->req_cache); + qm_unset_queue_regions(q); free(qinfo->priv); qinfo->priv = NULL; } @@ -605,10 +624,10 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) int ret; __u32 i; - wd_spinlock(&info->sd_lock); + pthread_spin_lock(&info->sd_lock); if (unlikely((__u32)__atomic_load_n(&info->used, __ATOMIC_RELAXED) > info->sq_depth - num - 1)) { - wd_unspinlock(&info->sd_lock); + pthread_spin_unlock(&info->sd_lock); WD_ERR("queue is full!\n"); return -WD_EBUSY; } @@ -617,7 +636,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) ret = info->sqe_fill[qinfo->atype](req[i], qinfo->priv, info->sq_tail_index); if (unlikely(ret != WD_SUCCESS)) { - wd_unspinlock(&info->sd_lock); + pthread_spin_unlock(&info->sd_lock); WD_ERR("sqe fill error, ret %d!\n", ret); return -WD_EINVAL; } @@ -629,7 +648,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) } ret = qm_tx_update(info, num); - wd_unspinlock(&info->sd_lock); + pthread_spin_unlock(&info->sd_lock); return ret; } @@ -662,9 +681,9 @@ static int check_ds_rx_base(struct qm_queue_info *info, return 0; if (before) { - wd_spinlock(&info->rc_lock); + pthread_spin_lock(&info->rc_lock); qm_rx_from_cache(info, resp, num); - wd_unspinlock(&info->rc_lock); + pthread_spin_unlock(&info->rc_lock); WD_ERR("wd queue hw error happened before qm receive!\n"); } else { WD_ERR("wd queue hw error happened after qm receive!\n"); @@ -705,7 +724,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) if (unlikely(ret)) return ret; - wd_spinlock(&info->rc_lock); + pthread_spin_lock(&info->rc_lock); for (i = 0; i < num; i++) { cqe = info->cq_base + info->cq_head_index * sizeof(struct cqe); if (info->cqc_phase != CQE_PHASE(cqe)) @@ -714,7 +733,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) mb(); /* make sure the data is all in memory before read */ sq_head = CQE_SQ_HEAD_INDEX(cqe); if (unlikely(sq_head >= info->sq_depth)) { - wd_unspinlock(&info->rc_lock); + pthread_spin_unlock(&info->rc_lock); WD_ERR("CQE_SQ_HEAD_INDEX(%u) error\n", sq_head); return -WD_EIO; } @@ -726,7 +745,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) if (!ret) { break; } else if (ret < 0) { - wd_unspinlock(&info->rc_lock); + pthread_spin_unlock(&info->rc_lock); WD_ERR("recv sqe error %u\n", sq_head); return ret; } @@ -747,7 +766,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) ret = i; } - wd_unspinlock(&info->rc_lock); + pthread_spin_unlock(&info->rc_lock); return ret; } diff --git a/v1/drv/hisi_qm_udrv.h b/v1/drv/hisi_qm_udrv.h index 4d54cf6..06ac66a 100644 --- a/v1/drv/hisi_qm_udrv.h +++ b/v1/drv/hisi_qm_udrv.h @@ -166,8 +166,8 @@ struct qm_queue_info { qm_sqe_parse sqe_parse[WCRYPTO_MAX_ALG]; hisi_qm_sqe_fill_priv sqe_fill_priv; hisi_qm_sqe_parse_priv sqe_parse_priv; - struct wd_lock sd_lock; - struct wd_lock rc_lock; + pthread_spinlock_t sd_lock; + pthread_spinlock_t rc_lock; struct wd_queue *q; int (*sgl_info)(struct hw_sgl_info *info); int (*sgl_init)(void *pool, struct wd_sgl *sgl); diff --git a/v1/drv/hisi_rng_udrv.c b/v1/drv/hisi_rng_udrv.c index 86a20cb..605ef27 100644 --- a/v1/drv/hisi_rng_udrv.c +++ b/v1/drv/hisi_rng_udrv.c @@ -17,6 +17,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdio.h> +#include <pthread.h> #include <sys/mman.h> #include <string.h> #include <stdint.h> @@ -34,6 +35,7 @@ int rng_init_queue(struct wd_queue *q) { struct q_info *qinfo = q->qinfo; struct rng_queue_info *info; + int ret; info = calloc(1, sizeof(*info)); if (!info) { @@ -41,12 +43,20 @@ int rng_init_queue(struct wd_queue *q) return -ENOMEM; } + ret = pthread_spin_init(&info->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { + free(info); + WD_ERR("failed to init rng qinfo lock!\n"); + return ret; + } + qinfo->priv = info; info->mmio_base = wd_drv_mmap_qfr(q, WD_UACCE_QFRT_MMIO, 0); if (info->mmio_base == MAP_FAILED) { info->mmio_base = NULL; - free(qinfo->priv); qinfo->priv = NULL; + pthread_spin_destroy(&info->lock); + free(info); WD_ERR("mmap trng mmio fail\n"); return -ENOMEM; } @@ -63,6 +73,7 @@ void rng_uninit_queue(struct wd_queue *q) free(qinfo->priv); qinfo->priv = NULL; + pthread_spin_destroy(&info->lock); } int rng_send(struct wd_queue *q, void **req, __u32 num) @@ -70,14 +81,14 @@ int rng_send(struct wd_queue *q, void **req, __u32 num) struct q_info *qinfo = q->qinfo; struct rng_queue_info *info = qinfo->priv; - wd_spinlock(&info->lock); + pthread_spin_lock(&info->lock); if (!info->req_cache[info->send_idx]) { info->req_cache[info->send_idx] = req[0]; info->send_idx++; - wd_unspinlock(&info->lock); + pthread_spin_unlock(&info->lock); return 0; } - wd_unspinlock(&info->lock); + pthread_spin_unlock(&info->lock); WD_ERR("queue is full!\n"); return -WD_EBUSY; @@ -128,16 +139,16 @@ int rng_recv(struct wd_queue *q, void **resp, __u32 num) struct wcrypto_cb_tag *tag; __u32 currsize = 0; - wd_spinlock(&info->lock); + pthread_spin_lock(&info->lock); msg = info->req_cache[info->recv_idx]; if (!msg) { - wd_unspinlock(&info->lock); + pthread_spin_unlock(&info->lock); return 0; } info->req_cache[info->recv_idx] = NULL; info->recv_idx++; - wd_unspinlock(&info->lock); + pthread_spin_unlock(&info->lock); tag = (void *)(uintptr_t)msg->usr_tag; if (usr && tag->ctx_id != usr) diff --git a/v1/drv/hisi_rng_udrv.h b/v1/drv/hisi_rng_udrv.h index 56814a4..3efa10e 100644 --- a/v1/drv/hisi_rng_udrv.h +++ b/v1/drv/hisi_rng_udrv.h @@ -29,7 +29,7 @@ struct rng_queue_info { void *req_cache[TRNG_Q_DEPTH]; __u8 send_idx; __u8 recv_idx; - struct wd_lock lock; + pthread_spinlock_t lock; }; int rng_init_queue(struct wd_queue *q); -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 8/9] uadk_tools: add segfault locating function

From: Longfang Liu <liulongfang@huawei.com> When a segfault occurs within a test thread, it is generally difficult to locate the problem. In order to improve the efficiency of problem location, a segmentation fault capture entry is added to each business thread entry. And register a segfault callback handler function. As long as a segfault occurs within the thread, the callback is triggered and the segfault error message is output. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- uadk_tool/benchmark/hpre_uadk_benchmark.c | 1 + uadk_tool/benchmark/hpre_wd_benchmark.c | 1 + uadk_tool/benchmark/sec_soft_benchmark.c | 1 + uadk_tool/benchmark/sec_uadk_benchmark.c | 1 + uadk_tool/benchmark/sec_wd_benchmark.c | 1 + uadk_tool/benchmark/trng_wd_benchmark.c | 1 + uadk_tool/benchmark/uadk_benchmark.c | 15 +++++++++++++++ uadk_tool/benchmark/uadk_benchmark.h | 3 +++ uadk_tool/benchmark/zip_uadk_benchmark.c | 1 + uadk_tool/benchmark/zip_wd_benchmark.c | 1 + 10 files changed, 26 insertions(+) diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c index 0148e56..5dd6a39 100644 --- a/uadk_tool/benchmark/hpre_uadk_benchmark.c +++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c @@ -2706,6 +2706,7 @@ int hpre_uadk_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; g_ctxnum = options->ctxnums; diff --git a/uadk_tool/benchmark/hpre_wd_benchmark.c b/uadk_tool/benchmark/hpre_wd_benchmark.c index 5545ad8..0196e62 100644 --- a/uadk_tool/benchmark/hpre_wd_benchmark.c +++ b/uadk_tool/benchmark/hpre_wd_benchmark.c @@ -2564,6 +2564,7 @@ int hpre_wd_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; if (options->optype >= (WCRYPTO_EC_OP_MAX - WCRYPTO_ECDSA_VERIFY)) { diff --git a/uadk_tool/benchmark/sec_soft_benchmark.c b/uadk_tool/benchmark/sec_soft_benchmark.c index 84dab63..8fa523c 100644 --- a/uadk_tool/benchmark/sec_soft_benchmark.c +++ b/uadk_tool/benchmark/sec_soft_benchmark.c @@ -1277,6 +1277,7 @@ int sec_soft_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; g_pktlen = options->pktlen; g_jobsnum = options->ctxnums; diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index 56f4fa6..41b7416 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -1777,6 +1777,7 @@ int sec_uadk_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; g_pktlen = options->pktlen; g_ctxnum = options->ctxnums; diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c index bb47d61..e022dcb 100644 --- a/uadk_tool/benchmark/sec_wd_benchmark.c +++ b/uadk_tool/benchmark/sec_wd_benchmark.c @@ -1630,6 +1630,7 @@ int sec_wd_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_alg = options->subtype; g_algtype = options->algtype; g_optype = options->optype; diff --git a/uadk_tool/benchmark/trng_wd_benchmark.c b/uadk_tool/benchmark/trng_wd_benchmark.c index 3ce329a..2f058d4 100644 --- a/uadk_tool/benchmark/trng_wd_benchmark.c +++ b/uadk_tool/benchmark/trng_wd_benchmark.c @@ -312,6 +312,7 @@ int trng_wd_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; ret = init_trng_wd_queue(options); diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 1bf9fee..0f01fdf 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -331,6 +331,21 @@ void cal_avg_latency(u32 count) ACC_TST_PRT("thread<%lu> avg latency: %.1fus\n", gettid(), latency); } +void segmentfault_handler(int sig) +{ +#define BUF_SZ 64 + void *array[BUF_SZ]; + size_t size; + + /* Get void*'s for all entries on the stack */ + size = backtrace(array, BUF_SZ); + + /* Print out all the frames to stderr */ + fprintf(stderr, "Error: signal %d:\n", sig); + backtrace_symbols_fd(array, size, STDERR_FILENO); + exit(1); +} + /*-------------------------------------main code------------------------------------------------------*/ static void parse_alg_param(struct acc_option *option) { diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index c493ac3..0def4b9 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -4,6 +4,7 @@ #include <ctype.h> #include <errno.h> +#include <execinfo.h> #include <fcntl.h> #include <getopt.h> #include <linux/random.h> @@ -15,6 +16,7 @@ #include <signal.h> #include <sys/syscall.h> #include <sys/time.h> +#include <signal.h> #include <unistd.h> #define ACC_TST_PRT printf @@ -217,6 +219,7 @@ extern void add_send_complete(void); extern u32 get_recv_time(void); extern void cal_avg_latency(u32 count); extern int get_alg_name(int alg, char *alg_name); +extern void segmentfault_handler(int sig); int acc_cmd_parse(int argc, char *argv[], struct acc_option *option); int acc_default_case(struct acc_option *option); diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index ecb688f..22aa916 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -1331,6 +1331,7 @@ int zip_uadk_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; g_pktlen = options->pktlen; g_ctxnum = options->ctxnums; diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c index cbe07fc..8ad3e96 100644 --- a/uadk_tool/benchmark/zip_wd_benchmark.c +++ b/uadk_tool/benchmark/zip_wd_benchmark.c @@ -1162,6 +1162,7 @@ int zip_wd_benchmark(struct acc_option *options) u32 ptime; int ret; + signal(SIGSEGV, segmentfault_handler); g_thread_num = options->threads; g_pktlen = options->pktlen; -- 2.33.0

Qi Tao

7:18 p.m.

New subject: [PATCH 9/9] uadk: bugfix CE driver initialization problem

From: Longfang Liu <liulongfang@huawei.com> Upstream: Yes DTS:DTS2024061518375 Bugfix or Feature: Bugfix When using UADK provider, using the default business type TASK_MIX will cause driver initialization to fail. Analysis found that the CE driver will be initialized by fallback, and NULL will be passed to the input parameter during initialization. This NULL parameter will cause a segmentation fault during CE driver initialization. Therefore, initialization is skipped for NULL parameters in the CE driver. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- drv/hash_mb/hash_mb.c | 4 ++++ drv/isa_ce_sm3.c | 6 +++++- drv/isa_ce_sm4.c | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drv/hash_mb/hash_mb.c b/drv/hash_mb/hash_mb.c index a73c698..e4a9564 100644 --- a/drv/hash_mb/hash_mb.c +++ b/drv/hash_mb/hash_mb.c @@ -192,6 +192,10 @@ static int hash_mb_init(struct wd_alg_driver *drv, void *conf) struct hash_mb_ctx *priv; int ret; + /* Fallback init is NULL */ + if (!drv || !conf) + return 0; + priv = malloc(sizeof(struct hash_mb_ctx)); if (!priv) return -WD_ENOMEM; diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c index 0309861..59f3940 100644 --- a/drv/isa_ce_sm3.c +++ b/drv/isa_ce_sm3.c @@ -375,7 +375,11 @@ static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf) struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf; struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv; - config->epoll_en = false; + /* Fallback init is NULL */ + if (!drv || !conf) + return 0; + + config->epoll_en = 0; /* return if already inited */ if (sctx) diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c index 6961471..e937893 100644 --- a/drv/isa_ce_sm4.c +++ b/drv/isa_ce_sm4.c @@ -36,6 +36,10 @@ static int isa_ce_init(struct wd_alg_driver *drv, void *conf) struct wd_ctx_config_internal *config = conf; struct sm4_ce_drv_ctx *sctx = drv->priv; + /* Fallback init is NULL */ + if (!drv || !conf) + return 0; + config->epoll_en = 0; memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal)); -- 2.33.0

344

Age (days ago)

344

Last active (days ago)

List overview

9 comments

1 participants

participants (1)

Qi Tao

[PATCH 0/9] bugfix for uadk

tags

participants (1)