[PATCH 00/22] support new algorithms and add some bugfixes

*** BLURB HERE *** Chenghai Huang (7): uadk: fix definition coding standard issues uadk: add or remove some store buf condition judgments uadk: add new alg called lz77_only uadk: remove redundant checks on bit read results uadk_tool: modify unrecv num in async benchmark test uadk_tool: add lz77_only alg in zip benchmark test uadk_tool: add lz4 alg in zip benchmark test Longfang Liu (3): uadk: resolve some code issues uadk: removal of the Shared Memory Interface for Queues uadk_tools: Removal of the Shared Memory Interface for Queues Qi Tao (1): uadk_tool: add aead algorithm Qinxin Xia (3): uadk: hisi_comp - abstract get sgl function and general deflate functions uadk: hisi_comp - support the new algorithm 'lz4' uadk: wd_comp - support the new algorithm 'lz4' Weili Qian (1): uadk: support data move Wenkai Lin (2): uadk: fix for rehash invalid size uadk: support hashjoin and gather algorithm Zhushuai Yin (5): uadk: Add max and min operations at the hash algorithm layer uadk: hash agg adapter drv parameter uadk:Add max,min,and rehash implementations uadk:zip algorithm increases buffer len interception uadk:fix dh prov segmentation issue Makefile.am | 31 +- drv/hisi_comp.c | 580 +++++-- drv/hisi_comp_huf.c | 11 +- drv/hisi_dae.c | 827 +++------- drv/hisi_dae.h | 229 +++ drv/hisi_dae_common.c | 387 +++++ drv/hisi_dae_join_gather.c | 1040 ++++++++++++ drv/hisi_qm_udrv.h | 3 +- drv/hisi_udma.c | 566 +++++++ include/drv/wd_agg_drv.h | 10 +- include/drv/wd_join_gather_drv.h | 52 + include/drv/wd_udma_drv.h | 34 + include/wd_agg.h | 9 +- include/wd_alg.h | 4 + include/wd_comp.h | 2 + include/wd_dae.h | 12 + include/wd_join_gather.h | 352 +++++ include/wd_udma.h | 124 ++ include/wd_util.h | 2 + libwd_dae.map | 34 +- uadk_tool/benchmark/sec_uadk_benchmark.c | 34 + uadk_tool/benchmark/sec_wd_benchmark.c | 34 + uadk_tool/benchmark/uadk_benchmark.c | 14 + uadk_tool/benchmark/uadk_benchmark.h | 4 + uadk_tool/benchmark/zip_uadk_benchmark.c | 24 +- v1/drv/hisi_zip_huf.c | 11 +- v1/drv/hisi_zip_udrv.c | 2 +- v1/test/hisi_hpre_test/hpre_test_tools.c | 392 ----- v1/test/hisi_zip_test_sgl/wd_sched_sgl.c | 310 ++-- v1/test/test_mm/test_wd_mem.c | 8 +- v1/test/wd_sched.c | 247 +-- v1/uacce.h | 1 - v1/wd.c | 57 +- v1/wd.h | 2 - wd.c | 2 +- wd_agg.c | 75 +- wd_comp.c | 56 +- wd_join_gather.c | 1823 ++++++++++++++++++++++ wd_sched.c | 3 +- wd_udma.c | 511 ++++++ wd_util.c | 34 +- 41 files changed, 6435 insertions(+), 1518 deletions(-) create mode 100644 drv/hisi_dae.h create mode 100644 drv/hisi_dae_common.c create mode 100644 drv/hisi_dae_join_gather.c create mode 100644 drv/hisi_udma.c create mode 100644 include/drv/wd_join_gather_drv.h create mode 100644 include/drv/wd_udma_drv.h create mode 100644 include/wd_join_gather.h create mode 100644 include/wd_udma.h create mode 100644 wd_join_gather.c create mode 100644 wd_udma.c -- 2.33.0

From: Zhushuai Yin <yinzhushuai@huawei.com> The hash agg feature has been enhanced with max and min functions, and external parameters have been added at the algorithm level. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> --- include/wd_agg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/wd_agg.h b/include/wd_agg.h index aac3d406..fed08c50 100644 --- a/include/wd_agg.h +++ b/include/wd_agg.h @@ -20,6 +20,8 @@ extern "C" { enum wd_agg_alg { WD_AGG_SUM, WD_AGG_COUNT, + WD_AGG_MAX, + WD_AGG_MIN, WD_AGG_ALG_TYPE_MAX, }; @@ -141,10 +143,9 @@ struct wd_agg_req { }; /** - * wd_agg_init() - A simplify interface to initializate uadk - * encryption and decryption. This interface keeps most functions of - * wd_agg_init(). Users just need to descripe the deployment of - * business scenarios. Then the initialization will request appropriate + * wd_agg_init() - A simplify interface to initializate uadk hash agg. + * Users just need to descripe the deployment of business scenarios. + * Then the initialization will request appropriate * resources to support the business scenarios. * To make the initializate simpler, ctx_params support set NULL. * And then the function will set them as driver's default. -- 2.33.0

From: Zhushuai Yin <yinzhushuai@huawei.com> Add the drv parameter in hash agg sess init and uninit to achieve uniformity. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> --- drv/hisi_dae.c | 10 ++++++---- include/drv/wd_agg_drv.h | 10 ++++++---- wd_agg.c | 12 ++++++------ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drv/hisi_dae.c b/drv/hisi_dae.c index ae91475b..52f7a213 100644 --- a/drv/hisi_dae.c +++ b/drv/hisi_dae.c @@ -1266,7 +1266,7 @@ static int hashagg_fill_agg_ctx(struct hashagg_ctx *agg_ctx, struct wd_agg_sess_ return WD_SUCCESS; } -static void hashagg_sess_priv_uninit(void *priv) +static void hashagg_sess_priv_uninit(struct wd_alg_driver *drv, void *priv) { struct hashagg_ctx *agg_ctx = priv; @@ -1279,7 +1279,8 @@ static void hashagg_sess_priv_uninit(void *priv) free(agg_ctx); } -static int hashagg_sess_priv_init(struct wd_agg_sess_setup *setup, void **priv) +static int hashagg_sess_priv_init(struct wd_alg_driver *drv, + struct wd_agg_sess_setup *setup, void **priv) { struct hashagg_ctx *agg_ctx; int ret; @@ -1365,7 +1366,7 @@ free_ext_addr: return ret; } -static int dae_get_row_size(void *param) +static int dae_get_row_size(struct wd_alg_driver *drv, void *param) { struct hashagg_ctx *agg_ctx = param; @@ -1514,7 +1515,8 @@ static int dae_std_table_init(struct hash_table_data *hw_table, return WD_SUCCESS; } -static int dae_hash_table_init(struct wd_dae_hash_table *hash_table, void *priv) +static int dae_hash_table_init(struct wd_alg_driver *drv, + struct wd_dae_hash_table *hash_table, void *priv) { struct hashagg_ctx *agg_ctx = priv; struct hash_table_data *hw_table; diff --git a/include/drv/wd_agg_drv.h b/include/drv/wd_agg_drv.h index 3d26eeb3..978c2d34 100644 --- a/include/drv/wd_agg_drv.h +++ b/include/drv/wd_agg_drv.h @@ -42,10 +42,12 @@ struct wd_agg_msg { }; struct wd_agg_ops { - int (*get_row_size)(void *priv); - int (*sess_init)(struct wd_agg_sess_setup *setup, void **priv); - void (*sess_uninit)(void *priv); - int (*hash_table_init)(struct wd_dae_hash_table *hash_table, void *priv); + int (*get_row_size)(struct wd_alg_driver *drv, void *priv); + int (*sess_init)(struct wd_alg_driver *drv, + struct wd_agg_sess_setup *setup, void **priv); + void (*sess_uninit)(struct wd_alg_driver *drv, void *priv); + int (*hash_table_init)(struct wd_alg_driver *drv, + struct wd_dae_hash_table *hash_table, void *priv); }; struct wd_agg_msg *wd_agg_get_msg(__u32 idx, __u32 tag); diff --git a/wd_agg.c b/wd_agg.c index 615c25b9..8869ab84 100644 --- a/wd_agg.c +++ b/wd_agg.c @@ -360,7 +360,7 @@ static int wd_agg_init_sess_priv(struct wd_agg_sess *sess, struct wd_agg_sess_se WD_ERR("failed to get session uninit ops!\n"); return -WD_EINVAL; } - ret = sess->ops.sess_init(setup, &sess->priv); + ret = sess->ops.sess_init(wd_agg_setting.driver, setup, &sess->priv); if (ret) { WD_ERR("failed to init session priv!\n"); return ret; @@ -368,10 +368,10 @@ static int wd_agg_init_sess_priv(struct wd_agg_sess *sess, struct wd_agg_sess_se } if (sess->ops.get_row_size) { - ret = sess->ops.get_row_size(sess->priv); + ret = sess->ops.get_row_size(wd_agg_setting.driver, sess->priv); if (ret <= 0) { if (sess->ops.sess_uninit) - sess->ops.sess_uninit(sess->priv); + sess->ops.sess_uninit(wd_agg_setting.driver, sess->priv); WD_ERR("failed to get hash table row size: %d!\n", ret); return -WD_EINVAL; } @@ -436,7 +436,7 @@ handle_t wd_agg_alloc_sess(struct wd_agg_sess_setup *setup) uninit_priv: if (sess->ops.sess_uninit) - sess->ops.sess_uninit(sess->priv); + sess->ops.sess_uninit(wd_agg_setting.driver, sess->priv); free_key: free(sess->sched_key); free_sess: @@ -458,7 +458,7 @@ void wd_agg_free_sess(handle_t h_sess) free(sess->key_conf.data_size); if (sess->ops.sess_uninit) - sess->ops.sess_uninit(sess->priv); + sess->ops.sess_uninit(wd_agg_setting.driver, sess->priv); if (sess->sched_key) free(sess->sched_key); @@ -551,7 +551,7 @@ int wd_agg_set_hash_table(handle_t h_sess, struct wd_dae_hash_table *info) memcpy(hash_table, info, sizeof(struct wd_dae_hash_table)); if (sess->ops.hash_table_init) { - ret = sess->ops.hash_table_init(hash_table, sess->priv); + ret = sess->ops.hash_table_init(wd_agg_setting.driver, hash_table, sess->priv); if (ret) { memcpy(hash_table, rehash_table, sizeof(struct wd_dae_hash_table)); memset(rehash_table, 0, sizeof(struct wd_dae_hash_table)); -- 2.33.0

From: Zhushuai Yin <yinzhushuai@huawei.com> The hardware has been updated to support the hashagg max/min functionality and the new rehash operation, with the driver layer being adapted accordingly. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> --- drv/hisi_dae.c | 234 +++++++++++++++++++++++++++++++++++++++++---- drv/hisi_qm_udrv.h | 3 +- 2 files changed, 216 insertions(+), 21 deletions(-) diff --git a/drv/hisi_dae.c b/drv/hisi_dae.c index 52f7a213..22a780dd 100644 --- a/drv/hisi_dae.c +++ b/drv/hisi_dae.c @@ -18,8 +18,6 @@ #define DAE_CTX_Q_NUM_DEF 1 /* will remove in next version */ -#define DAE_HASHAGG_SUM 0x1 -#define DAE_HASHAGG_COUNT 0x2 #define DAE_HASH_COUNT_ALL 0x1 /* column information */ @@ -60,6 +58,9 @@ #define HASH_TABLE_OFFSET_3ROW 3 #define HASH_TABLE_OFFSET_1ROW 1 +/* hash agg operations col max num */ +#define DAE_AGG_COL_ALG_MAX_NUM 2 + #define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((uintptr_t)(p), (a))) @@ -73,10 +74,14 @@ enum dae_stage { DAE_HASH_AGGREGATE = 0x0, DAE_HASHAGG_OUTPUT = 0x7, + /* new platform rehash new operation */ + DAE_HASHAGG_MERGE = 0x6, }; enum dae_op_type { DAE_COUNT = 0x1, + DAE_MAX = 0x3, + DAE_MIN = 0x4, DAE_SUM = 0x5, }; @@ -127,6 +132,18 @@ enum dae_sum_optype { DECIMAL64_TO_DECIMAL128 = 0x3, }; +enum dae_alg_optype { + DAE_HASHAGG_SUM = 0x1, + DAE_HASHAGG_COUNT = 0x2, + DAE_HASHAGG_MAX = 0x4, + DAE_HASHAGG_MIN = 0x8, +}; + +enum dae_bd_type { + DAE_BD_TYPE_V1 = 0x0, + DAE_BD_TYPE_V2 = 0x1, +}; + struct dae_sqe { __u32 bd_type : 6; __u32 resv1 : 2; @@ -292,7 +309,7 @@ static void put_ext_addr(struct dae_extend_addr *ext_addr, int idx) __atomic_clear(&ext_addr->addr_status[idx], __ATOMIC_RELEASE); } -static void fill_hashagg_task_type(struct wd_agg_msg *msg, struct dae_sqe *sqe) +static void fill_hashagg_task_type(struct wd_agg_msg *msg, struct dae_sqe *sqe, __u16 hw_type) { /* * The variable 'pos' is enumeration type, and the case branches @@ -304,9 +321,14 @@ static void fill_hashagg_task_type(struct wd_agg_msg *msg, struct dae_sqe *sqe) sqe->task_type_ext = DAE_HASH_AGGREGATE; break; case WD_AGG_STREAM_OUTPUT: - case WD_AGG_REHASH_OUTPUT: sqe->task_type_ext = DAE_HASHAGG_OUTPUT; break; + case WD_AGG_REHASH_OUTPUT: + if (hw_type >= HISI_QM_API_VER5_BASE) + sqe->task_type_ext = DAE_HASHAGG_MERGE; + else + sqe->task_type_ext = DAE_HASHAGG_OUTPUT; + break; } } @@ -335,8 +357,26 @@ static void fill_hashagg_output_order(struct dae_sqe *sqe, struct dae_ext_sqe *e } } +static void fill_hashagg_merge_output_order(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct wd_agg_msg *msg) +{ + struct hashagg_ctx *agg_ctx = msg->priv; + struct hashagg_col_data *cols_data = &agg_ctx->cols_data; + struct hashagg_output_src *output_src; + __u32 offset = 0; + __u32 i; + + output_src = cols_data->rehash_output; + + for (i = 0; i < cols_data->output_num; i++) { + ext_sqe->out_from_in_idx |= (__u64)output_src[i].out_from_in_idx << offset; + ext_sqe->out_optype |= (__u64)output_src[i].out_optype << offset; + offset += DAE_COL_BIT_NUM; + } +} + static void fill_hashagg_table_data(struct dae_sqe *sqe, struct dae_addr_list *addr_list, - struct wd_agg_msg *msg) + struct wd_agg_msg *msg) { struct hashagg_ctx *agg_ctx = (struct hashagg_ctx *)msg->priv; struct hash_table_data *table_data = &agg_ctx->table_data; @@ -370,6 +410,31 @@ static void fill_hashagg_table_data(struct dae_sqe *sqe, struct dae_addr_list *a hw_table->ext_table_size = table_data->ext_table_size; } +static void fill_hashagg_merge_table_data(struct dae_sqe *sqe, + struct dae_addr_list *addr_list, + struct wd_agg_msg *msg) +{ + struct hashagg_ctx *agg_ctx = (struct hashagg_ctx *)msg->priv; + struct hash_table_data *table_data_src = &agg_ctx->rehash_table; + struct hash_table_data *table_data_dst = &agg_ctx->table_data; + struct dae_table_addr *hw_table_src = &addr_list->src_table; + struct dae_table_addr *hw_table_dst = &addr_list->dst_table; + + sqe->table_row_size = agg_ctx->row_size; + sqe->src_table_width = table_data_src->table_width; + sqe->dst_table_width = table_data_dst->table_width; + + hw_table_dst->std_table_addr = (__u64)(uintptr_t)table_data_dst->std_table; + hw_table_dst->std_table_size = table_data_dst->std_table_size; + hw_table_dst->ext_table_addr = (__u64)(uintptr_t)table_data_dst->ext_table; + hw_table_dst->ext_table_size = table_data_dst->ext_table_size; + + hw_table_src->std_table_addr = (__u64)(uintptr_t)table_data_src->std_table; + hw_table_src->std_table_size = table_data_src->std_table_size; + hw_table_src->ext_table_addr = (__u64)(uintptr_t)table_data_src->ext_table; + hw_table_src->ext_table_size = table_data_src->ext_table_size; +} + static void fill_hashagg_key_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, struct dae_addr_list *addr_list, struct wd_agg_msg *msg) { @@ -416,6 +481,21 @@ static void fill_hashagg_key_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_s } } +static void fill_hashagg_merge_key_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, struct wd_agg_msg *msg) +{ + struct hashagg_ctx *agg_ctx = msg->priv; + struct hw_agg_data *key_data = agg_ctx->cols_data.key_data; + __u32 i; + + sqe->key_col_bitmap = GENMASK(msg->key_cols_num - 1, 0); + + for (i = 0; i < msg->key_cols_num; i++) { + sqe->key_data_type[i] = key_data[i].hw_type; + ext_sqe->key_data_info[i] = key_data[i].data_info; + } +} + static void fill_hashagg_normal_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, struct hashagg_col_data *cols_data, __u32 agg_cols_num) { @@ -501,6 +581,15 @@ static void fill_hashagg_input_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext } } +static void fill_hashagg_merge_input_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, struct wd_agg_msg *msg) +{ + struct hashagg_ctx *agg_ctx = msg->priv; + struct hashagg_col_data *cols_data = &agg_ctx->cols_data; + + fill_hashagg_rehash_info(sqe, ext_sqe, cols_data->output_data, msg->agg_cols_num); +} + static void fill_hashagg_ext_addr(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, struct dae_addr_list *addr_list) { @@ -511,13 +600,46 @@ static void fill_hashagg_ext_addr(struct dae_sqe *sqe, struct dae_ext_sqe *ext_s addr_list->ext_sqe_size = DAE_EXT_SQE_SIZE; } +static void fill_hashagg_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, struct wd_agg_msg *msg, + __u16 hw_type) +{ + fill_hashagg_ext_addr(sqe, ext_sqe, addr_list); + + if (hw_type >= HISI_QM_API_VER5_BASE) + sqe->bd_type = DAE_BD_TYPE_V2; + + if (sqe->task_type_ext == DAE_HASHAGG_MERGE) { + fill_hashagg_merge_table_data(sqe, addr_list, msg); + fill_hashagg_merge_key_data(sqe, ext_sqe, addr_list, msg); + fill_hashagg_merge_input_data(sqe, ext_sqe, addr_list, msg); + fill_hashagg_merge_output_order(sqe, ext_sqe, msg); + } else { + fill_hashagg_table_data(sqe, addr_list, msg); + fill_hashagg_key_data(sqe, ext_sqe, addr_list, msg); + fill_hashagg_input_data(sqe, ext_sqe, addr_list, msg); + fill_hashagg_output_order(sqe, ext_sqe, msg); + } +} + static int check_hashagg_param(struct wd_agg_msg *msg) { + struct hashagg_col_data *cols_data; + struct hashagg_ctx *agg_ctx; + if (!msg) { WD_ERR("invalid: input hashagg msg is NULL!\n"); return -WD_EINVAL; } + agg_ctx = msg->priv; + cols_data = &agg_ctx->cols_data; + if (cols_data->output_num > DAE_MAX_OUTPUT_COLS) { + WD_ERR("invalid: input hashagg output num %u is more than %d!\n", + cols_data->output_num, DAE_MAX_OUTPUT_COLS); + return -WD_EINVAL; + } + if ((msg->pos == WD_AGG_STREAM_INPUT || msg->pos == WD_AGG_REHASH_INPUT) && msg->row_count > DAE_HASHAGG_MAX_ROW_NUN) { WD_ERR("invalid: input hashagg row count %u is more than %d!\n", @@ -544,7 +666,11 @@ static int hashagg_send(struct wd_alg_driver *drv, handle_t ctx, void *hashagg_m if (ret) return ret; - fill_hashagg_task_type(msg, &sqe); + if (qp->q_info.hw_type >= HISI_QM_API_VER5_BASE && + qp->q_info.qp_mode == CTX_MODE_SYNC && msg->pos == WD_AGG_REHASH_INPUT) + return WD_SUCCESS; + + fill_hashagg_task_type(msg, &sqe, qp->q_info.hw_type); sqe.row_num = msg->row_count; idx = get_free_ext_addr(ext_addr); @@ -553,11 +679,7 @@ static int hashagg_send(struct wd_alg_driver *drv, handle_t ctx, void *hashagg_m addr_list = &ext_addr->addr_list[idx]; ext_sqe = &ext_addr->ext_sqe[idx]; - fill_hashagg_ext_addr(&sqe, ext_sqe, addr_list); - fill_hashagg_table_data(&sqe, addr_list, msg); - fill_hashagg_key_data(&sqe, ext_sqe, addr_list, msg); - fill_hashagg_input_data(&sqe, ext_sqe, addr_list, msg); - fill_hashagg_output_order(&sqe, ext_sqe, msg); + fill_hashagg_info(&sqe, ext_sqe, addr_list, msg, qp->q_info.hw_type); hisi_set_msg_id(h_qp, &msg->tag); sqe.low_tag = msg->tag; @@ -611,6 +733,9 @@ static void fill_hashagg_msg_task_done(struct dae_sqe *sqe, struct wd_agg_msg *m if (sqe->task_type_ext == DAE_HASHAGG_OUTPUT) { msg->out_row_count = sqe->out_raw_num; msg->output_done = sqe->output_end; + } else if (sqe->task_type_ext == DAE_HASHAGG_MERGE) { + msg->out_row_count = temp_msg->row_count; + msg->output_done = sqe->output_end; } else { msg->in_row_count = temp_msg->row_count; } @@ -673,6 +798,12 @@ static int hashagg_recv(struct wd_alg_driver *drv, handle_t ctx, void *hashagg_m __u16 recv_cnt = 0; int ret; + if (qp->q_info.hw_type >= HISI_QM_API_VER5_BASE && + qp->q_info.qp_mode == CTX_MODE_SYNC && msg->pos == WD_AGG_REHASH_INPUT) { + msg->result = WD_AGG_TASK_DONE; + return WD_SUCCESS; + } + ret = hisi_qm_recv(h_qp, &sqe, 1, &recv_cnt); if (ret) return ret; @@ -768,7 +899,8 @@ static int agg_get_output_num(enum wd_dae_data_type type, return WD_SUCCESS; } -static int agg_output_num_check(struct wd_agg_col_info *agg_cols, __u32 cols_num, bool is_count_all) +static int agg_output_num_check(struct wd_agg_col_info *agg_cols, __u32 cols_num, + bool is_count_all, __u16 hw_type) { __u32 size8 = 0, size16 = 0; __u32 i, j, count_num; @@ -786,11 +918,13 @@ static int agg_output_num_check(struct wd_agg_col_info *agg_cols, __u32 cols_num if (is_count_all) size8++; - if (size8 > DAE_MAX_8B_COLS_NUM || size16 > DAE_MAX_16B_COLS_NUM) { + if (hw_type < HISI_QM_API_VER5_BASE && + (size8 > DAE_MAX_8B_COLS_NUM || size16 > DAE_MAX_16B_COLS_NUM)) { WD_ERR("invalid: output col num 8B-16B %u-%u is more than support %d-%d !\n", size8, size16, DAE_MAX_8B_COLS_NUM, DAE_MAX_16B_COLS_NUM); return -WD_EINVAL; } + count_num = size8 + size16; if (count_num > DAE_MAX_OUTPUT_COLS) { WD_ERR("invalid: agg output cols num %u is more than device support %d!\n", @@ -801,7 +935,7 @@ static int agg_output_num_check(struct wd_agg_col_info *agg_cols, __u32 cols_num return WD_SUCCESS; } -static int hashagg_init_param_check(struct wd_agg_sess_setup *setup) +static int hashagg_init_param_check(struct wd_agg_sess_setup *setup, __u16 hw_type) { int ret; @@ -827,7 +961,8 @@ static int hashagg_init_param_check(struct wd_agg_sess_setup *setup) if (ret) return -WD_EINVAL; - return agg_output_num_check(setup->agg_cols_info, setup->agg_cols_num, setup->is_count_all); + return agg_output_num_check(setup->agg_cols_info, setup->agg_cols_num, + setup->is_count_all, hw_type); } static __u32 hashagg_get_data_type_size(enum dae_data_type type, __u16 data_info) @@ -1027,9 +1162,35 @@ static int hashagg_check_count_info(enum wd_dae_data_type input_type, return WD_SUCCESS; } +static int hashagg_check_max_min_info(struct wd_agg_col_info *agg_col, + struct hw_agg_data *user_input_data, + struct hw_agg_data *user_output_data) +{ + switch (agg_col->input_data_type) { + case WD_DAE_LONG: + user_input_data->hw_type = DAE_SINT64; + user_output_data->hw_type = DAE_SINT64; + break; + case WD_DAE_SHORT_DECIMAL: + user_input_data->hw_type = DAE_DECIMAL64; + user_output_data->hw_type = DAE_DECIMAL64; + break; + case WD_DAE_LONG_DECIMAL: + user_input_data->hw_type = DAE_DECIMAL128; + user_output_data->hw_type = DAE_DECIMAL128; + break; + default: + WD_ERR("invalid: device not support col data type %u do max or min!\n", + agg_col->input_data_type); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + static int hashagg_check_input_data(struct wd_agg_col_info *agg_col, - struct hw_agg_data *user_input_data, - struct hw_agg_data *user_output_data, __u32 index) + struct hw_agg_data *user_input_data, + struct hw_agg_data *user_output_data, __u32 index) { int ret; @@ -1050,6 +1211,20 @@ static int hashagg_check_input_data(struct wd_agg_col_info *agg_col, user_output_data->hw_type = DAE_SINT64; user_output_data->optype = DAE_COUNT; break; + case WD_AGG_MAX: + ret = hashagg_check_max_min_info(agg_col, user_input_data, user_output_data); + if (ret) + return ret; + user_input_data->optype |= DAE_HASHAGG_MAX; + user_output_data->optype = DAE_MAX; + break; + case WD_AGG_MIN: + ret = hashagg_check_max_min_info(agg_col, user_input_data, user_output_data); + if (ret) + return ret; + user_input_data->optype |= DAE_HASHAGG_MIN; + user_output_data->optype = DAE_MIN; + break; default: WD_ERR("invalid: device not support alg %u!\n", agg_col->output_col_algs[index]); return -WD_EINVAL; @@ -1068,6 +1243,11 @@ static int transfer_input_col_info(struct wd_agg_col_info *agg_cols, int ret; for (i = 0; i < cols_num; i++) { + if (agg_cols[i].col_alg_num > DAE_AGG_COL_ALG_MAX_NUM) { + WD_ERR("invalid: col alg num(%u) more than 2!\n", agg_cols[i].col_alg_num); + return -WD_EINVAL; + } + for (j = 0; j < agg_cols[i].col_alg_num; j++) { ret = hashagg_check_input_data(&agg_cols[i], &user_input_data[i], &user_output_data[k], j); @@ -1102,8 +1282,8 @@ static void hashagg_swap_out_index(struct hw_agg_data *user_output_data, } static void transfer_input_to_hw_order(struct hashagg_col_data *cols_data, - struct hw_agg_data *user_input_data, - struct hw_agg_data *user_output_data) + struct hw_agg_data *user_input_data, + struct hw_agg_data *user_output_data) { struct hw_agg_data *input_data = cols_data->input_data; __u32 type_num = ARRAY_SIZE(hw_data_type_order); @@ -1282,15 +1462,29 @@ static void hashagg_sess_priv_uninit(struct wd_alg_driver *drv, void *priv) static int hashagg_sess_priv_init(struct wd_alg_driver *drv, struct wd_agg_sess_setup *setup, void **priv) { + struct wd_ctx_config_internal *config; + struct hisi_dae_ctx *dae_priv; struct hashagg_ctx *agg_ctx; + struct hisi_qp *qp; + handle_t h_qp; int ret; + if (!drv || !drv->priv) { + WD_ERR("invalid: dae drv is NULL!\n"); + return -WD_EINVAL; + } + if (!setup || !priv) { WD_ERR("invalid: dae sess priv is NULL!\n"); return -WD_EINVAL; } - ret = hashagg_init_param_check(setup); + dae_priv = (struct hisi_dae_ctx *)drv->priv; + config = &dae_priv->config; + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[0].ctx); + qp = (struct hisi_qp *)h_qp; + + ret = hashagg_init_param_check(setup, qp->q_info.hw_type); if (ret) return -WD_EINVAL; diff --git a/drv/hisi_qm_udrv.h b/drv/hisi_qm_udrv.h index b02e8e7f..e7871932 100644 --- a/drv/hisi_qm_udrv.h +++ b/drv/hisi_qm_udrv.h @@ -41,7 +41,8 @@ enum hisi_qm_sgl_copy_dir { enum hisi_hw_type { HISI_QM_API_VER_BASE = 1, HISI_QM_API_VER2_BASE, - HISI_QM_API_VER3_BASE + HISI_QM_API_VER3_BASE, + HISI_QM_API_VER5_BASE = 5, }; struct hisi_qm_priv { -- 2.33.0

From: Weili Qian <qianweili@huawei.com> UADK supports hardware acceleration for the data move. Currently, data copying and initialization are supported. Signed-off-by: Weili Qian <qianweili@huawei.com> --- Makefile.am | 27 +- drv/hisi_udma.c | 566 ++++++++++++++++++++++++++++++++++++++ include/drv/wd_udma_drv.h | 34 +++ include/wd_alg.h | 2 + include/wd_udma.h | 124 +++++++++ include/wd_util.h | 1 + libwd_dae.map | 15 +- wd_udma.c | 511 ++++++++++++++++++++++++++++++++++ wd_util.c | 2 + 9 files changed, 1278 insertions(+), 4 deletions(-) create mode 100644 drv/hisi_udma.c create mode 100644 include/drv/wd_udma_drv.h create mode 100644 include/wd_udma.h create mode 100644 wd_udma.c diff --git a/Makefile.am b/Makefile.am index c4b9c526..df756f72 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,16 +36,17 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ include/wd_comp.h include/wd_dh.h include/wd_digest.h \ include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ - include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h + include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ + include/wd_udma.h nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h -lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la +lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la libwd_udma.la uadk_driversdir=$(libdir)/uadk uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ - libisa_ce.la libisa_sve.la libhisi_dae.la + libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ @@ -69,6 +70,9 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/drv/hisi_sec_udrv.c v1/drv/hisi_sec_udrv.h \ v1/drv/hisi_rng_udrv.c v1/drv/hisi_rng_udrv.h +libwd_udma_la_SOURCES=wd_udma.h wd_udma_drv.h wd_udma.c \ + wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h + libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c \ wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h @@ -110,6 +114,9 @@ endif libhisi_dae_la_SOURCES=drv/hisi_dae.c drv/hisi_qm_udrv.c \ hisi_qm_udrv.h +libhisi_udma_la_SOURCES=drv/hisi_udma.c drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h + if WD_STATIC_DRV AM_CFLAGS += -DWD_STATIC_DRV -fPIC AM_CFLAGS += -DWD_NO_LOG @@ -124,6 +131,9 @@ libhisi_zip_la_LIBADD = -ldl libwd_crypto_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_crypto_la_DEPENDENCIES = libwd.la +libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread +libwd_udma_la_DEPENDENCIES = libwd.la + libwd_dae_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma libwd_dae_la_DEPENDENCIES = libwd.la @@ -139,6 +149,9 @@ libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la libisa_sve_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) libisa_sve_la_DEPENDENCIES = libwd.la libwd_crypto.la +libhisi_udma_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_udma_la_OBJECTS) +libhisi_udma_la_DEPENDENCIES = libwd.la libwd_udma.la + libhisi_dae_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_dae_la_OBJECTS) libhisi_dae_la_DEPENDENCIES = libwd.la libwd_dae.la @@ -160,6 +173,10 @@ libwd_crypto_la_LIBADD= -lwd -ldl -lnuma libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) -lpthread libwd_crypto_la_DEPENDENCIES= libwd.la +libwd_udma_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread +libwd_udma_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) +libwd_udma_la_DEPENDENCIES= libwd.la + libwd_dae_la_LIBADD= -lwd -ldl -lnuma -lm libwd_dae_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) libwd_dae_la_DEPENDENCIES= libwd.la @@ -184,6 +201,10 @@ libisa_sve_la_LIBADD= -lwd -lwd_crypto libisa_sve_la_LDFLAGS=$(UADK_VERSION) libisa_sve_la_DEPENDENCIES= libwd.la libwd_crypto.la +libhisi_udma_la_LIBADD= -lwd -lwd_udma +libhisi_udma_la_LDFLAGS=$(UADK_VERSION) +libhisi_udma_la_DEPENDENCIES= libwd.la libwd_udma.la + libhisi_dae_la_LIBADD= -lwd -lwd_dae libhisi_dae_la_LDFLAGS=$(UADK_VERSION) libhisi_dae_la_DEPENDENCIES= libwd.la libwd_dae.la diff --git a/drv/hisi_udma.c b/drv/hisi_udma.c new file mode 100644 index 00000000..57dae8cb --- /dev/null +++ b/drv/hisi_udma.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: Apache-2.0 +/* Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#include <math.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <sys/mman.h> +#include <sys/types.h> +#include "hisi_qm_udrv.h" +#include "../include/drv/wd_udma_drv.h" + +#define BIT(nr) (1UL << (nr)) +#define UDMA_CTX_Q_NUM_DEF 1 +#define UDMA_TASK_TYPE 0x3 +#define UDMA_SQE_TYPE 0x1 +#define UDMA_ALG_TYPE 2 +/* Multi max data size is (16M -1) * 64 */ +#define UDMA_M_MAX_ADDR_SIZE 1073741760 +/* Single max data size is (16M - 1) */ +#define UDMA_S_MAX_ADDR_SIZE 16777215 +#define UDMA_MAX_ADDR_NUM 64 +#define UDMA_ADDR_NUM_SHIFT 6 +#define UDMA_MULTI_ADDR_EN BIT(14) +#define UDMA_ADDR_NUM_SHIFT 6 +#define UDMA_SVA_PREFETCH_EN BIT(15) +#define UDMA_ADDR_RESV_NUM 16 +#define UDMA_ADDR_ALIGN_SIZE 128 + +enum { + DATA_MEMCPY = 0x0, + DATA_MEMSET = 0x7, +}; + +enum { + UDMA_TASK_DONE = 0x1, + UDMA_TASK_ERROR = 0x2, +}; + +struct udma_addr { + __u64 addr; + __u64 data_size; +}; + +struct udma_addr_array { + __u64 resv_addr[UDMA_ADDR_RESV_NUM]; + struct udma_addr src_addr[UDMA_MAX_ADDR_NUM]; + struct udma_addr dst_addr[UDMA_MAX_ADDR_NUM]; +}; + +struct udma_sqe { + __u32 bd_type : 6; + __u32 resv1 : 2; + __u32 task_type : 6; + __u32 resv2 : 2; + __u32 task_type_ext : 6; + __u32 resv3 : 9; + __u32 bd_invlid : 1; + __u32 rsv4[2]; + __u32 low_tag; + __u32 hi_tag; + /* The number of bytes to be copied or filled for single address. */ + __u32 data_size; + __u32 rsv5; + /* + * 0 ~ 13 bits: reserved, + * 14 bit: single address or multi addresses, + * 15 bit: sva prefetch en. + */ + __u16 dw0; + /* + * 0 ~5 bits: reserved, + * 6 ~ 13 bits: address num, + * 14 ~15 bits: reserved. + */ + __u16 dw1; + __u64 init_val; + __u32 rsv6[12]; + /* dst addr for single address task */ + __u64 dst_addr; + __u32 rsv7[2]; + /* src addr for single address task, addr array for multi addresses. */ + __u64 addr_array; + __u32 done_flag : 3; + __u32 rsv8 : 1; + __u32 ext_err_type : 12; + __u32 err_type : 8; + __u32 wtype : 8; + __u32 rsv9[3]; +}; + +struct udma_internal_addr { + struct udma_addr_array *addr_array; + __u8 *addr_status; + __u16 addr_count; + __u16 tail; +}; + +struct hisi_udma_ctx { + struct wd_ctx_config_internal config; +}; + +static int get_free_inter_addr(struct udma_internal_addr *inter_addr) +{ + __u16 addr_count = inter_addr->addr_count; + __u16 idx = inter_addr->tail; + __u16 cnt = 0; + + if (unlikely(!addr_count)) { + WD_ERR("invalid: internal addr count is 0!\n"); + return -WD_EINVAL; + } + + while (__atomic_test_and_set(&inter_addr->addr_status[idx], __ATOMIC_ACQUIRE)) { + idx = (idx + 1) % addr_count; + cnt++; + if (cnt == addr_count) + return -WD_EBUSY; + } + + inter_addr->tail = (idx + 1) % addr_count; + + return idx; +} + +static void put_inter_addr(struct udma_internal_addr *inter_addr, int idx) +{ + __atomic_clear(&inter_addr->addr_status[idx], __ATOMIC_RELEASE); +} + +static int check_udma_param(struct wd_udma_msg *msg) +{ + int i; + + if (unlikely(!msg)) { + WD_ERR("invalid: input udma msg is NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(msg->addr_num > UDMA_MAX_ADDR_NUM)) { + WD_ERR("invalid: input addr_num is more than %d!\n", UDMA_MAX_ADDR_NUM); + return -WD_EINVAL; + } + + /* + * When the single address length exceeds UDMA_S_MAX_ADDR_SIZE, + * the driver will split the address into multiple addresses and + * send them to the hardware. + */ + if (msg->addr_num == 1) { + if (unlikely(msg->dst->data_size > UDMA_M_MAX_ADDR_SIZE)) { + WD_ERR("invalid: input size %lu is more than %d!\n", + msg->dst->data_size, UDMA_M_MAX_ADDR_SIZE); + return -WD_EINVAL; + } + + return WD_SUCCESS; + } + + for (i = 0; i < msg->addr_num; i++) { + if (unlikely(msg->dst[i].data_size > UDMA_S_MAX_ADDR_SIZE)) { + WD_ERR("invalid: addr %d input size %lu is more than %d!\n", + i, msg->dst[i].data_size, UDMA_S_MAX_ADDR_SIZE); + return -WD_EINVAL; + } + } + + return WD_SUCCESS; +} + +static void fill_long_size_memcpy_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + __u32 addr_num = 0; + __u64 count; + + for (count = 0; count < msg->src->data_size; count += UDMA_S_MAX_ADDR_SIZE) { + addr_array->src_addr[addr_num].addr = (__u64)(uintptr_t)msg->src->addr + count; + addr_array->dst_addr[addr_num].addr = (__u64)(uintptr_t)msg->dst->addr + count; + if (count + UDMA_S_MAX_ADDR_SIZE <= msg->src->data_size) { + addr_array->src_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + addr_array->dst_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + } else { + addr_array->src_addr[addr_num].data_size = msg->src->data_size - count; + addr_array->dst_addr[addr_num].data_size = msg->dst->data_size - count; + } + addr_num++; + } + sqe->dw1 |= (addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_long_size_memset_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + __u32 addr_num = 0; + __u64 count; + + for (count = 0; count < msg->dst->data_size; count += UDMA_S_MAX_ADDR_SIZE) { + addr_array->dst_addr[addr_num].addr = (__u64)(uintptr_t)msg->dst->addr + count; + if (count + UDMA_S_MAX_ADDR_SIZE <= msg->dst->data_size) + addr_array->dst_addr[addr_num].data_size = UDMA_S_MAX_ADDR_SIZE; + else + addr_array->dst_addr[addr_num].data_size = msg->dst->data_size - count; + addr_num++; + } + + sqe->dw1 |= (addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_mulit_memset_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + int i; + + for (i = 0; i < msg->addr_num; i++) { + addr_array->dst_addr[i].addr = (__u64)(uintptr_t)msg->dst[i].addr; + addr_array->dst_addr[i].data_size = (__u64)(uintptr_t)msg->dst[i].data_size; + } + + sqe->dw1 |= ((__u32)msg->addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_multi_memcpy_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + int i; + + for (i = 0; i < msg->addr_num; i++) { + addr_array->src_addr[i].addr = (__u64)(uintptr_t)msg->src[i].addr; + addr_array->src_addr[i].data_size = (__u64)(uintptr_t)msg->src[i].data_size; + addr_array->dst_addr[i].addr = (__u64)(uintptr_t)msg->dst[i].addr; + addr_array->dst_addr[i].data_size = (__u64)(uintptr_t)msg->dst[i].data_size; + } + + sqe->dw1 |= ((__u32)msg->addr_num - 1) << UDMA_ADDR_NUM_SHIFT; +} + +static void fill_multi_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + if (msg->addr_num == 1) { + if (msg->op_type == WD_UDMA_MEMCPY) + fill_long_size_memcpy_info(sqe, msg, addr_array); + else + fill_long_size_memset_info(sqe, msg, addr_array); + } else { + if (msg->op_type == WD_UDMA_MEMCPY) + fill_multi_memcpy_addr_info(sqe, msg, addr_array); + else + fill_mulit_memset_addr_info(sqe, msg, addr_array); + } + + sqe->addr_array = (__u64)(uintptr_t)addr_array; + sqe->dw0 |= UDMA_MULTI_ADDR_EN; +} + +static void fill_single_addr_info(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + if (msg->op_type == WD_UDMA_MEMCPY) + sqe->addr_array = (__u64)(uintptr_t)msg->src->addr; + sqe->dst_addr = (__u64)(uintptr_t)msg->dst->addr; + sqe->data_size = msg->dst->data_size; +} + +static void fill_udma_sqe_addr(struct udma_sqe *sqe, struct wd_udma_msg *msg, + struct udma_addr_array *addr_array) +{ + if (!addr_array) + fill_single_addr_info(sqe, msg); + else + fill_multi_addr_info(sqe, msg, addr_array); +} + +static void fill_sqe_type(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + sqe->bd_type = UDMA_SQE_TYPE; + sqe->task_type = UDMA_TASK_TYPE; + if (msg->op_type == WD_UDMA_MEMCPY) + sqe->task_type_ext = DATA_MEMCPY; + else + sqe->task_type_ext = DATA_MEMSET; +} + +static void fill_init_value(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + if (msg->op_type == WD_UDMA_MEMSET) + memset(&sqe->init_val, msg->value, sizeof(__u64)); +} + +static int udma_send(struct wd_alg_driver *drv, handle_t ctx, void *udma_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr = qp->priv; + struct udma_addr_array *addr_array = NULL; + struct wd_udma_msg *msg = udma_msg; + struct udma_sqe sqe = {0}; + __u16 send_cnt = 0; + int idx = 0; + int ret; + + ret = check_udma_param(msg); + if (unlikely(ret)) + return ret; + + if (msg->addr_num > 1 || msg->dst->data_size > UDMA_S_MAX_ADDR_SIZE) { + idx = get_free_inter_addr(inter_addr); + if (idx < 0) + return -WD_EBUSY; + + addr_array = &inter_addr->addr_array[idx]; + memset(addr_array, 0, sizeof(struct udma_addr_array)); + } + + fill_sqe_type(&sqe, msg); + fill_init_value(&sqe, msg); + fill_udma_sqe_addr(&sqe, msg, addr_array); + + hisi_set_msg_id(h_qp, &msg->tag); + sqe.low_tag = msg->tag; + sqe.hi_tag = (__u32)idx; + sqe.dw0 |= UDMA_SVA_PREFETCH_EN; + + ret = hisi_qm_send(h_qp, &sqe, 1, &send_cnt); + if (unlikely(ret)) { + if (ret != -WD_EBUSY) + WD_ERR("failed to send to hardware, ret = %d!\n", ret); + if (addr_array) + put_inter_addr(inter_addr, idx); + return ret; + } + + return WD_SUCCESS; +} + +static void dump_udma_msg(struct udma_sqe *sqe, struct wd_udma_msg *msg) +{ + WD_ERR("dump UDMA message after a task error occurs.\n" + "op_type:%u addr_num:%d.\n", msg->op_type, msg->addr_num); +} + +static int udma_recv(struct wd_alg_driver *drv, handle_t ctx, void *udma_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr = qp->priv; + struct wd_udma_msg *msg = udma_msg; + struct wd_udma_msg *temp_msg = msg; + struct udma_sqe sqe = {0}; + __u16 recv_cnt = 0; + int ret; + + ret = hisi_qm_recv(h_qp, &sqe, 1, &recv_cnt); + if (ret) + return ret; + + ret = hisi_check_bd_id(h_qp, msg->tag, sqe.low_tag); + if (ret) + goto out; + + msg->tag = sqe.low_tag; + if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { + temp_msg = wd_udma_get_msg(qp->q_info.idx, msg->tag); + if (!temp_msg) { + WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", + qp->q_info.idx, msg->tag); + ret = -WD_EINVAL; + goto out; + } + } + + msg->result = WD_SUCCESS; + if (sqe.done_flag != UDMA_TASK_DONE || + sqe.err_type || sqe.ext_err_type || sqe.wtype) { + WD_ERR("failed to do udma task! done=0x%x, err_type=0x%x\n" + "ext_err_type=0x%x, wtype=0x%x!\n", + (__u32)sqe.done_flag, (__u32)sqe.err_type, + (__u32)sqe.ext_err_type, (__u32)sqe.wtype); + msg->result = WD_IN_EPARA; + } + + if (unlikely(msg->result != WD_SUCCESS)) + dump_udma_msg(&sqe, temp_msg); + +out: + if (sqe.dw0 & UDMA_MULTI_ADDR_EN) + put_inter_addr(inter_addr, sqe.hi_tag); + return ret; +} + +static void udma_uninit_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct udma_internal_addr *inter_addr; + + if (!qp) + return; + + inter_addr = (struct udma_internal_addr *)qp->priv; + if (!inter_addr) + return; + + free(inter_addr->addr_array); + free(inter_addr->addr_status); + free(inter_addr); + qp->priv = NULL; +} + +static int udma_init_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + __u16 sq_depth = qp->q_info.sq_depth; + struct udma_internal_addr *inter_addr; + int ret = -WD_ENOMEM; + + inter_addr = calloc(1, sizeof(struct udma_internal_addr)); + if (!inter_addr) + return ret; + + inter_addr->addr_status = calloc(1, sizeof(__u8) * sq_depth); + if (!inter_addr->addr_status) + goto free_inter_addr; + + inter_addr->addr_array = aligned_alloc(UDMA_ADDR_ALIGN_SIZE, + sizeof(struct udma_addr_array) * sq_depth); + if (!inter_addr->addr_array) + goto free_addr_status; + + inter_addr->addr_count = sq_depth; + qp->priv = inter_addr; + + return WD_SUCCESS; + +free_addr_status: + free(inter_addr->addr_status); +free_inter_addr: + free(inter_addr); + + return ret; +} + +static int udma_init(struct wd_alg_driver *drv, void *conf) +{ + struct wd_ctx_config_internal *config = conf; + struct hisi_qm_priv qm_priv; + struct hisi_udma_ctx *priv; + handle_t h_qp = 0; + handle_t h_ctx; + __u32 i, j; + int ret; + + if (!config || !config->ctx_num) { + WD_ERR("invalid: udma init config is null or ctx num is 0!\n"); + return -WD_EINVAL; + } + + priv = malloc(sizeof(struct hisi_udma_ctx)); + if (!priv) + return -WD_ENOMEM; + + qm_priv.op_type = UDMA_ALG_TYPE; + qm_priv.sqe_size = sizeof(struct udma_sqe); + /* Allocate qp for each context */ + for (i = 0; i < config->ctx_num; i++) { + h_ctx = config->ctxs[i].ctx; + qm_priv.qp_mode = config->ctxs[i].ctx_mode; + /* Setting the epoll en to 0 for ASYNC ctx */ + qm_priv.epoll_en = (qm_priv.qp_mode == CTX_MODE_SYNC) ? + config->epoll_en : 0; + qm_priv.idx = i; + h_qp = hisi_qm_alloc_qp(&qm_priv, h_ctx); + if (!h_qp) { + ret = -WD_ENOMEM; + goto out; + } + config->ctxs[i].sqn = qm_priv.sqn; + ret = udma_init_qp_priv(h_qp); + if (ret) + goto free_h_qp; + } + memcpy(&priv->config, config, sizeof(struct wd_ctx_config_internal)); + drv->priv = priv; + + return WD_SUCCESS; +free_h_qp: + hisi_qm_free_qp(h_qp); +out: + for (j = 0; j < i; j++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[j].ctx); + udma_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + free(priv); + return ret; +} + +static void udma_exit(struct wd_alg_driver *drv) +{ + struct wd_ctx_config_internal *config; + struct hisi_udma_ctx *priv; + handle_t h_qp; + __u32 i; + + if (!drv || !drv->priv) + return; + + priv = (struct hisi_udma_ctx *)drv->priv; + config = &priv->config; + for (i = 0; i < config->ctx_num; i++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); + udma_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + + free(priv); + drv->priv = NULL; +} + +static int udma_get_usage(void *param) +{ + return 0; +} + +static struct wd_alg_driver udma_driver = { + .drv_name = "hisi_zip", + .alg_name = "udma", + .calc_type = UADK_ALG_HW, + .priority = 100, + .queue_num = UDMA_CTX_Q_NUM_DEF, + .op_type_num = 1, + .fallback = 0, + .init = udma_init, + .exit = udma_exit, + .send = udma_send, + .recv = udma_recv, + .get_usage = udma_get_usage, +}; + +#ifdef WD_STATIC_DRV +void hisi_udma_probe(void) +#else +static void __attribute__((constructor)) hisi_udma_probe(void) +#endif +{ + int ret; + + WD_INFO("Info: register UDMA alg drivers!\n"); + + ret = wd_alg_driver_register(&udma_driver); + if (ret && ret != -WD_ENODEV) + WD_ERR("failed to register UDMA driver, ret = %d!\n", ret); +} + +#ifdef WD_STATIC_DRV +void hisi_udma_remove(void) +#else +static void __attribute__((destructor)) hisi_udma_remove(void) +#endif +{ + WD_INFO("Info: unregister UDMA alg drivers!\n"); + + wd_alg_driver_unregister(&udma_driver); +} diff --git a/include/drv/wd_udma_drv.h b/include/drv/wd_udma_drv.h new file mode 100644 index 00000000..c8028f79 --- /dev/null +++ b/include/drv/wd_udma_drv.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#ifndef __WD_UDMA_DRV_H +#define __WD_UDMA_DRV_H + +#include <asm/types.h> + +#include "../wd_udma.h" +#include "../wd_util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* udma message format */ +struct wd_udma_msg { + struct wd_udma_req req; + struct wd_data_addr *src; + struct wd_data_addr *dst; + int addr_num; + int value; + enum wd_udma_op_type op_type; + __u32 tag; /* User-defined request identifier */ + __u8 result; /* alg op error code */ +}; + +struct wd_udma_msg *wd_udma_get_msg(__u32 idx, __u32 tag); + +#ifdef __cplusplus +} +#endif + +#endif /* __WD_UDMA_DRV_H */ diff --git a/include/wd_alg.h b/include/wd_alg.h index aba855d6..441b3bef 100644 --- a/include/wd_alg.h +++ b/include/wd_alg.h @@ -204,11 +204,13 @@ void hisi_sec2_probe(void); void hisi_hpre_probe(void); void hisi_zip_probe(void); void hisi_dae_probe(void); +void hisi_udma_probe(void); void hisi_sec2_remove(void); void hisi_hpre_remove(void); void hisi_zip_remove(void); void hisi_dae_remove(void); +void hisi_udma_remove(void); #endif diff --git a/include/wd_udma.h b/include/wd_udma.h new file mode 100644 index 00000000..d8a7964e --- /dev/null +++ b/include/wd_udma.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __WD_UDMA_H +#define __WD_UDMA_H + +#include <stdbool.h> + +#include "wd_alg_common.h" + +typedef void (*wd_udma_cb_t)(void *cb_param); + +/** + * wd_udma_op_type - Algorithm type of option. + */ +enum wd_udma_op_type { + WD_UDMA_MEMCPY, + WD_UDMA_MEMSET, + WD_UDMA_OP_MAX +}; + +/** + * wd_udma_sess_setup - udma session setup information. + * @sched_param: Parameters of the scheduling policy, + * usually allocated according to struct sched_params. + */ +struct wd_udma_sess_setup { + void *sched_param; +}; + +/** + * wd_data_addr - addr information of UDMA. + * @addr: Indicates the start address of the operation. + * @addr_size: Maximum size of the addr, in bytes. + * @count: Number of bytes to be set. + */ +struct wd_data_addr { + void *addr; + size_t addr_size; + size_t data_size; +}; + +/** + * wd_udma_req - udma operation request. + * @src: pointer to input address. + * @dst: pointer to output address, for WD_UDMA_MEMSET, only one of src and dst can be set. + * @addr_num: Number of address. + * @value: Value to be written for WD_UDMA_MEMSET. + * @op_type: udma operation type. + * @cb: Callback function. + * @cb_param: Parameters of the callback function. + * @state: operation result written back by the driver. + */ +struct wd_udma_req { + struct wd_data_addr *src; + struct wd_data_addr *dst; + int addr_num; + int value; + enum wd_udma_op_type op_type; + wd_udma_cb_t cb; + void *cb_param; + int status; +}; + +/** + * wd_udma_init() - A simplify interface to initializate ecc. + * To make the initializate simpler, ctx_params support set NULL. + * And then the function will set them as driver's default. + * + * @alg: The algorithm users want to use. + * @sched_type: The scheduling type users want to use. + * @task_type: Task types, including soft computing, hardware and hybrid computing. + * @ctx_params: The ctxs resources users want to use. Include per operation + * type ctx numbers and business process run numa. + * + * Return 0 if succeed and others if fail. + */ +int wd_udma_init(const char *alg, __u32 sched_type, + int task_type, struct wd_ctx_params *ctx_params); + +/** + * wd_udma_uninit() - Uninitialise ctx configuration and scheduler. + */ +void wd_udma_uninit(void); + +/** + * wd_udma_alloc_sess() - Allocate a wd udma session. + * @setup: Parameters to setup this session. + * + * Return 0 if failed. + */ +handle_t wd_udma_alloc_sess(struct wd_udma_sess_setup *setup); + +/** + * wd_udma_free_sess() - Free a wd udma session. + * @ sess: The sess to be freed. + */ +void wd_udma_free_sess(handle_t sess); + +/** + * wd_do_udma_sync() - Send a sync udma request. + * @h_sess: The session which request will be sent to. + * @req: Request. + */ +int wd_do_udma_sync(handle_t h_sess, struct wd_udma_req *req); + +/** + * wd_do_udma_async() - Send an async udma request. + * @sess: The session which request will be sent to. + * @req: Request. + */ +int wd_do_udma_async(handle_t h_sess, struct wd_udma_req *req); + +/** + * wd_udma_poll() - Poll finished request. + * + * This function will call poll_policy function which is registered to wd udma + * by user. + */ +int wd_udma_poll(__u32 expt, __u32 *count); + +#endif /* __WD_UDMA_H */ diff --git a/include/wd_util.h b/include/wd_util.h index 9e9d4e35..bbb18a7c 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -42,6 +42,7 @@ enum wd_type { WD_DH_TYPE, WD_ECC_TYPE, WD_AGG_TYPE, + WD_UDMA_TYPE, WD_TYPE_MAX, }; diff --git a/libwd_dae.map b/libwd_dae.map index 4c51b856..6597ff98 100644 --- a/libwd_dae.map +++ b/libwd_dae.map @@ -1,4 +1,4 @@ -UADK_CRYPTO_2.0 { +UADK_DAE_2.0 { global: wd_agg_alloc_sess; wd_agg_free_sess; @@ -17,5 +17,18 @@ global: wd_sched_rr_instance; wd_sched_rr_alloc; wd_sched_rr_release; + + wd_udma_alloc_sess; + wd_udma_free_sess; + wd_udma_init; + wd_udma_uninit; + wd_do_udma_sync; + wd_do_udma_async; + wd_udma_poll; + wd_udma_get_msg; + + wd_sched_rr_instance; + wd_sched_rr_alloc; + wd_sched_rr_release; local: *; }; diff --git a/wd_udma.c b/wd_udma.c new file mode 100644 index 00000000..5f47291e --- /dev/null +++ b/wd_udma.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#include <stdlib.h> +#include <pthread.h> +#include <limits.h> +#include "include/drv/wd_udma_drv.h" +#include "wd_udma.h" + +struct wd_udma_sess { + const char *alg_name; + wd_dev_mask_t *dev_mask; + void *priv; + void *sched_key; +}; + +static struct wd_udma_setting { + enum wd_status status; + struct wd_ctx_config_internal config; + struct wd_sched sched; + struct wd_async_msg_pool pool; + struct wd_alg_driver *driver; + void *dlhandle; + void *dlh_list; +} wd_udma_setting; + +static struct wd_init_attrs wd_udma_init_attrs; + +static void wd_udma_close_driver(void) +{ +#ifndef WD_STATIC_DRV + wd_dlclose_drv(wd_udma_setting.dlh_list); + wd_udma_setting.dlh_list = NULL; +#else + wd_release_drv(wd_udma_setting.driver); + hisi_udma_remove(); +#endif +} + +static int wd_udma_open_driver(void) +{ +#ifndef WD_STATIC_DRV + /* + * Driver lib file path could set by env param. + * then open tham by wd_dlopen_drv() + * use NULL means dynamic query path + */ + wd_udma_setting.dlh_list = wd_dlopen_drv(NULL); + if (!wd_udma_setting.dlh_list) { + WD_ERR("fail to open driver lib files.\n"); + return -WD_EINVAL; + } +#else + hisi_udma_probe(); +#endif + return WD_SUCCESS; +} + +void wd_udma_free_sess(handle_t sess) +{ + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)sess; + + if (!sess_t) { + WD_ERR("invalid: free udma sess param NULL!\n"); + return; + } + + if (sess_t->sched_key) + free(sess_t->sched_key); + free(sess_t); +} + +handle_t wd_udma_alloc_sess(struct wd_udma_sess_setup *setup) +{ + struct wd_udma_sess *sess; + + if (!setup) { + WD_ERR("invalid: alloc udma sess setup NULL!\n"); + return (handle_t)0; + } + + sess = calloc(1, sizeof(struct wd_udma_sess)); + if (!sess) + return (handle_t)0; + + sess->alg_name = "udma"; + /* Some simple scheduler don't need scheduling parameters */ + sess->sched_key = (void *)wd_udma_setting.sched.sched_init( + wd_udma_setting.sched.h_sched_ctx, setup->sched_param); + if (WD_IS_ERR(sess->sched_key)) { + WD_ERR("failed to init session schedule key!\n"); + goto free_sess; + } + + return (handle_t)sess; + +free_sess: + free(sess); + return (handle_t)0; +} + +static int wd_udma_addr_check(struct wd_data_addr *data_addr) +{ + if (unlikely(!data_addr->addr)) { + WD_ERR("invalid: udma addr is NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(!data_addr->data_size || + data_addr->data_size > data_addr->addr_size)) { + WD_ERR("invalid: udma size is error, data_size %lu, addr_size is %lu!\n", + data_addr->data_size, data_addr->addr_size); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_udma_param_check(struct wd_udma_sess *sess, + struct wd_udma_req *req) +{ + struct wd_data_addr *src, *dst; + int i, ret; + + if (unlikely(!sess || !req)) { + WD_ERR("invalid: input param NULL!\n"); + return -WD_EINVAL; + } + + if (unlikely(req->addr_num <= 0)) { + WD_ERR("invalid: addr num is error %d!\n", req->addr_num); + return -WD_EINVAL; + } + + src = req->src; + dst = req->dst; + if (unlikely(req->op_type >= WD_UDMA_OP_MAX)) { + WD_ERR("invalid: op_type is error %u!\n", req->op_type); + return -WD_EINVAL; + } else if (unlikely(req->op_type == WD_UDMA_MEMCPY && (!src || !dst))) { + WD_ERR("invalid: memcpy src or dst is NULL!\n"); + return -WD_EINVAL; + } else if (unlikely(req->op_type == WD_UDMA_MEMSET && + ((!src && !dst) || (src && dst)))) { + WD_ERR("invalid: memset src and dst is error!\n"); + return -WD_EINVAL; + } + + if (req->op_type == WD_UDMA_MEMSET) + dst = !req->src ? req->dst : req->src; + + for (i = 0; i < req->addr_num; i++) { + if (req->op_type == WD_UDMA_MEMCPY) { + ret = wd_udma_addr_check(&src[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memcpy src addr is error!\n"); + return -WD_EINVAL; + } + + ret = wd_udma_addr_check(&dst[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memcpy dst addr is error!\n"); + return -WD_EINVAL; + } + + if (unlikely(dst[i].data_size != src[i].data_size)) { + WD_ERR("invalid: udma memcpy data_size is error!\n" + "src %lu, dst %lu!\n", + dst[i].data_size, src[i].data_size); + return -WD_EINVAL; + } + } else { + ret = wd_udma_addr_check(&dst[i]); + if (unlikely(ret)) { + WD_ERR("invalid: udma memset addr is error!\n"); + return -WD_EINVAL; + } + } + } + + return WD_SUCCESS; +} + +static void fill_udma_msg(struct wd_udma_msg *msg, struct wd_udma_req *req) +{ + msg->result = WD_EINVAL; + + memcpy(&msg->req, req, sizeof(*req)); + msg->op_type = req->op_type; + msg->addr_num = req->addr_num; + msg->value = req->value; + if (req->op_type == WD_UDMA_MEMSET) { + msg->dst = !req->src ? req->dst : req->src; + } else { + msg->src = req->src; + msg->dst = req->dst; + } +} + +int wd_do_udma_sync(handle_t h_sess, struct wd_udma_req *req) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)h_sess; + struct wd_msg_handle msg_handle; + struct wd_ctx_internal *ctx; + struct wd_udma_msg msg = {0}; + __u32 idx; + int ret; + + ret = wd_udma_param_check(sess_t, req); + if (unlikely(ret)) + return ret; + + idx = wd_udma_setting.sched.pick_next_ctx(h_sched_ctx, + sess_t->sched_key, + CTX_MODE_SYNC); + ret = wd_check_ctx(config, CTX_MODE_SYNC, idx); + if (unlikely(ret)) + return ret; + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + ctx = config->ctxs + idx; + + fill_udma_msg(&msg, req); + + msg_handle.send = wd_udma_setting.driver->send; + msg_handle.recv = wd_udma_setting.driver->recv; + pthread_spin_lock(&ctx->lock); + ret = wd_handle_msg_sync(wd_udma_setting.driver, &msg_handle, ctx->ctx, + &msg, NULL, wd_udma_setting.config.epoll_en); + pthread_spin_unlock(&ctx->lock); + if (unlikely(ret)) + return ret; + + req->status = msg.result; + + return GET_NEGATIVE(msg.result); +} + +int wd_do_udma_async(handle_t sess, struct wd_udma_req *req) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + struct wd_udma_sess *sess_t = (struct wd_udma_sess *)sess; + struct wd_udma_msg *msg = NULL; + struct wd_ctx_internal *ctx; + int ret, mid; + __u32 idx; + + ret = wd_udma_param_check(sess_t, req); + if (unlikely(ret)) + return ret; + + if (unlikely(!req->cb)) { + WD_ERR("invalid: udma input req cb is NULL!\n"); + return -WD_EINVAL; + } + + idx = wd_udma_setting.sched.pick_next_ctx(h_sched_ctx, + sess_t->sched_key, + CTX_MODE_ASYNC); + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (unlikely(ret)) + return ret; + ctx = config->ctxs + idx; + + mid = wd_get_msg_from_pool(&wd_udma_setting.pool, idx, (void **)&msg); + if (unlikely(mid < 0)) { + WD_ERR("failed to get msg from pool!\n"); + return mid; + } + + fill_udma_msg(msg, req); + msg->tag = mid; + + ret = wd_alg_driver_send(wd_udma_setting.driver, ctx->ctx, msg); + if (unlikely(ret)) { + if (ret != -WD_EBUSY) + WD_ERR("failed to send udma BD, hw is err!\n"); + + goto fail_with_msg; + } + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + + return WD_SUCCESS; + +fail_with_msg: + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, mid); + + return ret; +} + +static int wd_udma_poll_ctx(__u32 idx, __u32 expt, __u32 *count) +{ + struct wd_ctx_config_internal *config = &wd_udma_setting.config; + struct wd_udma_msg rcv_msg = {0}; + struct wd_ctx_internal *ctx; + struct wd_udma_req *req; + struct wd_udma_msg *msg; + __u32 rcv_cnt = 0; + __u32 tmp = expt; + int ret; + + *count = 0; + + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (ret) + return ret; + + ctx = config->ctxs + idx; + + do { + ret = wd_alg_driver_recv(wd_udma_setting.driver, ctx->ctx, &rcv_msg); + if (ret == -WD_EAGAIN) { + return ret; + } else if (unlikely(ret)) { + WD_ERR("failed to async recv, ret = %d!\n", ret); + *count = rcv_cnt; + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, + rcv_msg.tag); + return ret; + } + rcv_cnt++; + msg = wd_find_msg_in_pool(&wd_udma_setting.pool, idx, rcv_msg.tag); + if (!msg) { + WD_ERR("failed to find udma msg!\n"); + return -WD_EINVAL; + } + + msg->req.status = rcv_msg.result; + req = &msg->req; + req->cb(req); + wd_put_msg_to_pool(&wd_udma_setting.pool, idx, rcv_msg.tag); + *count = rcv_cnt; + } while (--tmp); + + return ret; +} + +int wd_udma_poll(__u32 expt, __u32 *count) +{ + handle_t h_sched_ctx = wd_udma_setting.sched.h_sched_ctx; + + if (unlikely(!count || !expt)) { + WD_ERR("invalid: udma poll count is NULL or expt is 0!\n"); + return -WD_EINVAL; + } + + return wd_udma_setting.sched.poll_policy(h_sched_ctx, expt, count); +} + +static void wd_udma_clear_status(void) +{ + wd_alg_clear_init(&wd_udma_setting.status); +} + +static void wd_udma_alg_uninit(void) +{ + /* Uninit async request pool */ + wd_uninit_async_request_pool(&wd_udma_setting.pool); + /* Unset config, sched, driver */ + wd_clear_sched(&wd_udma_setting.sched); + wd_alg_uninit_driver(&wd_udma_setting.config, wd_udma_setting.driver); +} + +void wd_udma_uninit(void) +{ + enum wd_status status; + + wd_alg_get_init(&wd_udma_setting.status, &status); + if (status == WD_UNINIT) + return; + + wd_udma_alg_uninit(); + wd_alg_attrs_uninit(&wd_udma_init_attrs); + wd_alg_drv_unbind(wd_udma_setting.driver); + wd_udma_close_driver(); + wd_alg_clear_init(&wd_udma_setting.status); +} + +static int wd_udma_alg_init(struct wd_ctx_config *config, struct wd_sched *sched) +{ + int ret; + + ret = wd_set_epoll_en("WD_UDMA_EPOLL_EN", &wd_udma_setting.config.epoll_en); + if (ret < 0) + return ret; + + ret = wd_init_ctx_config(&wd_udma_setting.config, config); + if (ret < 0) + return ret; + + ret = wd_init_sched(&wd_udma_setting.sched, sched); + if (ret < 0) + goto out_clear_ctx_config; + + /* Allocate async pool for every ctx */ + ret = wd_init_async_request_pool(&wd_udma_setting.pool, config, WD_POOL_MAX_ENTRIES, + sizeof(struct wd_udma_msg)); + if (ret < 0) + goto out_clear_sched; + + ret = wd_alg_init_driver(&wd_udma_setting.config, wd_udma_setting.driver); + if (ret) + goto out_clear_pool; + + return WD_SUCCESS; + +out_clear_pool: + wd_uninit_async_request_pool(&wd_udma_setting.pool); +out_clear_sched: + wd_clear_sched(&wd_udma_setting.sched); +out_clear_ctx_config: + wd_clear_ctx_config(&wd_udma_setting.config); + return ret; +} + +int wd_udma_init(const char *alg, __u32 sched_type, int task_type, + struct wd_ctx_params *ctx_params) +{ + struct wd_ctx_nums udma_ctx_num[WD_UDMA_OP_MAX] = {0}; + struct wd_ctx_params udma_ctx_params = {0}; + int state, ret = -WD_EINVAL; + + pthread_atfork(NULL, NULL, wd_udma_clear_status); + + state = wd_alg_try_init(&wd_udma_setting.status); + if (state) + return state; + + if (!alg || sched_type >= SCHED_POLICY_BUTT || + task_type < 0 || task_type >= TASK_MAX_TYPE) { + WD_ERR("invalid: input param is wrong!\n"); + goto out_clear_init; + } + + if (strcmp(alg, "udma")) { + WD_ERR("invalid: the alg %s not support!\n", alg); + goto out_clear_init; + } + + state = wd_udma_open_driver(); + if (state) + goto out_clear_init; + + while (ret) { + memset(&wd_udma_setting.config, 0, sizeof(struct wd_ctx_config_internal)); + + /* Get alg driver and dev name */ + wd_udma_setting.driver = wd_alg_drv_bind(task_type, alg); + if (!wd_udma_setting.driver) { + WD_ERR("fail to bind a valid driver.\n"); + ret = -WD_EINVAL; + goto out_dlopen; + } + + udma_ctx_params.ctx_set_num = udma_ctx_num; + ret = wd_ctx_param_init(&udma_ctx_params, ctx_params, + wd_udma_setting.driver, WD_UDMA_TYPE, WD_UDMA_OP_MAX); + if (ret) { + if (ret == -WD_EAGAIN) { + wd_disable_drv(wd_udma_setting.driver); + wd_alg_drv_unbind(wd_udma_setting.driver); + continue; + } + goto out_driver; + } + + (void)strcpy(wd_udma_init_attrs.alg, alg); + wd_udma_init_attrs.sched_type = sched_type; + wd_udma_init_attrs.driver = wd_udma_setting.driver; + wd_udma_init_attrs.ctx_params = &udma_ctx_params; + wd_udma_init_attrs.alg_init = wd_udma_alg_init; + wd_udma_init_attrs.alg_poll_ctx = wd_udma_poll_ctx; + ret = wd_alg_attrs_init(&wd_udma_init_attrs); + if (ret) { + if (ret == -WD_ENODEV) { + wd_disable_drv(wd_udma_setting.driver); + wd_alg_drv_unbind(wd_udma_setting.driver); + wd_ctx_param_uninit(&udma_ctx_params); + continue; + } + WD_ERR("failed to init alg attrs!\n"); + goto out_params_uninit; + } + } + + wd_alg_set_init(&wd_udma_setting.status); + wd_ctx_param_uninit(&udma_ctx_params); + + return WD_SUCCESS; + +out_params_uninit: + wd_ctx_param_uninit(&udma_ctx_params); +out_driver: + wd_alg_drv_unbind(wd_udma_setting.driver); +out_dlopen: + wd_udma_close_driver(); +out_clear_init: + wd_alg_clear_init(&wd_udma_setting.status); + return ret; +} + +struct wd_udma_msg *wd_udma_get_msg(__u32 idx, __u32 tag) +{ + return wd_find_msg_in_pool(&wd_udma_setting.pool, idx, tag); +} diff --git a/wd_util.c b/wd_util.c index f1b27bf8..38d2d375 100644 --- a/wd_util.c +++ b/wd_util.c @@ -63,6 +63,7 @@ static const char *wd_env_name[WD_TYPE_MAX] = { "WD_DH_CTX_NUM", "WD_ECC_CTX_NUM", "WD_AGG_CTX_NUM", + "WD_UDMA_CTX_NUM", }; struct async_task { @@ -107,6 +108,7 @@ static struct acc_alg_item alg_options[] = { {"deflate", "deflate"}, {"lz77_zstd", "lz77_zstd"}, {"hashagg", "hashagg"}, + {"udma", "udma"}, {"rsa", "rsa"}, {"dh", "dh"}, -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> Add parentheses to the input variable. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- drv/hisi_comp.c | 6 +++--- drv/hisi_comp_huf.c | 2 +- v1/drv/hisi_zip_huf.c | 2 +- v1/drv/hisi_zip_udrv.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index cd558a80..98b45d71 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -69,9 +69,9 @@ #define upper_32_bits(addr) ((__u32)((uintptr_t)(addr) >> HZ_HADDR_SHIFT)) /* the min output buffer size is (input size * 1.125) */ -#define min_out_buf_size(inl) ((((__u64)inl * 9) + 7) >> 3) +#define min_out_buf_size(inl) ((((__u64)(inl) * 9) + 7) >> 3) /* the max input size is (output buffer size * 8 / 9) and align with 4 byte */ -#define max_in_data_size(outl) ((__u32)(((__u64)outl << 3) / 9) & 0xfffffffc) +#define max_in_data_size(outl) ((__u32)(((__u64)(outl) << 3) / 9) & 0xfffffffc) #define HZ_MAX_SIZE (8 * 1024 * 1024) @@ -100,7 +100,7 @@ #define CTX_WIN_LEN_MASK 0xffff #define CTX_HEAD_BIT_CNT_SHIFT 0xa #define CTX_HEAD_BIT_CNT_MASK 0xfC00 -#define WIN_LEN_ALIGN(len) ((len + 15) & ~(__u32)0x0F) +#define WIN_LEN_ALIGN(len) (((len) + 15) & ~(__u32)0x0F) enum alg_type { HW_DEFLATE = 0x1, diff --git a/drv/hisi_comp_huf.c b/drv/hisi_comp_huf.c index 890e54b4..b9c7f258 100644 --- a/drv/hisi_comp_huf.c +++ b/drv/hisi_comp_huf.c @@ -14,7 +14,7 @@ #define EMPTY_STORE_BLOCK_VAL 0xffff0000L #define BLOCK_IS_COMPLETE 1 #define BLOCK_IS_INCOMPLETE 0 -#define LEN_NLEN_CHECK(data) ((data & 0xffff) != ((data >> 16) ^ 0xffff)) +#define LEN_NLEN_CHECK(data) (((data) & 0xffff) != (((data) >> 16) ^ 0xffff)) /* Constants related to the Huffman code table */ #define LIT_LEN_7BIT_THRESHOLD 7 diff --git a/v1/drv/hisi_zip_huf.c b/v1/drv/hisi_zip_huf.c index 3ce270f4..dce9eaf6 100644 --- a/v1/drv/hisi_zip_huf.c +++ b/v1/drv/hisi_zip_huf.c @@ -14,7 +14,7 @@ #define EMPTY_STORE_BLOCK_VAL 0xffff0000L #define HF_BLOCK_IS_COMPLETE 1 #define HF_BLOCK_IS_INCOMPLETE 0 -#define LEN_NLEN_CHECK(data) ((data & 0xffff) != ((data >> 16) ^ 0xffff)) +#define LEN_NLEN_CHECK(data) (((data) & 0xffff) != (((data) >> 16) ^ 0xffff)) /* Constants related to the Huffman code table */ #define LIT_LEN_7BITS_THRESHOLD 7 diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c index f2733ad1..ab4254e4 100644 --- a/v1/drv/hisi_zip_udrv.c +++ b/v1/drv/hisi_zip_udrv.c @@ -88,7 +88,7 @@ #define CTX_WIN_LEN_MASK 0xffff #define CTX_HEAD_BIT_CNT_SHIFT 0xa #define CTX_HEAD_BIT_CNT_MASK 0xfC00 -#define WIN_LEN_ALIGN(len) ((len + 15) & ~(__u32)0x0F) +#define WIN_LEN_ALIGN(len) (((len) + 15) & ~(__u32)0x0F) enum { BD_TYPE, -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> 1.When determining whether the output size meets the threshold requirements, only compression needs to consider the length of the head size. 2.check_store_buf will not return value less than 0. 3.msg will not be null, because it is a local variable in wd_comp. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> --- drv/hisi_comp.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 98b45d71..0c36301d 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -343,15 +343,14 @@ static int check_enable_store_buf(struct wd_comp_msg *msg, __u32 out_size, int h if (msg->stream_mode != WD_COMP_STATEFUL) return 0; - if (msg->stream_pos != WD_COMP_STREAM_NEW && out_size > SW_STOREBUF_TH) - return 0; + if (msg->stream_pos == WD_COMP_STREAM_NEW && msg->req.op_type == WD_DIR_COMPRESS && + out_size - head_size <= SW_STOREBUF_TH) + return 1; - if (msg->stream_pos == WD_COMP_STREAM_NEW && - out_size - head_size > SW_STOREBUF_TH) - return 0; + if (out_size <= SW_STOREBUF_TH) + return 1; - /* 1 mean it need store buf */ - return 1; + return 0; } static void fill_buf_size_deflate(struct hisi_zip_sqe *sqe, __u32 in_size, @@ -386,7 +385,7 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, /* * When the output buffer is smaller than the SW_STOREBUF_TH in STATEFUL, - * the internal buffer is used. + * the internal buffer is used. It requires a storage buffer when returning 1. */ ret = check_enable_store_buf(msg, out_size, head_size); if (ret) { @@ -524,7 +523,7 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, /* * When the output buffer is smaller than the SW_STOREBUF_TH in STATEFUL, - * the internal buffer is used. + * the internal buffer is used. It requires a storage buffer when returning 1. */ ret = check_enable_store_buf(msg, out_size, head_size); if (ret) { @@ -1152,7 +1151,7 @@ static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *com /* Skip hardware, if the store buffer need to be copied to output */ ret = check_store_buf(msg); if (ret) - return ret < 0 ? ret : 0; + return 0; hisi_set_msg_id(h_qp, &msg->tag); ret = fill_zip_comp_sqe(qp, msg, &sqe); @@ -1322,7 +1321,7 @@ static int hisi_zip_comp_recv(struct wd_alg_driver *drv, handle_t ctx, void *com __u16 count = 0; int ret; - if (recv_msg && recv_msg->ctx_buf) { + if (recv_msg->ctx_buf) { buf = (struct hisi_comp_buf *)(recv_msg->ctx_buf + CTX_STOREBUF_OFFSET); /* * The output has been copied from the storage buffer, -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> Supports LZ77 encoding for LZ4 without additional offset processing. The output includes literal and sequence (LitLength, MatchLength, Offset). Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> --- drv/hisi_comp.c | 310 +++++++++++++++++++++++++++++++++++++--------- include/wd_comp.h | 1 + wd_comp.c | 2 +- wd_util.c | 1 + 4 files changed, 255 insertions(+), 59 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 0c36301d..1c9f438f 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -84,6 +84,9 @@ #define OVERFLOW_DATA_SIZE 8 #define SEQ_DATA_SIZE_SHIFT 3 #define ZSTD_FREQ_DATA_SIZE 784 +#define ZSTD_MIN_OUT_SIZE 1000 +#define LZ77_MIN_OUT_SIZE 200 +#define PRICE_MIN_OUT_SIZE 4096 #define ZSTD_LIT_RESV_SIZE 16 #define REPCODE_SIZE 12 @@ -108,6 +111,8 @@ enum alg_type { HW_GZIP, HW_LZ77_ZSTD_PRICE = 0x42, HW_LZ77_ZSTD, + HW_LZ77_ONLY = 0x40, + HW_LZ77_ONLY_PRICE, }; enum hw_state { @@ -616,31 +621,30 @@ static void fill_buf_addr_lz77_zstd(struct hisi_zip_sqe *sqe, sqe->stream_ctx_addr_h = upper_32_bits(ctx_buf); } -static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, - struct wd_comp_msg *msg) +static int lz77_zstd_buf_check(struct wd_comp_msg *msg) { - struct wd_comp_req *req = &msg->req; - struct wd_lz77_zstd_data *data = req->priv; __u32 in_size = msg->req.src_len; - __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; __u32 out_size = msg->avail_out; - void *ctx_buf = NULL; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = out_size - lits_size; - if (unlikely(!data)) { - WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); - return -WD_EINVAL; + if (unlikely(in_size > ZSTD_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", in_size); + return -WD_EINVAL; } - if (unlikely(in_size > ZSTD_MAX_SIZE)) { - WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", - in_size); + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= ZSTD_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, ZSTD_MIN_OUT_SIZE + lits_size); return -WD_EINVAL; } - if (unlikely(out_size > HZ_MAX_SIZE)) { - WD_ERR("warning: avail_out(%u) is out of range , will set 8MB size max!\n", - out_size); - out_size = HZ_MAX_SIZE; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; } /* @@ -653,14 +657,92 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, return -WD_EINVAL; } + return 0; +} + +static int lz77_only_buf_check(struct wd_comp_msg *msg) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = out_size - lits_size; + + /* lits_size need to be less than 8M when use pbuffer */ + if (unlikely(lits_size > HZ_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_only is out of range!\n", in_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= LZ77_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, LZ77_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + /* For lz77_only, the hardware needs 32 Bytes buffer to output the dfx information */ + if (unlikely(out_size < ZSTD_LIT_RESV_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough, %u bytes are minimum!\n", + ZSTD_LIT_RESV_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + +static int lz77_buf_check(struct wd_comp_msg *msg) +{ + enum wd_comp_alg_type alg_type = msg->alg_type; + + if (alg_type == WD_LZ77_ZSTD) + return lz77_zstd_buf_check(msg); + else if (alg_type == WD_LZ77_ONLY) + return lz77_only_buf_check(msg); + + return 0; +} + +static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg) +{ + struct wd_comp_req *req = &msg->req; + struct wd_lz77_zstd_data *data = req->priv; + __u32 in_size = msg->req.src_len; + __u32 lits_size = in_size + ZSTD_LIT_RESV_SIZE; + __u32 seq_avail_out = msg->avail_out - lits_size; + void *ctx_buf = NULL; + int ret; + + if (unlikely(!data)) { + WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); + return -WD_EINVAL; + } + + ret = lz77_buf_check(msg); + if (ret) + return ret; + + if (unlikely(seq_avail_out > HZ_MAX_SIZE)) { + WD_ERR("warning: sequence avail_out(%u) is out of range , will set 8MB size max!\n", + seq_avail_out); + seq_avail_out = HZ_MAX_SIZE; + } + if (msg->ctx_buf) { ctx_buf = msg->ctx_buf + RSV_OFFSET; - if (data->blk_type != COMP_BLK) + if (msg->alg_type == WD_LZ77_ZSTD && data->blk_type != COMP_BLK) memcpy(ctx_buf + CTX_HW_REPCODE_OFFSET, msg->ctx_buf + CTX_REPCODE2_OFFSET, REPCODE_SIZE); } - fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); + fill_buf_size_lz77_zstd(sqe, in_size, lits_size, seq_avail_out); fill_buf_addr_lz77_zstd(sqe, req->src, req->dst, req->dst + lits_size, ctx_buf); @@ -685,6 +767,103 @@ static struct wd_datalist *get_seq_start_list(struct wd_comp_req *req) return cur; } +static int lz77_zstd_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 seq_avail_out; + + if (unlikely(in_size > ZSTD_MAX_SIZE)) { + WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", in_size); + return -WD_EINVAL; + } + + /* + * For lz77_zstd, the hardware needs 784 Bytes buffer to output + * the frequency information about input data. The sequences + * and frequency data need to be written to an independent sgl + * splited from list_dst. + */ + if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { + WD_ERR("invalid: output is not enough for literals, at least %u bytes!\n", + ZSTD_FREQ_DATA_SIZE + lits_size); + return -WD_EINVAL; + } else if (unlikely(out_size < ZSTD_FREQ_DATA_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", + ZSTD_FREQ_DATA_SIZE + lits_size - out_size); + return -WD_EINVAL; + } + + seq_avail_out = out_size - lits_size; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= ZSTD_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, ZSTD_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + +static int lz77_only_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + __u32 in_size = msg->req.src_len; + __u32 out_size = msg->avail_out; + __u32 seq_avail_out; + + /* + * For lz77_only, the hardware needs 32 Bytes buffer to output + * the dfx information. The literals and sequences data need to be written + * to an independent sgl splited from list_dst. + */ + if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { + WD_ERR("invalid: output is not enough for literals, at least %u bytes!\n", + ZSTD_LIT_RESV_SIZE + lits_size); + return -WD_EINVAL; + } else if (unlikely(out_size < ZSTD_LIT_RESV_SIZE + lits_size)) { + WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", + ZSTD_LIT_RESV_SIZE + lits_size - out_size); + return -WD_EINVAL; + } + + seq_avail_out = out_size - lits_size; + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && + seq_avail_out <= LZ77_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum!\n", + out_size, LZ77_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv == WD_COMP_L9 && + seq_avail_out <= PRICE_MIN_OUT_SIZE)) { + WD_ERR("invalid: out_len(%u) not enough, %u bytes are minimum in price mode!\n", + out_size, PRICE_MIN_OUT_SIZE + lits_size); + return -WD_EINVAL; + } + + return 0; +} + + +static int lz77_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) +{ + enum wd_comp_alg_type alg_type = msg->alg_type; + + if (alg_type == WD_LZ77_ZSTD) + return lz77_zstd_buf_check_sgl(msg, lits_size); + else if (alg_type == WD_LZ77_ONLY) + return lz77_only_buf_check_sgl(msg, lits_size); + + return 0; +} + static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { @@ -698,12 +877,6 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, __u32 lits_size; int ret; - if (unlikely(in_size > ZSTD_MAX_SIZE)) { - WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", - in_size); - return -WD_EINVAL; - } - if (unlikely(!data)) { WD_ERR("invalid: wd_lz77_zstd_data address is NULL!\n"); return -WD_EINVAL; @@ -715,26 +888,15 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(!seq_start)) return -WD_EINVAL; + lits_size = hisi_qm_get_list_size(req->list_dst, seq_start); + + ret = lz77_buf_check_sgl(msg, lits_size); + if (ret) + return ret; + data->literals_start = req->list_dst; data->sequences_start = seq_start; - /* - * For lz77_zstd, the hardware needs 784 Bytes buffer to output - * the frequency information about input data. The sequences - * and frequency data need to be written to an independent sgl - * splited from list_dst. - */ - lits_size = hisi_qm_get_list_size(req->list_dst, seq_start); - if (unlikely(lits_size < in_size + ZSTD_LIT_RESV_SIZE)) { - WD_ERR("invalid: output is not enough for literals, %u bytes are minimum!\n", - ZSTD_FREQ_DATA_SIZE + lits_size); - return -WD_EINVAL; - } else if (unlikely(out_size < ZSTD_FREQ_DATA_SIZE + lits_size)) { - WD_ERR("invalid: output is not enough for sequences, at least %u bytes more!\n", - ZSTD_FREQ_DATA_SIZE + lits_size - out_size); - return -WD_EINVAL; - } - fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); h_sgl_pool = hisi_qm_get_sglpool(h_qp); @@ -824,6 +986,15 @@ static void fill_alg_lz77_zstd(struct hisi_zip_sqe *sqe) sqe->dw9 = val; } +static void fill_alg_lz77_only(struct hisi_zip_sqe *sqe) +{ + __u32 val; + + val = sqe->dw9 & ~HZ_REQ_TYPE_MASK; + val |= HW_LZ77_ONLY; + sqe->dw9 = val; +} + static void fill_tag_v1(struct hisi_zip_sqe *sqe, __u32 tag) { sqe->dw13 = tag; @@ -841,7 +1012,7 @@ static int fill_comp_level_deflate(struct hisi_zip_sqe *sqe, enum wd_comp_level static int fill_comp_level_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_level comp_lv) { - __u32 val; + __u32 val, alg; switch (comp_lv) { case WD_COMP_L8: @@ -851,8 +1022,12 @@ static int fill_comp_level_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_leve */ break; case WD_COMP_L9: + alg = sqe->dw9 & HZ_REQ_TYPE_MASK; val = sqe->dw9 & ~HZ_REQ_TYPE_MASK; - val |= HW_LZ77_ZSTD_PRICE; + if (alg == HW_LZ77_ZSTD) + val |= HW_LZ77_ZSTD_PRICE; + else if (alg == HW_LZ77_ONLY) + val |= HW_LZ77_ONLY_PRICE; sqe->dw9 = val; break; default: @@ -911,18 +1086,22 @@ static void get_data_size_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_op_ty if (unlikely(!data)) return; + recv_msg->in_cons = sqe->consumed; data->lit_num = sqe->comp_data_length; data->seq_num = sqe->produced; - data->lit_length_overflow_cnt = sqe->dw31 >> LITLEN_OVERFLOW_CNT_SHIFT; - data->lit_length_overflow_pos = sqe->dw31 & LITLEN_OVERFLOW_POS_MASK; - data->freq = data->sequences_start + (data->seq_num << SEQ_DATA_SIZE_SHIFT) + - OVERFLOW_DATA_SIZE; - - if (ctx_buf) { - memcpy(ctx_buf + CTX_REPCODE2_OFFSET, - ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); - memcpy(ctx_buf + CTX_REPCODE1_OFFSET, - ctx_buf + RSV_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); + + if (recv_msg->alg_type == WD_LZ77_ZSTD) { + data->lit_length_overflow_cnt = sqe->dw31 >> LITLEN_OVERFLOW_CNT_SHIFT; + data->lit_length_overflow_pos = sqe->dw31 & LITLEN_OVERFLOW_POS_MASK; + data->freq = data->sequences_start + (data->seq_num << SEQ_DATA_SIZE_SHIFT) + + OVERFLOW_DATA_SIZE; + + if (ctx_buf) { + memcpy(ctx_buf + CTX_REPCODE2_OFFSET, + ctx_buf + CTX_REPCODE1_OFFSET, REPCODE_SIZE); + memcpy(ctx_buf + CTX_REPCODE1_OFFSET, + ctx_buf + RSV_OFFSET + CTX_HW_REPCODE_OFFSET, REPCODE_SIZE); + } } } @@ -970,6 +1149,16 @@ struct hisi_zip_sqe_ops ops[] = { { .fill_comp_level = fill_comp_level_lz77_zstd, .get_data_size = get_data_size_lz77_zstd, .get_tag = get_tag_v3, + }, { + .alg_name = "lz77_only", + .fill_buf[WD_FLAT_BUF] = fill_buf_lz77_zstd, + .fill_buf[WD_SGL_BUF] = fill_buf_lz77_zstd_sgl, + .fill_sqe_type = fill_sqe_type_v3, + .fill_alg = fill_alg_lz77_only, + .fill_tag = fill_tag_v3, + .fill_comp_level = fill_comp_level_lz77_zstd, + .get_data_size = get_data_size_lz77_zstd, + .get_tag = get_tag_v3, } }; @@ -1079,10 +1268,6 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, return -WD_EINVAL; } - ret = ops[alg_type].fill_comp_level(sqe, msg->comp_lv); - if (unlikely(ret)) - return ret; - ret = ops[alg_type].fill_buf[msg->req.data_fmt]((handle_t)qp, sqe, msg); if (unlikely(ret)) return ret; @@ -1091,6 +1276,10 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, ops[alg_type].fill_alg(sqe); + ret = ops[alg_type].fill_comp_level(sqe, msg->comp_lv); + if (unlikely(ret)) + return ret; + ops[alg_type].fill_tag(sqe, msg->tag); state = (msg->stream_mode == WD_COMP_STATEFUL) ? HZ_STATEFUL : @@ -1132,7 +1321,7 @@ static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); - if (alg_type == WD_LZ77_ZSTD) { + if (alg_type == WD_LZ77_ZSTD || alg_type == WD_LZ77_ONLY) { hw_sgl_out = VA_ADDR(sqe->literals_addr_h, sqe->literals_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); @@ -1190,6 +1379,10 @@ static int get_alg_type(__u32 type) case HW_LZ77_ZSTD_PRICE: alg_type = WD_LZ77_ZSTD; break; + case HW_LZ77_ONLY: + case HW_LZ77_ONLY_PRICE: + alg_type = WD_LZ77_ONLY; + break; default: break; } @@ -1369,6 +1562,7 @@ static struct wd_alg_driver zip_alg_driver[] = { GEN_ZIP_ALG_DRIVER("deflate"), GEN_ZIP_ALG_DRIVER("lz77_zstd"), + GEN_ZIP_ALG_DRIVER("lz77_only"), }; #ifdef WD_STATIC_DRV diff --git a/include/wd_comp.h b/include/wd_comp.h index 45994ff6..0012ef6b 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -20,6 +20,7 @@ enum wd_comp_alg_type { WD_ZLIB, WD_GZIP, WD_LZ77_ZSTD, + WD_LZ77_ONLY, WD_COMP_ALG_MAX, }; diff --git a/wd_comp.c b/wd_comp.c index 647c320e..8e47a32f 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -27,7 +27,7 @@ #define cpu_to_be32(x) swap_byte(x) static const char *wd_comp_alg_name[WD_COMP_ALG_MAX] = { - "zlib", "gzip", "deflate", "lz77_zstd" + "zlib", "gzip", "deflate", "lz77_zstd", "lz77_only" }; struct wd_comp_sess { diff --git a/wd_util.c b/wd_util.c index 38d2d375..ec8bd7c6 100644 --- a/wd_util.c +++ b/wd_util.c @@ -107,6 +107,7 @@ static struct acc_alg_item alg_options[] = { {"gzip", "gzip"}, {"deflate", "deflate"}, {"lz77_zstd", "lz77_zstd"}, + {"lz77_only", "lz77_only"}, {"hashagg", "hashagg"}, {"udma", "udma"}, -- 2.33.0

From: Qinxin Xia <xiaqinxin@huawei.com> Abstract the acquisition of scatter-gather lists (sgl) into function to prevent repetitive code implementations. Modify some deflate functions with general attributes to be universal, allowing other alg to invoke them and enhancing code readability. Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- drv/hisi_comp.c | 147 +++++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 70 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 1c9f438f..e979f022 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -232,6 +232,15 @@ struct hisi_zip_ctx { struct wd_ctx_config_internal config; }; +struct comp_sgl { + void *in; + void *out; + void *out_seq; + struct wd_datalist *list_src; + struct wd_datalist *list_dst; + struct wd_datalist *seq_start; +}; + static void dump_zip_msg(struct wd_comp_msg *msg) { WD_ERR("dump zip message after a task error occurs.\n"); @@ -246,11 +255,8 @@ static int buf_size_check_deflate(__u32 *in_size, __u32 *out_size) return -WD_EINVAL; } - if (unlikely(*out_size > HZ_MAX_SIZE)) { - WD_ERR("warning: avail_out(%u) is out of range, will set 8MB size max!\n", - *out_size); + if (unlikely(*out_size > HZ_MAX_SIZE)) *out_size = HZ_MAX_SIZE; - } return 0; } @@ -358,7 +364,46 @@ static int check_enable_store_buf(struct wd_comp_msg *msg, __u32 out_size, int h return 0; } -static void fill_buf_size_deflate(struct hisi_zip_sqe *sqe, __u32 in_size, +static int get_sgl_from_pool(handle_t h_qp, struct comp_sgl *c_sgl) +{ + handle_t h_sgl_pool; + + h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool!\n"); + return -WD_EINVAL; + } + + c_sgl->in = hisi_qm_get_hw_sgl(h_sgl_pool, c_sgl->list_src); + if (unlikely(!c_sgl->in)) { + WD_ERR("failed to get hw sgl in!\n"); + return -WD_ENOMEM; + } + + c_sgl->out = hisi_qm_get_hw_sgl(h_sgl_pool, c_sgl->list_dst); + if (unlikely(!c_sgl->out)) { + WD_ERR("failed to get hw sgl out!\n"); + goto err_free_sgl_in; + } + + if (c_sgl->seq_start) { + c_sgl->out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, c_sgl->seq_start); + if (unlikely(!c_sgl->out_seq)) { + WD_ERR("failed to get hw sgl out for sequences!\n"); + goto err_free_sgl_out; + } + } + + return 0; + +err_free_sgl_out: + hisi_qm_put_hw_sgl(h_sgl_pool, c_sgl->out); +err_free_sgl_in: + hisi_qm_put_hw_sgl(h_sgl_pool, c_sgl->in); + return -WD_ENOMEM; +} + +static void fill_comp_buf_size(struct hisi_zip_sqe *sqe, __u32 in_size, __u32 out_size) { sqe->input_data_length = in_size; @@ -430,7 +475,7 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, if (unlikely(ret)) return ret; - fill_buf_size_deflate(sqe, in_size, out_size); + fill_comp_buf_size(sqe, in_size, out_size); if (msg->ctx_buf) ctx_buf = msg->ctx_buf + RSV_OFFSET; @@ -471,29 +516,18 @@ static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_datalist *list_src, struct wd_datalist *list_dst) { - void *hw_sgl_in, *hw_sgl_out; - handle_t h_sgl_pool; - - h_sgl_pool = hisi_qm_get_sglpool(h_qp); - if (unlikely(!h_sgl_pool)) { - WD_ERR("failed to get sglpool!\n"); - return -WD_EINVAL; - } + struct comp_sgl c_sgl; + int ret; - hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src); - if (unlikely(!hw_sgl_in)) { - WD_ERR("failed to get hw sgl in!\n"); - return -WD_ENOMEM; - } + c_sgl.list_src = list_src; + c_sgl.list_dst = list_dst; + c_sgl.seq_start = NULL; - hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst); - if (unlikely(!hw_sgl_out)) { - WD_ERR("failed to get hw sgl out!\n"); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); - return -WD_ENOMEM; - } + ret = get_sgl_from_pool(h_qp, &c_sgl); + if (unlikely(ret)) + return ret; - fill_buf_addr_deflate(sqe, hw_sgl_in, hw_sgl_out, NULL); + fill_buf_addr_deflate(sqe, c_sgl.in, c_sgl.out, NULL); return 0; } @@ -572,7 +606,7 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_sgl_skip(sqe, src_skip, dst_skip); - fill_buf_size_deflate(sqe, in_size, out_size); + fill_comp_buf_size(sqe, in_size, out_size); return 0; } @@ -729,11 +763,8 @@ static int fill_buf_lz77_zstd(handle_t h_qp, struct hisi_zip_sqe *sqe, if (ret) return ret; - if (unlikely(seq_avail_out > HZ_MAX_SIZE)) { - WD_ERR("warning: sequence avail_out(%u) is out of range , will set 8MB size max!\n", - seq_avail_out); + if (unlikely(seq_avail_out > HZ_MAX_SIZE)) seq_avail_out = HZ_MAX_SIZE; - } if (msg->ctx_buf) { ctx_buf = msg->ctx_buf + RSV_OFFSET; @@ -867,13 +898,12 @@ static int lz77_buf_check_sgl(struct wd_comp_msg *msg, __u32 lits_size) static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, struct wd_comp_msg *msg) { - void *hw_sgl_in, *hw_sgl_out_lit, *hw_sgl_out_seq; struct wd_comp_req *req = &msg->req; struct wd_lz77_zstd_data *data = req->priv; __u32 in_size = msg->req.src_len; __u32 out_size = msg->avail_out; struct wd_datalist *seq_start; - handle_t h_sgl_pool; + struct comp_sgl c_sgl; __u32 lits_size; int ret; @@ -899,42 +929,19 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size); - h_sgl_pool = hisi_qm_get_sglpool(h_qp); - if (unlikely(!h_sgl_pool)) { - WD_ERR("failed to get sglpool!\n"); - return -WD_EINVAL; - } - - hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src); - if (unlikely(!hw_sgl_in)) { - WD_ERR("failed to get hw sgl in!\n"); - return -WD_ENOMEM; - } + c_sgl.list_src = req->list_src; + c_sgl.list_dst = req->list_dst; + c_sgl.seq_start = seq_start; - hw_sgl_out_lit = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst); - if (unlikely(!hw_sgl_out_lit)) { - WD_ERR("failed to get hw sgl out for literals!\n"); - ret = -WD_ENOMEM; - goto err_free_sgl_in; - } + ret = get_sgl_from_pool(h_qp, &c_sgl); + if (unlikely(ret)) + return ret; - hw_sgl_out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, seq_start); - if (unlikely(!hw_sgl_out_seq)) { - WD_ERR("failed to get hw sgl out for sequences!\n"); - ret = -WD_ENOMEM; - goto err_free_sgl_out_lit; - } - fill_buf_addr_lz77_zstd(sqe, hw_sgl_in, hw_sgl_out_lit, - hw_sgl_out_seq, NULL); + fill_buf_addr_lz77_zstd(sqe, c_sgl.in, c_sgl.out, + c_sgl.out_seq, NULL); return 0; - -err_free_sgl_out_lit: - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out_lit); -err_free_sgl_in: - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); - return ret; } static void fill_sqe_type_v1(struct hisi_zip_sqe *sqe) @@ -1005,7 +1012,7 @@ static void fill_tag_v3(struct hisi_zip_sqe *sqe, __u32 tag) sqe->dw26 = tag; } -static int fill_comp_level_deflate(struct hisi_zip_sqe *sqe, enum wd_comp_level comp_lv) +static int fill_comp_level(struct hisi_zip_sqe *sqe, enum wd_comp_level comp_lv) { return 0; } @@ -1038,7 +1045,7 @@ static int fill_comp_level_lz77_zstd(struct hisi_zip_sqe *sqe, enum wd_comp_leve return 0; } -static void get_data_size_deflate(struct hisi_zip_sqe *sqe, enum wd_comp_op_type op_type, +static void get_comp_data_size(struct hisi_zip_sqe *sqe, enum wd_comp_op_type op_type, struct wd_comp_msg *recv_msg) { recv_msg->in_cons = sqe->consumed; @@ -1122,22 +1129,22 @@ struct hisi_zip_sqe_ops ops[] = { { .fill_sqe_type = fill_sqe_type_v3, .fill_alg = fill_alg_deflate, .fill_tag = fill_tag_v3, - .fill_comp_level = fill_comp_level_deflate, - .get_data_size = get_data_size_deflate, + .fill_comp_level = fill_comp_level, + .get_data_size = get_comp_data_size, .get_tag = get_tag_v3, }, { .alg_name = "zlib", .fill_buf[WD_FLAT_BUF] = fill_buf_zlib, .fill_buf[WD_SGL_BUF] = fill_buf_zlib_sgl, .fill_alg = fill_alg_zlib, - .fill_comp_level = fill_comp_level_deflate, + .fill_comp_level = fill_comp_level, .get_data_size = get_data_size_zlib, }, { .alg_name = "gzip", .fill_buf[WD_FLAT_BUF] = fill_buf_gzip, .fill_buf[WD_SGL_BUF] = fill_buf_gzip_sgl, .fill_alg = fill_alg_gzip, - .fill_comp_level = fill_comp_level_deflate, + .fill_comp_level = fill_comp_level, .get_data_size = get_data_size_gzip, }, { .alg_name = "lz77_zstd", -- 2.33.0

From: Qinxin Xia <xiaqinxin@huawei.com> Support 'lz4' algorithm in hisilicon driver. Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com> --- drv/hisi_comp.c | 120 ++++++++++++++++++++++++++++++++++++++++++++-- include/wd_comp.h | 1 + 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index e979f022..001150ec 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -109,6 +109,7 @@ enum alg_type { HW_DEFLATE = 0x1, HW_ZLIB, HW_GZIP, + HW_LZ4, HW_LZ77_ZSTD_PRICE = 0x42, HW_LZ77_ZSTD, HW_LZ77_ONLY = 0x40, @@ -503,6 +504,59 @@ static int fill_buf_gzip(handle_t h_qp, struct hisi_zip_sqe *sqe, return fill_buf_deflate_generic(sqe, msg, GZIP_HEADER, GZIP_HEADER_SZ); } +static void fill_buf_addr_lz4(struct hisi_zip_sqe *sqe, void *src, void *dst) +{ + sqe->source_addr_l = lower_32_bits(src); + sqe->source_addr_h = upper_32_bits(src); + sqe->dest_addr_l = lower_32_bits(dst); + sqe->dest_addr_h = upper_32_bits(dst); +} + +static int check_lz4_msg(struct wd_comp_msg *msg, enum wd_buff_type buf_type) +{ + /* LZ4 only support for compress and block mode */ + if (unlikely(msg->req.op_type != WD_DIR_COMPRESS)) { + WD_ERR("invalid: lz4 only support compress!\n"); + return -WD_EINVAL; + } + + if (unlikely(msg->stream_mode == WD_COMP_STATEFUL)) { + WD_ERR("invalid: lz4 does not support the stream mode!\n"); + return -WD_EINVAL; + } + + if (buf_type != WD_FLAT_BUF) + return 0; + + if (unlikely(msg->req.src_len == 0 || msg->req.src_len > HZ_MAX_SIZE)) { + WD_ERR("invalid: lz4 input size can't be zero or more than 8M size max!\n"); + return -WD_EINVAL; + } + + if (unlikely(msg->avail_out > HZ_MAX_SIZE)) + msg->avail_out = HZ_MAX_SIZE; + + return 0; +} + +static int fill_buf_lz4(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg) +{ + void *src = msg->req.src; + void *dst = msg->req.dst; + int ret; + + ret = check_lz4_msg(msg, WD_FLAT_BUF); + if (unlikely(ret)) + return ret; + + fill_comp_buf_size(sqe, msg->req.src_len, msg->avail_out); + + fill_buf_addr_lz4(sqe, src, dst); + + return 0; +} + static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) { __u32 val; @@ -664,7 +718,7 @@ static int lz77_zstd_buf_check(struct wd_comp_msg *msg) if (unlikely(in_size > ZSTD_MAX_SIZE)) { WD_ERR("invalid: in_len(%u) of lz77_zstd is out of range!\n", in_size); - return -WD_EINVAL; + return -WD_EINVAL; } if (unlikely(msg->stream_mode == WD_COMP_STATEFUL && msg->comp_lv < WD_COMP_L9 && @@ -937,13 +991,50 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, if (unlikely(ret)) return ret; - fill_buf_addr_lz77_zstd(sqe, c_sgl.in, c_sgl.out, c_sgl.out_seq, NULL); return 0; } +static int fill_buf_addr_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_datalist *list_src, + struct wd_datalist *list_dst) +{ + struct comp_sgl c_sgl; + int ret; + + c_sgl.list_src = list_src; + c_sgl.list_dst = list_dst; + c_sgl.seq_start = NULL; + + ret = get_sgl_from_pool(h_qp, &c_sgl); + if (unlikely(ret)) + return ret; + + fill_buf_addr_lz4(sqe, c_sgl.in, c_sgl.out); + + return 0; +} + +static int fill_buf_lz4_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg) +{ + struct wd_datalist *list_src = msg->req.list_src; + struct wd_datalist *list_dst = msg->req.list_dst; + int ret; + + ret = check_lz4_msg(msg, WD_SGL_BUF); + if (unlikely(ret)) + return ret; + + fill_buf_type_sgl(sqe); + + fill_comp_buf_size(sqe, msg->req.src_len, msg->avail_out); + + return fill_buf_addr_lz4_sgl(h_qp, sqe, list_src, list_dst); +} + static void fill_sqe_type_v1(struct hisi_zip_sqe *sqe) { __u32 val; @@ -1002,6 +1093,15 @@ static void fill_alg_lz77_only(struct hisi_zip_sqe *sqe) sqe->dw9 = val; } +static void fill_alg_lz4(struct hisi_zip_sqe *sqe) +{ + __u32 val; + + val = sqe->dw9 & ~HZ_REQ_TYPE_MASK; + val |= HW_LZ4; + sqe->dw9 = val; +} + static void fill_tag_v1(struct hisi_zip_sqe *sqe, __u32 tag) { sqe->dw13 = tag; @@ -1156,6 +1256,16 @@ struct hisi_zip_sqe_ops ops[] = { { .fill_comp_level = fill_comp_level_lz77_zstd, .get_data_size = get_data_size_lz77_zstd, .get_tag = get_tag_v3, + }, { + .alg_name = "lz4", + .fill_buf[WD_FLAT_BUF] = fill_buf_lz4, + .fill_buf[WD_SGL_BUF] = fill_buf_lz4_sgl, + .fill_sqe_type = fill_sqe_type_v3, + .fill_alg = fill_alg_lz4, + .fill_tag = fill_tag_v3, + .fill_comp_level = fill_comp_level, + .get_data_size = get_comp_data_size, + .get_tag = get_tag_v3, }, { .alg_name = "lz77_only", .fill_buf[WD_FLAT_BUF] = fill_buf_lz77_zstd, @@ -1382,6 +1492,9 @@ static int get_alg_type(__u32 type) case HW_GZIP: alg_type = WD_GZIP; break; + case HW_LZ4: + alg_type = WD_LZ4; + break; case HW_LZ77_ZSTD: case HW_LZ77_ZSTD_PRICE: alg_type = WD_LZ77_ZSTD; @@ -1569,7 +1682,8 @@ static struct wd_alg_driver zip_alg_driver[] = { GEN_ZIP_ALG_DRIVER("deflate"), GEN_ZIP_ALG_DRIVER("lz77_zstd"), - GEN_ZIP_ALG_DRIVER("lz77_only"), + GEN_ZIP_ALG_DRIVER("lz4"), + GEN_ZIP_ALG_DRIVER("lz77_only") }; #ifdef WD_STATIC_DRV diff --git a/include/wd_comp.h b/include/wd_comp.h index 0012ef6b..8e056d1c 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -20,6 +20,7 @@ enum wd_comp_alg_type { WD_ZLIB, WD_GZIP, WD_LZ77_ZSTD, + WD_LZ4, WD_LZ77_ONLY, WD_COMP_ALG_MAX, }; -- 2.33.0

From: Qinxin Xia <xiaqinxin@huawei.com> Support the algorithm 'lz4' in wd_comp Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- wd_comp.c | 2 +- wd_util.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/wd_comp.c b/wd_comp.c index 8e47a32f..58f6e451 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -27,7 +27,7 @@ #define cpu_to_be32(x) swap_byte(x) static const char *wd_comp_alg_name[WD_COMP_ALG_MAX] = { - "zlib", "gzip", "deflate", "lz77_zstd", "lz77_only" + "zlib", "gzip", "deflate", "lz77_zstd", "lz4", "lz77_only" }; struct wd_comp_sess { diff --git a/wd_util.c b/wd_util.c index ec8bd7c6..646346a0 100644 --- a/wd_util.c +++ b/wd_util.c @@ -107,6 +107,7 @@ static struct acc_alg_item alg_options[] = { {"gzip", "gzip"}, {"deflate", "deflate"}, {"lz77_zstd", "lz77_zstd"}, + {"lz4", "lz4"}, {"lz77_only", "lz77_only"}, {"hashagg", "hashagg"}, {"udma", "udma"}, -- 2.33.0

From: Zhushuai Yin <yinzhushuai@huawei.com> When the buffer size is configured to be less than 10, gzip will core dump. The root cause is that the access to the head size is not intercepted, leading to subsequent access overflow. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- wd_comp.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/wd_comp.c b/wd_comp.c index 58f6e451..112c97b9 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -17,6 +17,8 @@ #define HW_CTX_SIZE (64 * 1024) #define STREAM_CHUNK (128 * 1024) +#define WD_ZLIB_HEADER_SZ 2 +#define WD_GZIP_HEADER_SZ 10 #define swap_byte(x) \ ((((x) & 0x000000ff) << 24) | \ @@ -522,7 +524,48 @@ static void fill_comp_msg(struct wd_comp_sess *sess, struct wd_comp_msg *msg, msg->req.last = 1; } -static int wd_comp_check_buffer(struct wd_comp_req *req) +static int wd_check_alg_buff_size(struct wd_comp_req *req, struct wd_comp_sess *sess) +{ + if (!req->dst_len) { + WD_ERR("invalid: dst_len is 0!\n"); + return -WD_EINVAL; + } + + /* + * Only the first package needs to be checked, + * the middle and last packages do not need to be checked + */ + if (sess->stream_pos != WD_COMP_STREAM_NEW) + return 0; + + if (sess->alg_type == WD_ZLIB) { + if (req->dst_len <= WD_ZLIB_HEADER_SZ && req->op_type == WD_DIR_COMPRESS) { + WD_ERR("invalid: zlib dst_len(%u) is too small!\n", req->dst_len); + return -WD_EINVAL; + } + + if (req->src_len <= WD_ZLIB_HEADER_SZ && req->op_type == WD_DIR_DECOMPRESS) { + WD_ERR("invalid: zlib src_len(%u) is too small!\n", req->src_len); + return -WD_EINVAL; + } + } + + if (sess->alg_type == WD_GZIP) { + if (req->dst_len <= WD_GZIP_HEADER_SZ && req->op_type == WD_DIR_COMPRESS) { + WD_ERR("invalid: gzip dst_len(%u) is too small!\n", req->dst_len); + return -WD_EINVAL; + } + + if (req->src_len <= WD_GZIP_HEADER_SZ && req->op_type == WD_DIR_DECOMPRESS) { + WD_ERR("invalid: gzip src_len(%u) is too small!\n", req->src_len); + return -WD_EINVAL; + } + } + + return 0; +} + +static int wd_comp_check_buffer(struct wd_comp_req *req, struct wd_comp_sess *sess) { if (req->data_fmt == WD_FLAT_BUF) { if (unlikely(!req->src || !req->dst)) { @@ -536,12 +579,7 @@ static int wd_comp_check_buffer(struct wd_comp_req *req) } } - if (!req->dst_len) { - WD_ERR("invalid: dst_len is 0!\n"); - return -WD_EINVAL; - } - - return 0; + return wd_check_alg_buff_size(req, sess); } static int wd_comp_check_params(struct wd_comp_sess *sess, @@ -560,7 +598,7 @@ static int wd_comp_check_params(struct wd_comp_sess *sess, return -WD_EINVAL; } - ret = wd_comp_check_buffer(req); + ret = wd_comp_check_buffer(req, sess); if (unlikely(ret)) return ret; -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> Bit length check for byte alignment and length verification has been completed earlier, so there is no need to check whether the read exceeds the length. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- drv/hisi_comp_huf.c | 9 ++------- v1/drv/hisi_zip_huf.c | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drv/hisi_comp_huf.c b/drv/hisi_comp_huf.c index b9c7f258..3684a187 100644 --- a/drv/hisi_comp_huf.c +++ b/drv/hisi_comp_huf.c @@ -96,15 +96,10 @@ static int check_store_huffman_block(struct bit_reader *br) /* go to a byte boundary */ pad = bit_len & BYTE_ALIGN_MASK; bit_len -= pad; - data = read_bits(br, pad); - if (data < 0) - return BLOCK_IS_INCOMPLETE; - - data = read_bits(br, bit_len); - if (data < 0) - return BLOCK_IS_INCOMPLETE; + br->cur_pos += pad; /* check len and nlen */ + data = read_bits(br, bit_len); if (LEN_NLEN_CHECK(data)) return -WD_EINVAL; diff --git a/v1/drv/hisi_zip_huf.c b/v1/drv/hisi_zip_huf.c index dce9eaf6..086fa9f3 100644 --- a/v1/drv/hisi_zip_huf.c +++ b/v1/drv/hisi_zip_huf.c @@ -96,15 +96,10 @@ static int check_store_huffman_block(struct bit_reader *br) /* go to a byte boundary */ pad = bits & BYTE_ALIGN_MASK; bits -= pad; - data = read_bits(br, pad); - if (data < 0) - return HF_BLOCK_IS_INCOMPLETE; - - data = read_bits(br, bits); - if (data < 0) - return HF_BLOCK_IS_INCOMPLETE; + br->cur_pos += pad; /* check len and nlen */ + data = read_bits(br, bits); if (LEN_NLEN_CHECK(data)) return -WD_EINVAL; -- 2.33.0

From: Longfang Liu <liulongfang@huawei.com> 1. resolve some code defects 2. remove log output when default parameters are used in scheduler initialization to prevent log duplication when using the uadk provider/engine. Signed-off-by: Longfang Liu <liulongfang@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- wd.c | 2 +- wd_sched.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/wd.c b/wd.c index 75a94695..c1cc282c 100644 --- a/wd.c +++ b/wd.c @@ -68,7 +68,7 @@ static void wd_parse_log_level(void) goto close_file; } - if (file_info.st_size > FILE_MAX_SIZE) { + if (file_info.st_size <= 0 || file_info.st_size > FILE_MAX_SIZE) { WD_ERR("failed to check rsyslog.conf size.\n"); goto close_file; } diff --git a/wd_sched.c b/wd_sched.c index aa6c91ec..204ed237 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -192,7 +192,8 @@ static handle_t session_sched_init(handle_t h_sched_ctx, void *sched_param) if (!param) { memset(skey, 0, sizeof(struct sched_key)); skey->numa_id = sched_ctx->numa_map[node]; - WD_INFO("session don't set scheduler parameters!\n"); + if (wd_need_debug()) + WD_DEBUG("session don't set scheduler parameters!\n"); } else if (param->numa_id < 0) { skey->type = param->type; skey->numa_id = sched_ctx->numa_map[node]; -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> If the two packages end the task and release resources without being received, it can easily lead to resource conflicts in algorithms with slower processing times, such as the price model. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- uadk_tool/benchmark/zip_uadk_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index cad8016f..c09df071 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -16,7 +16,7 @@ #define MAX_POOL_LENTH_COMP 1 #define COMPRESSION_RATIO_FACTOR 0.7 #define CHUNK_SIZE (128 * 1024) -#define MAX_UNRECV_PACKET_NUM 2 +#define MAX_UNRECV_PACKET_NUM 1 struct uadk_bd { u8 *src; u8 *dst; -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> Add lz77_only for zip benchmark in v2. We could use "numactl -N 1 -m 1 uadk_tool benchmark --alg lz77_only --mode sva --prefetch --async --pktlen 32768 --seconds 30 --opt 0 --thread 32 --ctxnum 32 --winsize 1 --complevel 8" to test the perf of lz77_only. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- uadk_tool/benchmark/uadk_benchmark.c | 6 ++++++ uadk_tool/benchmark/uadk_benchmark.h | 1 + uadk_tool/benchmark/zip_uadk_benchmark.c | 16 +++++++++++----- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 16980616..90656a6d 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -70,6 +70,7 @@ static struct acc_alg_item alg_options[] = { {"gzip", "gzip", GZIP}, {"deflate", "deflate", DEFLATE}, {"lz77_zstd", "lz77_zstd", LZ77_ZSTD}, + {"lz77_only", "lz77_only", LZ77_ONLY}, {"rsa", "rsa-1024", RSA_1024}, {"rsa", "rsa-2048", RSA_2048}, {"rsa", "rsa-3072", RSA_3072}, @@ -370,6 +371,11 @@ static void parse_alg_param(struct acc_option *option) option->acctype = ZIP_TYPE; option->subtype = DEFAULT_TYPE; break; + case LZ77_ONLY: + snprintf(option->algclass, MAX_ALG_NAME, "%s", "lz77_only"); + option->acctype = ZIP_TYPE; + option->subtype = DEFAULT_TYPE; + break; case SM2_ALG: snprintf(option->algclass, MAX_ALG_NAME, "%s", "sm2"); option->acctype = HPRE_TYPE; diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index 2739a0ef..cac25fd1 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -121,6 +121,7 @@ enum test_alg { GZIP, // gzip DEFLATE, // deflate LZ77_ZSTD, // lz77_zstd + LZ77_ONLY, RSA_1024, // rsa RSA_2048, RSA_3072, diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index c09df071..3d45e619 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -277,6 +277,12 @@ static int zip_uadk_param_parse(thread_data *tddata, struct acc_option *options) ZIP_TST_PRT("Zip LZ77_ZSTD just support compress!\n"); optype = WD_DIR_COMPRESS; break; + case LZ77_ONLY: + alg = WD_LZ77_ONLY; + if (optype == WD_DIR_DECOMPRESS) + ZIP_TST_PRT("Zip LZ77_ONLY just support compress!\n"); + optype = WD_DIR_COMPRESS; + break; default: ZIP_TST_PRT("failed to set zip alg\n"); return -EINVAL; @@ -1186,12 +1192,12 @@ static int zip_uadk_sync_threads(struct acc_option *options) threads_option.optype = options->optype; if (threads_option.mode == 1) {// stream mode - if (threads_option.alg == LZ77_ZSTD) + if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) uadk_zip_sync_run = zip_uadk_stm_lz77_sync_run; else uadk_zip_sync_run = zip_uadk_stm_sync_run; } else { - if (threads_option.alg == LZ77_ZSTD) + if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) uadk_zip_sync_run = zip_uadk_blk_lz77_sync_run; else uadk_zip_sync_run = zip_uadk_blk_sync_run; @@ -1243,7 +1249,7 @@ static int zip_uadk_async_threads(struct acc_option *options) return 0; } - if (threads_option.alg == LZ77_ZSTD) + if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) uadk_zip_async_run = zip_uadk_blk_lz77_async_run; else uadk_zip_async_run = zip_uadk_blk_async_run; @@ -1268,7 +1274,7 @@ static int zip_uadk_async_threads(struct acc_option *options) threads_args[i].win_sz = threads_option.win_sz; threads_args[i].comp_lv = threads_option.comp_lv; threads_args[i].td_id = i; - if (threads_option.alg == LZ77_ZSTD) { + if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) { struct bd_pool *uadk_pool = &g_zip_pool.pool[i]; u32 out_len = uadk_pool->bds[0].dst_len; @@ -1331,7 +1337,7 @@ tag_free: free(threads_args[i].tag); } lz77_free: - if (threads_option.alg == LZ77_ZSTD) { + if (threads_option.alg == WD_LZ77_ZSTD || threads_option.alg == WD_LZ77_ONLY) { for (i = 0; i < g_thread_num; i++) { if (threads_args[i].ftuple) free(threads_args[i].ftuple); -- 2.33.0

From: Chenghai Huang <huangchenghai2@huawei.com> Add lz4 for zip benchmark in v2. We could use "numactl -N 1 -m 1 uadk_tool benchmark --alg lz4 --mode sva --prefetch --async --pktlen 32768 --seconds 30 --opt 0 --thread 32 --ctxnum 32 --winsize 1" to test the perf of lz4. Note that lz4 only supports block mode. Signed-off-by: Chenghai Huang <huangchenghai2@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- uadk_tool/benchmark/uadk_benchmark.c | 6 ++++++ uadk_tool/benchmark/uadk_benchmark.h | 1 + uadk_tool/benchmark/zip_uadk_benchmark.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 90656a6d..ce9a1f2d 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -70,6 +70,7 @@ static struct acc_alg_item alg_options[] = { {"gzip", "gzip", GZIP}, {"deflate", "deflate", DEFLATE}, {"lz77_zstd", "lz77_zstd", LZ77_ZSTD}, + {"lz4", "lz4", LZ4}, {"lz77_only", "lz77_only", LZ77_ONLY}, {"rsa", "rsa-1024", RSA_1024}, {"rsa", "rsa-2048", RSA_2048}, @@ -371,6 +372,11 @@ static void parse_alg_param(struct acc_option *option) option->acctype = ZIP_TYPE; option->subtype = DEFAULT_TYPE; break; + case LZ4: + snprintf(option->algclass, MAX_ALG_NAME, "%s", "lz4"); + option->acctype = ZIP_TYPE; + option->subtype = DEFAULT_TYPE; + break; case LZ77_ONLY: snprintf(option->algclass, MAX_ALG_NAME, "%s", "lz77_only"); option->acctype = ZIP_TYPE; diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index cac25fd1..90df2ead 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -121,6 +121,7 @@ enum test_alg { GZIP, // gzip DEFLATE, // deflate LZ77_ZSTD, // lz77_zstd + LZ4, LZ77_ONLY, RSA_1024, // rsa RSA_2048, diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index 3d45e619..7a42d9ea 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -277,6 +277,12 @@ static int zip_uadk_param_parse(thread_data *tddata, struct acc_option *options) ZIP_TST_PRT("Zip LZ77_ZSTD just support compress!\n"); optype = WD_DIR_COMPRESS; break; + case LZ4: + alg = WD_LZ4; + if (optype == WD_DIR_DECOMPRESS) + ZIP_TST_PRT("Zip LZ4 just support compress!\n"); + optype = WD_DIR_COMPRESS; + break; case LZ77_ONLY: alg = WD_LZ77_ONLY; if (optype == WD_DIR_DECOMPRESS) -- 2.33.0

uadk_tool benchmark supports AES_256_GCM, AES_256_CBC_SHA256_HMAC, AES_128_CBC_SHA1_HMAC and SM4_CBC_SM3_HMAC algorithms. Signed-off-by: Qi Tao <taoqi10@huawei.com> --- uadk_tool/benchmark/sec_uadk_benchmark.c | 34 ++++++++++++++++++++++++ uadk_tool/benchmark/sec_wd_benchmark.c | 34 ++++++++++++++++++++++++ uadk_tool/benchmark/uadk_benchmark.c | 2 ++ uadk_tool/benchmark/uadk_benchmark.h | 2 ++ 4 files changed, 72 insertions(+) diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index 7cc3f4a4..acfd833e 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -82,10 +82,26 @@ struct aead_alg_info aead_info[] = { .index = AES_128_GCM, .name = "AES_128_GCM", .mac_len = 16, + }, { + .index = AES_256_GCM, + .name = "AES_256_GCM", + .mac_len = 16, }, { .index = AES_128_CBC_SHA256_HMAC, .name = "AES_128_CBC_SHA256_HMAC", .mac_len = 32, + }, { + .index = AES_256_CBC_SHA256_HMAC, + .name = "AES_256_CBC_SHA256_HMAC", + .mac_len = 32, + }, { + .index = AES_128_CBC_SHA1_HMAC, + .name = "AES_128_CBC_SHA1_HMAC", + .mac_len = 20, + }, { + .index = SM4_CBC_SM3_HMAC, + .name = "SM4_CBC_SM3_HMAC", + .mac_len = 32, }, { .index = SM4_128_GCM, .name = "SM4_128_GCM", @@ -459,6 +475,24 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) dalg = WD_DIGEST_SHA256; dmode = WD_DIGEST_HMAC; break; + case AES_128_CBC_SHA1_HMAC: + keysize = 16; + ivsize = 16; + mode = WD_CIPHER_CBC; + alg = WD_CIPHER_AES; + is_union = true; + dalg = WD_DIGEST_SHA1; + dmode = WD_DIGEST_HMAC; + break; + case SM4_CBC_SM3_HMAC: + keysize = 16; + ivsize = 16; + mode = WD_CIPHER_CBC; + alg = WD_CIPHER_SM4; + is_union = true; + dalg = WD_DIGEST_SM3; + dmode = WD_DIGEST_HMAC; + break; case SM4_128_CCM: keysize = 16; ivsize = 16; diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c index f066a5b9..a3d62cd3 100644 --- a/uadk_tool/benchmark/sec_wd_benchmark.c +++ b/uadk_tool/benchmark/sec_wd_benchmark.c @@ -79,10 +79,26 @@ static struct aead_alg_info wd_aead_info[] = { .index = AES_128_GCM, .name = "AES_128_GCM", .mac_len = 16, + }, { + .index = AES_256_GCM, + .name = "AES_256_GCM", + .mac_len = 16, }, { .index = AES_128_CBC_SHA256_HMAC, .name = "AES_128_CBC_SHA256_HMAC", .mac_len = 32, + }, { + .index = AES_256_CBC_SHA256_HMAC, + .name = "AES_256_CBC_SHA256_HMAC", + .mac_len = 32, + }, { + .index = AES_128_CBC_SHA1_HMAC, + .name = "AES_128_CBC_SHA1_HMAC", + .mac_len = 20, + }, { + .index = SM4_CBC_SM3_HMAC, + .name = "SM4_CBC_SM3_HMAC", + .mac_len = 32, }, { .index = SM4_128_GCM, .name = "SM4_128_GCM", @@ -523,6 +539,24 @@ static int sec_wd_param_parse(thread_data *tddata, struct acc_option *options) dalg = WCRYPTO_SHA256; dmode = WCRYPTO_DIGEST_HMAC; break; + case AES_128_CBC_SHA1_HMAC: + keysize = 16; + ivsize = 16; + mode = WCRYPTO_CIPHER_CBC; + alg = WCRYPTO_CIPHER_AES; + is_union = true; + dalg = WCRYPTO_SHA1; + dmode = WCRYPTO_DIGEST_HMAC; + break; + case SM4_CBC_SM3_HMAC: + keysize = 16; + ivsize = 16; + mode = WCRYPTO_CIPHER_CBC; + alg = WCRYPTO_CIPHER_SM4; + is_union = true; + dalg = WCRYPTO_SM3; + dmode = WCRYPTO_DIGEST_HMAC; + break; case SM4_128_CCM: keysize = 16; ivsize = 16; diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index ce9a1f2d..ffcf176a 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -144,6 +144,8 @@ static struct acc_alg_item alg_options[] = { {"authenc(generic,cbc(aes))", "aes-128-cbc-sha256-hmac", AES_128_CBC_SHA256_HMAC}, {"authenc(generic,cbc(aes))", "aes-192-cbc-sha256-hmac", AES_192_CBC_SHA256_HMAC}, {"authenc(generic,cbc(aes))", "aes-256-cbc-sha256-hmac", AES_256_CBC_SHA256_HMAC}, + {"authenc(generic,cbc(aes))", "aes-128-cbc-sha1-hmac", AES_128_CBC_SHA1_HMAC}, + {"authenc(generic,cbc(sm4))", "sm4-cbc-sm3-hmac", SM4_CBC_SM3_HMAC}, {"ccm(sm4)", "sm4-128-ccm", SM4_128_CCM}, {"gcm(sm4)", "sm4-128-gcm", SM4_128_GCM}, {"sm3", "sm3", SM3_ALG}, diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index 90df2ead..145bed67 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -195,6 +195,8 @@ enum test_alg { AES_128_CBC_SHA256_HMAC, AES_192_CBC_SHA256_HMAC, AES_256_CBC_SHA256_HMAC, + AES_128_CBC_SHA1_HMAC, + SM4_CBC_SM3_HMAC, SM4_128_CCM, SM4_128_GCM, SM3_ALG, // digest -- 2.33.0

From: Zhushuai Yin <yinzhushuai@huawei.com> The file_path caused a stack overflow due to the use of 4096 bytes of stack memory, leading to insufficient stack memory and a memory overwrite. This resulted in the addresses of the func and funcargs within the async job being overwritten. Signed-off-by: Zhushuai Yin <yinzhushuai@huawei.com> Signed-off-by: Qi Tao <taoqi10@huawei.com> --- wd_util.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/wd_util.c b/wd_util.c index 646346a0..199ee608 100644 --- a/wd_util.c +++ b/wd_util.c @@ -34,6 +34,8 @@ #define WD_DRV_LIB_DIR "uadk" +#define WD_PATH_DIR_NUM 2 + struct msg_pool { /* message array allocated dynamically */ void *msgs; @@ -2164,8 +2166,7 @@ static void dladdr_empty(void) int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) { - char file_path[PATH_MAX] = {0}; - char path[PATH_MAX] = {0}; + char *path_buf, *path, *file_path; Dl_info file_info; int len, rc, i; @@ -2175,6 +2176,14 @@ int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) WD_ERR("fail to get lib file path.\n"); return -WD_EINVAL; } + + path_buf = calloc(WD_PATH_DIR_NUM, sizeof(char) * PATH_MAX); + if (!path_buf) { + WD_ERR("fail to calloc path_buf.\n"); + return -WD_ENOMEM; + } + file_path = path_buf; + path = path_buf + PATH_MAX; strncpy(file_path, file_info.dli_fname, PATH_MAX - 1); /* Clear the file path's tail file name */ @@ -2189,19 +2198,24 @@ int wd_get_lib_file_path(const char *lib_file, char *lib_path, bool is_dir) if (is_dir) { len = snprintf(lib_path, PATH_MAX, "%s/%s", file_path, WD_DRV_LIB_DIR); if (len >= PATH_MAX) - return -WD_EINVAL; + goto free_path; } else { len = snprintf(lib_path, PATH_MAX, "%s/%s/%s", file_path, WD_DRV_LIB_DIR, lib_file); if (len >= PATH_MAX) - return -WD_EINVAL; + goto free_path; } if (realpath(lib_path, path) == NULL) { WD_ERR("invalid: %s: no such file or directory!\n", path); - return -WD_EINVAL; + goto free_path; } + free(path_buf); return 0; + +free_path: + free(path_buf); + return -WD_EINVAL; } /** -- 2.33.0

From: Longfang Liu <liulongfang@huawei.com> In the No-SVA mode of UADK, a feature was implemented to reserve memory allocated for shared queues, which was exposed to external users through the wd_share_reserved_memory interface. After this functionality was removed from the kernel-space UACCE, the corresponding external interface in UADK must also be deleted to maintain consistency. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- v1/uacce.h | 1 - v1/wd.c | 57 ++---------------------------------------------------- v1/wd.h | 2 -- 3 files changed, 2 insertions(+), 58 deletions(-) diff --git a/v1/uacce.h b/v1/uacce.h index eef932c7..ffd576c1 100644 --- a/v1/uacce.h +++ b/v1/uacce.h @@ -79,7 +79,6 @@ enum uacce_qfrt { * Optimization method since close fd may delay */ #define WD_UACCE_CMD_PUT_Q _IO('W', 1) -#define WD_UACCE_CMD_SHARE_SVAS _IO('W', 2) #define WD_UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long) #endif diff --git a/v1/wd.c b/v1/wd.c index 13239b58..bab2712f 100644 --- a/v1/wd.c +++ b/v1/wd.c @@ -58,7 +58,6 @@ struct dev_info { int node_id; int numa_dis; int flags; - int ref; int available_instances; int iommu_type; unsigned int weight; @@ -531,7 +530,6 @@ static int get_queue_from_dev(struct wd_queue *q, const struct dev_info *dev) qinfo->iommu_type = dev->iommu_type; qinfo->dev_info = dev; qinfo->head = &qinfo->ss_list; - __atomic_clear(&qinfo->ref, __ATOMIC_RELEASE); TAILQ_INIT(&qinfo->ss_list); memcpy(qinfo->qfrs_offset, dev->qfrs_offset, sizeof(qinfo->qfrs_offset)); @@ -618,23 +616,14 @@ err_with_dev: void wd_release_queue(struct wd_queue *q) { - struct wd_ss_region_list *head; - struct q_info *qinfo, *sqinfo; + struct q_info *qinfo; if (!q || !q->qinfo) { WD_ERR("release queue parameter error!\n"); return; } - qinfo = q->qinfo; - if (__atomic_load_n(&qinfo->ref, __ATOMIC_RELAXED)) { - WD_ERR("q(%s) is busy, release fail!\n", q->capa.alg); - return; - } - head = qinfo->head; - sqinfo = container_of(head, struct q_info, ss_list); - if (sqinfo != qinfo) /* q_share */ - __atomic_sub_fetch(&sqinfo->ref, 1, __ATOMIC_RELAXED); + qinfo = q->qinfo; if (ioctl(qinfo->fd, WD_UACCE_CMD_PUT_Q)) WD_ERR("failed to put queue!\n"); @@ -721,48 +710,6 @@ void *wd_reserve_memory(struct wd_queue *q, size_t size) return drv_reserve_mem(q, size); } -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q) -{ - const struct dev_info *info, *tgt_info; - struct q_info *qinfo, *tqinfo; - int ret; - - if (!q || !target_q || !q->qinfo || !target_q->qinfo) { - WD_ERR("wd share reserved memory: parameter err!\n"); - return -WD_EINVAL; - } - - qinfo = q->qinfo; - tqinfo = target_q->qinfo; - tgt_info = tqinfo->dev_info; - info = qinfo->dev_info; - - /* Just share DMA memory from 'q' in NO-IOMMU mode */ - if (qinfo->iommu_type) { - WD_ERR("IOMMU opened, not support share mem!\n"); - return -WD_EINVAL; - } - - if (qinfo->iommu_type != tqinfo->iommu_type) { - WD_ERR("IOMMU type mismatching as share mem!\n"); - return -WD_EINVAL; - } - if (info->node_id != tgt_info->node_id) - WD_ERR("Warn: the 2 queues is not at the same node!\n"); - - ret = ioctl(qinfo->fd, WD_UACCE_CMD_SHARE_SVAS, tqinfo->fd); - if (ret) { - WD_ERR("ioctl share dma memory fail!\n"); - return ret; - } - - tqinfo->head = qinfo->head; - __atomic_add_fetch(&qinfo->ref, 1, __ATOMIC_RELAXED); - - return 0; -} - int wd_get_available_dev_num(const char *algorithm) { struct wd_queue q; diff --git a/v1/wd.h b/v1/wd.h index e3effa75..0132e254 100644 --- a/v1/wd.h +++ b/v1/wd.h @@ -210,8 +210,6 @@ int wd_recv(struct wd_queue *q, void **resp); int wd_wait(struct wd_queue *q, __u16 ms); int wd_recv_sync(struct wd_queue *q, void **resp, __u16 ms); void *wd_reserve_memory(struct wd_queue *q, size_t size); -int wd_share_reserved_memory(struct wd_queue *q, - struct wd_queue *target_q); int wd_get_available_dev_num(const char *algorithm); int wd_get_node_id(struct wd_queue *q); void *wd_iova_map(struct wd_queue *q, void *va, size_t sz); -- 2.33.0

From: Longfang Liu <liulongfang@huawei.com> After removing the shared queue memory allocation interface, the UADK test tools must also eliminate the shared memory functionality. For individual memory reservations, the wd_reserve_memory interface should be used. When allocating memory for multiple queues, each queue should independently request its own reserved memory allocation. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- v1/test/hisi_hpre_test/hpre_test_tools.c | 392 ----------------------- v1/test/hisi_zip_test_sgl/wd_sched_sgl.c | 310 +++++++++--------- v1/test/test_mm/test_wd_mem.c | 8 +- v1/test/wd_sched.c | 247 +++++++------- 4 files changed, 300 insertions(+), 657 deletions(-) diff --git a/v1/test/hisi_hpre_test/hpre_test_tools.c b/v1/test/hisi_hpre_test/hpre_test_tools.c index 7f562f34..10a4ade9 100755 --- a/v1/test/hisi_hpre_test/hpre_test_tools.c +++ b/v1/test/hisi_hpre_test/hpre_test_tools.c @@ -644,317 +644,6 @@ int application_release_multiple_queue(char *dev, char *alg_type, unsigned int q printf("application_release_multiple_queue test end!\n"); return 0; } - -/*** - -***/ -int hpre_dev_queue_share(char *dev, char * share_dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return 1; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", share_dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - //target_q队列共享q队列预留内存; - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} -/*** - -***/ -int hpre_node_queue_share(char *dev, unsigned int node, unsigned int share_node, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - q.node_mask = node; - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return 1; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - target_q.node_mask = node; - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - //target_q队列共享q队列预留内存; - ret = do_dh(&q); - if(ret) - { - printf("do dh on q fail!\n"); - return 1; - } - ret = do_dh(&target_q); - if(ret) - { - printf("do dh on target q fail!\n"); - return 1; - } - - ret = wd_share_reserved_memory(&q, &target_q); - - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - ret = do_dh(&q); - if(ret) - { - printf("do dh on share q fail!\n"); - return 1; - } - ret = do_dh(&target_q); - if(ret) - { - printf("do dh on share target q fail!\n"); - return 1; - } - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} -/*** - -***/ -int hpre_dev_queue_interact_share(char *dev, char * share_dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size; - - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("wd request queue fail!\n"); - return ret; - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", share_dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - addr = wd_reserve_memory(&target_q, memory_size); - if(!addr) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd reserve memory fail!\n"); - return 1; - } - printf("wd reserve memory success!\n"); - memset(addr, 0, memory_size); - - //target_q - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&q); - wd_release_queue(&target_q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} - -/*** - -***/ -int hpre_dev_queue_cross_proc_share(char *dev, char *alg_type, unsigned long m_size) -{ - void *addr=NULL; - int ret = 0; - pid_t pid; - struct wd_queue q; - struct wd_queue target_q; - unsigned long memory_size=0; - - pid = fork(); - if(pid < 0) - { - printf("Creation process failed, pid:%d\n",pid); - return 1; - } - else if(pid == 0) - { - printf("child process:%d\n", pid); - memset((void *)&q, 0, sizeof(q)); - q.capa.alg = alg_type; - snprintf(q.dev_path, sizeof(q.dev_path), "%s", dev); - printf("queue path:%s\n", q.dev_path); - - ret = wd_request_queue(&q); - if(ret) - { - printf("request queue fail!\n"); - exit(1); - } - printf("wd request queue success!\n"); - memory_size = m_size; - addr = wd_reserve_memory(&q, memory_size); - if(!addr) - { - wd_release_queue(&q); - printf("queue reserve memory fail!\n"); - exit(2); - } - printf("queue reserve memory success!\n"); - memset(addr, 0, memory_size); - exit(0); - } - printf("parent process:%d\n", pid); - pid_t wpid; - int status = -1; - wpid = waitpid(pid, &status, WUNTRACED | WCONTINUED); - if( wpid < 0) - { - printf("exited, status=%d\n", WEXITSTATUS(status)); - return(status); - } - - memset((void *)&target_q, 0, sizeof(target_q)); - target_q.capa.alg = alg_type; - snprintf(target_q.dev_path, sizeof(target_q.dev_path), "%s", dev); - printf("target queue path:%s\n", target_q.dev_path); - - ret = wd_request_queue(&target_q); - if(ret) - { - wd_release_queue(&q); - printf("wd request target_q queue fail!\n"); - return 1; - } - printf("wd request target_q queue success!\n"); - ret = wd_share_reserved_memory(&q, &target_q); - if(ret) - { - wd_release_queue(&target_q); - wd_release_queue(&q); - printf("wd target_q queue share reserved memory fail!\n"); - return 1; - } - printf("wd target_q queue share reserved memory success!\n"); - - wd_release_queue(&target_q); - wd_release_queue(&q); - - return 0; -} - /*** ***/ @@ -1696,87 +1385,6 @@ int main(int arc, char *argv[]) return 1; } } - else if(!strcmp(argv[1], "queue-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示共享预留内存的设备 - argv[5] - 表示申请队列的预留内存大小 - ***/ - //申请单个队列,预留内存,与其它队列共享预留内存 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - snprintf(share_dev, sizeof(share_dev), "%s", argv[4]); - memory_size = strtoul(argv[5], NULL, 10); - - ret = hpre_dev_queue_share(dev, share_dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "node-queue-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示设备node - argv[5] - 表示共享内存设备node - argv[6] - 表示申请队列的预留内存大小 - ***/ - //申请单个队列,预留内存,与其它队列共享预留内存 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - unsigned int node=0; - node = strtoul(argv[4], NULL, 16); - unsigned int share_node=0; - share_node = strtoul(argv[5], NULL, 16); - memory_size = strtoul(argv[6], NULL, 10); - - ret = hpre_node_queue_share(dev, node, share_node, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "queue-interact-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示共享预留内存的设备 - argv[5] - 表示申请队列的预留内存大小 - ***/ - //队列预留内存后作为共享的目标队列 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - snprintf(share_dev, sizeof(share_dev), "%s", argv[4]); - memory_size = strtoul(argv[5], NULL, 10); - - ret = hpre_dev_queue_interact_share(dev, share_dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } - else if(!strcmp(argv[1], "queue-cross-proc-share")) - { - /*** - argv[2] - 表示算法类型 - argv[3] - 表示申请队列设备 - argv[4] - 表示申请队列的预留内存大小 - ***/ - //跨进程进行队列共享 - snprintf(algorithm_type, sizeof(algorithm_type), "%s", argv[2]); - snprintf(dev, sizeof(dev), "%s", argv[3]); - memory_size = strtoul(argv[4], NULL, 10); - ret = hpre_dev_queue_cross_proc_share(dev, algorithm_type, memory_size); - if(0 != ret) - { - return 1; - } - } else if(!strcmp(argv[1], "mult-thread-queue")) { /*** diff --git a/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c b/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c index 31637565..7a3be22c 100644 --- a/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c +++ b/v1/test/hisi_zip_test_sgl/wd_sched_sgl.c @@ -23,96 +23,33 @@ #define EXTRA_SIZE 4096 #define WD_WAIT_MS 1000 -static int __init_cache(struct wd_scheduler *sched, int data_fmt) +static int wd_sched_pre_uninit(struct wd_scheduler *sched, int data_fmt) { - int i; - int ret = -ENOMEM; + unsigned int flags = 0; struct q_info *qinfo; void *pool; + int i; - sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); - if (!sched->msgs) { - WD_ERR("calloc for sched->msgs fail!\n"); - return ret; - } - sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); - if (!sched->stat) { - WD_ERR("calloc for sched->stat fail!\n"); - goto err_with_msgs; - } qinfo = sched->qs[0].qinfo; - pool = qinfo->br.usr; - for (i = 0; i < sched->msg_cache_num; i++) { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ - sched->msgs[i].data_in = wd_alloc_blk(pool); - sched->msgs[i].data_out = wd_alloc_blk(pool); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; - } - } else { /* use sgl */ - sched->msgs[i].data_in = wd_alloc_sgl(pool, sched->msg_data_size); - sched->msgs[i].data_out = wd_alloc_sgl(pool, sched->msg_data_size); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; - } + flags = qinfo->dev_flags; + if (flags & WD_UACCE_DEV_PASID) { + if (sched->ss_region) { + free(sched->ss_region); + sched->ss_region = NULL; } + return 0; + } - if (sched->init_cache) - sched->init_cache(sched, i, data_fmt); + for (i = 0; i < sched->q_num; i++) { + wd_release_queue(&sched->qs[i]); + qinfo = sched->qs[i].qinfo; + if (data_fmt == WD_FLAT_BUF) + wd_blkpool_destroy(qinfo->br.usr); + else + wd_sglpool_destroy(qinfo->br.usr); } return 0; - -err_with_stat: - free(sched->stat); - sched->stat = NULL; -err_with_msgs: - free(sched->msgs); - sched->msgs = NULL; - return ret; -} - -static void __fini_cache(struct wd_scheduler *sched, int data_fmt) -{ - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - void *pool; - int i; - - if (sched->stat) { - free(sched->stat); - sched->stat = NULL; - } - if (!(flags & WD_UACCE_DEV_PASID)) { - pool = qinfo->br.usr; - if (pool) { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_blk(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_blk(pool, sched->msgs[i].data_out); - } - wd_blkpool_destroy(pool); - } else { /* use sgl */ - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_sgl(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_sgl(pool, sched->msgs[i].data_out); - } - wd_sglpool_destroy(pool); - } - } - } - if (sched->msgs) { - free(sched->msgs); - sched->msgs = NULL; - } } static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) @@ -124,14 +61,6 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) struct wd_sglpool_setup sp; void *pool; - for (i = 0; i < sched->q_num; i++) { - ret = wd_request_queue(&sched->qs[i]); - if (ret) { - WD_ERR("fail to request queue!\n"); - goto out_with_queues; - } - } - if (!sched->ss_region_size) sched->ss_region_size = EXTRA_SIZE + /* add 1 page extra */ sched->msg_cache_num * (sched->msg_data_size << 0x1); @@ -145,12 +74,22 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) ret = -ENOMEM; goto out_with_queues; } - } else { - if (data_fmt == WD_FLAT_BUF) { /* use pbuffer*/ - memset(&mm_setup, 0, sizeof(mm_setup)); - mm_setup.block_size = sched->msg_data_size; - mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ - mm_setup.align_size = 128; + return 0; + } + + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer*/ + memset(&mm_setup, 0, sizeof(mm_setup)); + mm_setup.block_size = sched->msg_data_size; + mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ + mm_setup.align_size = 128; + for (i = 0; i < sched->q_num; i++) { + ret = wd_request_queue(&sched->qs[i]); + if (ret) { + WD_ERR("fail to request queue!\n"); + goto out_with_queues; + } + + qinfo = sched->qs[i].qinfo; pool = wd_blkpool_create(&sched->qs[0], &mm_setup); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); @@ -162,15 +101,18 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) qinfo->br.iova_map = (void *)wd_blk_iova_map; qinfo->br.iova_unmap = (void *)wd_blk_iova_unmap; qinfo->br.usr = pool; - } else { /* use sgl*/ - memset(&sp, 0, sizeof(sp)); - sp.buf_size = sched->msg_data_size / 10; - sp.align_size = 64; - sp.sge_num_in_sgl = 60; - sp.buf_num_in_sgl = sp.sge_num_in_sgl; - sp.sgl_num = 3 * sched->msg_cache_num; - sp.buf_num = sp.buf_num_in_sgl * sp.sgl_num + sp.sgl_num * 2; - + } + } else { /* use sgl*/ + memset(&sp, 0, sizeof(sp)); + sp.buf_size = sched->msg_data_size / 10; + sp.align_size = 64; + sp.sge_num_in_sgl = 60; + sp.buf_num_in_sgl = sp.sge_num_in_sgl; + sp.sgl_num = 3 * sched->msg_cache_num; + sp.buf_num = sp.buf_num_in_sgl * sp.sgl_num + sp.sgl_num * 2; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; pool = wd_sglpool_create(&sched->qs[0], &sp); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); @@ -189,17 +131,132 @@ static int wd_sched_preinit(struct wd_scheduler *sched, int data_fmt) return 0; out_with_queues: + for (j = i-1; j >= 0; j--) { + wd_release_queue(&sched->qs[j]); + qinfo = sched->qs[j].qinfo; + if (data_fmt == WD_FLAT_BUF) + wd_blkpool_destroy(qinfo->br.usr); + else + wd_sglpool_destroy(qinfo->br.usr); + } + if (flags & WD_UACCE_DEV_PASID) { if (sched->ss_region) { free(sched->ss_region); sched->ss_region = NULL; } } - for (j = i-1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); + return ret; } +static void __fini_cache(struct wd_scheduler *sched, int data_fmt) +{ + struct q_info *qinfo = sched->qs[0].qinfo; + unsigned int flags = qinfo->dev_flags; + void *pool; + int i, j; + + if (sched->stat) { + free(sched->stat); + sched->stat = NULL; + } + + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + + if (!(flags & WD_UACCE_DEV_PASID)) { + for (j = 0; j < sched->q_num; j++) { + qinfo = sched->qs[j].qinfo; + pool = qinfo->br.usr; + if (!pool) + continue; + + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_blk(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_blk(pool, sched->msgs[i].data_out); + } + } else { /* use sgl */ + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_sgl(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_sgl(pool, sched->msgs[i].data_out); + } + } + } + } +} + +static int __init_cache(struct wd_scheduler *sched, int data_fmt) +{ + struct q_info *qinfo; + unsigned int flags; + int ret = -ENOMEM; + int i, j; + void *pool; + + sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); + if (!sched->msgs) { + WD_ERR("calloc for sched->msgs fail!\n"); + return ret; + } + sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); + if (!sched->stat) { + WD_ERR("calloc for sched->stat fail!\n"); + goto err_with_msgs; + } + qinfo = sched->qs[0].qinfo; + pool = qinfo->br.usr; + flags = qinfo->dev_flags; + if ((flags & WD_UACCE_DEV_PASID)) + return 0; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; + pool = qinfo->br.usr; + for (j = 0; j < sched->msg_cache_num; j++) { + if (data_fmt == WD_FLAT_BUF) { /* use pbuffer */ + sched->msgs[j].data_in = wd_alloc_blk(pool); + sched->msgs[j].data_out = wd_alloc_blk(pool); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_stat; + } + } else { /* use sgl */ + sched->msgs[j].data_in = wd_alloc_sgl(pool, sched->msg_data_size); + sched->msgs[j].data_out = wd_alloc_sgl(pool, sched->msg_data_size); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_stat; + } + } + + if (sched->init_cache) + sched->init_cache(sched, j, data_fmt); + } + } + + return 0; + +err_with_stat: + free(sched->stat); + sched->stat = NULL; + __fini_cache(sched, data_fmt); +err_with_msgs: + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + return ret; +} int wd_sched_init(struct wd_scheduler *sched, int data_fmt) { @@ -211,57 +268,22 @@ int wd_sched_init(struct wd_scheduler *sched, int data_fmt) if (ret < 0) return -EINVAL; - qinfo = sched->qs[0].qinfo; - flags = qinfo->dev_flags; - if (!(flags & WD_UACCE_DEV_PASID)) { - for (k = 1; k < sched->q_num; k++) { - ret = wd_share_reserved_memory(&sched->qs[0], - &sched->qs[k]); - if (ret) { - WD_ERR("fail to share queue reserved mem!\n"); - goto out_with_queues; - } - } - } - sched->cl = sched->msg_cache_num; ret = __init_cache(sched, data_fmt); if (ret) { WD_ERR("fail to init caches!\n"); - goto out_with_queues; + wd_sched_pre_uninit(sched, data_fmt); + return -EINVAL; } return 0; - -out_with_queues: - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - for (j = sched->q_num - 1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); - return ret; } void wd_sched_fini(struct wd_scheduler *sched, int data_fmt) { - int i; - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - __fini_cache(sched, data_fmt); - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - - for (i = sched->q_num - 1; i >= 0; i--) - wd_release_queue(&sched->qs[i]); + wd_sched_pre_uninit(sched, data_fmt); } static int __sync_send(struct wd_scheduler *sched) @@ -350,4 +372,4 @@ int wd_sched_work(struct wd_scheduler *sched, int remained) } return sched->cl; -} \ No newline at end of file +} diff --git a/v1/test/test_mm/test_wd_mem.c b/v1/test/test_mm/test_wd_mem.c index 09824b99..e2eec60e 100644 --- a/v1/test/test_mm/test_wd_mem.c +++ b/v1/test/test_mm/test_wd_mem.c @@ -208,10 +208,10 @@ void *mmt_sys_test_thread(void *data) return NULL; } - ret = wd_share_reserved_memory(pdata->qinfo1.q, &rsa_q); + ret = wd_request_queue(&pdata->qinfo1.q); if (ret) { wd_release_queue(&rsa_q); - MMT_PRT("Proc-%d, thrd-%d:share mem on rsa queue fail!\n", + MMT_PRT("Proc-%d, thrd-%d:rsa queue fail!\n", pid, thread_id); return NULL; } @@ -226,9 +226,9 @@ void *mmt_sys_test_thread(void *data) return NULL; } - ret = wd_share_reserved_memory(pdata->qinfo2.q, &zlib_q); + ret = wd_request_queue(&pdata->qinfo2.q); if (ret) { - MMT_PRT("Proc-%d, thrd-%d:share mem on zlib queue fail!\n", + MMT_PRT("Proc-%d, thrd-%d:zlib queue fail!\n", pid, thread_id); goto fail_release; diff --git a/v1/test/wd_sched.c b/v1/test/wd_sched.c index f5e46699..ce1d2604 100644 --- a/v1/test/wd_sched.c +++ b/v1/test/wd_sched.c @@ -22,94 +22,40 @@ #define EXTRA_SIZE 4096 #define WD_WAIT_MS 1000 -static int __init_cache(struct wd_scheduler *sched) +static int wd_sched_pre_uninit(struct wd_scheduler *sched) { - int i; - int ret = -ENOMEM; + unsigned int flags = 0; struct q_info *qinfo; void *pool; + int i; - sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); - if (!sched->msgs) { - WD_ERR("calloc for sched->msgs fail!\n"); - return ret; - } - sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); - if (!sched->stat) { - WD_ERR("calloc for sched->stat fail!\n"); - goto err_with_msgs; - } qinfo = sched->qs[0].qinfo; - pool = qinfo->br.usr; - for (i = 0; i < sched->msg_cache_num; i++) { - sched->msgs[i].data_in = wd_alloc_blk(pool); - sched->msgs[i].data_out = wd_alloc_blk(pool); - if (!sched->msgs[i].data_in || !sched->msgs[i].data_out) { - dbg("not enough data ss_region memory " - "for cache %d (bs=%d)\n", i, sched->msg_data_size); - goto err_with_stat; + flags = qinfo->dev_flags; + if (flags & WD_UACCE_DEV_PASID) { + if (sched->ss_region) { + free(sched->ss_region); + sched->ss_region = NULL; } + return 0; + } - if (sched->init_cache) - sched->init_cache(sched, i); + for (i = 0; i < sched->q_num; i++) { + wd_release_queue(&sched->qs[i]); + qinfo = sched->qs[i].qinfo; + wd_blkpool_destroy(qinfo->br.usr); } return 0; - -err_with_stat: - free(sched->stat); - sched->stat = NULL; -err_with_msgs: - free(sched->msgs); - sched->msgs = NULL; - return ret; -} - -static void __fini_cache(struct wd_scheduler *sched) -{ - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - void *pool; - int i; - - if (sched->stat) { - free(sched->stat); - sched->stat = NULL; - } - if (!(flags & WD_UACCE_DEV_PASID)) { - pool = qinfo->br.usr; - if (pool) { - for (i = 0; i < sched->msg_cache_num; i++) { - if (sched->msgs[i].data_in) - wd_free_blk(pool, sched->msgs[i].data_in); - if (sched->msgs[i].data_out) - wd_free_blk(pool, sched->msgs[i].data_out); - } - wd_blkpool_destroy(pool); - } - } - if (sched->msgs) { - free(sched->msgs); - sched->msgs = NULL; - } } static int wd_sched_preinit(struct wd_scheduler *sched) { - int ret, i, j; + struct wd_blkpool_setup mm_setup; unsigned int flags = 0; struct q_info *qinfo; - struct wd_blkpool_setup mm_setup; + int ret, i, j; void *pool; - for (i = 0; i < sched->q_num; i++) { - ret = wd_request_queue(&sched->qs[i]); - if (ret) { - WD_ERR("fail to request queue!\n"); - goto out_with_queues; - } - } - if (!sched->ss_region_size) sched->ss_region_size = EXTRA_SIZE + /* add 1 page extra */ sched->msg_cache_num * (sched->msg_data_size << 0x1); @@ -120,18 +66,29 @@ static int wd_sched_preinit(struct wd_scheduler *sched) sched->ss_region = malloc(sched->ss_region_size); if (!sched->ss_region) { WD_ERR("fail to alloc sched ss region mem!\n"); + return -ENOMEM; + } + return 0; + } + + memset(&mm_setup, 0, sizeof(mm_setup)); + mm_setup.block_size = sched->msg_data_size; + mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ + mm_setup.align_size = 128; + for (i = 0; i < sched->q_num; i++) { + ret = wd_request_queue(&sched->qs[i]); + if (ret) { + WD_ERR("fail to request queue!\n"); ret = -ENOMEM; goto out_with_queues; } - } else { - memset(&mm_setup, 0, sizeof(mm_setup)); - mm_setup.block_size = sched->msg_data_size; - mm_setup.block_num = sched->msg_cache_num << 0x1; /* in and out */ - mm_setup.align_size = 128; - pool = wd_blkpool_create(&sched->qs[0], &mm_setup); + + qinfo = sched->qs[i].qinfo; + pool = wd_blkpool_create(&sched->qs[i], &mm_setup); if (!pool) { WD_ERR("%s(): create pool fail!\n", __func__); ret = -ENOMEM; + wd_release_queue(&sched->qs[i]); goto out_with_queues; } qinfo->br.alloc = (void *)wd_alloc_blk; @@ -144,79 +101,135 @@ static int wd_sched_preinit(struct wd_scheduler *sched) return 0; out_with_queues: + for (j = i-1; j >= 0; j--) { + wd_release_queue(&sched->qs[j]); + qinfo = sched->qs[j].qinfo; + wd_blkpool_destroy(qinfo->br.usr); + } + if (flags & WD_UACCE_DEV_PASID) { if (sched->ss_region) { free(sched->ss_region); sched->ss_region = NULL; } } - for (j = i-1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); + return ret; } +static void __fini_cache(struct wd_scheduler *sched) +{ + struct q_info *qinfo = sched->qs[0].qinfo; + unsigned int flags = qinfo->dev_flags; + void *pool; + int i, j; -int wd_sched_init(struct wd_scheduler *sched) + if (sched->stat) { + free(sched->stat); + sched->stat = NULL; + } + + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + + if (!(flags & WD_UACCE_DEV_PASID)) { + for (j = 0; j < sched->q_num; j++) { + qinfo = sched->qs[j].qinfo; + pool = qinfo->br.usr; + if (!pool) + continue; + + for (i = 0; i < sched->msg_cache_num; i++) { + if (sched->msgs[i].data_in) + wd_free_blk(pool, sched->msgs[i].data_in); + if (sched->msgs[i].data_out) + wd_free_blk(pool, sched->msgs[i].data_out); + } + } + } +} + +static int __init_cache(struct wd_scheduler *sched) { - int ret, j, k; - unsigned int flags; struct q_info *qinfo; + unsigned int flags; + int ret = -ENOMEM; + int i, j; + void *pool; - ret = wd_sched_preinit(sched); - if (ret < 0) - return -EINVAL; + sched->msgs = calloc(sched->msg_cache_num, sizeof(*sched->msgs)); + if (!sched->msgs) { + WD_ERR("calloc for sched->msgs fail!\n"); + return ret; + } + sched->stat = calloc(sched->q_num, sizeof(*sched->stat)); + if (!sched->stat) { + WD_ERR("calloc for sched->stat fail!\n"); + goto err_with_msgs; + } qinfo = sched->qs[0].qinfo; + pool = qinfo->br.usr; flags = qinfo->dev_flags; - if (!(flags & WD_UACCE_DEV_PASID)) { - for (k = 1; k < sched->q_num; k++) { - ret = wd_share_reserved_memory(&sched->qs[0], - &sched->qs[k]); - if (ret) { - WD_ERR("fail to share queue reserved mem!\n"); - goto out_with_queues; + if ((flags & WD_UACCE_DEV_PASID)) + return 0; + + for (i = 0; i < sched->q_num; i++) { + qinfo = sched->qs[i].qinfo; + pool = qinfo->br.usr; + for (j = 0; j < sched->msg_cache_num; j++) { + sched->msgs[j].data_in = wd_alloc_blk(pool); + sched->msgs[j].data_out = wd_alloc_blk(pool); + if (!sched->msgs[j].data_in || !sched->msgs[j].data_out) { + dbg("not enough data ss_region memory " + "for cache %d (bs=%d)\n", j, sched->msg_data_size); + goto err_with_alloc; } + + if (sched->init_cache) + sched->init_cache(sched, j); } } - sched->cl = sched->msg_cache_num; + return 0; + +err_with_alloc: + free(sched->stat); + sched->stat = NULL; + __fini_cache(sched); +err_with_msgs: + if (sched->msgs) { + free(sched->msgs); + sched->msgs = NULL; + } + return ret; +} + +int wd_sched_init(struct wd_scheduler *sched) +{ + int ret; + ret = wd_sched_preinit(sched); + if (ret < 0) + return -EINVAL; + + sched->cl = sched->msg_cache_num; ret = __init_cache(sched); if (ret) { WD_ERR("fail to init caches!\n"); - goto out_with_queues; + wd_sched_pre_uninit(sched); + return -EINVAL; } return 0; - -out_with_queues: - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - for (j = sched->q_num - 1; j >= 0; j--) - wd_release_queue(&sched->qs[j]); - return ret; } void wd_sched_fini(struct wd_scheduler *sched) { - int i; - struct q_info *qinfo = sched->qs[0].qinfo; - unsigned int flags = qinfo->dev_flags; - __fini_cache(sched); - if (flags & WD_UACCE_DEV_PASID) { - if (sched->ss_region) { - free(sched->ss_region); - sched->ss_region = NULL; - } - } - - for (i = sched->q_num - 1; i >= 0; i--) - wd_release_queue(&sched->qs[i]); + wd_sched_pre_uninit(sched); } static int __sync_send(struct wd_scheduler *sched) -- 2.33.0

From: Wenkai Lin <linwenkai6@hisilicon.com> Uadk will change output address information before rehash input task, so it is needed to restore these information before next rehash task. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- drv/hisi_dae.c | 3 ++- wd_agg.c | 63 +++++++++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/drv/hisi_dae.c b/drv/hisi_dae.c index 22a780dd..b171c78e 100644 --- a/drv/hisi_dae.c +++ b/drv/hisi_dae.c @@ -734,8 +734,9 @@ static void fill_hashagg_msg_task_done(struct dae_sqe *sqe, struct wd_agg_msg *m msg->out_row_count = sqe->out_raw_num; msg->output_done = sqe->output_end; } else if (sqe->task_type_ext == DAE_HASHAGG_MERGE) { - msg->out_row_count = temp_msg->row_count; msg->output_done = sqe->output_end; + if (!msg->output_done) + msg->out_row_count = temp_msg->row_count; } else { msg->in_row_count = temp_msg->row_count; } diff --git a/wd_agg.c b/wd_agg.c index 8869ab84..686e7393 100644 --- a/wd_agg.c +++ b/wd_agg.c @@ -1406,16 +1406,16 @@ static int wd_agg_set_col_size(struct wd_agg_sess *sess, struct wd_agg_req *req, return WD_SUCCESS; } -static int wd_agg_rehash_sync_inner(struct wd_agg_sess *sess, struct wd_agg_req *req) +static int wd_agg_rehash_sync_inner(struct wd_agg_sess *sess, struct wd_agg_req *in_req, + struct wd_agg_req *out_req) { + struct wd_agg_msg in_msg = {0}; struct wd_agg_msg msg = {0}; - bool output_done; int ret; - fill_request_msg_output(&msg, req, sess, true); - req->state = 0; + fill_request_msg_output(&msg, out_req, sess, true); - ret = wd_agg_sync_job(sess, req, &msg); + ret = wd_agg_sync_job(sess, out_req, &msg); if (unlikely(ret)) return ret; @@ -1423,33 +1423,26 @@ static int wd_agg_rehash_sync_inner(struct wd_agg_sess *sess, struct wd_agg_req if (unlikely(ret)) return ret; - req->real_out_row_count = msg.out_row_count; - output_done = msg.output_done; if (!msg.out_row_count) { - req->output_done = true; + out_req->output_done = true; return WD_SUCCESS; } - req->key_cols = req->out_key_cols; - req->agg_cols = req->out_agg_cols; - req->key_cols_num = req->out_key_cols_num; - req->agg_cols_num = req->out_agg_cols_num; - wd_agg_set_col_size(sess, req, req->real_out_row_count); - req->in_row_count = req->real_out_row_count; + out_req->real_out_row_count = msg.out_row_count; + wd_agg_set_col_size(sess, in_req, out_req->real_out_row_count); + in_req->in_row_count = out_req->real_out_row_count; - memset(&msg, 0, sizeof(struct wd_agg_msg)); - fill_request_msg_input(&msg, req, sess, true); + fill_request_msg_input(&in_msg, in_req, sess, true); - ret = wd_agg_sync_job(sess, req, &msg); + ret = wd_agg_sync_job(sess, in_req, &in_msg); if (unlikely(ret)) return ret; - ret = wd_agg_check_msg_result(msg.result); + ret = wd_agg_check_msg_result(in_msg.result); if (unlikely(ret)) return ret; - req->state = msg.result; - req->output_done = output_done; + out_req->output_done = msg.output_done; return WD_SUCCESS; } @@ -1472,9 +1465,9 @@ int wd_agg_rehash_sync(handle_t h_sess, struct wd_agg_req *req) { struct wd_agg_sess *sess = (struct wd_agg_sess *)h_sess; enum wd_agg_sess_state expected = WD_AGG_SESS_RESET; - struct wd_agg_req src_req; - __u64 cnt = 0; - __u64 max_cnt; + struct wd_dae_col_addr *cols; + struct wd_agg_req in_req; + __u64 max_cnt, key_len, agg_len, cnt = 0; int ret; ret = wd_agg_check_rehash_params(sess, req); @@ -1487,21 +1480,39 @@ int wd_agg_rehash_sync(handle_t h_sess, struct wd_agg_req *req) if (unlikely(ret)) return ret; - memcpy(&src_req, req, sizeof(struct wd_agg_req)); + memcpy(&in_req, req, sizeof(struct wd_agg_req)); + + key_len = req->out_key_cols_num * sizeof(struct wd_dae_col_addr); + agg_len = req->out_agg_cols_num * sizeof(struct wd_dae_col_addr); + cols = malloc(key_len + agg_len); + if (unlikely(!cols)) + return -WD_ENOMEM; + + /* The input task uses the address of the output task as input address. */ + in_req.key_cols = cols; + in_req.agg_cols = cols + req->out_key_cols_num; + in_req.key_cols_num = req->out_key_cols_num; + in_req.agg_cols_num = req->out_agg_cols_num; + memcpy(in_req.key_cols, req->out_key_cols, key_len); + memcpy(in_req.agg_cols, req->out_agg_cols, agg_len); + max_cnt = MAX_HASH_TABLE_ROW_NUM / req->out_row_count; + while (cnt < max_cnt) { - ret = wd_agg_rehash_sync_inner(sess, &src_req); + ret = wd_agg_rehash_sync_inner(sess, &in_req, req); if (ret) { __atomic_store_n(&sess->state, WD_AGG_SESS_RESET, __ATOMIC_RELEASE); WD_ERR("failed to do agg rehash task!\n"); + free(cols); return ret; } - if (src_req.output_done) + if (req->output_done) break; cnt++; } __atomic_store_n(&sess->state, WD_AGG_SESS_INPUT, __ATOMIC_RELEASE); + free(cols); return WD_SUCCESS; } -- 2.33.0

From: Wenkai Lin <linwenkai6@hisilicon.com> UADK supports hardware acceleration for the hashjoin and gather. Hashjoin is used to construct a hash table to join two tables, gather is used to combine data of different types in multiple columns in a specified order to obtain a new column. Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com> --- Makefile.am | 8 +- drv/hisi_dae.c | 602 +--------- drv/hisi_dae.h | 229 ++++ drv/hisi_dae_common.c | 387 +++++++ drv/hisi_dae_join_gather.c | 1040 +++++++++++++++++ include/drv/wd_join_gather_drv.h | 52 + include/wd_alg.h | 2 + include/wd_dae.h | 12 + include/wd_join_gather.h | 352 ++++++ include/wd_util.h | 1 + libwd_dae.map | 19 + wd_join_gather.c | 1823 ++++++++++++++++++++++++++++++ wd_util.c | 6 +- 13 files changed, 3959 insertions(+), 574 deletions(-) create mode 100644 drv/hisi_dae.h create mode 100644 drv/hisi_dae_common.c create mode 100644 drv/hisi_dae_join_gather.c create mode 100644 include/drv/wd_join_gather_drv.h create mode 100644 include/wd_join_gather.h create mode 100644 wd_join_gather.c diff --git a/Makefile.am b/Makefile.am index df756f72..d35287cd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -37,7 +37,7 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ - include/wd_udma.h + include/wd_udma.h include/wd_join_gather.h nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h @@ -73,7 +73,7 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ libwd_udma_la_SOURCES=wd_udma.h wd_udma_drv.h wd_udma.c \ wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h -libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c \ +libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c wd_join_gather.h wd_join_gather_drv.h wd_join_gather.c \ wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h libwd_comp_la_SOURCES=wd_comp.c wd_comp.h wd_comp_drv.h wd_util.c wd_util.h \ @@ -111,8 +111,8 @@ libisa_sve_la_SOURCES=drv/hash_mb/hash_mb.c wd_digest_drv.h drv/hash_mb/hash_mb. drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S endif -libhisi_dae_la_SOURCES=drv/hisi_dae.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h +libhisi_dae_la_SOURCES=drv/hisi_dae.c hisi_dae.h drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h drv/hisi_dae_join_gather.c drv/hisi_dae_common.c libhisi_udma_la_SOURCES=drv/hisi_udma.c drv/hisi_qm_udrv.c \ hisi_qm_udrv.h diff --git a/drv/hisi_dae.c b/drv/hisi_dae.c index b171c78e..4f4d13c6 100644 --- a/drv/hisi_dae.c +++ b/drv/hisi_dae.c @@ -1,19 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 /* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ -#include <math.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/epoll.h> -#include <sys/eventfd.h> -#include <sys/mman.h> -#include <sys/types.h> #include "hisi_qm_udrv.h" +#include "hisi_dae.h" #include "../include/drv/wd_agg_drv.h" -#define DAE_HASH_AGG_TYPE 2 #define DAE_EXT_SQE_SIZE 128 #define DAE_CTX_Q_NUM_DEF 1 @@ -39,37 +30,14 @@ /* align size */ #define DAE_CHAR_ALIGN_SIZE 4 -#define DAE_TABLE_ALIGN_SIZE 128 -#define DAE_ADDR_ALIGN_SIZE 128 - -/* decimal infomartion */ -#define DAE_DECIMAL_PRECISION_OFFSET 8 -#define DAE_DECIMAL128_MAX_PRECISION 38 -#define DAE_DECIMAL64_MAX_PRECISION 18 /* hash table */ -#define HASH_EXT_TABLE_INVALID_OFFSET 5 -#define HASH_EXT_TABLE_VALID 0x80 #define HASH_TABLE_HEAD_TAIL_SIZE 8 #define HASH_TABLE_EMPTY_SIZE 4 -#define HASH_TABLE_WITDH_POWER 2 -#define HASH_TABLE_MIN_WIDTH 10 -#define HASH_TABLE_MAX_WIDTH 43 -#define HASH_TABLE_OFFSET_3ROW 3 -#define HASH_TABLE_OFFSET_1ROW 1 /* hash agg operations col max num */ #define DAE_AGG_COL_ALG_MAX_NUM 2 -#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((uintptr_t)(p), (a))) - -#define BIT(nr) (1UL << (nr)) -#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) -#define GENMASK(h, l) \ - (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) - /* DAE hardware protocol data */ enum dae_stage { DAE_HASH_AGGREGATE = 0x0, @@ -85,48 +53,6 @@ enum dae_op_type { DAE_SUM = 0x5, }; -enum dae_done_flag { - DAE_HW_TASK_NOT_PROCESS = 0x0, - DAE_HW_TASK_DONE = 0x1, - DAE_HW_TASK_ERR = 0x2, -}; - -enum dae_error_type { - DAE_TASK_SUCCESS = 0x0, - DAE_TASK_BD_ERROR_MIN = 0x1, - DAE_TASK_BD_ERROR_MAX = 0x7f, - DAE_HASH_TABLE_NEED_REHASH = 0x82, - DAE_HASH_TABLE_INVALID = 0x83, - DAE_HASHAGG_VCHAR_OVERFLOW = 0x84, - DAE_HASHAGG_RESULT_OVERFLOW = 0x85, - DAE_HASHAGG_BUS_ERROR = 0x86, - DAE_HASHAGG_VCHAR_LEN_ERROR = 0x87, -}; - -enum dae_data_type { - DAE_SINT32 = 0x0, - DAE_SINT64 = 0x2, - DAE_DECIMAL64 = 0x9, - DAE_DECIMAL128 = 0xA, - DAE_CHAR = 0xC, - DAE_VCHAR = 0xD, -}; - -enum dae_date_type_size { - SINT32_SIZE = 4, - SINT64_SIZE = 8, - DECIMAL128_SIZE = 16, - DEFAULT_VCHAR_SIZE = 30, -}; - -enum dae_table_row_size { - ROW_SIZE32 = 32, - ROW_SIZE64 = 64, - ROW_SIZE128 = 128, - ROW_SIZE256 = 256, - ROW_SIZE512 = 512, -}; - enum dae_sum_optype { DECIMAL64_TO_DECIMAL64 = 0x2, DECIMAL64_TO_DECIMAL128 = 0x3, @@ -139,99 +65,6 @@ enum dae_alg_optype { DAE_HASHAGG_MIN = 0x8, }; -enum dae_bd_type { - DAE_BD_TYPE_V1 = 0x0, - DAE_BD_TYPE_V2 = 0x1, -}; - -struct dae_sqe { - __u32 bd_type : 6; - __u32 resv1 : 2; - __u32 task_type : 6; - __u32 resv2 : 2; - __u32 task_type_ext : 6; - __u32 resv3 : 9; - __u32 bd_invlid : 1; - __u16 table_row_size; - __u16 resv4; - __u32 resv5; - __u32 low_tag; - __u32 hi_tag; - __u32 row_num; - __u32 resv6; - __u32 src_table_width : 6; - __u32 dst_table_width : 6; - __u32 resv7 : 4; - __u32 counta_vld : 1; - __u32 resv8 : 15; - /* - * high 4bits: compare mode if data type is char/vchar, - * out type if operation is sum. - * low 4bits: input value type. - */ - __u8 key_data_type[16]; - __u8 agg_data_type[16]; - __u32 resv9[8]; - __u32 key_col_bitmap; - __u32 agg_col_bitmap; - __u64 addr_list; - __u32 done_flag : 3; - __u32 output_end : 1; - __u32 ext_err_type : 12; - __u32 err_type : 8; - __u32 wtype : 8; - __u32 out_raw_num; - __u32 vchar_err_offset; - __u16 sum_overflow_cols; - __u16 resv10; -}; - -struct dae_ext_sqe { - /* - * If date type is char/vchar, data info fill data type size - * If data type is decimal64/decimal128, data info fill data precision - */ - __u16 key_data_info[16]; - __u16 agg_data_info[16]; - /* Aggregated output from input agg col index */ - __u64 out_from_in_idx; - /* Aggregated output from input agg col operation, sum or count */ - __u64 out_optype; - __u32 resv[12]; -}; - -struct dae_col_addr { - __u64 empty_addr; - __u64 empty_size; - __u64 value_addr; - __u64 value_size; -}; - -struct dae_table_addr { - __u64 std_table_addr; - __u64 std_table_size; - __u64 ext_table_addr; - __u64 ext_table_size; -}; - -struct dae_addr_list { - __u64 ext_sqe_addr; - __u64 ext_sqe_size; - struct dae_table_addr src_table; - struct dae_table_addr dst_table; - __u64 resv_addr[6]; - struct dae_col_addr input_addr[32]; - struct dae_col_addr output_addr[32]; -}; - -struct dae_extend_addr { - struct dae_ext_sqe *ext_sqe; - struct dae_addr_list *addr_list; - __u8 *addr_status; - __u16 addr_num; - __u16 tail; -}; - static enum dae_data_type hw_data_type_order[] = { DAE_VCHAR, DAE_CHAR, DAE_DECIMAL128, DAE_DECIMAL64, DAE_SINT64, DAE_SINT32, @@ -265,14 +98,6 @@ struct hashagg_col_data { bool is_count_all; }; -struct hash_table_data { - void *std_table; - void *ext_table; - __u64 std_table_size; - __u64 ext_table_size; - __u32 table_width; -}; - struct hashagg_ctx { struct hashagg_col_data cols_data; struct hash_table_data table_data; @@ -282,33 +107,6 @@ struct hashagg_ctx { __u16 sum_overflow_cols; }; -struct hisi_dae_ctx { - struct wd_ctx_config_internal config; -}; - -static int get_free_ext_addr(struct dae_extend_addr *ext_addr) -{ - __u16 addr_num = ext_addr->addr_num; - __u16 idx = ext_addr->tail; - __u16 cnt = 0; - - while (__atomic_test_and_set(&ext_addr->addr_status[idx], __ATOMIC_ACQUIRE)) { - idx = (idx + 1) % addr_num; - cnt++; - if (cnt == addr_num) - return -WD_EBUSY; - } - - ext_addr->tail = (idx + 1) % addr_num; - - return idx; -} - -static void put_ext_addr(struct dae_extend_addr *ext_addr, int idx) -{ - __atomic_clear(&ext_addr->addr_status[idx], __ATOMIC_RELEASE); -} - static void fill_hashagg_task_type(struct wd_agg_msg *msg, struct dae_sqe *sqe, __u16 hw_type) { /* @@ -671,7 +469,7 @@ static int hashagg_send(struct wd_alg_driver *drv, handle_t ctx, void *hashagg_m return WD_SUCCESS; fill_hashagg_task_type(msg, &sqe, qp->q_info.hw_type); - sqe.row_num = msg->row_count; + sqe.data_row_num = msg->row_count; idx = get_free_ext_addr(ext_addr); if (idx < 0) @@ -758,15 +556,15 @@ static void fill_hashagg_msg_task_err(struct dae_sqe *sqe, struct wd_agg_msg *ms break; case DAE_HASHAGG_VCHAR_OVERFLOW: WD_ERR("failed to do hashagg task, vchar size overflow! consumed row num: %u!\n", - sqe->vchar_err_offset); + sqe->data_row_offset); msg->result = WD_AGG_INVALID_VARCHAR; - msg->in_row_count = sqe->vchar_err_offset; + msg->in_row_count = sqe->data_row_offset; break; case DAE_HASHAGG_RESULT_OVERFLOW: msg->in_row_count = temp_msg->row_count; msg->result = WD_AGG_SUM_OVERFLOW; break; - case DAE_HASHAGG_BUS_ERROR: + case DAE_TASK_BUS_ERROR: WD_ERR("failed to do hashagg task, bus error! etype %u!\n", sqe->err_type); msg->result = WD_AGG_BUS_ERROR; break; @@ -966,31 +764,11 @@ static int hashagg_init_param_check(struct wd_agg_sess_setup *setup, __u16 hw_ty setup->is_count_all, hw_type); } -static __u32 hashagg_get_data_type_size(enum dae_data_type type, __u16 data_info) -{ - switch (type) { - case DAE_SINT32: - return SINT32_SIZE; - case DAE_SINT64: - case DAE_DECIMAL64: - return SINT64_SIZE; - case DAE_DECIMAL128: - return DECIMAL128_SIZE; - case DAE_CHAR: - return ALIGN(data_info, DAE_CHAR_ALIGN_SIZE); - case DAE_VCHAR: - return data_info; - default: - break; - } - - return 0; -} - static int transfer_key_col_info(struct wd_key_col_info *key_cols, struct hw_agg_data *key_data, __u32 col_num) { __u32 i; + int ret; for (i = 0; i < col_num; i++) { switch (key_cols[i].input_data_type) { @@ -1008,9 +786,15 @@ static int transfer_key_col_info(struct wd_key_col_info *key_cols, key_data[i].hw_type = DAE_CHAR; break; case WD_DAE_LONG_DECIMAL: + ret = dae_decimal_precision_check(key_cols[i].col_data_info, true); + if (ret) + return ret; key_data[i].hw_type = DAE_DECIMAL128; break; case WD_DAE_SHORT_DECIMAL: + ret = dae_decimal_precision_check(key_cols[i].col_data_info, false); + if (ret) + return ret; key_data[i].hw_type = DAE_DECIMAL64; break; case WD_DAE_LONG: @@ -1060,33 +844,6 @@ static int transfer_key_to_hw_type(struct hashagg_col_data *cols_data, return WD_SUCCESS; } -static int hashagg_decimal_precision_check(__u16 data_info, bool longdecimal) -{ - __u8 all_precision; - - /* - * low 8bits: overall precision - * high 8bits: precision of the decimal part - */ - all_precision = data_info; - if (longdecimal) { - if (all_precision > DAE_DECIMAL128_MAX_PRECISION) { - WD_ERR("invalid: longdecimal precision %u is more than support %d!\n", - all_precision, DAE_DECIMAL128_MAX_PRECISION); - return -WD_EINVAL; - } - return WD_SUCCESS; - } - - if (all_precision > DAE_DECIMAL64_MAX_PRECISION) { - WD_ERR("invalid: shortdecimal precision %u is more than support %d!\n", - all_precision, DAE_DECIMAL64_MAX_PRECISION); - return -WD_EINVAL; - } - - return WD_SUCCESS; -} - static int hashagg_check_sum_info(struct wd_agg_col_info *agg_col, struct hw_agg_data *user_input_data, struct hw_agg_data *user_output_data, __u32 index) @@ -1105,7 +862,7 @@ static int hashagg_check_sum_info(struct wd_agg_col_info *agg_col, break; case WD_DAE_SHORT_DECIMAL: if (agg_col->output_data_types[index] == WD_DAE_SHORT_DECIMAL) { - ret = hashagg_decimal_precision_check(agg_col->col_data_info, false); + ret = dae_decimal_precision_check(agg_col->col_data_info, false); if (ret) return ret; user_input_data->sum_outtype = DECIMAL64_TO_DECIMAL64; @@ -1113,7 +870,7 @@ static int hashagg_check_sum_info(struct wd_agg_col_info *agg_col, /* For rehash, rehash will do sum */ user_output_data->sum_outtype = DECIMAL64_TO_DECIMAL64; } else if (agg_col->output_data_types[index] == WD_DAE_LONG_DECIMAL) { - ret = hashagg_decimal_precision_check(agg_col->col_data_info, true); + ret = dae_decimal_precision_check(agg_col->col_data_info, true); if (ret) return ret; user_input_data->sum_outtype = DECIMAL64_TO_DECIMAL128; @@ -1131,7 +888,7 @@ static int hashagg_check_sum_info(struct wd_agg_col_info *agg_col, agg_col->output_data_types[index]); return -WD_EINVAL; } - ret = hashagg_decimal_precision_check(agg_col->col_data_info, true); + ret = dae_decimal_precision_check(agg_col->col_data_info, true); if (ret) return ret; user_input_data->hw_type = DAE_DECIMAL128; @@ -1167,16 +924,24 @@ static int hashagg_check_max_min_info(struct wd_agg_col_info *agg_col, struct hw_agg_data *user_input_data, struct hw_agg_data *user_output_data) { + int ret; + switch (agg_col->input_data_type) { case WD_DAE_LONG: user_input_data->hw_type = DAE_SINT64; user_output_data->hw_type = DAE_SINT64; break; case WD_DAE_SHORT_DECIMAL: + ret = dae_decimal_precision_check(agg_col->col_data_info, false); + if (ret) + return ret; user_input_data->hw_type = DAE_DECIMAL64; user_output_data->hw_type = DAE_DECIMAL64; break; case WD_DAE_LONG_DECIMAL: + ret = dae_decimal_precision_check(agg_col->col_data_info, true); + if (ret) + return ret; user_input_data->hw_type = DAE_DECIMAL128; user_output_data->hw_type = DAE_DECIMAL128; break; @@ -1395,12 +1160,12 @@ static int hashagg_get_table_rowsize(struct hashagg_col_data *cols_data) __u32 i; for (i = 0; i < key_num; i++) - row_count_size += hashagg_get_data_type_size(key_data[i].hw_type, - key_data[i].data_info); + row_count_size += get_data_type_size(key_data[i].hw_type, + key_data[i].data_info); for (i = 0; i < output_num; i++) - row_count_size += hashagg_get_data_type_size(output_col[i].hw_type, - output_col[i].data_info); + row_count_size += get_data_type_size(output_col[i].hw_type, + output_col[i].data_info); row_count_size += HASH_TABLE_EMPTY_SIZE; if (row_count_size < DAE_MIN_ROW_SIZE || row_count_size > DAE_MAX_ROW_SIZE) { @@ -1510,58 +1275,7 @@ free_agg_ctx: return ret; } -static void dae_uninit_qp_priv(handle_t h_qp) -{ - struct hisi_qp *qp = (struct hisi_qp *)h_qp; - struct dae_extend_addr *ext_addr = (struct dae_extend_addr *)qp->priv; - - free(ext_addr->addr_list); - free(ext_addr->addr_status); - free(ext_addr->ext_sqe); - free(ext_addr); - qp->priv = NULL; -} - -static int dae_init_qp_priv(handle_t h_qp) -{ - struct hisi_qp *qp = (struct hisi_qp *)h_qp; - __u16 sq_depth = qp->q_info.sq_depth; - struct dae_extend_addr *ext_addr; - int ret = -WD_ENOMEM; - - ext_addr = calloc(1, sizeof(struct dae_extend_addr)); - if (!ext_addr) - return ret; - - ext_addr->ext_sqe = aligned_alloc(DAE_ADDR_ALIGN_SIZE, DAE_EXT_SQE_SIZE * sq_depth); - if (!ext_addr->ext_sqe) - goto free_ext_addr; - - ext_addr->addr_status = calloc(1, sizeof(__u8) * sq_depth); - if (!ext_addr->addr_status) - goto free_ext_sqe; - - ext_addr->addr_list = aligned_alloc(DAE_ADDR_ALIGN_SIZE, - sizeof(struct dae_addr_list) * sq_depth); - if (!ext_addr->addr_list) - goto free_addr_status; - - ext_addr->addr_num = sq_depth; - qp->priv = ext_addr; - - return WD_SUCCESS; - -free_addr_status: - free(ext_addr->addr_status); -free_ext_sqe: - free(ext_addr->ext_sqe); -free_ext_addr: - free(ext_addr); - - return ret; -} - -static int dae_get_row_size(struct wd_alg_driver *drv, void *param) +static int agg_get_row_size(struct wd_alg_driver *drv, void *param) { struct hashagg_ctx *agg_ctx = param; @@ -1571,266 +1285,16 @@ static int dae_get_row_size(struct wd_alg_driver *drv, void *param) return agg_ctx->row_size; } -static __u32 dae_ext_table_rownum(void **ext_table, struct wd_dae_hash_table *hash_table, - __u32 row_size) -{ - __u64 tlb_size, tmp_size, row_num; - void *tmp_table; - - /* - * The first row of the extended hash table stores the hash table information, - * and the second row stores the aggregated data. The 128-bytes aligned address - * in the second row provides the optimal performance. - */ - tmp_table = PTR_ALIGN(hash_table->ext_table, DAE_TABLE_ALIGN_SIZE); - tlb_size = (__u64)hash_table->table_row_size * hash_table->ext_table_row_num; - tmp_size = (__u64)(uintptr_t)tmp_table - (__u64)(uintptr_t)hash_table->ext_table; - if (tmp_size >= tlb_size) - return 0; - - row_num = (tlb_size - tmp_size) / row_size; - if (row_size == ROW_SIZE32) { - if (tmp_size >= row_size) { - tmp_table = (__u8 *)tmp_table - row_size; - row_num += 1; - } else { - /* - * When row size is 32 bytes, the first 96 bytes are not used. - * Ensure that the address of the second row is 128 bytes aligned. - */ - if (row_num > HASH_TABLE_OFFSET_3ROW) { - tmp_table = (__u8 *)tmp_table + HASH_TABLE_OFFSET_3ROW * row_size; - row_num -= HASH_TABLE_OFFSET_3ROW; - } else { - return 0; - } - } - } else if (row_size == ROW_SIZE64) { - if (tmp_size >= row_size) { - tmp_table = (__u8 *)tmp_table - row_size; - row_num += 1; - } else { - /* - * When row size is 64 bytes, the first 64 bytes are not used. - * Ensure that the address of the second row is 128 bytes aligned. - */ - if (row_num > HASH_TABLE_OFFSET_1ROW) { - tmp_table = (__u8 *)tmp_table + HASH_TABLE_OFFSET_1ROW * row_size; - row_num -= HASH_TABLE_OFFSET_1ROW; - } else { - return 0; - } - } - } - - *ext_table = tmp_table; - - return row_num; -} - -static int dae_ext_table_init(struct hashagg_ctx *agg_ctx, - struct wd_dae_hash_table *hash_table, bool is_rehash) -{ - struct hash_table_data *hw_table = &agg_ctx->table_data; - __u64 ext_size = hw_table->ext_table_size; - __u32 row_size = agg_ctx->row_size; - __u64 tlb_size, row_num; - void *ext_table; - __u8 *ext_valid; - __u64 *ext_row; - - row_num = dae_ext_table_rownum(&ext_table, hash_table, row_size); - if (row_num <= 1) { - WD_ERR("invalid: after aligned, extend table row num is less than device need!\n"); - return -WD_EINVAL; - } - - tlb_size = row_num * row_size; - if (is_rehash && tlb_size <= ext_size) { - WD_ERR("invalid: rehash extend table size %llu is not longer than current %llu!\n", - tlb_size, ext_size); - return -WD_EINVAL; - } - - /* - * If table has been initialized, save the previous data - * before replacing the new table. - */ - if (is_rehash) - memcpy(&agg_ctx->rehash_table, hw_table, sizeof(struct hash_table_data)); - - /* Initialize the extend table value. */ - memset(ext_table, 0, tlb_size); - ext_valid = (__u8 *)ext_table + HASH_EXT_TABLE_INVALID_OFFSET; - *ext_valid = HASH_EXT_TABLE_VALID; - ext_row = (__u64 *)ext_table + 1; - *ext_row = row_num - 1; - - hw_table->ext_table = ext_table; - hw_table->ext_table_size = tlb_size; - - return WD_SUCCESS; -} - -static int dae_std_table_init(struct hash_table_data *hw_table, - struct wd_dae_hash_table *hash_table, __u32 row_size) -{ - __u64 tlb_size, row_num, tmp_size; - - /* - * Hash table address must be 128-bytes aligned, and the number - * of rows in a standard hash table must be a power of 2. - */ - hw_table->std_table = PTR_ALIGN(hash_table->std_table, DAE_TABLE_ALIGN_SIZE); - tlb_size = (__u64)hash_table->table_row_size * hash_table->std_table_row_num; - tmp_size = (__u64)(uintptr_t)hw_table->std_table - (__u64)(uintptr_t)hash_table->std_table; - if (tmp_size >= tlb_size) { - WD_ERR("invalid: after aligned, standard table size is less than 0!\n"); - return -WD_EINVAL; - } - - row_num = (tlb_size - tmp_size) / row_size; - if (!row_num) { - WD_ERR("invalid: standard table row num is 0!\n"); - return -WD_EINVAL; - } - - hw_table->table_width = (__u32)log2(row_num); - if (hw_table->table_width < HASH_TABLE_MIN_WIDTH || - hw_table->table_width > HASH_TABLE_MAX_WIDTH) { - WD_ERR("invalid: standard table width %u is out of device support range %d~%d!\n", - hw_table->table_width, HASH_TABLE_MIN_WIDTH, HASH_TABLE_MAX_WIDTH); - return -WD_EINVAL; - } - - row_num = (__u64)pow(HASH_TABLE_WITDH_POWER, hw_table->table_width); - hw_table->std_table_size = row_num * row_size; - memset(hw_table->std_table, 0, hw_table->std_table_size); - - return WD_SUCCESS; -} - -static int dae_hash_table_init(struct wd_alg_driver *drv, +static int agg_hash_table_init(struct wd_alg_driver *drv, struct wd_dae_hash_table *hash_table, void *priv) { struct hashagg_ctx *agg_ctx = priv; - struct hash_table_data *hw_table; - bool is_rehash = false; - int ret; if (!agg_ctx || !hash_table) return -WD_EINVAL; - if (!agg_ctx->row_size || agg_ctx->row_size > hash_table->table_row_size) { - WD_ERR("invalid: row size %u is error, device need %u!\n", - hash_table->table_row_size, agg_ctx->row_size); - return -WD_EINVAL; - } - - /* hash_std_table is checked by caller */ - if (!hash_table->ext_table || !hash_table->ext_table_row_num) { - WD_ERR("invalid: hash extend table is null!\n"); - return -WD_EINVAL; - } - - hw_table = &agg_ctx->table_data; - if (hw_table->std_table_size) - is_rehash = true; - - ret = dae_ext_table_init(agg_ctx, hash_table, is_rehash); - if (ret) - return ret; - - ret = dae_std_table_init(hw_table, hash_table, agg_ctx->row_size); - if (ret) - goto update_table; - - return WD_SUCCESS; - -update_table: - if (is_rehash) - memcpy(hw_table, &agg_ctx->rehash_table, sizeof(struct hash_table_data)); - else - memset(hw_table, 0, sizeof(struct hash_table_data)); - return ret; -} - -static int dae_init(struct wd_alg_driver *drv, void *conf) -{ - struct wd_ctx_config_internal *config = conf; - struct hisi_qm_priv qm_priv; - struct hisi_dae_ctx *priv; - handle_t h_qp = 0; - handle_t h_ctx; - __u32 i, j; - int ret; - - if (!config || !config->ctx_num) { - WD_ERR("invalid: dae init config is null or ctx num is 0!\n"); - return -WD_EINVAL; - } - - priv = malloc(sizeof(struct hisi_dae_ctx)); - if (!priv) - return -WD_ENOMEM; - - qm_priv.op_type = DAE_HASH_AGG_TYPE; - qm_priv.sqe_size = sizeof(struct dae_sqe); - /* Allocate qp for each context */ - for (i = 0; i < config->ctx_num; i++) { - h_ctx = config->ctxs[i].ctx; - qm_priv.qp_mode = config->ctxs[i].ctx_mode; - /* Setting the epoll en to 0 for ASYNC ctx */ - qm_priv.epoll_en = (qm_priv.qp_mode == CTX_MODE_SYNC) ? - config->epoll_en : 0; - qm_priv.idx = i; - h_qp = hisi_qm_alloc_qp(&qm_priv, h_ctx); - if (!h_qp) { - ret = -WD_ENOMEM; - goto out; - } - config->ctxs[i].sqn = qm_priv.sqn; - ret = dae_init_qp_priv(h_qp); - if (ret) - goto free_h_qp; - } - memcpy(&priv->config, config, sizeof(struct wd_ctx_config_internal)); - drv->priv = priv; - - return WD_SUCCESS; - -free_h_qp: - hisi_qm_free_qp(h_qp); -out: - for (j = 0; j < i; j++) { - h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[j].ctx); - dae_uninit_qp_priv(h_qp); - hisi_qm_free_qp(h_qp); - } - free(priv); - return ret; -} - -static void dae_exit(struct wd_alg_driver *drv) -{ - struct wd_ctx_config_internal *config; - struct hisi_dae_ctx *priv; - handle_t h_qp; - __u32 i; - - if (!drv || !drv->priv) - return; - - priv = (struct hisi_dae_ctx *)drv->priv; - config = &priv->config; - for (i = 0; i < config->ctx_num; i++) { - h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); - dae_uninit_qp_priv(h_qp); - hisi_qm_free_qp(h_qp); - } - - free(priv); - drv->priv = NULL; + return dae_hash_table_init(&agg_ctx->table_data, &agg_ctx->rehash_table, + hash_table, agg_ctx->row_size); } static int dae_get_usage(void *param) @@ -1845,8 +1309,8 @@ static int dae_get_extend_ops(void *ops) if (!agg_ops) return -WD_EINVAL; - agg_ops->get_row_size = dae_get_row_size; - agg_ops->hash_table_init = dae_hash_table_init; + agg_ops->get_row_size = agg_get_row_size; + agg_ops->hash_table_init = agg_hash_table_init; agg_ops->sess_init = hashagg_sess_priv_init; agg_ops->sess_uninit = hashagg_sess_priv_uninit; diff --git a/drv/hisi_dae.h b/drv/hisi_dae.h new file mode 100644 index 00000000..12648138 --- /dev/null +++ b/drv/hisi_dae.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __HDAE_DRV_H__ +#define __HDAE_DRV_H__ + +#include <stdbool.h> +#include <stddef.h> +#include <pthread.h> +#include <linux/types.h> + +#include "config.h" +#include "wd_alg.h" +#include "wd_dae.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DAE_SQC_ALG_TYPE 2 +#define DAE_EXT_SQE_SIZE 128 + +/* align size */ +#define DAE_TABLE_ALIGN_SIZE 128 +#define DAE_ADDR_ALIGN_SIZE 128 +#define DAE_CHAR_ALIGN_SIZE 4 + +/* decimal infomartion */ +#define DAE_DECIMAL_PRECISION_OFFSET 8 +#define DAE_DECIMAL128_MAX_PRECISION 38 +#define DAE_DECIMAL64_MAX_PRECISION 18 + +/* hash table */ +#define HASH_EXT_TABLE_INVALID_OFFSET 5 +#define HASH_EXT_TABLE_VALID 0x80 +#define HASH_TABLE_HEAD_TAIL_SIZE 8 +#define HASH_TABLE_EMPTY_SIZE 4 +#define HASH_TABLE_WITDH_POWER 2 +#define HASH_TABLE_MIN_WIDTH 10 +#define HASH_TABLE_MAX_WIDTH 43 +#define HASH_TABLE_OFFSET_3ROW 3 +#define HASH_TABLE_OFFSET_1ROW 1 + +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((uintptr_t)(p), (a))) + +#define BIT(nr) (1UL << (nr)) +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) +#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +/* DAE hardware protocol data */ +enum dae_done_flag { + DAE_HW_TASK_NOT_PROCESS = 0x0, + DAE_HW_TASK_DONE = 0x1, + DAE_HW_TASK_ERR = 0x2, +}; + +enum dae_error_type { + DAE_TASK_SUCCESS = 0x0, + DAE_TASK_BD_ERROR_MIN = 0x1, + DAE_TASK_BD_ERROR_MAX = 0x7f, + DAE_HASH_TABLE_NEED_REHASH = 0x82, + DAE_HASH_TABLE_INVALID = 0x83, + DAE_HASHAGG_VCHAR_OVERFLOW = 0x84, + DAE_HASHAGG_RESULT_OVERFLOW = 0x85, + DAE_TASK_BUS_ERROR = 0x86, + DAE_HASHAGG_VCHAR_LEN_ERROR = 0x87, +}; + +enum dae_data_type { + DAE_SINT32 = 0x0, + DAE_SINT64 = 0x2, + DAE_DECIMAL64 = 0x9, + DAE_DECIMAL128 = 0xA, + DAE_CHAR = 0xC, + DAE_VCHAR = 0xD, +}; + +enum dae_date_type_size { + SINT32_SIZE = 4, + SINT64_SIZE = 8, + DECIMAL128_SIZE = 16, + DEFAULT_VCHAR_SIZE = 30, +}; + +enum dae_table_row_size { + ROW_SIZE32 = 32, + ROW_SIZE64 = 64, + ROW_SIZE128 = 128, + ROW_SIZE256 = 256, + ROW_SIZE512 = 512, +}; + +enum dae_bd_type { + DAE_BD_TYPE_V1 = 0x0, + DAE_BD_TYPE_V2 = 0x1, +}; + +struct dae_sqe { + __u32 bd_type : 6; + __u32 resv1 : 2; + __u32 task_type : 6; + __u32 resv2 : 2; + __u32 task_type_ext : 6; + __u32 resv3 : 9; + __u32 bd_invlid : 1; + __u16 table_row_size; + __u16 resv4; + __u32 batch_num; + __u32 low_tag; + __u32 hi_tag; + __u32 data_row_num; + __u32 init_row_num; + __u32 src_table_width : 6; + __u32 dst_table_width : 6; + __u32 key_out_en : 1; + __u32 break_point_en : 1; + __u32 multi_batch_en : 1; + __u32 sva_prefetch_en : 1; + __u32 counta_vld : 1; + __u32 index_num : 5; + __u32 resv5 : 8; + __u32 index_batch_type : 1; + __u32 resv6 : 1; + __u8 key_data_type[16]; + __u8 agg_data_type[16]; + __u32 resv9[6]; + __u64 addr_ext; + __u16 key_col_bitmap; + __u16 has_empty; + __u32 agg_col_bitmap; + __u64 addr_list; + __u32 done_flag : 3; + __u32 output_end : 1; + __u32 ext_err_type : 12; + __u32 err_type : 8; + __u32 wtype : 8; + __u32 out_raw_num; + __u32 data_row_offset; + __u16 sum_overflow_cols; + __u16 resv10; +}; + +struct dae_ext_sqe { + /* + * If date type is char/vchar, data info fill data type size + * If data type is decimal64/decimal128, data info fill data precision + */ + __u16 key_data_info[16]; + __u16 agg_data_info[16]; + /* Aggregated output from input agg col index */ + __u64 out_from_in_idx; + /* Aggregated output from input agg col operation, sum or count */ + __u64 out_optype; + __u32 resv[12]; +}; + +struct dae_col_addr { + __u64 empty_addr; + __u64 empty_size; + __u64 value_addr; + __u64 value_size; +}; + +struct dae_table_addr { + __u64 std_table_addr; + __u64 std_table_size; + __u64 ext_table_addr; + __u64 ext_table_size; +}; + +struct dae_probe_info_addr { + __u64 batch_num_index; + __u64 batch_addr_index; + __u64 probe_index_addr; + __u64 resv1; + __u64 break_point_addr; + __u64 resv2; +}; + +struct dae_addr_list { + __u64 ext_sqe_addr; + __u64 ext_sqe_size; + struct dae_table_addr src_table; + struct dae_table_addr dst_table; + struct dae_probe_info_addr probe_info; + struct dae_col_addr input_addr[32]; + struct dae_col_addr output_addr[32]; +}; + +struct dae_extend_addr { + struct dae_ext_sqe *ext_sqe; + struct dae_addr_list *addr_list; + __u8 *addr_status; + __u16 addr_num; + __u16 tail; +}; + +struct hash_table_data { + void *std_table; + void *ext_table; + __u64 std_table_size; + __u64 ext_table_size; + __u32 table_width; +}; + +struct hisi_dae_ctx { + struct wd_ctx_config_internal config; +}; + +void dae_exit(struct wd_alg_driver *drv); +int dae_init(struct wd_alg_driver *drv, void *conf); +int dae_hash_table_init(struct hash_table_data *hw_table, + struct hash_table_data *rehash_table, + struct wd_dae_hash_table *hash_table, + __u32 row_size); +int get_free_ext_addr(struct dae_extend_addr *ext_addr); +void put_ext_addr(struct dae_extend_addr *ext_addr, int idx); +__u32 get_data_type_size(enum dae_data_type type, __u16 data_info); +int dae_decimal_precision_check(__u16 data_info, bool longdecimal); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drv/hisi_dae_common.c b/drv/hisi_dae_common.c new file mode 100644 index 00000000..43b53e0f --- /dev/null +++ b/drv/hisi_dae_common.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: Apache-2.0 +/* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#include <math.h> +#include "hisi_qm_udrv.h" +#include "hisi_dae.h" + +int dae_decimal_precision_check(__u16 data_info, bool longdecimal) +{ + __u8 all_precision; + + /* + * low 8bits: overall precision + * high 8bits: precision of the decimal part + */ + all_precision = data_info; + if (longdecimal) { + if (all_precision > DAE_DECIMAL128_MAX_PRECISION) { + WD_ERR("invalid: longdecimal precision %u is more than support %d!\n", + all_precision, DAE_DECIMAL128_MAX_PRECISION); + return -WD_EINVAL; + } + return WD_SUCCESS; + } + + if (all_precision > DAE_DECIMAL64_MAX_PRECISION) { + WD_ERR("invalid: shortdecimal precision %u is more than support %d!\n", + all_precision, DAE_DECIMAL64_MAX_PRECISION); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +__u32 get_data_type_size(enum dae_data_type type, __u16 data_info) +{ + switch (type) { + case DAE_SINT32: + return SINT32_SIZE; + case DAE_SINT64: + case DAE_DECIMAL64: + return SINT64_SIZE; + case DAE_DECIMAL128: + return DECIMAL128_SIZE; + case DAE_CHAR: + return ALIGN(data_info, DAE_CHAR_ALIGN_SIZE); + case DAE_VCHAR: + return data_info; + default: + break; + } + + return 0; +} + +/* The caller ensures that the address pointer or num is not null. */ +int get_free_ext_addr(struct dae_extend_addr *ext_addr) +{ + __u16 addr_num = ext_addr->addr_num; + __u16 idx = ext_addr->tail; + __u16 cnt = 0; + + while (__atomic_test_and_set(&ext_addr->addr_status[idx], __ATOMIC_ACQUIRE)) { + idx = (idx + 1) % addr_num; + cnt++; + if (cnt == addr_num) + return -WD_EBUSY; + } + + ext_addr->tail = (idx + 1) % addr_num; + + return idx; +} + +void put_ext_addr(struct dae_extend_addr *ext_addr, int idx) +{ + __atomic_clear(&ext_addr->addr_status[idx], __ATOMIC_RELEASE); +} + +static void dae_uninit_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct dae_extend_addr *ext_addr = (struct dae_extend_addr *)qp->priv; + + free(ext_addr->addr_list); + free(ext_addr->addr_status); + free(ext_addr->ext_sqe); + free(ext_addr); + qp->priv = NULL; +} + +static int dae_init_qp_priv(handle_t h_qp) +{ + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + __u16 sq_depth = qp->q_info.sq_depth; + struct dae_extend_addr *ext_addr; + int ret = -WD_ENOMEM; + + ext_addr = calloc(1, sizeof(struct dae_extend_addr)); + if (!ext_addr) + return ret; + + ext_addr->ext_sqe = aligned_alloc(DAE_ADDR_ALIGN_SIZE, DAE_EXT_SQE_SIZE * sq_depth); + if (!ext_addr->ext_sqe) + goto free_ext_addr; + + ext_addr->addr_status = calloc(1, sizeof(__u8) * sq_depth); + if (!ext_addr->addr_status) + goto free_ext_sqe; + + ext_addr->addr_list = aligned_alloc(DAE_ADDR_ALIGN_SIZE, + sizeof(struct dae_addr_list) * sq_depth); + if (!ext_addr->addr_list) + goto free_addr_status; + + ext_addr->addr_num = sq_depth; + qp->priv = ext_addr; + + return WD_SUCCESS; + +free_addr_status: + free(ext_addr->addr_status); +free_ext_sqe: + free(ext_addr->ext_sqe); +free_ext_addr: + free(ext_addr); + + return ret; +} + +static __u32 dae_ext_table_rownum(void **ext_table, struct wd_dae_hash_table *hash_table, + __u32 row_size) +{ + __u64 tlb_size, tmp_size, row_num; + void *tmp_table; + + /* + * The first row of the extended hash table stores the hash table information, + * and the second row stores the aggregated data. The 128-bytes aligned address + * in the second row provides the optimal performance. + */ + tmp_table = PTR_ALIGN(hash_table->ext_table, DAE_TABLE_ALIGN_SIZE); + tlb_size = (__u64)hash_table->table_row_size * hash_table->ext_table_row_num; + tmp_size = (__u64)(uintptr_t)tmp_table - (__u64)(uintptr_t)hash_table->ext_table; + if (tmp_size >= tlb_size) + return 0; + + row_num = (tlb_size - tmp_size) / row_size; + if (row_size == ROW_SIZE32) { + if (tmp_size >= row_size) { + tmp_table = (__u8 *)tmp_table - row_size; + row_num += 1; + } else { + /* + * When row size is 32 bytes, the first 96 bytes are not used. + * Ensure that the address of the second row is 128 bytes aligned. + */ + if (row_num > HASH_TABLE_OFFSET_3ROW) { + tmp_table = (__u8 *)tmp_table + HASH_TABLE_OFFSET_3ROW * row_size; + row_num -= HASH_TABLE_OFFSET_3ROW; + } else { + return 0; + } + } + } else if (row_size == ROW_SIZE64) { + if (tmp_size >= row_size) { + tmp_table = (__u8 *)tmp_table - row_size; + row_num += 1; + } else { + /* + * When row size is 64 bytes, the first 64 bytes are not used. + * Ensure that the address of the second row is 128 bytes aligned. + */ + if (row_num > HASH_TABLE_OFFSET_1ROW) { + tmp_table = (__u8 *)tmp_table + HASH_TABLE_OFFSET_1ROW * row_size; + row_num -= HASH_TABLE_OFFSET_1ROW; + } else { + return 0; + } + } + } + + *ext_table = tmp_table; + + return row_num; +} + +static int dae_ext_table_init(struct hash_table_data *hw_table, + struct hash_table_data *rehash_table, + struct wd_dae_hash_table *hash_table, + __u32 row_size, bool is_rehash) +{ + __u64 ext_size = hw_table->ext_table_size; + __u64 tlb_size, row_num; + void *ext_table; + __u8 *ext_valid; + __u64 *ext_row; + + row_num = dae_ext_table_rownum(&ext_table, hash_table, row_size); + if (row_num <= 1) { + WD_ERR("invalid: after aligned, extend table row num is less than device need!\n"); + return -WD_EINVAL; + } + + tlb_size = row_num * row_size; + if (is_rehash && tlb_size <= ext_size) { + WD_ERR("invalid: rehash extend table size %llu is not longer than current %llu!\n", + tlb_size, ext_size); + return -WD_EINVAL; + } + + /* + * If table has been initialized, save the previous data + * before replacing the new table. + */ + if (is_rehash) + memcpy(rehash_table, hw_table, sizeof(struct hash_table_data)); + + /* Initialize the extend table value. */ + memset(ext_table, 0, tlb_size); + ext_valid = (__u8 *)ext_table + HASH_EXT_TABLE_INVALID_OFFSET; + *ext_valid = HASH_EXT_TABLE_VALID; + ext_row = (__u64 *)ext_table + 1; + *ext_row = row_num - 1; + + hw_table->ext_table = ext_table; + hw_table->ext_table_size = tlb_size; + + return WD_SUCCESS; +} + +static int dae_std_table_init(struct hash_table_data *hw_table, + struct wd_dae_hash_table *hash_table, __u32 row_size) +{ + __u64 tlb_size, row_num, tmp_size; + + /* + * Hash table address must be 128-bytes aligned, and the number + * of rows in a standard hash table must be a power of 2. + */ + hw_table->std_table = PTR_ALIGN(hash_table->std_table, DAE_TABLE_ALIGN_SIZE); + tlb_size = (__u64)hash_table->table_row_size * hash_table->std_table_row_num; + tmp_size = (__u64)(uintptr_t)hw_table->std_table - (__u64)(uintptr_t)hash_table->std_table; + if (tmp_size >= tlb_size) { + WD_ERR("invalid: after aligned, standard table size is less than 0!\n"); + return -WD_EINVAL; + } + + row_num = (tlb_size - tmp_size) / row_size; + if (!row_num) { + WD_ERR("invalid: standard table row num is 0!\n"); + return -WD_EINVAL; + } + + hw_table->table_width = (__u32)log2(row_num); + if (hw_table->table_width < HASH_TABLE_MIN_WIDTH || + hw_table->table_width > HASH_TABLE_MAX_WIDTH) { + WD_ERR("invalid: standard table width %u is out of device support range %d~%d!\n", + hw_table->table_width, HASH_TABLE_MIN_WIDTH, HASH_TABLE_MAX_WIDTH); + return -WD_EINVAL; + } + + row_num = (__u64)pow(HASH_TABLE_WITDH_POWER, hw_table->table_width); + hw_table->std_table_size = row_num * row_size; + memset(hw_table->std_table, 0, hw_table->std_table_size); + + return WD_SUCCESS; +} + +int dae_hash_table_init(struct hash_table_data *hw_table, + struct hash_table_data *rehash_table, + struct wd_dae_hash_table *hash_table, + __u32 row_size) +{ + bool is_rehash = false; + int ret; + + if (!row_size || row_size > hash_table->table_row_size) { + WD_ERR("invalid: row size %u is error, device need %u!\n", + hash_table->table_row_size, row_size); + return -WD_EINVAL; + } + + /* hash_std_table is checked by caller */ + if (!hash_table->ext_table || !hash_table->ext_table_row_num) { + WD_ERR("invalid: hash extend table is null!\n"); + return -WD_EINVAL; + } + + if (hw_table->std_table_size) + is_rehash = true; + + ret = dae_ext_table_init(hw_table, rehash_table, hash_table, row_size, is_rehash); + if (ret) + return ret; + + ret = dae_std_table_init(hw_table, hash_table, row_size); + if (ret) + goto update_table; + + return WD_SUCCESS; + +update_table: + if (is_rehash) + memcpy(hw_table, rehash_table, sizeof(struct hash_table_data)); + else + memset(hw_table, 0, sizeof(struct hash_table_data)); + return ret; +} + +int dae_init(struct wd_alg_driver *drv, void *conf) +{ + struct wd_ctx_config_internal *config = conf; + struct hisi_qm_priv qm_priv; + struct hisi_dae_ctx *priv; + handle_t h_qp = 0; + handle_t h_ctx; + __u32 i, j; + int ret; + + if (!config || !config->ctx_num) { + WD_ERR("invalid: dae init config is null or ctx num is 0!\n"); + return -WD_EINVAL; + } + + priv = malloc(sizeof(struct hisi_dae_ctx)); + if (!priv) + return -WD_ENOMEM; + + qm_priv.op_type = DAE_SQC_ALG_TYPE; + qm_priv.sqe_size = sizeof(struct dae_sqe); + /* Allocate qp for each context */ + for (i = 0; i < config->ctx_num; i++) { + h_ctx = config->ctxs[i].ctx; + qm_priv.qp_mode = config->ctxs[i].ctx_mode; + /* Setting the epoll en to 0 for ASYNC ctx */ + qm_priv.epoll_en = (qm_priv.qp_mode == CTX_MODE_SYNC) ? + config->epoll_en : 0; + qm_priv.idx = i; + h_qp = hisi_qm_alloc_qp(&qm_priv, h_ctx); + if (!h_qp) { + ret = -WD_ENOMEM; + goto out; + } + config->ctxs[i].sqn = qm_priv.sqn; + ret = dae_init_qp_priv(h_qp); + if (ret) + goto free_h_qp; + } + memcpy(&priv->config, config, sizeof(struct wd_ctx_config_internal)); + drv->priv = priv; + + return WD_SUCCESS; + +free_h_qp: + hisi_qm_free_qp(h_qp); +out: + for (j = 0; j < i; j++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[j].ctx); + dae_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + free(priv); + return ret; +} + +void dae_exit(struct wd_alg_driver *drv) +{ + struct wd_ctx_config_internal *config; + struct hisi_dae_ctx *priv; + handle_t h_qp; + __u32 i; + + if (!drv || !drv->priv) + return; + + priv = (struct hisi_dae_ctx *)drv->priv; + config = &priv->config; + for (i = 0; i < config->ctx_num; i++) { + h_qp = (handle_t)wd_ctx_get_priv(config->ctxs[i].ctx); + dae_uninit_qp_priv(h_qp); + hisi_qm_free_qp(h_qp); + } + + free(priv); + drv->priv = NULL; +} diff --git a/drv/hisi_dae_join_gather.c b/drv/hisi_dae_join_gather.c new file mode 100644 index 00000000..db965d35 --- /dev/null +++ b/drv/hisi_dae_join_gather.c @@ -0,0 +1,1040 @@ +// SPDX-License-Identifier: Apache-2.0 +/* Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. */ + +#include "hisi_qm_udrv.h" +#include "hisi_dae.h" +#include "../include/drv/wd_join_gather_drv.h" + +#define DAE_EXT_SQE_SIZE 128 +#define DAE_CTX_Q_NUM_DEF 1 + +/* column information */ +#define DAE_MAX_KEY_COLS 9 +#define DAE_MAX_CHAR_SIZE 32 +#define DAE_MAX_ROW_SIZE 512 +#define DAE_JOIN_MAX_ROW_NUN 50000 +#define DAE_JOIN_MAX_BATCH_NUM 2800 +#define DAE_MAX_TABLE_NUM 16 +#define BUILD_INDEX_ROW_SIZE 8 +#define PROBE_INDEX_ROW_SIZE 4 + +/* align size */ +#define DAE_KEY_ALIGN_SIZE 8 +#define DAE_BREAKPOINT_SIZE 81920 +#define DAE_ADDR_INDEX_SHIFT 1 + +/* hash table */ +#define HASH_TABLE_HEAD_TAIL_SIZE 8 +#define HASH_TABLE_INDEX_NUM 1 +#define HASH_TABLE_MAX_INDEX_NUM 15 +#define HASH_TABLE_INDEX_SIZE 12 +#define HASH_TABLE_EMPTY_SIZE 4 +#define GATHER_ROW_BATCH_EMPTY_SIZE 2 + +/* DAE hardware protocol data */ +enum dae_join_stage { + DAE_JOIN_BUILD_HASH = 0x0, + DAE_JOIN_REHASH = 0x6, + DAE_JOIN_PROBE = 0x7, +}; + +enum dae_gather_stage { + DAE_GATHER_CONVERT = 0x0, + DAE_GATHER_COMPLETE = 0x7, +}; + +enum dae_task_type { + DAE_HASH_JOIN = 0x1, + DAE_GATHER = 0x2, +}; + +static enum dae_data_type hw_data_type_order[] = { + DAE_VCHAR, DAE_CHAR, DAE_DECIMAL128, + DAE_DECIMAL64, DAE_SINT64, DAE_SINT32, +}; + +struct hw_join_gather_data { + enum dae_data_type hw_type; + __u32 optype; + __u32 usr_col_idx; + __u16 data_info; +}; + +struct join_gather_col_data { + struct hw_join_gather_data key_data[DAE_MAX_KEY_COLS]; + struct hw_join_gather_data gather_data[DAE_MAX_TABLE_NUM][DAE_MAX_KEY_COLS]; + + __u32 key_num; + __u32 gather_table_num; + __u32 gather_cols_num[DAE_MAX_TABLE_NUM]; + __u16 has_empty[DAE_MAX_TABLE_NUM]; + __u8 index_num; +}; + +struct join_gather_ctx { + struct join_gather_col_data cols_data; + struct hash_table_data table_data; + struct hash_table_data rehash_table; + pthread_spinlock_t lock; + __u32 hash_table_row_size; + __u32 batch_row_size[DAE_MAX_TABLE_NUM]; +}; + +static void fill_join_gather_misc_field(struct wd_join_gather_msg *msg, + struct dae_sqe *sqe) +{ + struct join_gather_ctx *ctx = msg->priv; + struct join_gather_col_data *cols_data = &ctx->cols_data; + + sqe->sva_prefetch_en = true; + + switch (msg->op_type) { + case WD_JOIN_BUILD_HASH: + sqe->task_type = DAE_HASH_JOIN; + sqe->task_type_ext = DAE_JOIN_BUILD_HASH; + sqe->data_row_num = msg->req.input_row_num; + sqe->batch_num = msg->req.join_req.build_batch_index; + sqe->init_row_num = msg->req.join_req.batch_row_offset; + sqe->index_num = cols_data->index_num; + break; + case WD_JOIN_PROBE: + sqe->task_type = DAE_HASH_JOIN; + sqe->task_type_ext = DAE_JOIN_PROBE; + sqe->data_row_num = msg->req.output_row_num; + sqe->batch_num = msg->req.input_row_num; + sqe->init_row_num = msg->req.join_req.batch_row_offset; + sqe->index_num = cols_data->index_num; + sqe->key_out_en = msg->key_out_en; + sqe->break_point_en = sqe->init_row_num ? true : false; + sqe->index_batch_type = msg->index_type; + break; + case WD_JOIN_REHASH: + sqe->task_type = DAE_HASH_JOIN; + sqe->task_type_ext = DAE_JOIN_REHASH; + sqe->data_row_num = msg->req.output_row_num; + sqe->index_num = cols_data->index_num; + break; + case WD_GATHER_CONVERT: + sqe->task_type = DAE_GATHER; + sqe->task_type_ext = DAE_GATHER_CONVERT; + sqe->data_row_num = msg->req.input_row_num; + break; + case WD_GATHER_COMPLETE: + sqe->task_type = DAE_GATHER; + sqe->task_type_ext = DAE_GATHER_COMPLETE; + sqe->multi_batch_en = msg->multi_batch_en; + sqe->index_batch_type = msg->index_type; + sqe->data_row_num = msg->req.output_row_num; + break; + default: + break; + } +} + +static void fill_join_table_data(struct dae_sqe *sqe, struct dae_addr_list *addr_list, + struct wd_join_gather_msg *msg, struct join_gather_ctx *ctx) +{ + struct dae_table_addr *hw_table_src = &addr_list->src_table; + struct dae_table_addr *hw_table_dst = &addr_list->dst_table; + struct hash_table_data *table_data_src, *table_data_dst; + + switch (msg->op_type) { + case WD_JOIN_BUILD_HASH: + table_data_src = NULL; + table_data_dst = &ctx->table_data; + break; + case WD_JOIN_REHASH: + table_data_src = &ctx->rehash_table; + table_data_dst = &ctx->table_data; + break; + case WD_JOIN_PROBE: + table_data_src = &ctx->table_data; + table_data_dst = NULL; + break; + default: + return; + } + + sqe->table_row_size = ctx->hash_table_row_size; + + if (table_data_src) { + sqe->src_table_width = table_data_src->table_width; + hw_table_src->std_table_addr = (__u64)(uintptr_t)table_data_src->std_table; + hw_table_src->std_table_size = table_data_src->std_table_size; + hw_table_src->ext_table_addr = (__u64)(uintptr_t)table_data_src->ext_table; + hw_table_src->ext_table_size = table_data_src->ext_table_size; + } + + if (table_data_dst) { + sqe->dst_table_width = table_data_dst->table_width; + hw_table_dst->std_table_addr = (__u64)(uintptr_t)table_data_dst->std_table; + hw_table_dst->std_table_size = table_data_dst->std_table_size; + hw_table_dst->ext_table_addr = (__u64)(uintptr_t)table_data_dst->ext_table; + hw_table_dst->ext_table_size = table_data_dst->ext_table_size; + } +} + +static void fill_join_key_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, + struct wd_join_gather_msg *msg, struct join_gather_ctx *ctx) +{ + struct dae_probe_info_addr *info = &addr_list->probe_info; + struct hw_join_gather_data *key_data = ctx->cols_data.key_data; + struct wd_dae_col_addr *usr_key, *out_usr_key = NULL; + struct dae_col_addr *hw_key, *out_hw_key = NULL; + struct wd_join_req *req = &msg->req.join_req; + struct wd_probe_out_info *output = &req->probe_output; + __u16 usr_col_idx; + __u64 offset; + __u32 i; + + sqe->key_col_bitmap = GENMASK(msg->key_cols_num - 1, 0); + + for (i = 0; i < msg->key_cols_num; i++) { + sqe->key_data_type[i] = key_data[i].hw_type; + ext_sqe->key_data_info[i] = key_data[i].data_info; + } + + switch (msg->op_type) { + case WD_JOIN_BUILD_HASH: + usr_key = req->key_cols; + hw_key = addr_list->input_addr; + if (msg->index_type == WD_BATCH_ADDR_INDEX) + sqe->addr_ext = (__u64)(uintptr_t)req->build_batch_addr.addr; + break; + case WD_JOIN_PROBE: + usr_key = req->key_cols; + hw_key = addr_list->input_addr; + if (msg->key_out_en) { + out_usr_key = output->key_cols; + out_hw_key = addr_list->output_addr; + } + + info->batch_num_index = (__u64)(uintptr_t)output->build_index.addr; + info->probe_index_addr = (__u64)(uintptr_t)output->probe_index.addr; + info->break_point_addr = (__u64)(uintptr_t)output->breakpoint.addr; + + if (msg->index_type == WD_BATCH_ADDR_INDEX) { + offset = (__u64)output->build_index.row_size * output->build_index.row_num; + offset = offset >> DAE_ADDR_INDEX_SHIFT; + info->batch_addr_index = info->batch_num_index + offset; + } + break; + default: + return; + } + + for (i = 0; i < msg->key_cols_num; i++) { + usr_col_idx = key_data[i].usr_col_idx; + hw_key[i].empty_addr = (__u64)(uintptr_t)usr_key[usr_col_idx].empty; + hw_key[i].empty_size = usr_key[usr_col_idx].empty_size; + hw_key[i].value_addr = (__u64)(uintptr_t)usr_key[usr_col_idx].value; + hw_key[i].value_size = usr_key[usr_col_idx].value_size; + + if (!out_usr_key) + continue; + out_hw_key[i].empty_addr = (__u64)(uintptr_t)out_usr_key[usr_col_idx].empty; + out_hw_key[i].empty_size = out_usr_key[usr_col_idx].empty_size; + /* The hardware does not output the empty data, set the data by software. */ + memset(out_usr_key[usr_col_idx].empty, 0, out_usr_key[usr_col_idx].empty_size); + + out_hw_key[i].value_addr = (__u64)(uintptr_t)out_usr_key[usr_col_idx].value; + out_hw_key[i].value_size = out_usr_key[usr_col_idx].value_size; + } +} + +static void fill_gather_col_data(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, + struct wd_join_gather_msg *msg, struct join_gather_ctx *ctx) +{ + struct dae_probe_info_addr *info = &addr_list->probe_info; + struct join_gather_col_data *cols_data = &ctx->cols_data; + struct wd_gather_req *gather_req = &msg->req.gather_req; + __u32 table_index = gather_req->table_index; + struct hw_join_gather_data *gather_data = cols_data->gather_data[table_index]; + __u16 cols_num = cols_data->gather_cols_num[table_index]; + struct wd_dae_col_addr *usr_data; + struct dae_col_addr *hw_data; + __u16 usr_col_idx; + void **batch_addr; + __u64 offset; + __u32 i; + + sqe->key_col_bitmap = GENMASK(cols_num - 1, 0); + sqe->has_empty = cols_data->has_empty[table_index]; + sqe->table_row_size = ctx->batch_row_size[table_index]; + + usr_data = gather_req->data_cols; + batch_addr = gather_req->row_batchs.batch_addr; + + switch (msg->op_type) { + case WD_GATHER_CONVERT: + hw_data = addr_list->input_addr; + /* Single batch tasks use the first element of the array. */ + addr_list->dst_table.std_table_addr = (__u64)(uintptr_t)batch_addr[0]; + break; + case WD_GATHER_COMPLETE: + hw_data = addr_list->output_addr; + if (!msg->multi_batch_en) { + info->probe_index_addr = (__u64)(uintptr_t)gather_req->index.addr; + addr_list->src_table.std_table_addr = (__u64)(uintptr_t)batch_addr[0]; + break; + } + + info->batch_num_index = (__u64)(uintptr_t)gather_req->index.addr; + if (msg->index_type == WD_BATCH_ADDR_INDEX) { + offset = (__u64)gather_req->index.row_size * gather_req->index.row_num; + offset = offset >> DAE_ADDR_INDEX_SHIFT; + info->batch_addr_index = info->batch_num_index + offset; + } else { + addr_list->src_table.std_table_addr = (__u64)(uintptr_t)batch_addr; + } + break; + default: + return; + } + + for (i = 0; i < cols_num; i++) { + sqe->key_data_type[i] = gather_data[i].hw_type; + ext_sqe->key_data_info[i] = gather_data[i].data_info; + + usr_col_idx = gather_data[i].usr_col_idx; + hw_data[i].empty_addr = (__u64)(uintptr_t)usr_data[usr_col_idx].empty; + hw_data[i].empty_size = usr_data[usr_col_idx].empty_size; + hw_data[i].value_addr = (__u64)(uintptr_t)usr_data[usr_col_idx].value; + hw_data[i].value_size = usr_data[usr_col_idx].value_size; + } +} + +static void fill_join_gather_ext_addr(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list) +{ + memset(ext_sqe, 0, DAE_EXT_SQE_SIZE); + memset(addr_list, 0, sizeof(struct dae_addr_list)); + sqe->addr_list = (__u64)(uintptr_t)addr_list; + addr_list->ext_sqe_addr = (__u64)(uintptr_t)ext_sqe; + addr_list->ext_sqe_size = DAE_EXT_SQE_SIZE; +} + +static void fill_join_gather_info(struct dae_sqe *sqe, struct dae_ext_sqe *ext_sqe, + struct dae_addr_list *addr_list, + struct wd_join_gather_msg *msg) +{ + struct join_gather_ctx *ctx = (struct join_gather_ctx *)msg->priv; + + fill_join_gather_ext_addr(sqe, ext_sqe, addr_list); + sqe->bd_type = DAE_BD_TYPE_V2; + + switch (msg->op_type) { + case WD_JOIN_BUILD_HASH: + case WD_JOIN_PROBE: + case WD_JOIN_REHASH: + fill_join_table_data(sqe, addr_list, msg, ctx); + fill_join_key_data(sqe, ext_sqe, addr_list, msg, ctx); + break; + case WD_GATHER_CONVERT: + case WD_GATHER_COMPLETE: + fill_gather_col_data(sqe, ext_sqe, addr_list, msg, ctx); + break; + default: + break; + } +} + +static int check_join_gather_param(struct wd_join_gather_msg *msg) +{ + struct wd_probe_out_info *output; + struct wd_gather_req *greq; + __u64 row_num, size; + + if (!msg) { + WD_ERR("invalid: input join gather msg is NULL!\n"); + return -WD_EINVAL; + } + + output = &msg->req.join_req.probe_output; + greq = &msg->req.gather_req; + + switch (msg->op_type) { + case WD_JOIN_BUILD_HASH: + if (msg->req.input_row_num > DAE_JOIN_MAX_ROW_NUN) { + WD_ERR("invalid: build table row count %u is more than %d!\n", + msg->req.input_row_num, DAE_JOIN_MAX_ROW_NUN); + return -WD_EINVAL; + } + if (msg->index_type == WD_BATCH_NUMBER_INDEX) { + if (msg->req.join_req.build_batch_index >= DAE_JOIN_MAX_BATCH_NUM) { + WD_ERR("invalid: input join batch index is more than %d!\n", + DAE_JOIN_MAX_BATCH_NUM - 1); + return -WD_EINVAL; + } + } else { + if (!msg->req.join_req.build_batch_addr.addr || + !msg->req.join_req.build_batch_addr.row_num || + !msg->req.join_req.build_batch_addr.row_size) { + WD_ERR("invalid: input join build batch addr is NULL!\n"); + return -WD_EINVAL; + } + } + break; + case WD_JOIN_PROBE: + size = (__u64)output->breakpoint.row_size * output->breakpoint.row_num; + if (!output->breakpoint.addr || size < DAE_BREAKPOINT_SIZE) { + WD_ERR("invalid probe breakpoint size: %llu\n", size); + return -WD_EINVAL; + } + if (msg->index_type == WD_BATCH_ADDR_INDEX) { + row_num = msg->req.output_row_num << DAE_ADDR_INDEX_SHIFT; + if (output->build_index.row_num < row_num) { + WD_ERR("build index row number is less than: %llu\n", + row_num); + return -WD_EINVAL; + } + } + + if (output->probe_index.row_size != PROBE_INDEX_ROW_SIZE || + output->build_index.row_size != BUILD_INDEX_ROW_SIZE) { + WD_ERR("build and probe index row size need be %d, %d!\n", + BUILD_INDEX_ROW_SIZE, PROBE_INDEX_ROW_SIZE); + return -WD_EINVAL; + } + break; + case WD_JOIN_REHASH: + case WD_GATHER_CONVERT: + break; + case WD_GATHER_COMPLETE: + if (!msg->multi_batch_en) { + if (greq->index.row_size != PROBE_INDEX_ROW_SIZE) { + WD_ERR("invalid: probe index row size need be %d!\n", + PROBE_INDEX_ROW_SIZE); + return -WD_EINVAL; + } + break; + } + + if (greq->index.row_size != BUILD_INDEX_ROW_SIZE) { + WD_ERR("invalid: build index row size need be %d!\n", + BUILD_INDEX_ROW_SIZE); + return -WD_EINVAL; + } + if (msg->index_type == WD_BATCH_NUMBER_INDEX) { + if (greq->row_batchs.batch_num > DAE_JOIN_MAX_BATCH_NUM) { + WD_ERR("invalid: gather row batch num is more than %d!\n", + DAE_JOIN_MAX_BATCH_NUM); + return -WD_EINVAL; + } + } else { + row_num = msg->req.output_row_num << DAE_ADDR_INDEX_SHIFT; + if (greq->index.row_num < row_num) { + WD_ERR("build index row number is less than: %llu\n", + row_num); + return -WD_EINVAL; + } + } + break; + default: + break; + } + + return WD_SUCCESS; +} + +static int join_gather_send(struct wd_alg_driver *drv, handle_t ctx, void *send_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct dae_extend_addr *ext_addr = qp->priv; + struct wd_join_gather_msg *msg = send_msg; + struct dae_addr_list *addr_list; + struct dae_ext_sqe *ext_sqe; + struct dae_sqe sqe = {0}; + __u16 send_cnt = 0; + int ret, idx; + + ret = check_join_gather_param(msg); + if (ret) + return ret; + + fill_join_gather_misc_field(msg, &sqe); + + idx = get_free_ext_addr(ext_addr); + if (idx < 0) + return -WD_EBUSY; + addr_list = &ext_addr->addr_list[idx]; + ext_sqe = &ext_addr->ext_sqe[idx]; + + fill_join_gather_info(&sqe, ext_sqe, addr_list, msg); + + hisi_set_msg_id(h_qp, &msg->tag); + sqe.low_tag = msg->tag; + sqe.hi_tag = idx; + + ret = hisi_qm_send(h_qp, &sqe, 1, &send_cnt); + if (ret) { + if (ret != -WD_EBUSY) + WD_ERR("failed to send to hardware, ret = %d!\n", ret); + put_ext_addr(ext_addr, idx); + return ret; + } + + return WD_SUCCESS; +} + +static void fill_join_gather_task_done(struct dae_sqe *sqe, struct wd_join_gather_msg *msg) +{ + if (sqe->task_type == DAE_HASH_JOIN) { + if (sqe->task_type_ext == DAE_JOIN_PROBE) { + msg->consumed_row_num = sqe->data_row_offset; + msg->produced_row_num = sqe->out_raw_num; + msg->output_done = sqe->output_end; + } else if (sqe->task_type_ext == DAE_JOIN_REHASH) { + msg->output_done = sqe->output_end; + } + } +} + +static void fill_join_gather_task_err(struct dae_sqe *sqe, struct wd_join_gather_msg *msg) +{ + switch (sqe->err_type) { + case DAE_TASK_BD_ERROR_MIN ... DAE_TASK_BD_ERROR_MAX: + WD_ERR("failed to do join gather task, bd error=0x%x!\n", sqe->err_type); + msg->result = WD_JOIN_GATHER_PARSE_ERROR; + break; + case DAE_HASH_TABLE_NEED_REHASH: + msg->result = WD_JOIN_GATHER_NEED_REHASH; + break; + case DAE_HASH_TABLE_INVALID: + msg->result = WD_JOIN_GATHER_INVALID_HASH_TABLE; + break; + case DAE_TASK_BUS_ERROR: + WD_ERR("failed to do join gather task, bus error %u!\n", sqe->err_type); + msg->result = WD_JOIN_GATHER_BUS_ERROR; + break; + default: + WD_ERR("failed to do dae task! done_flag=0x%x, etype=0x%x, ext_type = 0x%x!\n", + (__u32)sqe->done_flag, (__u32)sqe->err_type, (__u32)sqe->ext_err_type); + msg->result = WD_JOIN_GATHER_PARSE_ERROR; + break; + } + + if (sqe->task_type == DAE_HASH_JOIN && sqe->task_type_ext == DAE_JOIN_PROBE) { + msg->produced_row_num = sqe->out_raw_num; + msg->consumed_row_num = sqe->data_row_offset; + msg->output_done = sqe->output_end; + } +} + +static int join_gather_recv(struct wd_alg_driver *drv, handle_t hctx, void *recv_msg) +{ + handle_t h_qp = (handle_t)wd_ctx_get_priv(hctx); + struct hisi_qp *qp = (struct hisi_qp *)h_qp; + struct dae_extend_addr *ext_addr = qp->priv; + struct wd_join_gather_msg *msg = recv_msg; + struct wd_join_gather_msg *send_msg; + struct dae_sqe sqe = {0}; + __u16 recv_cnt = 0; + int ret; + + ret = hisi_qm_recv(h_qp, &sqe, 1, &recv_cnt); + if (ret) + return ret; + + ret = hisi_check_bd_id(h_qp, msg->tag, sqe.low_tag); + if (ret) + goto out; + + msg->tag = sqe.low_tag; + if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { + send_msg = wd_join_gather_get_msg(qp->q_info.idx, msg->tag); + if (!send_msg) { + msg->result = WD_JOIN_GATHER_IN_EPARA; + WD_ERR("failed to get send msg! idx = %u, tag = %u.\n", + qp->q_info.idx, msg->tag); + ret = -WD_EINVAL; + goto out; + } + } + + msg->result = WD_JOIN_GATHER_TASK_DONE; + msg->consumed_row_num = 0; + + if (likely(sqe.done_flag == DAE_HW_TASK_DONE)) { + fill_join_gather_task_done(&sqe, msg); + } else if (sqe.done_flag == DAE_HW_TASK_ERR) { + fill_join_gather_task_err(&sqe, msg); + } else { + msg->result = WD_JOIN_GATHER_PARSE_ERROR; + WD_ERR("failed to do join gather task, hardware doesn't process the task!\n"); + } + +out: + put_ext_addr(ext_addr, sqe.hi_tag); + return ret; +} + +static int join_check_params(struct wd_join_gather_col_info *key_info, __u32 cols_num) +{ + __u32 i; + int ret; + + if (cols_num > DAE_MAX_KEY_COLS) { + WD_ERR("invalid: join key cols num %u is more than device support %d!\n", + cols_num, DAE_MAX_KEY_COLS); + return -WD_EINVAL; + } + + for (i = 0; i < cols_num; i++) { + switch (key_info[i].data_type) { + case WD_DAE_SHORT_DECIMAL: + ret = dae_decimal_precision_check(key_info[i].data_info, false); + if (ret) + return ret; + break; + case WD_DAE_LONG_DECIMAL: + ret = dae_decimal_precision_check(key_info[i].data_info, true); + if (ret) + return ret; + break; + case WD_DAE_CHAR: + case WD_DAE_VARCHAR: + WD_ERR("invalid: key col %u, char or varchar isn't supported!\n", i); + return -WD_EINVAL; + default: + break; + } + } + + return WD_SUCCESS; +} + +static int gather_check_params(struct wd_join_gather_sess_setup *setup) +{ + struct wd_gather_table_info *table = setup->gather_tables; + struct wd_join_gather_col_info *col; + __u32 i, j; + int ret; + + if (setup->gather_table_num > DAE_MAX_TABLE_NUM) { + WD_ERR("invalid: gather table num %u is more than device support %d!\n", + setup->gather_table_num, DAE_MAX_TABLE_NUM); + return -WD_EINVAL; + } + + for (i = 0; i < setup->gather_table_num; i++) { + col = table[i].cols; + if (table[i].cols_num > DAE_MAX_KEY_COLS) { + WD_ERR("invalid: gather cols num %u is more than device support %d!\n", + table[i].cols_num, DAE_MAX_KEY_COLS); + return -WD_EINVAL; + } + for (j = 0; j < table[i].cols_num; j++) { + switch (col[j].data_type) { + case WD_DAE_SHORT_DECIMAL: + ret = dae_decimal_precision_check(col[j].data_info, false); + if (ret) + return ret; + break; + case WD_DAE_LONG_DECIMAL: + ret = dae_decimal_precision_check(col[j].data_info, true); + if (ret) + return ret; + break; + case WD_DAE_CHAR: + if (col[j].data_info > DAE_MAX_CHAR_SIZE) { + WD_ERR("gather col %u, char size isn't supported!\n", j); + return -WD_EINVAL; + } + break; + case WD_DAE_VARCHAR: + WD_ERR("invalid: gather col %u, varchar isn't supported!\n", j); + return -WD_EINVAL; + default: + break; + } + } + } + + return WD_SUCCESS; +} + +static int join_gather_param_check(struct wd_join_gather_sess_setup *setup) +{ + int ret; + + switch (setup->alg) { + case WD_JOIN: + return join_check_params(setup->join_table.build_key_cols, + setup->join_table.build_key_cols_num); + case WD_GATHER: + return gather_check_params(setup); + case WD_JOIN_GATHER: + ret = join_check_params(setup->join_table.build_key_cols, + setup->join_table.build_key_cols_num); + if (ret) + return ret; + + return gather_check_params(setup); + default: + return -WD_EINVAL; + } +} + +static int transfer_col_info(struct wd_join_gather_col_info *cols, + struct hw_join_gather_data *data, __u32 col_num) +{ + __u32 i; + + for (i = 0; i < col_num; i++) { + switch (cols[i].data_type) { + case WD_DAE_CHAR: + data[i].hw_type = DAE_CHAR; + data[i].data_info = cols[i].data_info; + break; + case WD_DAE_LONG_DECIMAL: + data[i].hw_type = DAE_DECIMAL128; + break; + case WD_DAE_SHORT_DECIMAL: + data[i].hw_type = DAE_DECIMAL64; + break; + case WD_DAE_LONG: + data[i].hw_type = DAE_SINT64; + break; + case WD_DAE_INT: + case WD_DAE_DATE: + data[i].hw_type = DAE_SINT32; + break; + default: + return -WD_EINVAL; + } + } + + return WD_SUCCESS; +} + +static int transfer_cols_to_hw_type(struct wd_join_gather_col_info *cols, + struct hw_join_gather_data *hw_data, __u32 cols_num) +{ + struct hw_join_gather_data tmp_data[DAE_MAX_KEY_COLS] = {0}; + __u32 type_num = ARRAY_SIZE(hw_data_type_order); + __u32 i, j, k = 0; + int ret; + + ret = transfer_col_info(cols, tmp_data, cols_num); + if (ret) + return ret; + + for (i = 0; i < type_num; i++) { + for (j = 0; j < cols_num; j++) { + if (hw_data_type_order[i] != tmp_data[j].hw_type) + continue; + hw_data[k].usr_col_idx = j; + hw_data[k].hw_type = tmp_data[j].hw_type; + hw_data[k++].data_info = tmp_data[j].data_info; + } + } + + return WD_SUCCESS; +} + +static int transfer_data_to_hw_type(struct join_gather_col_data *cols_data, + struct wd_join_gather_sess_setup *setup) +{ + struct wd_gather_table_info *tables = setup->gather_tables; + struct wd_join_gather_col_info *gather_cols; + struct hw_join_gather_data *hw_data; + __u32 n, j; + int ret; + + for (n = 0; n < setup->gather_table_num; n++) { + gather_cols = tables[n].cols; + hw_data = cols_data->gather_data[n]; + ret = transfer_cols_to_hw_type(gather_cols, hw_data, tables[n].cols_num); + if (ret) + return ret; + + cols_data->gather_cols_num[n] = tables[n].cols_num; + for (j = 0; j < tables[n].cols_num; j++) + if (gather_cols[j].has_empty) + cols_data->has_empty[n] |= (1 << j); + } + + return WD_SUCCESS; +} + +static int transfer_key_to_hw_type(struct join_gather_col_data *cols_data, + struct wd_join_gather_sess_setup *setup) +{ + struct wd_join_gather_col_info *key_cols = setup->join_table.build_key_cols; + struct hw_join_gather_data *hw_key_data = cols_data->key_data; + __u32 cols_num = setup->join_table.build_key_cols_num; + int ret; + + ret = transfer_cols_to_hw_type(key_cols, hw_key_data, cols_num); + if (ret) + return ret; + + cols_data->key_num = cols_num; + + return WD_SUCCESS; +} + +static int join_get_table_rowsize(struct join_gather_col_data *cols_data, + struct wd_join_gather_sess_setup *setup) +{ + struct hw_join_gather_data *key_data = cols_data->key_data; + __u32 key_num = cols_data->key_num; + __u64 row_count_size = 0; + __u32 i; + + cols_data->index_num = setup->join_table.hash_table_index_num; + + if (cols_data->index_num > HASH_TABLE_MAX_INDEX_NUM) { + WD_ERR("invalid: hash table index num is not supported!\n"); + return -WD_EINVAL; + } else if (!cols_data->index_num) { + WD_INFO("Hash table index num is not set, set to default: 1!\n"); + cols_data->index_num = HASH_TABLE_INDEX_NUM; + } + + /* With a restriction on the col number, the sum lengths will not overflow. */ + for (i = 0; i < key_num; i++) + row_count_size += get_data_type_size(key_data[i].hw_type, 0); + + row_count_size = ALIGN(row_count_size, DAE_KEY_ALIGN_SIZE); + row_count_size += HASH_TABLE_HEAD_TAIL_SIZE + + cols_data->index_num * HASH_TABLE_INDEX_SIZE; + if (row_count_size > DAE_MAX_ROW_SIZE) { + WD_ERR("invalid: hash table row size %llu, hash_table_index_num %u!\n", + row_count_size, cols_data->index_num); + return -WD_EINVAL; + } + + if (row_count_size <= ROW_SIZE32) + return ROW_SIZE32; + + if (row_count_size <= ROW_SIZE64) + return ROW_SIZE64; + + if (row_count_size <= ROW_SIZE128) + return ROW_SIZE128; + + if (row_count_size <= ROW_SIZE256) + return ROW_SIZE256; + + return ROW_SIZE512; +} + +static void gather_get_batch_rowsize(struct join_gather_col_data *cols_data, + struct wd_join_gather_sess_setup *setup, + __u32 *batch_row_size) +{ + struct wd_gather_table_info *tables = setup->gather_tables; + struct hw_join_gather_data *gather_data; + __u32 row_count_size = 0; + __u32 n, i; + + cols_data->gather_table_num = setup->gather_table_num; + for (n = 0; n < setup->gather_table_num; n++) { + row_count_size = 0; + gather_data = cols_data->gather_data[n]; + + /* With a restriction on the col number, the sum length will not overflow. */ + for (i = 0; i < tables[n].cols_num; i++) + row_count_size += get_data_type_size(gather_data[i].hw_type, + gather_data[i].data_info); + + batch_row_size[n] = row_count_size + GATHER_ROW_BATCH_EMPTY_SIZE; + } +} + +static int join_gather_fill_ctx(struct join_gather_ctx *ctx, + struct wd_join_gather_sess_setup *setup) +{ + struct join_gather_col_data *cols_data = &ctx->cols_data; + int ret; + + if (setup->alg != WD_GATHER) { + ret = transfer_key_to_hw_type(cols_data, setup); + if (ret) + return ret; + + ret = join_get_table_rowsize(cols_data, setup); + if (ret < 0) + return -WD_EINVAL; + ctx->hash_table_row_size = ret; + } + + if (setup->alg != WD_JOIN) { + ret = transfer_data_to_hw_type(cols_data, setup); + if (ret) + return ret; + + gather_get_batch_rowsize(cols_data, setup, ctx->batch_row_size); + } + + return WD_SUCCESS; +} + +static void join_gather_sess_priv_uninit(struct wd_alg_driver *drv, void *priv) +{ + struct join_gather_ctx *ctx = priv; + + if (!ctx) { + WD_ERR("invalid: dae sess uninit priv is NULL!\n"); + return; + } + + pthread_spin_destroy(&ctx->lock); + free(ctx); +} + +static int join_gather_sess_priv_init(struct wd_alg_driver *drv, + struct wd_join_gather_sess_setup *setup, void **priv) +{ + struct join_gather_ctx *ctx; + int ret; + + if (!drv || !drv->priv) { + WD_ERR("invalid: dae drv is NULL!\n"); + return -WD_EINVAL; + } + + if (!setup || !priv) { + WD_ERR("invalid: dae sess priv is NULL!\n"); + return -WD_EINVAL; + } + + ret = join_gather_param_check(setup); + if (ret) + return -WD_EINVAL; + + ctx = calloc(1, sizeof(struct join_gather_ctx)); + if (!ctx) + return -WD_ENOMEM; + + ret = join_gather_fill_ctx(ctx, setup); + if (ret) + goto free_ctx; + + ret = pthread_spin_init(&ctx->lock, PTHREAD_PROCESS_SHARED); + if (ret) + goto free_ctx; + + *priv = ctx; + + return WD_SUCCESS; + +free_ctx: + free(ctx); + return ret; +} + +static int join_get_table_row_size(struct wd_alg_driver *drv, void *param) +{ + struct join_gather_ctx *ctx = param; + + if (!ctx) + return -WD_EINVAL; + + return ctx->hash_table_row_size; +} + +static int gather_get_batch_row_size(struct wd_alg_driver *drv, void *param, + __u32 *row_size, __u32 size) +{ + struct join_gather_ctx *ctx = param; + + if (!ctx) + return -WD_EINVAL; + + if (!size || size > DAE_MAX_TABLE_NUM * sizeof(__u32)) + return -WD_EINVAL; + + memcpy(row_size, ctx->batch_row_size, size); + + return 0; +} + +static int join_hash_table_init(struct wd_alg_driver *drv, + struct wd_dae_hash_table *table, void *priv) +{ + struct join_gather_ctx *ctx = priv; + + if (!ctx || !table) + return -WD_EINVAL; + + return dae_hash_table_init(&ctx->table_data, &ctx->rehash_table, + table, ctx->hash_table_row_size); +} + +static int join_gather_get_extend_ops(void *ops) +{ + struct wd_join_gather_ops *join_gather_ops = (struct wd_join_gather_ops *)ops; + + if (!join_gather_ops) + return -WD_EINVAL; + + join_gather_ops->get_table_row_size = join_get_table_row_size; + join_gather_ops->get_batch_row_size = gather_get_batch_row_size; + join_gather_ops->hash_table_init = join_hash_table_init; + join_gather_ops->sess_init = join_gather_sess_priv_init; + join_gather_ops->sess_uninit = join_gather_sess_priv_uninit; + + return WD_SUCCESS; +} + + +#define GEN_JOIN_GATHER_DRIVER(dae_alg_name) \ +{\ + .drv_name = "hisi_zip",\ + .alg_name = (dae_alg_name),\ + .calc_type = UADK_ALG_HW,\ + .priority = 100,\ + .queue_num = DAE_CTX_Q_NUM_DEF,\ + .op_type_num = 1,\ + .fallback = 0,\ + .init = dae_init,\ + .exit = dae_exit,\ + .send = join_gather_send,\ + .recv = join_gather_recv,\ + .get_extend_ops = join_gather_get_extend_ops,\ +} + +static struct wd_alg_driver join_gather_driver[] = { + GEN_JOIN_GATHER_DRIVER("hashjoin"), + GEN_JOIN_GATHER_DRIVER("gather"), + GEN_JOIN_GATHER_DRIVER("join-gather"), +}; + +#ifdef WD_STATIC_DRV +void hisi_dae_join_gather_probe(void) +#else +static void __attribute__((constructor)) hisi_dae_join_gather_probe(void) +#endif +{ + __u32 alg_num = ARRAY_SIZE(join_gather_driver); + int ret; + __u32 i; + + WD_INFO("Info: register DAE hashjoin and gather alg drivers!\n"); + for (i = 0; i < alg_num; i++) { + ret = wd_alg_driver_register(&join_gather_driver[i]); + if (ret && ret != -WD_ENODEV) + WD_ERR("Error: register %s failed!\n", + join_gather_driver[i].alg_name); + } +} + +#ifdef WD_STATIC_DRV +void hisi_dae_join_gather_remove(void) +#else +static void __attribute__((destructor)) hisi_dae_join_gather_remove(void) +#endif +{ + __u32 alg_num = ARRAY_SIZE(join_gather_driver); + __u32 i; + + WD_INFO("Info: unregister DAE alg drivers!\n"); + for (i = 0; i < alg_num; i++) + wd_alg_driver_unregister(&join_gather_driver[i]); +} diff --git a/include/drv/wd_join_gather_drv.h b/include/drv/wd_join_gather_drv.h new file mode 100644 index 00000000..80fb9322 --- /dev/null +++ b/include/drv/wd_join_gather_drv.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __WD_JOIN_GATHER_DRV_H +#define __WD_JOIN_GATHER_DRV_H + +#include <asm/types.h> +#include "wd_join_gather.h" +#include "wd_util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct wd_join_gather_msg { + __u32 tag; + __u32 key_cols_num; + __u32 result; + __u32 input_row_num; + __u32 output_row_num; + __u32 consumed_row_num; + __u32 produced_row_num; + enum wd_join_gather_op_type op_type; + enum multi_batch_index_type index_type; + bool output_done; + bool key_out_en; + bool multi_batch_en; + struct wd_join_gather_req req; + struct wd_dae_hash_table hash_table; + void *priv; +}; + +struct wd_join_gather_ops { + int (*get_table_row_size)(struct wd_alg_driver *drv, void *priv); + int (*get_batch_row_size)(struct wd_alg_driver *drv, void *priv, + __u32 *batch_row_size, __u32 size); + int (*sess_init)(struct wd_alg_driver *drv, + struct wd_join_gather_sess_setup *setup, void **priv); + void (*sess_uninit)(struct wd_alg_driver *drv, void *priv); + int (*hash_table_init)(struct wd_alg_driver *drv, + struct wd_dae_hash_table *hash_table, void *priv); +}; + +struct wd_join_gather_msg *wd_join_gather_get_msg(__u32 idx, __u32 tag); + +#ifdef __cplusplus +} +#endif + +#endif /* __WD_JOIN_GATHER_DRV_H */ diff --git a/include/wd_alg.h b/include/wd_alg.h index 441b3bef..2fc350af 100644 --- a/include/wd_alg.h +++ b/include/wd_alg.h @@ -205,12 +205,14 @@ void hisi_hpre_probe(void); void hisi_zip_probe(void); void hisi_dae_probe(void); void hisi_udma_probe(void); +void hisi_dae_join_gather_probe(void); void hisi_sec2_remove(void); void hisi_hpre_remove(void); void hisi_zip_remove(void); void hisi_dae_remove(void); void hisi_udma_remove(void); +void hisi_dae_join_gather_remove(void); #endif diff --git a/include/wd_dae.h b/include/wd_dae.h index aa9f966c..64f17dc4 100644 --- a/include/wd_dae.h +++ b/include/wd_dae.h @@ -57,6 +57,18 @@ struct wd_dae_col_addr { __u64 offset_size; }; +/** + * wd_dae_row_addr - information of row memory. + * @addr: The start address of row memory. + * @row_size: Memory size occupied by a row. + * @row_num: Total number of rows. + */ +struct wd_dae_row_addr { + void *addr; + __u32 row_size; + __u32 row_num; +}; + /** * wd_dae_hash_table - Hash table information of DAE. * @std_table: Address of standard hash table. diff --git a/include/wd_join_gather.h b/include/wd_join_gather.h new file mode 100644 index 00000000..4962ee35 --- /dev/null +++ b/include/wd_join_gather.h @@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __WD_JOIN_GATHER_H +#define __WD_JOIN_GATHER_H + +#include <dlfcn.h> +#include <asm/types.h> +#include "wd_dae.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum wd_join_gather_alg { + WD_JOIN, + WD_GATHER, + WD_JOIN_GATHER, + WD_JOIN_GATHER_ALG_MAX, +}; + +/** + * wd_join_gather_op_type - operation type for hash join and gather. + */ +enum wd_join_gather_op_type { + WD_JOIN_BUILD_HASH, + WD_JOIN_PROBE, + WD_JOIN_REHASH, + WD_GATHER_CONVERT, + WD_GATHER_COMPLETE, + WD_JOIN_GATHER_OP_TYPE_MAX, +}; + +/** + * wd_join_gather_task_error_type - hash join and gather task error type. + */ +enum wd_join_gather_task_error_type { + WD_JOIN_GATHER_TASK_DONE, + WD_JOIN_GATHER_IN_EPARA, + WD_JOIN_GATHER_NEED_REHASH, + WD_JOIN_GATHER_INVALID_HASH_TABLE, + WD_JOIN_GATHER_PARSE_ERROR, + WD_JOIN_GATHER_BUS_ERROR, +}; + +enum multi_batch_index_type { + WD_BATCH_NUMBER_INDEX, + WD_BATCH_ADDR_INDEX, + WD_BATCH_INDEX_TYPE_MAX, +}; + +/** + * wd_join_gather_col_info - column information. + * @data_type: column data type. + * @data_info: For CHAR, it is size of data, at least 1B. + * For DECIMAL, it is precision of data, high 8 bit: decimal part precision, + * low 8 bit: the whole data precision. + * @has_empty: indicates whether the column contains empty data. + */ +struct wd_join_gather_col_info { + enum wd_dae_data_type data_type; + __u16 data_info; + bool has_empty; +}; + +/** + * wd_gather_table_info - gather table information. + * @cols: Information of gather table columns. + * @cols_num: Number of gather table columns. + * @is_multi_batch: indicates single or multi batch task. + */ +struct wd_gather_table_info { + struct wd_join_gather_col_info *cols; + __u32 cols_num; + bool is_multi_batch; +}; + +/** + * wd_join_table_info - join table information. + * @build_key_cols: Information of build table key columns. + * @probe_key_cols: Information of probe table key columns. + * @build_key_cols_num: Number of build table key columns. + * @probe_key_cols_num: Number of probe table key columns. + * @key_output_enable: Indicates whether output key columns. + * @hash_table_index_num: Number of original rows can be stored + * in each row of a hash table. + */ +struct wd_join_table_info { + struct wd_join_gather_col_info *build_key_cols; + struct wd_join_gather_col_info *probe_key_cols; + __u32 build_key_cols_num; + __u32 probe_key_cols_num; + bool key_output_enable; + __u32 hash_table_index_num; +}; + +/** + * wd_join_gather_sess_setup - Hash join and gather session setup information. + * @join_table: Information of join table. + * @gather_tables: Information of gather table. + * @gather_table_num: Number of gather table. + * @alg: Alg for this session. + * @index_type: Indicates the index type, 0 for batch number and row number, + * 1 for batch address and row number. + * @charset_info: Charset information + * @sched_param: Parameters of the scheduling policy, + * usually allocated according to struct sched_params. + */ +struct wd_join_gather_sess_setup { + struct wd_join_table_info join_table; + struct wd_gather_table_info *gather_tables; + __u32 gather_table_num; + + enum wd_join_gather_alg alg; + enum multi_batch_index_type index_type; + struct wd_dae_charset charset_info; + void *sched_param; +}; + +struct wd_join_gather_req; +typedef void *wd_join_gather_cb_t(struct wd_join_gather_req *req, void *cb_param); + +/** + * wd_probe_out_info - Hash join probe output info. + * @build_index: address information of multi batch index. + * @probe_index: address information of single batch index. + * @breakpoint: address information of probe breakpoint. + * @key_cols: address information of output key columns. + * @key_cols_num: number of output key columns. + */ +struct wd_probe_out_info { + struct wd_dae_row_addr build_index; + struct wd_dae_row_addr probe_index; + struct wd_dae_row_addr breakpoint; + struct wd_dae_col_addr *key_cols; + __u32 key_cols_num; +}; + +/** + * wd_join_req - Hash join request. + * @build_batch_addr: Row-storaged batch address, the batch is used to store build + * table data cols in row format. This field is only used for batch addr index. + * + * @probe_output: The information for hash join probe stage. + * @key_cols: key columns from build table or probe table. + * @key_cols_num: key columns number. + * @batch_row_offset: Indicates the start row number of the input column. + * @build_batch_index: build table batch index, start from 0. + */ +struct wd_join_req { + struct wd_dae_row_addr build_batch_addr; + struct wd_probe_out_info probe_output; + struct wd_dae_col_addr *key_cols; + __u32 key_cols_num; + __u32 batch_row_offset; + __u32 build_batch_index; +}; + +/** + * wd_row_batch_info - Information of some row-storaged batchs. + * @batch_addr: Addr list of row batchs. + * @batch_row_size: Row size of each row batch. + * @batch_row_num: Row number of each row batch. + * @batch_num: Total number of row batchs. + */ +struct wd_row_batch_info { + void **batch_addr; + __u32 *batch_row_size; + __u32 *batch_row_num; + __u32 batch_num; +}; + +/** + * wd_gather_req - Hash join and gather operation request. + * @index: address information of multi batch index or single batch index. + * @row_batchs: address information of row batchs. + * @data_cols: data columns from gather table. + * @data_cols_num: columns number from gather table. + * @table_index: The table index from the session's gather_tables to do tasks. + */ +struct wd_gather_req { + struct wd_dae_row_addr index; + struct wd_row_batch_info row_batchs; + struct wd_dae_col_addr *data_cols; + __u32 data_cols_num; + __u32 table_index; +}; + +/** + * wd_join_gather_req - Hash join and gather operation request. + * @op_type: The operation type for hash join or gather task. + * @join_req: The request for hash join. + * @gather_req: The request for gather. + * @input_row_num: Row count of input column. + * @output_row_num: Expected row count of output column. + * @consumed_row_num: Row count of input data that has been processed. + * @produced_row_num: Real row count of output column. + * @cb: Callback function for the asynchronous mode. + * @cb_param: Parameters of the callback function. + * @state: Error information written back by the hardware. + * @output_done: For rehash, it indicates whether all data in hash table has been output. + * For probe task, it indicates whether all data of one probe batch has been processed. + * @priv: Private data from user(reserved). + */ +struct wd_join_gather_req { + /* user fill-in fields */ + enum wd_join_gather_op_type op_type; + struct wd_join_req join_req; + struct wd_gather_req gather_req; + __u32 input_row_num; + __u32 output_row_num; + wd_join_gather_cb_t *cb; + void *cb_param; + void *priv; + + /* uadk driver writeback fields */ + enum wd_join_gather_task_error_type state; + __u32 consumed_row_num; + __u32 produced_row_num; + bool output_done; +}; + +/** + * wd_join_gather_init() - A simplify interface to initializate uadk. + * Users just need to descripe the deployment of business scenarios. + * Then the initialization will request appropriate + * resources to support the business scenarios. + * To make the initializate simpler, ctx_params support set NULL. + * And then the function will set them as driver's default. + * + * @alg: Supported algorithms: hashjoin, gather, join-gather. + * @sched_type: The scheduling type users want to use. + * @task_type: Task types, including soft computing, hardware and hybrid computing. + * @ctx_params: The ctxs resources users want to use. Include per operation + * type ctx numbers and business process run numa. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_gather_init(char *alg, __u32 sched_type, int task_type, + struct wd_ctx_params *ctx_params); + +/** + * wd_join_gather_uninit() - Uninitialise ctx configuration and scheduler. + */ +void wd_join_gather_uninit(void); + +/** + * wd_join_gather_alloc_sess() - Allocate a hash join or gather session + * @setup: Parameters to setup this session. + * + * Return 0 if fail and others if succeed. + */ +handle_t wd_join_gather_alloc_sess(struct wd_join_gather_sess_setup *setup); + +/** + * wd_join_gather_free_sess() - Free a hash join or gather session + * @sess: The session need to be freed. + */ +void wd_join_gather_free_sess(handle_t h_sess); + +/** + * wd_join_set_hash_table() - Set hash table to the wd session + * @sess, Session to be initialized. + * @info, Hash table information to set. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_set_hash_table(handle_t h_sess, struct wd_dae_hash_table *info); + +/** + * wd_join_build_hash_sync()/wd_join_build_hash_async() - Build the hash table. + * @sess: Wd session + * @req: Operational data. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_build_hash_sync(handle_t h_sess, struct wd_join_gather_req *req); +int wd_join_build_hash_async(handle_t h_sess, struct wd_join_gather_req *req); + +/** + * wd_join_probe_sync()/wd_join_probe_async() - Probe and output the index or key. + * @sess: Wd session + * @req: Operational data. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_probe_sync(handle_t h_sess, struct wd_join_gather_req *req); +int wd_join_probe_async(handle_t h_sess, struct wd_join_gather_req *req); + +/** + * wd_gather_convert_sync()/wd_gather_convert_async() - Convert a column batch to a row batch. + * @sess: Wd session + * @req: Operational data. + * + * Return 0 if succeed and others if fail. + */ +int wd_gather_convert_sync(handle_t h_sess, struct wd_join_gather_req *req); +int wd_gather_convert_async(handle_t h_sess, struct wd_join_gather_req *req); + +/** + * wd_gather_complete_sync()/wd_gather_complete_async() - map the index with a row batch + * and output the result to a column batch. + * @sess: Wd session + * @req: Operational data. + * + * Return 0 if succeed and others if fail. + */ +int wd_gather_complete_sync(handle_t h_sess, struct wd_join_gather_req *req); +int wd_gather_complete_async(handle_t h_sess, struct wd_join_gather_req *req); + +/** + * wd_join_rehash_sync - Rehash operation, only the synchronous mode is supported. + * @sess: Wd hash join session + * @req: Operational data. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_rehash_sync(handle_t h_sess, struct wd_join_gather_req *req); + +/** + * wd_join_gather_poll() - Poll finished request. + * This function will call poll_policy function which is registered to wd + * by user. + * + * Return 0 if succeed and others if fail. + */ +int wd_join_gather_poll(__u32 expt, __u32 *count); + +/** + * wd_join_get_table_rowsize - Get the hash table's row size. + * @h_sess: Wd session handler. + * + * Return negative value if fail and others if succeed. + */ +int wd_join_get_table_rowsize(handle_t h_sess); + +/** + * wd_gather_get_batch_rowsize - Get the batch row size. + * @h_sess: Wd session handler. + * @table_index: The table index from the session's gather_tables. + * + * Return negative value if fail and others if succeed. + */ +int wd_gather_get_batch_rowsize(handle_t h_sess, __u8 table_index); + +#ifdef __cplusplus +} +#endif + +#endif /* __WD_JOIN_GATHER_H */ diff --git a/include/wd_util.h b/include/wd_util.h index bbb18a7c..4a5204de 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -43,6 +43,7 @@ enum wd_type { WD_ECC_TYPE, WD_AGG_TYPE, WD_UDMA_TYPE, + WD_JOIN_GATHER_TYPE, WD_TYPE_MAX, }; diff --git a/libwd_dae.map b/libwd_dae.map index 6597ff98..f3b06337 100644 --- a/libwd_dae.map +++ b/libwd_dae.map @@ -1,5 +1,24 @@ UADK_DAE_2.0 { global: + wd_join_gather_alloc_sess; + wd_join_gather_free_sess; + wd_join_get_table_rowsize; + wd_gather_get_batch_rowsize; + wd_join_set_hash_table; + wd_join_gather_init; + wd_join_gather_uninit; + wd_join_build_hash_sync; + wd_join_build_hash_async; + wd_join_probe_sync; + wd_join_probe_async; + wd_join_rehash_sync; + wd_join_gather_get_msg; + wd_join_gather_poll; + wd_gather_convert_sync; + wd_gather_complete_sync; + wd_gather_convert_async; + wd_gather_complete_async; + wd_agg_alloc_sess; wd_agg_free_sess; wd_agg_get_table_rowsize; diff --git a/wd_join_gather.c b/wd_join_gather.c new file mode 100644 index 00000000..616d039a --- /dev/null +++ b/wd_join_gather.c @@ -0,0 +1,1823 @@ +/* SPDX-License-Identifier: Apache-2.0 */ +/* + * Copyright 2025 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#include <stdlib.h> +#include <pthread.h> +#include <sched.h> +#include <limits.h> +#include "include/drv/wd_join_gather_drv.h" +#include "wd_join_gather.h" + +#define DECIMAL_PRECISION_OFFSET 8 +#define DAE_INT_SIZE 4 +#define DAE_LONG_SIZE 8 +#define DAE_LONG_DECIMAL_SIZE 16 + +/* Sum of the max row number of standard and external hash table */ +#define MAX_HASH_TABLE_ROW_NUM 0x1FFFFFFFE + +enum wd_join_sess_state { + WD_JOIN_SESS_UNINIT, /* Uninit session */ + WD_JOIN_SESS_INIT, /* Hash table has been set */ + WD_JOIN_SESS_BUILD_HASH, /* Input stage has started */ + WD_JOIN_SESS_PREPARE_REHASH, /* New hash table has been set */ + WD_JOIN_SESS_REHASH, /* Rehash stage has started */ + WD_JOIN_SESS_PROBE, /* Output stage has started */ +}; + +struct wd_join_gather_setting { + enum wd_status status; + struct wd_ctx_config_internal config; + struct wd_sched sched; + struct wd_async_msg_pool pool; + struct wd_alg_driver *driver; + void *priv; + void *dlhandle; + void *dlh_list; +}; + +struct wd_join_cols_conf { + struct wd_join_gather_col_info *cols; + __u64 *data_size; + __u32 cols_num; + bool key_output_enable; +}; + +struct wd_gather_tables_conf { + struct wd_gather_table_info *tables; + __u32 *batch_row_size; + __u64 **data_size; + __u32 table_num; +}; + +struct wd_join_gather_sess { + enum multi_batch_index_type index_type; + enum wd_join_sess_state state; + enum wd_join_gather_alg alg; + struct wd_join_gather_ops ops; + struct wd_join_cols_conf join_conf; + struct wd_gather_tables_conf gather_conf; + struct wd_dae_hash_table hash_table; + wd_dev_mask_t *dev_mask; + void *sched_key; + void *priv; +}; + +static const char *wd_join_gather_alg[WD_JOIN_GATHER_ALG_MAX] = { + "hashjoin", "gather", "join-gather" +}; + +static struct wd_init_attrs wd_join_gather_init_attrs; +static struct wd_join_gather_setting wd_join_gather_setting; +static int wd_join_gather_poll_ctx(__u32 idx, __u32 expt, __u32 *count); + +static void wd_join_gather_close_driver(void) +{ +#ifndef WD_STATIC_DRV + wd_dlclose_drv(wd_join_gather_setting.dlh_list); +#else + wd_release_drv(wd_join_gather_setting.driver); + hisi_dae_join_gather_remove(); +#endif +} + +static int wd_join_gather_open_driver(void) +{ +#ifndef WD_STATIC_DRV + /* + * Driver lib file path could set by env param. + * then open tham by wd_dlopen_drv() + * use NULL means dynamic query path + */ + wd_join_gather_setting.dlh_list = wd_dlopen_drv(NULL); + if (!wd_join_gather_setting.dlh_list) { + WD_ERR("fail to open driver lib files.\n"); + return -WD_EINVAL; + } +#else + hisi_dae_join_gather_probe(); +#endif + return WD_SUCCESS; +} + +static bool wd_join_gather_check_inner(void) +{ + struct uacce_dev_list *list; + + list = wd_get_accel_list("hashjoin"); + if (!list) + goto out; + wd_free_list_accels(list); + + list = wd_get_accel_list("gather"); + if (!list) + goto out; + wd_free_list_accels(list); + + return true; +out: + WD_ERR("invalid: the device cannot support hashjoin and gather!\n"); + return false; +} + +static bool wd_join_gather_alg_check(const char *alg_name) +{ + __u32 i; + + /* Check for the virtual algorithms */ + if (!strcmp(alg_name, "join-gather")) + return wd_join_gather_check_inner(); + + for (i = 0; i < WD_JOIN_GATHER_ALG_MAX; i++) { + /* Some algorithms do not support all modes */ + if (!wd_join_gather_alg[i] || !strlen(wd_join_gather_alg[i])) + continue; + if (!strcmp(alg_name, wd_join_gather_alg[i])) + return true; + } + + return false; +} + +static int check_col_data_info(enum wd_dae_data_type type, __u16 col_data_info) +{ + __u8 all_precision, decimal_precision; + + switch (type) { + case WD_DAE_DATE: + case WD_DAE_INT: + case WD_DAE_LONG: + case WD_DAE_VARCHAR: + break; + case WD_DAE_SHORT_DECIMAL: + case WD_DAE_LONG_DECIMAL: + /* High 8 bit: decimal part precision, low 8 bit: the whole data precision */ + all_precision = col_data_info; + decimal_precision = col_data_info >> DECIMAL_PRECISION_OFFSET; + if (!all_precision || decimal_precision > all_precision) { + WD_ERR("failed to check data precision, all: %u, decimal: %u!\n", + all_precision, decimal_precision); + return -WD_EINVAL; + } + break; + case WD_DAE_CHAR: + if (!col_data_info) { + WD_ERR("invalid: char length is zero!\n"); + return -WD_EINVAL; + } + break; + default: + WD_ERR("invalid: data type %u is not supported!\n", type); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int get_data_type_size(enum wd_dae_data_type type, __u16 col_data_info, + __u64 *col, __u32 idx) +{ + switch (type) { + case WD_DAE_DATE: + case WD_DAE_INT: + col[idx] = DAE_INT_SIZE; + break; + case WD_DAE_LONG: + case WD_DAE_SHORT_DECIMAL: + col[idx] = DAE_LONG_SIZE; + break; + case WD_DAE_LONG_DECIMAL: + col[idx] = DAE_LONG_DECIMAL_SIZE; + break; + case WD_DAE_CHAR: + col[idx] = col_data_info; + break; + case WD_DAE_VARCHAR: + col[idx] = 0; + break; + default: + return -WD_EINVAL; + } + return WD_SUCCESS; +} + +static int check_key_cols_info(struct wd_join_gather_sess_setup *setup) +{ + struct wd_join_table_info *table = &setup->join_table; + struct wd_join_gather_col_info *build = table->build_key_cols; + __u32 i; + int ret; + + if (table->build_key_cols_num != table->probe_key_cols_num) { + WD_ERR("invalid: build key_cols_num: %u, probe key_cols_num: %u!\n", + table->build_key_cols_num, table->probe_key_cols_num); + return -WD_EINVAL; + } + + ret = memcmp(table->build_key_cols, table->probe_key_cols, + table->build_key_cols_num * sizeof(struct wd_join_gather_col_info)); + if (ret) { + WD_ERR("invalid: build and probe table key infomation is not same!\n"); + return -WD_EINVAL; + } + + for (i = 0; i < table->build_key_cols_num; i++) { + if (!build[i].has_empty) { + WD_ERR("invalid: key col has no empty data! col: %u\n", i); + return -WD_EINVAL; + } + ret = check_col_data_info(build[i].data_type, build[i].data_info); + if (ret) { + WD_ERR("failed to check key col data info! col: %u\n", i); + return ret; + } + } + + return WD_SUCCESS; +} + +static int wd_join_check_params(struct wd_join_gather_sess_setup *setup) +{ + struct wd_join_table_info *table = &setup->join_table; + + if (!table->build_key_cols_num || !table->build_key_cols) { + WD_ERR("invalid: build key cols is NULL or key_cols_num is 0!\n"); + return -WD_EINVAL; + } + + if (!table->probe_key_cols_num || !table->probe_key_cols) { + WD_ERR("invalid: probe key cols is NULL or key_cols_num is 0!\n"); + return -WD_EINVAL; + } + + if (setup->index_type >= WD_BATCH_INDEX_TYPE_MAX) { + WD_ERR("failed to check batch index type!\n"); + return -WD_EINVAL; + } + + if (check_key_cols_info(setup)) { + WD_ERR("failed to check join setup key cols info!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_gather_check_params(struct wd_join_gather_sess_setup *setup) +{ + struct wd_gather_table_info *table = setup->gather_tables; + struct wd_join_gather_col_info *col; + __u32 i, j; + int ret; + + if (!setup->gather_tables || !setup->gather_table_num) { + WD_ERR("invalid: gather table is NULL, table num: %u\n", setup->gather_table_num); + return -WD_EINVAL; + } + + if (setup->index_type >= WD_BATCH_INDEX_TYPE_MAX) { + WD_ERR("failed to check gather batch index type!\n"); + return -WD_EINVAL; + } + + for (i = 0; i < setup->gather_table_num; i++) { + if (!table[i].cols || !table[i].cols_num) { + WD_ERR("failed to check gather table cols, num: %u\n", table[i].cols_num); + return -WD_EINVAL; + } + col = table[i].cols; + for (j = 0; j < table[i].cols_num; j++) { + ret = check_col_data_info(col[j].data_type, col[j].data_info); + if (ret) { + WD_ERR("failed to check gather info! col: %u, table: %u\n", j, i); + return ret; + } + } + } + + return WD_SUCCESS; +} + +static int wd_join_gather_check_params(struct wd_join_gather_sess_setup *setup) +{ + int ret; + + if (!setup) { + WD_ERR("invalid: hashjoin or gather sess setup is NULL!\n"); + return -WD_EINVAL; + } + + switch (setup->alg) { + case WD_JOIN: + return wd_join_check_params(setup); + case WD_GATHER: + return wd_gather_check_params(setup); + case WD_JOIN_GATHER: + ret = wd_join_check_params(setup); + if (ret) + return ret; + + return wd_gather_check_params(setup); + default: + WD_ERR("invalid: hashjoin sess setup alg is wrong!\n"); + return -WD_EINVAL; + } +} + +static void sess_data_size_uninit(struct wd_join_gather_sess *sess) +{ + __u32 i; + + if (sess->join_conf.cols) + free(sess->join_conf.cols); + + if (sess->gather_conf.tables) { + for (i = 0; i < sess->gather_conf.table_num; i++) + free(sess->gather_conf.data_size[i]); + + free(sess->gather_conf.tables); + } +} + +static int sess_data_size_init(struct wd_join_gather_sess *sess, + struct wd_join_gather_sess_setup *setup) +{ + struct wd_gather_table_info *gtable = setup->gather_tables; + struct wd_join_table_info *jtable = &setup->join_table; + struct wd_join_gather_col_info *key = jtable->build_key_cols; + __u64 key_size, key_data_size, gather_size, gather_data_size; + __u32 i, j; + + __atomic_store_n(&sess->state, WD_JOIN_SESS_UNINIT, __ATOMIC_RELEASE); + + if (setup->alg != WD_GATHER) { + key_size = jtable->build_key_cols_num * sizeof(struct wd_join_gather_col_info); + key_data_size = jtable->build_key_cols_num * sizeof(__u64); + sess->join_conf.cols = malloc(key_size + key_data_size); + if (!sess->join_conf.cols) + return -WD_ENOMEM; + memcpy(sess->join_conf.cols, key, key_size); + + sess->join_conf.data_size = (void *)sess->join_conf.cols + key_size; + for (i = 0; i < jtable->build_key_cols_num; i++) + (void)get_data_type_size(key[i].data_type, key[i].data_info, + sess->join_conf.data_size, i); + sess->join_conf.cols_num = jtable->build_key_cols_num; + + if (setup->alg == WD_JOIN) + return WD_SUCCESS; + } + + gather_size = setup->gather_table_num * sizeof(struct wd_gather_table_info); + gather_data_size = setup->gather_table_num * sizeof(__u64 *); + sess->gather_conf.tables = malloc(gather_size + gather_data_size); + if (!sess->gather_conf.tables) + goto free_join; + memcpy(sess->gather_conf.tables, gtable, gather_size); + + sess->gather_conf.data_size = (void *)sess->gather_conf.tables + gather_size; + for (i = 0; i < setup->gather_table_num; i++) { + sess->gather_conf.data_size[i] = malloc(gtable[i].cols_num * sizeof(__u64)); + if (!sess->gather_conf.data_size[i]) + goto free_gather; + } + + for (i = 0; i < setup->gather_table_num; i++) + for (j = 0; j < gtable[i].cols_num; j++) + (void)get_data_type_size(gtable[i].cols[j].data_type, + gtable[i].cols[j].data_info, + sess->gather_conf.data_size[i], j); + sess->gather_conf.table_num = setup->gather_table_num; + + return WD_SUCCESS; + +free_gather: + for (j = 0; j < i; j++) + free(sess->gather_conf.data_size[j]); + free(sess->gather_conf.tables); +free_join: + if (setup->alg != WD_GATHER) + free(sess->join_conf.cols); + return -WD_ENOMEM; +} + +static void wd_join_gather_uninit_sess(struct wd_join_gather_sess *sess) +{ + if (sess->gather_conf.batch_row_size) + free(sess->gather_conf.batch_row_size); + + if (sess->ops.sess_uninit) + sess->ops.sess_uninit(wd_join_gather_setting.driver, sess->priv); +} + +static int wd_join_gather_init_sess(struct wd_join_gather_sess *sess, + struct wd_join_gather_sess_setup *setup) +{ + struct wd_alg_driver *drv = wd_join_gather_setting.driver; + __u32 array_size; + int ret; + + if (sess->ops.sess_init) { + if (!sess->ops.sess_uninit) { + WD_ERR("failed to get session uninit ops!\n"); + return -WD_EINVAL; + } + ret = sess->ops.sess_init(drv, setup, &sess->priv); + if (ret) { + WD_ERR("failed to init session priv!\n"); + return ret; + } + } + + if (sess->ops.get_table_row_size && setup->alg != WD_GATHER) { + ret = sess->ops.get_table_row_size(drv, sess->priv); + if (ret <= 0) { + WD_ERR("failed to get hash table row size: %d!\n", ret); + goto uninit; + } + sess->hash_table.table_row_size = ret; + } + + if (sess->ops.get_batch_row_size && setup->alg != WD_JOIN) { + array_size = setup->gather_table_num * sizeof(__u32); + sess->gather_conf.batch_row_size = malloc(array_size); + if (!sess->gather_conf.batch_row_size) + goto uninit; + + ret = sess->ops.get_batch_row_size(drv, sess->priv, + sess->gather_conf.batch_row_size, + array_size); + if (ret) { + WD_ERR("failed to get batch table row size!\n"); + goto free_batch; + } + } + + return WD_SUCCESS; + +free_batch: + free(sess->gather_conf.batch_row_size); +uninit: + if (sess->ops.sess_uninit) + sess->ops.sess_uninit(drv, sess->priv); + return -WD_EINVAL; +} + +handle_t wd_join_gather_alloc_sess(struct wd_join_gather_sess_setup *setup) +{ + struct wd_join_gather_sess *sess; + int ret; + + ret = wd_join_gather_check_params(setup); + if (ret) + return (handle_t)0; + + sess = malloc(sizeof(struct wd_join_gather_sess)); + if (!sess) { + WD_ERR("failed to alloc join gather session memory!\n"); + return (handle_t)0; + } + memset(sess, 0, sizeof(struct wd_join_gather_sess)); + + sess->alg = setup->alg; + sess->index_type = setup->index_type; + sess->join_conf.key_output_enable = setup->join_table.key_output_enable; + + ret = wd_drv_alg_support(wd_join_gather_alg[sess->alg], wd_join_gather_setting.driver); + if (!ret) { + WD_ERR("failed to check driver alg: %s!\n", wd_join_gather_alg[sess->alg]); + goto free_sess; + } + + /* Some simple scheduler don't need scheduling parameters */ + sess->sched_key = (void *)wd_join_gather_setting.sched.sched_init( + wd_join_gather_setting.sched.h_sched_ctx, setup->sched_param); + if (WD_IS_ERR(sess->sched_key)) { + WD_ERR("failed to init join_gather session schedule key!\n"); + goto free_sess; + } + + if (wd_join_gather_setting.driver->get_extend_ops) { + ret = wd_join_gather_setting.driver->get_extend_ops(&sess->ops); + if (ret) { + WD_ERR("failed to get join gather extend ops!\n"); + goto free_key; + } + } + + ret = wd_join_gather_init_sess(sess, setup); + if (ret) + goto free_key; + + ret = sess_data_size_init(sess, setup); + if (ret) { + WD_ERR("failed to init join gather session data size!\n"); + goto uninit_sess; + } + + return (handle_t)sess; + +uninit_sess: + wd_join_gather_uninit_sess(sess); +free_key: + free(sess->sched_key); +free_sess: + free(sess); + return (handle_t)0; +} + +void wd_join_gather_free_sess(handle_t h_sess) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + + if (!sess) { + WD_ERR("invalid: join gather input sess is NULL!\n"); + return; + } + + sess_data_size_uninit(sess); + + wd_join_gather_uninit_sess(sess); + + if (sess->sched_key) + free(sess->sched_key); + + free(sess); +} + +int wd_gather_get_batch_rowsize(handle_t h_sess, __u8 table_index) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + + if (!sess || !sess->gather_conf.batch_row_size) { + WD_ERR("invalid: gather sess or batch_row_size is NULL!\n"); + return -WD_EINVAL; + } + + if (table_index >= sess->gather_conf.table_num) { + WD_ERR("invalid: gather table index(%u) is larger than %u!\n", + table_index, sess->gather_conf.table_num); + return -WD_EINVAL; + } + + return sess->gather_conf.batch_row_size[table_index]; +} + +int wd_join_get_table_rowsize(handle_t h_sess) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + + if (!sess) { + WD_ERR("invalid: hashjoin input sess is NULL!\n"); + return -WD_EINVAL; + } + + if (sess->alg != WD_JOIN && sess->alg != WD_JOIN_GATHER) { + WD_ERR("invalid: the session is not used for hashjoin!\n"); + return -WD_EINVAL; + } + + if (!sess->hash_table.table_row_size) { + WD_ERR("invalid: hashjoin sess hash table row size is 0!\n"); + return -WD_EINVAL; + } + + return sess->hash_table.table_row_size; +} + +static int wd_join_init_sess_state(struct wd_join_gather_sess *sess, + enum wd_join_sess_state *expected) +{ + enum wd_join_sess_state next; + int ret; + + if (sess->hash_table.std_table) { + *expected = WD_JOIN_SESS_BUILD_HASH; + next = WD_JOIN_SESS_PREPARE_REHASH; + } else { + *expected = WD_JOIN_SESS_UNINIT; + next = WD_JOIN_SESS_INIT; + } + + ret = __atomic_compare_exchange_n(&sess->state, expected, next, + false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + if (!ret) { + WD_ERR("invalid: join sess state is %u!\n", *expected); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +int wd_join_set_hash_table(handle_t h_sess, struct wd_dae_hash_table *info) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected; + int ret; + + if (!sess || !info) { + WD_ERR("invalid: hashjoin sess or hash table is NULL!\n"); + return -WD_EINVAL; + } + + if (sess->alg != WD_JOIN && sess->alg != WD_JOIN_GATHER) { + WD_ERR("invalid: the session is not used for hashjoin!\n"); + return -WD_EINVAL; + } + + ret = wd_join_init_sess_state(sess, &expected); + if (ret) + return ret; + + if (info->table_row_size != sess->hash_table.table_row_size) { + WD_ERR("invalid: hash table row size is not equal, expt: %u, real: %u!\n", + sess->hash_table.table_row_size, info->table_row_size); + ret = -WD_EINVAL; + goto out; + } + + if (!info->std_table) { + WD_ERR("invalid: standard hash table is NULL!\n"); + ret = -WD_EINVAL; + goto out; + } + + if (info->std_table_row_num < sess->hash_table.std_table_row_num) { + WD_ERR("invalid: standard hash table is too small, expt: %u, real: %u!\n", + sess->hash_table.std_table_row_num, info->std_table_row_num); + ret = -WD_EINVAL; + goto out; + } + + if (!info->ext_table_row_num || !info->ext_table) + WD_INFO("info: extern hash table is NULL!\n"); + + if (sess->ops.hash_table_init) { + ret = sess->ops.hash_table_init(wd_join_gather_setting.driver, + info, sess->priv); + if (ret) + goto out; + } + + memcpy(&sess->hash_table, info, sizeof(struct wd_dae_hash_table)); + + return WD_SUCCESS; + +out: + __atomic_store_n(&sess->state, expected, __ATOMIC_RELEASE); + return ret; +} + +static void wd_join_gather_clear_status(void) +{ + wd_alg_clear_init(&wd_join_gather_setting.status); +} + +static int wd_join_gather_alg_init(struct wd_ctx_config *config, struct wd_sched *sched) +{ + int ret; + + ret = wd_set_epoll_en("WD_JOIN_GATHER_EPOLL_EN", &wd_join_gather_setting.config.epoll_en); + if (ret < 0) + return ret; + + ret = wd_init_ctx_config(&wd_join_gather_setting.config, config); + if (ret < 0) + return ret; + + ret = wd_init_sched(&wd_join_gather_setting.sched, sched); + if (ret < 0) + goto out_clear_ctx_config; + + /* Allocate async pool for every ctx */ + ret = wd_init_async_request_pool(&wd_join_gather_setting.pool, config, WD_POOL_MAX_ENTRIES, + sizeof(struct wd_join_gather_msg)); + if (ret < 0) + goto out_clear_sched; + + ret = wd_alg_init_driver(&wd_join_gather_setting.config, wd_join_gather_setting.driver); + if (ret) + goto out_clear_pool; + + return WD_SUCCESS; + +out_clear_pool: + wd_uninit_async_request_pool(&wd_join_gather_setting.pool); +out_clear_sched: + wd_clear_sched(&wd_join_gather_setting.sched); +out_clear_ctx_config: + wd_clear_ctx_config(&wd_join_gather_setting.config); + return ret; +} + +static int wd_join_gather_alg_uninit(void) +{ + enum wd_status status; + + wd_alg_get_init(&wd_join_gather_setting.status, &status); + if (status == WD_UNINIT) + return -WD_EINVAL; + + /* Uninit async request pool */ + wd_uninit_async_request_pool(&wd_join_gather_setting.pool); + + /* Unset config, sched, driver */ + wd_clear_sched(&wd_join_gather_setting.sched); + + wd_alg_uninit_driver(&wd_join_gather_setting.config, wd_join_gather_setting.driver); + + return WD_SUCCESS; +} + +int wd_join_gather_init(char *alg, __u32 sched_type, int task_type, + struct wd_ctx_params *ctx_params) +{ + struct wd_ctx_params join_gather_ctx_params = {0}; + struct wd_ctx_nums join_gather_ctx_num = {0}; + int ret = -WD_EINVAL; + int state; + bool flag; + + pthread_atfork(NULL, NULL, wd_join_gather_clear_status); + + state = wd_alg_try_init(&wd_join_gather_setting.status); + if (state) + return state; + + if (!alg || sched_type >= SCHED_POLICY_BUTT || + task_type < 0 || task_type >= TASK_MAX_TYPE) { + WD_ERR("invalid: join_gathe init input param is wrong!\n"); + goto out_uninit; + } + + flag = wd_join_gather_alg_check(alg); + if (!flag) { + WD_ERR("invalid: alg: %s is unsupported!\n", alg); + goto out_uninit; + } + + state = wd_join_gather_open_driver(); + if (state) + goto out_uninit; + + while (ret != 0) { + memset(&wd_join_gather_setting.config, 0, sizeof(struct wd_ctx_config_internal)); + + /* Get alg driver and dev name */ + wd_join_gather_setting.driver = wd_alg_drv_bind(task_type, alg); + if (!wd_join_gather_setting.driver) { + WD_ERR("failed to bind %s driver.\n", alg); + goto out_dlopen; + } + + join_gather_ctx_params.ctx_set_num = &join_gather_ctx_num; + ret = wd_ctx_param_init(&join_gather_ctx_params, ctx_params, + wd_join_gather_setting.driver, + WD_JOIN_GATHER_TYPE, 1); + if (ret) { + if (ret == -WD_EAGAIN) { + wd_disable_drv(wd_join_gather_setting.driver); + wd_alg_drv_unbind(wd_join_gather_setting.driver); + continue; + } + goto out_driver; + } + + (void)strcpy(wd_join_gather_init_attrs.alg, alg); + wd_join_gather_init_attrs.sched_type = sched_type; + wd_join_gather_init_attrs.driver = wd_join_gather_setting.driver; + wd_join_gather_init_attrs.ctx_params = &join_gather_ctx_params; + wd_join_gather_init_attrs.alg_init = wd_join_gather_alg_init; + wd_join_gather_init_attrs.alg_poll_ctx = wd_join_gather_poll_ctx; + ret = wd_alg_attrs_init(&wd_join_gather_init_attrs); + if (ret) { + if (ret == -WD_ENODEV) { + wd_disable_drv(wd_join_gather_setting.driver); + wd_alg_drv_unbind(wd_join_gather_setting.driver); + wd_ctx_param_uninit(&join_gather_ctx_params); + continue; + } + WD_ERR("fail to init alg attrs.\n"); + goto out_params_uninit; + } + } + + wd_alg_set_init(&wd_join_gather_setting.status); + wd_ctx_param_uninit(&join_gather_ctx_params); + + return WD_SUCCESS; + +out_params_uninit: + wd_ctx_param_uninit(&join_gather_ctx_params); +out_driver: + wd_alg_drv_unbind(wd_join_gather_setting.driver); +out_dlopen: + wd_join_gather_close_driver(); +out_uninit: + wd_alg_clear_init(&wd_join_gather_setting.status); + return ret; +} + +void wd_join_gather_uninit(void) +{ + int ret; + + ret = wd_join_gather_alg_uninit(); + if (ret) + return; + + wd_alg_attrs_uninit(&wd_join_gather_init_attrs); + wd_alg_drv_unbind(wd_join_gather_setting.driver); + wd_join_gather_close_driver(); + wd_join_gather_setting.dlh_list = NULL; + wd_alg_clear_init(&wd_join_gather_setting.status); +} + +static void fill_build_hash_msg(struct wd_join_gather_msg *msg, + struct wd_join_gather_sess *sess) +{ + msg->index_type = sess->index_type; + msg->key_cols_num = sess->join_conf.cols_num; +} + +static void fill_probe_msg(struct wd_join_gather_msg *msg, + struct wd_join_gather_sess *sess) +{ + msg->key_cols_num = sess->join_conf.cols_num; + msg->index_type = sess->index_type; + msg->key_out_en = sess->join_conf.key_output_enable; +} + +static void fill_rehash_msg(struct wd_join_gather_msg *msg, + struct wd_join_gather_sess *sess) +{ + msg->key_cols_num = sess->join_conf.cols_num; +} + +static void fill_complete_msg(struct wd_join_gather_msg *msg, + struct wd_join_gather_sess *sess) +{ + __u32 table_index = msg->req.gather_req.table_index; + + msg->index_type = sess->index_type; + msg->multi_batch_en = sess->gather_conf.tables[table_index].is_multi_batch; +} + + +static void fill_join_gather_msg(struct wd_join_gather_msg *msg, struct wd_join_gather_req *req, + struct wd_join_gather_sess *sess) +{ + memcpy(&msg->req, req, sizeof(struct wd_join_gather_req)); + msg->priv = sess->priv; + msg->op_type = req->op_type; + + switch (req->op_type) { + case WD_JOIN_BUILD_HASH: + fill_build_hash_msg(msg, sess); + break; + case WD_JOIN_PROBE: + fill_probe_msg(msg, sess); + break; + case WD_JOIN_REHASH: + fill_rehash_msg(msg, sess); + break; + case WD_GATHER_CONVERT: + break; + case WD_GATHER_COMPLETE: + fill_complete_msg(msg, sess); + break; + default: + break; + } +} + +static int wd_join_gather_check_common(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, + __u8 mode, bool is_join) +{ + if (!sess) { + WD_ERR("invalid: join or gather session is NULL!\n"); + return -WD_EINVAL; + } + + if (!req) { + WD_ERR("invalid: join input req is NULL!\n"); + return -WD_EINVAL; + } + + if (mode == CTX_MODE_ASYNC && !req->cb) { + WD_ERR("invalid: join gather req cb is NULL!\n"); + return -WD_EINVAL; + } + + switch (sess->alg) { + case WD_JOIN: + if (!is_join || !sess->join_conf.data_size) { + WD_ERR("invalid: join session data size is NULL!\n"); + return -WD_EINVAL; + } + break; + case WD_GATHER: + if (is_join || !sess->gather_conf.data_size) { + WD_ERR("invalid: gather session data size is NULL!\n"); + return -WD_EINVAL; + } + break; + case WD_JOIN_GATHER: + if (mode == CTX_MODE_ASYNC) { + WD_ERR("join-gather session does not support the async mode!\n"); + return -WD_EINVAL; + } + + if (!sess->join_conf.data_size || !sess->gather_conf.data_size) { + WD_ERR("invalid: join or gather session data size is NULL!\n"); + return -WD_EINVAL; + } + break; + default: + WD_ERR("invalid: session alg is not supported!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int check_in_col_addr(struct wd_dae_col_addr *col, __u32 row_count, + enum wd_dae_data_type type, __u64 data_size) +{ + if (!col->empty || col->empty_size != row_count * sizeof(col->empty[0])) { + WD_ERR("failed to check input empty col, size: %llu!\n", col->empty_size); + return -WD_EINVAL; + } + + if (!col->value || col->value_size != row_count * data_size) { + WD_ERR("failed to check input value col size: %llu!\n", col->value_size); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int check_out_col_addr(struct wd_dae_col_addr *col, __u32 row_count, + enum wd_dae_data_type type, __u64 data_size) +{ + if (!col->empty || col->empty_size < row_count * sizeof(col->empty[0])) { + WD_ERR("failed to check output empty col, size: %llu!\n", col->empty_size); + return -WD_EINVAL; + } + + if (!col->value || col->value_size < row_count * data_size) { + WD_ERR("failed to check output value col size: %llu!\n", col->value_size); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int check_key_col_addr(struct wd_dae_col_addr *cols, __u32 cols_num, + struct wd_join_gather_sess *sess, __u32 row_count, bool is_input) +{ + int (*func)(struct wd_dae_col_addr *col, __u32 row_count, + enum wd_dae_data_type type, __u64 data_size); + __u32 i; + int ret; + + func = is_input ? check_in_col_addr : check_out_col_addr; + + for (i = 0; i < cols_num; i++) { + ret = func(cols + i, row_count, sess->join_conf.cols[i].data_type, + sess->join_conf.data_size[i]); + if (ret) { + WD_ERR("failed to check req key col! col idx: %u\n", i); + return ret; + } + } + + return WD_SUCCESS; +} + +static int check_data_col_addr(struct wd_gather_req *req, struct wd_join_gather_sess *sess, + __u32 row_count, bool is_input) +{ + struct wd_gather_table_info *table = &sess->gather_conf.tables[req->table_index]; + __u64 *data_size = sess->gather_conf.data_size[req->table_index]; + int (*func)(struct wd_dae_col_addr *col, __u32 row_count, + enum wd_dae_data_type type, __u64 data_size); + __u32 i; + int ret; + + if (!data_size) { + WD_ERR("invalid: gather session data size is NULL!\n"); + return -WD_EINVAL; + } + + if (!row_count) { + WD_ERR("invalid: gather data row number is 0!\n"); + return -WD_EINVAL; + } + + func = is_input ? check_in_col_addr : check_out_col_addr; + + for (i = 0; i < req->data_cols_num; i++) { + ret = func(&req->data_cols[i], row_count, table->cols[i].data_type, + data_size[i]); + if (ret) { + WD_ERR("failed to check req data col! col idx: %u\n", i); + return ret; + } + } + + return WD_SUCCESS; +} + +static int check_probe_out_addr(struct wd_probe_out_info *output, + struct wd_join_gather_sess *sess, __u32 row_num) +{ + if (!output->build_index.addr || !output->build_index.row_size) { + WD_ERR("probe multi index is not set!\n"); + return -WD_EINVAL; + } + + if (!output->probe_index.addr || !output->probe_index.row_size) { + WD_ERR("probe single index is not set!\n"); + return -WD_EINVAL; + } + + if (output->build_index.row_num < row_num || output->probe_index.row_num < row_num) { + WD_ERR("build: %u, probe: %u, row num is less than output row_num: %u!\n", + output->build_index.row_num, output->probe_index.row_num, row_num); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_join_common_check_req(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_join_req *join_req = &req->join_req; + int ret; + + if (join_req->key_cols_num != sess->join_conf.cols_num) { + WD_ERR("invalid: join table key_cols_num is not equal!\n"); + return -WD_EINVAL; + } + + if (!join_req->key_cols) { + WD_ERR("invalid: join table key_cols is NULL!\n"); + return -WD_EINVAL; + } + + if (!req->input_row_num) { + WD_ERR("invalid: join table input row number is zero!\n"); + return -WD_EINVAL; + } + + ret = check_key_col_addr(join_req->key_cols, join_req->key_cols_num, sess, + req->input_row_num, true); + if (ret) { + WD_ERR("failed to check join table key cols addr!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_build_hash_check_params(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, __u8 mode) +{ + int ret; + + ret = wd_join_gather_check_common(sess, req, mode, true); + if (ret) + return ret; + + if (req->op_type != WD_JOIN_BUILD_HASH) { + WD_ERR("failed to check req op_type for build hash task!\n"); + return -WD_EINVAL; + } + + ret = wd_join_common_check_req(sess, req); + if (ret) + WD_ERR("failed to check join req for build hash task!\n"); + + return ret; +} + +static int wd_join_probe_check_req(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_join_req *jreq = &req->join_req; + struct wd_probe_out_info *probe_output = &jreq->probe_output; + int ret; + + if (req->op_type != WD_JOIN_PROBE) { + WD_ERR("failed to check req op_type for probe task!\n"); + return -WD_EINVAL; + } + + ret = wd_join_common_check_req(sess, req); + if (ret) { + WD_ERR("failed to check join req for probe task!\n"); + return ret; + } + + if (!req->output_row_num) { + WD_ERR("probe output row number is zero!\n"); + return -WD_EINVAL; + } + + if (sess->join_conf.key_output_enable) { + if (probe_output->key_cols_num != sess->join_conf.cols_num || + !probe_output->key_cols) { + WD_ERR("invalid: probe out key_cols_num is not equal!\n"); + return -WD_EINVAL; + } + ret = check_key_col_addr(probe_output->key_cols, probe_output->key_cols_num, + sess, req->output_row_num, false); + if (ret) { + WD_ERR("failed to check porbe output key cols addr!\n"); + return -WD_EINVAL; + } + } + + ret = check_probe_out_addr(probe_output, sess, req->output_row_num); + if (ret) { + WD_ERR("failed to check porbe output addr!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_join_probe_check_params(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, __u8 mode) +{ + int ret; + + ret = wd_join_gather_check_common(sess, req, mode, true); + if (ret) + return ret; + + return wd_join_probe_check_req(sess, req); +} + +static int wd_join_rehash_check_params(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + int ret; + + ret = wd_join_gather_check_common(sess, req, CTX_MODE_SYNC, true); + if (ret) + return ret; + + if (req->op_type != WD_JOIN_REHASH) { + WD_ERR("failed to check req op_type for rehash task!\n"); + return -WD_EINVAL; + } + + if (!req->output_row_num) { + WD_ERR("invalid: req output_row_num is 0 for join rehash!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_join_gather_sync_job(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, + struct wd_join_gather_msg *msg) +{ + struct wd_join_gather_setting *setting = &wd_join_gather_setting; + struct wd_ctx_config_internal *config = &setting->config; + struct wd_msg_handle msg_handle; + struct wd_ctx_internal *ctx; + __u32 idx; + int ret; + + memset(msg, 0, sizeof(struct wd_join_gather_msg)); + fill_join_gather_msg(msg, req, sess); + req->state = 0; + + idx = setting->sched.pick_next_ctx(setting->sched.h_sched_ctx, + sess->sched_key, CTX_MODE_SYNC); + ret = wd_check_ctx(config, CTX_MODE_SYNC, idx); + if (ret) + return ret; + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + ctx = config->ctxs + idx; + + msg_handle.send = setting->driver->send; + msg_handle.recv = setting->driver->recv; + + pthread_spin_lock(&ctx->lock); + ret = wd_handle_msg_sync(setting->driver, &msg_handle, ctx->ctx, + msg, NULL, config->epoll_en); + pthread_spin_unlock(&ctx->lock); + + return ret; +} + +static int wd_build_hash_try_init(struct wd_join_gather_sess *sess, + enum wd_join_sess_state *expected) +{ + enum wd_join_sess_state state; + + (void)__atomic_compare_exchange_n(&sess->state, expected, WD_JOIN_SESS_BUILD_HASH, + false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + state = __atomic_load_n(&sess->state, __ATOMIC_RELAXED); + if (state != WD_JOIN_SESS_BUILD_HASH) { + WD_ERR("failed to set join sess state: %u!\n", state); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_join_gather_check_result(__u32 result) +{ + switch (result) { + case WD_JOIN_GATHER_TASK_DONE: + return WD_SUCCESS; + case WD_JOIN_GATHER_IN_EPARA: + case WD_JOIN_GATHER_NEED_REHASH: + case WD_JOIN_GATHER_INVALID_HASH_TABLE: + case WD_JOIN_GATHER_PARSE_ERROR: + case WD_JOIN_GATHER_BUS_ERROR: + WD_ERR("failed to check join gather message state: %u!\n", result); + return -WD_EIO; + default: + return -WD_EINVAL; + } +} + +int wd_join_build_hash_sync(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected = WD_JOIN_SESS_INIT; + struct wd_join_gather_msg msg; + int ret; + + ret = wd_build_hash_check_params(sess, req, CTX_MODE_SYNC); + if (unlikely(ret)) { + WD_ERR("failed to check hashjoin build hash params!\n"); + return ret; + } + + ret = wd_build_hash_try_init(sess, &expected); + if (unlikely(ret)) + return ret; + + ret = wd_join_gather_sync_job(sess, req, &msg); + if (unlikely(ret)) { + if (expected == WD_JOIN_SESS_INIT) + __atomic_store_n(&sess->state, expected, __ATOMIC_RELEASE); + WD_ERR("failed to do hashjoin build hash sync job!\n"); + return ret; + } + + req->consumed_row_num = msg.consumed_row_num; + req->state = msg.result; + + return WD_SUCCESS; +} + +static int wd_join_gather_async_job(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_join_gather_setting *setting = &wd_join_gather_setting; + struct wd_ctx_config_internal *config = &setting->config; + struct wd_join_gather_msg *msg; + struct wd_ctx_internal *ctx; + int msg_id, ret; + __u32 idx; + + idx = setting->sched.pick_next_ctx(setting->sched.h_sched_ctx, + sess->sched_key, CTX_MODE_ASYNC); + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (ret) + return ret; + + ctx = config->ctxs + idx; + msg_id = wd_get_msg_from_pool(&setting->pool, idx, (void **)&msg); + if (msg_id < 0) { + WD_ERR("failed to get join gather msg from pool!\n"); + return msg_id; + } + + fill_join_gather_msg(msg, req, sess); + msg->tag = msg_id; + ret = wd_alg_driver_send(setting->driver, ctx->ctx, msg); + if (ret < 0) { + if (ret != -WD_EBUSY) + WD_ERR("wd join gather async send err!\n"); + + goto fail_with_msg; + } + + wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); + + return WD_SUCCESS; + +fail_with_msg: + wd_put_msg_to_pool(&setting->pool, idx, msg->tag); + return ret; +} + +int wd_join_build_hash_async(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected = WD_JOIN_SESS_INIT; + int ret; + + ret = wd_build_hash_check_params(sess, req, CTX_MODE_ASYNC); + if (unlikely(ret)) { + WD_ERR("failed to check build hash async params!\n"); + return ret; + } + + ret = wd_build_hash_try_init(sess, &expected); + if (unlikely(ret)) + return ret; + + ret = wd_join_gather_async_job(sess, req); + if (unlikely(ret)) { + if (expected == WD_JOIN_SESS_INIT) + __atomic_store_n(&sess->state, expected, __ATOMIC_RELEASE); + WD_ERR("failed to do join build hash async job!\n"); + } + + return ret; +} + +static int wd_join_probe_try_init(struct wd_join_gather_sess *sess, + enum wd_join_sess_state *expected) +{ + enum wd_join_sess_state state; + + (void)__atomic_compare_exchange_n(&sess->state, expected, WD_JOIN_SESS_PROBE, + false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + state = __atomic_load_n(&sess->state, __ATOMIC_RELAXED); + if (state != WD_JOIN_SESS_PROBE) { + WD_ERR("failed to set join sess state: %u!\n", state); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +int wd_join_probe_sync(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected = WD_JOIN_SESS_BUILD_HASH; + struct wd_join_gather_msg msg; + int ret; + + ret = wd_join_probe_check_params(sess, req, CTX_MODE_SYNC); + if (unlikely(ret)) { + WD_ERR("failed to check join probe params!\n"); + return ret; + } + + ret = wd_join_probe_try_init(sess, &expected); + if (unlikely(ret)) + return ret; + + ret = wd_join_gather_sync_job(sess, req, &msg); + if (unlikely(ret)) { + if (expected == WD_JOIN_SESS_BUILD_HASH) + __atomic_store_n(&sess->state, expected, __ATOMIC_RELEASE); + WD_ERR("failed to do join probe sync job!\n"); + return ret; + } + + req->consumed_row_num = msg.consumed_row_num; + req->produced_row_num = msg.produced_row_num; + req->output_done = msg.output_done; + req->state = msg.result; + + return WD_SUCCESS; +} + +int wd_join_probe_async(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected = WD_JOIN_SESS_BUILD_HASH; + int ret; + + ret = wd_join_probe_check_params(sess, req, CTX_MODE_ASYNC); + if (unlikely(ret)) { + WD_ERR("failed to check join probe params!\n"); + return ret; + } + + ret = wd_join_probe_try_init(sess, &expected); + if (unlikely(ret)) + return ret; + + ret = wd_join_gather_async_job(sess, req); + if (unlikely(ret)) { + if (expected == WD_JOIN_SESS_BUILD_HASH) + __atomic_store_n(&sess->state, expected, __ATOMIC_RELEASE); + WD_ERR("failed to do join probe async job!\n"); + } + + return ret; +} + +static int wd_join_rehash_sync_inner(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_join_gather_msg msg = {0}; + int ret; + + ret = wd_join_gather_sync_job(sess, req, &msg); + if (ret) + return ret; + + ret = wd_join_gather_check_result(msg.result); + if (ret) + return ret; + + req->output_done = msg.output_done; + + return WD_SUCCESS; +} + +static int wd_join_rehash_try_init(struct wd_join_gather_sess *sess, + enum wd_join_sess_state *expected) +{ + int ret; + + ret = __atomic_compare_exchange_n(&sess->state, expected, WD_JOIN_SESS_REHASH, + false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + if (!ret) { + WD_ERR("invalid: join rehash sess state is %u!\n", *expected); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +int wd_join_rehash_sync(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + enum wd_join_sess_state expected = WD_JOIN_SESS_PREPARE_REHASH; + __u64 max_cnt, cnt = 0; + int ret; + + ret = wd_join_rehash_check_params(sess, req); + if (unlikely(ret)) { + WD_ERR("failed to check join rehash params!\n"); + return ret; + } + + ret = wd_join_rehash_try_init(sess, &expected); + if (unlikely(ret)) + return ret; + + max_cnt = MAX_HASH_TABLE_ROW_NUM / req->output_row_num; + while (cnt < max_cnt) { + ret = wd_join_rehash_sync_inner(sess, req); + if (unlikely(ret)) { + __atomic_store_n(&sess->state, WD_JOIN_SESS_PREPARE_REHASH, + __ATOMIC_RELEASE); + WD_ERR("failed to do join rehash task!\n"); + return ret; + } + if (req->output_done) + break; + cnt++; + } + + __atomic_store_n(&sess->state, WD_JOIN_SESS_BUILD_HASH, __ATOMIC_RELEASE); + return WD_SUCCESS; +} + +static int wd_gather_common_check_req(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_gather_req *gather_req = &req->gather_req; + struct wd_gather_table_info *tables; + __u32 table_index; + + if (!sess->gather_conf.tables) { + WD_ERR("invalid: session gather tables is NULL!\n"); + return -WD_EINVAL; + } + tables = sess->gather_conf.tables; + table_index = gather_req->table_index; + + if (table_index >= sess->gather_conf.table_num) { + WD_ERR("invalid: gather table index(%u) is too big!\n", table_index); + return -WD_EINVAL; + } + + if (gather_req->data_cols_num != tables[table_index].cols_num) { + WD_ERR("invalid: gather table data_cols_num is not equal!\n"); + return -WD_EINVAL; + } + + if (!gather_req->data_cols) { + WD_ERR("invalid: gather table data_cols is NULL!\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_gather_convert_check_req(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_gather_req *gather_req = &req->gather_req; + __u32 expt_size, table_index; + int ret; + + if (req->op_type != WD_GATHER_CONVERT) { + WD_ERR("failed to check req op_type for gather convert task!\n"); + return -WD_EINVAL; + } + + ret = wd_gather_common_check_req(sess, req); + if (ret) + return ret; + + table_index = gather_req->table_index; + + ret = check_data_col_addr(gather_req, sess, req->input_row_num, true); + if (ret) { + WD_ERR("failed to check gather convert data cols addr!\n"); + return -WD_EINVAL; + } + + if (gather_req->row_batchs.batch_num != 1 || !gather_req->row_batchs.batch_addr || + !gather_req->row_batchs.batch_addr[0]) { + WD_ERR("invalid: gather convert only support one batch!\n"); + return -WD_EINVAL; + } + + if (!gather_req->row_batchs.batch_row_num || !gather_req->row_batchs.batch_row_size) { + WD_ERR("invalid: gather convert batchs row_num or row_size is NULL!\n"); + return -WD_EINVAL; + } + + expt_size = sess->gather_conf.batch_row_size[table_index]; + if (gather_req->row_batchs.batch_row_num[0] != req->input_row_num || + gather_req->row_batchs.batch_row_size[0] != expt_size) { + WD_ERR("invalid: gather convert row batchs, row_size: %u, row_num: %u\n", + gather_req->row_batchs.batch_row_size[0], + gather_req->row_batchs.batch_row_num[0]); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +static int wd_gather_complete_check_req(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req) +{ + struct wd_gather_req *gather_req = &req->gather_req; + struct wd_gather_table_info *tables; + struct wd_dae_row_addr *index_addr; + __u32 table_index, expt_size, i; + int ret; + + if (req->op_type != WD_GATHER_COMPLETE) { + WD_ERR("failed to check req op_type for gather complete task!\n"); + return -WD_EINVAL; + } + + ret = wd_gather_common_check_req(sess, req); + if (ret) + return ret; + + tables = sess->gather_conf.tables; + table_index = gather_req->table_index; + + ret = check_data_col_addr(gather_req, sess, req->output_row_num, false); + if (ret) { + WD_ERR("failed to check gather complete data cols addr!\n"); + return -WD_EINVAL; + } + + index_addr = &gather_req->index; + if (!index_addr->addr || index_addr->row_num < req->output_row_num) { + WD_ERR("invalid: gather index is NULL or index row number is small!\n"); + return -WD_EINVAL; + } + + /* The row batch information is stored to index, no need to check. */ + if (sess->index_type == WD_BATCH_ADDR_INDEX && tables[table_index].is_multi_batch) + return WD_SUCCESS; + + if (!gather_req->row_batchs.batch_num || !gather_req->row_batchs.batch_addr) { + WD_ERR("invalid: gather row batch is NULL or batch addr number is 0!\n"); + return -WD_EINVAL; + } + + if (!gather_req->row_batchs.batch_row_num || !gather_req->row_batchs.batch_row_size) { + WD_ERR("invalid: gather row batch row_num or row_size is NULL!\n"); + return -WD_EINVAL; + } + + if (!tables[table_index].is_multi_batch) { + if (gather_req->row_batchs.batch_num != 1) { + WD_ERR("invalid: single gather row batch addr num should be 1!\n"); + return -WD_EINVAL; + } + } + + for (i = 0; i < gather_req->row_batchs.batch_num; i++) { + if (!gather_req->row_batchs.batch_addr[i] || + !gather_req->row_batchs.batch_row_num[i]) { + WD_ERR("invalid: row batch addr or row_num is null! idx: %u\n", i); + return -WD_EINVAL; + } + expt_size = sess->gather_conf.batch_row_size[table_index]; + if (gather_req->row_batchs.batch_row_size[i] != expt_size) { + WD_ERR("invalid row batch row_size: %u, batch idx: %u\n", + gather_req->row_batchs.batch_row_size[i], i); + return -WD_EINVAL; + } + } + + return WD_SUCCESS; +} + +static int wd_gather_convert_check_params(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, __u8 mode) +{ + int ret; + + ret = wd_join_gather_check_common(sess, req, mode, false); + if (ret) + return ret; + + return wd_gather_convert_check_req(sess, req); +} + +static int wd_gather_complete_check_params(struct wd_join_gather_sess *sess, + struct wd_join_gather_req *req, __u8 mode) +{ + int ret; + + ret = wd_join_gather_check_common(sess, req, mode, false); + if (ret) + return ret; + + return wd_gather_complete_check_req(sess, req); +} + +int wd_gather_convert_sync(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + struct wd_join_gather_msg msg; + int ret; + + ret = wd_gather_convert_check_params(sess, req, CTX_MODE_SYNC); + if (unlikely(ret)) { + WD_ERR("failed to check gather convert params!\n"); + return ret; + } + + ret = wd_join_gather_sync_job(sess, req, &msg); + if (unlikely(ret)) { + WD_ERR("failed to do gather convert sync job!\n"); + return ret; + } + + req->consumed_row_num = msg.consumed_row_num; + req->state = msg.result; + + return WD_SUCCESS; +} + +int wd_gather_convert_async(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + int ret; + + ret = wd_gather_convert_check_params(sess, req, CTX_MODE_ASYNC); + if (unlikely(ret)) { + WD_ERR("failed to check gather convert async params!\n"); + return ret; + } + + ret = wd_join_gather_async_job(sess, req); + if (unlikely(ret)) + WD_ERR("failed to do gather convert async job!\n"); + + return ret; +} + +int wd_gather_complete_sync(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + struct wd_join_gather_msg msg; + int ret; + + ret = wd_gather_complete_check_params(sess, req, CTX_MODE_SYNC); + if (unlikely(ret)) { + WD_ERR("failed to check gather complete params!\n"); + return ret; + } + + ret = wd_join_gather_sync_job(sess, req, &msg); + if (unlikely(ret)) { + WD_ERR("failed to do gather complete sync job!\n"); + return ret; + } + + req->produced_row_num = msg.produced_row_num; + req->state = msg.result; + + return WD_SUCCESS; +} + +int wd_gather_complete_async(handle_t h_sess, struct wd_join_gather_req *req) +{ + struct wd_join_gather_sess *sess = (struct wd_join_gather_sess *)h_sess; + int ret; + + ret = wd_gather_complete_check_params(sess, req, CTX_MODE_ASYNC); + if (unlikely(ret)) { + WD_ERR("failed to check gather complete params!\n"); + return ret; + } + + ret = wd_join_gather_async_job(sess, req); + if (unlikely(ret)) + WD_ERR("failed to do gather complete async job!\n"); + + return ret; +} + +struct wd_join_gather_msg *wd_join_gather_get_msg(__u32 idx, __u32 tag) +{ + return wd_find_msg_in_pool(&wd_join_gather_setting.pool, idx, tag); +} + +static int wd_join_gather_poll_ctx(__u32 idx, __u32 expt, __u32 *count) +{ + struct wd_ctx_config_internal *config = &wd_join_gather_setting.config; + struct wd_join_gather_msg resp_msg = {0}; + struct wd_join_gather_msg *msg; + struct wd_ctx_internal *ctx; + struct wd_join_gather_req *req; + __u64 recv_count = 0; + __u32 tmp = expt; + int ret; + + *count = 0; + + ret = wd_check_ctx(config, CTX_MODE_ASYNC, idx); + if (unlikely(ret)) + return ret; + + ctx = config->ctxs + idx; + + do { + ret = wd_alg_driver_recv(wd_join_gather_setting.driver, ctx->ctx, &resp_msg); + if (ret == -WD_EAGAIN) { + return ret; + } else if (ret < 0) { + WD_ERR("wd join_gather recv hw err!\n"); + return ret; + } + recv_count++; + msg = wd_find_msg_in_pool(&wd_join_gather_setting.pool, idx, resp_msg.tag); + if (!msg) { + WD_ERR("failed to get join gather msg from pool!\n"); + return -WD_EINVAL; + } + + msg->req.state = resp_msg.result; + msg->req.consumed_row_num = resp_msg.consumed_row_num; + msg->req.produced_row_num = resp_msg.produced_row_num; + msg->req.output_done = resp_msg.output_done; + req = &msg->req; + + req->cb(req, req->cb_param); + /* Free msg cache to msg_pool */ + wd_put_msg_to_pool(&wd_join_gather_setting.pool, idx, resp_msg.tag); + *count = recv_count; + } while (--tmp); + + return ret; +} + +int wd_join_gather_poll(__u32 expt, __u32 *count) +{ + handle_t h_ctx = wd_join_gather_setting.sched.h_sched_ctx; + struct wd_sched *sched = &wd_join_gather_setting.sched; + + if (!expt || !count) { + WD_ERR("invalid: join gather poll input param is NULL!\n"); + return -WD_EINVAL; + } + + return sched->poll_policy(h_ctx, expt, count); +} diff --git a/wd_util.c b/wd_util.c index 199ee608..e8a29344 100644 --- a/wd_util.c +++ b/wd_util.c @@ -66,6 +66,7 @@ static const char *wd_env_name[WD_TYPE_MAX] = { "WD_ECC_CTX_NUM", "WD_AGG_CTX_NUM", "WD_UDMA_CTX_NUM", + "WD_JOIN_GATHER_CTX_NUM", }; struct async_task { @@ -113,6 +114,9 @@ static struct acc_alg_item alg_options[] = { {"lz77_only", "lz77_only"}, {"hashagg", "hashagg"}, {"udma", "udma"}, + {"hashjoin", "hashjoin"}, + {"gather", "gather"}, + {"join-gather", "hashjoin"}, {"rsa", "rsa"}, {"dh", "dh"}, @@ -2608,7 +2612,7 @@ static int wd_alg_ctx_init(struct wd_init_attrs *attrs) list = wd_get_accel_list(attrs->alg); if (!list) { - WD_ERR("failed to get devices!\n"); + WD_ERR("failed to get devices for alg: %s\n", attrs->alg); return -WD_ENODEV; } -- 2.33.0
participants (1)
-
Qi Tao