From: Weili Qian <qianweili@huawei.com> The internal asynchronous polling interface is not used in any scenario, remove it. Upstream: YES Feature or Bugfix:Bugfix AR:AR20230706877890 DTS:DTS2026042322013 Signed-off-by: Weili Qian <qianweili@huawei.com> --- include/wd_aead.h | 3 +- include/wd_cipher.h | 3 +- include/wd_comp.h | 3 +- include/wd_digest.h | 3 +- include/wd_ecc.h | 3 +- include/wd_rsa.h | 3 +- include/wd_util.h | 37 ---- wd_aead.c | 24 +-- wd_cipher.c | 24 +-- wd_comp.c | 28 +-- wd_dh.c | 24 +-- wd_digest.c | 24 +-- wd_ecc.c | 24 +-- wd_rsa.c | 24 +-- wd_util.c | 418 +------------------------------------------- 15 files changed, 70 insertions(+), 575 deletions(-) diff --git a/include/wd_aead.h b/include/wd_aead.h index 4b5095f..3585c86 100644 --- a/include/wd_aead.h +++ b/include/wd_aead.h @@ -254,7 +254,8 @@ void wd_aead_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. * * If the current algorithm library does not require the type parameter, * the type parameter is invalid. The function returns 0 to indicate that diff --git a/include/wd_cipher.h b/include/wd_cipher.h index a6f8be1..383d315 100644 --- a/include/wd_cipher.h +++ b/include/wd_cipher.h @@ -218,7 +218,8 @@ void wd_cipher_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. */ int wd_cipher_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable); diff --git a/include/wd_comp.h b/include/wd_comp.h index 8579f93..890799a 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -252,7 +252,8 @@ void wd_comp_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. * * If the current algorithm library does not require the type parameter, * the type parameter is invalid. The function returns 0 to indicate that diff --git a/include/wd_digest.h b/include/wd_digest.h index 42a95db..410c7f9 100644 --- a/include/wd_digest.h +++ b/include/wd_digest.h @@ -278,7 +278,8 @@ void wd_digest_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. */ int wd_digest_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable); diff --git a/include/wd_ecc.h b/include/wd_ecc.h index 18c1c0d..d868951 100644 --- a/include/wd_ecc.h +++ b/include/wd_ecc.h @@ -548,7 +548,8 @@ void wd_ecc_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. */ int wd_ecc_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable); diff --git a/include/wd_rsa.h b/include/wd_rsa.h index 9c91432..b0ead0a 100644 --- a/include/wd_rsa.h +++ b/include/wd_rsa.h @@ -239,7 +239,8 @@ void wd_rsa_ctx_num_uninit(void); * @type: operation type. * @mode: 0: sync mode, 1: async mode * @num: return ctx num. - * @is_enable return enable inner poll flag. + * @is_enable: return enable inner poll flag, inner poll is not + * supported, and is_enable will always be 0. */ int wd_rsa_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable); diff --git a/include/wd_util.h b/include/wd_util.h index 42270d9..c24c554 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -78,16 +78,10 @@ struct wd_env_config_per_numa { /* Resource begin */ struct uacce_dev *dev; int dev_num; - /* This can be made statically currently */ - unsigned long async_poll_num; - void *async_task_queue_array; }; struct wd_env_config { struct wd_env_config_per_numa *config_per_numa; - /* Let's make it as a gobal config, not per numa */ - bool enable_internal_poll; - /* resource config */ struct wd_sched *sched; bool internal_sched; @@ -273,28 +267,6 @@ int wd_check_datalist(struct wd_datalist *head, __u64 size); */ int wd_parse_ctx_num(struct wd_env_config *config, const char *s); -/* - * wd_parse_async_poll_en() - Parse async polling thread related environment - * variable and store it. - * @config: Pointer of wd_env_config which is used to store environment - * variable information. - * @s: Related environment variable string. - * - * More information, please see docs/wd_environment_variable. - */ -int wd_parse_async_poll_en(struct wd_env_config *config, const char *s); - -/* - * wd_parse_async_poll_num() - Parse async polling thread related environment - * variable and store it. - * @config: Pointer of wd_env_config which is used to store environment - * variable information. - * @s: Related environment variable string. - * - * More information, please see docs/wd_environment_variable. - */ -int wd_parse_async_poll_num(struct wd_env_config *config, const char *s); - /* * wd_alg_env_init() - Init wd algorithm environment variable configurations. * This is a help function which can be used by specific @@ -323,15 +295,6 @@ int wd_alg_env_init(struct wd_env_config *env_config, void wd_alg_env_uninit(struct wd_env_config *env_config, const struct wd_alg_ops *ops); -/* - * wd_add_task_to_async_queue() - Add an async request to its related async - * task queue. - * @config: Pointer of wd_env_config which is used to store environment - * variable information. - * @idx: Index of ctx in config. - */ -int wd_add_task_to_async_queue(struct wd_env_config *config, __u32 idx); - /* * dump_env_info() - dump wd algorithm ctx info. * @config: Pointer of wd_env_config which is used to store environment diff --git a/wd_aead.c b/wd_aead.c index 748bf95..3e993a1 100644 --- a/wd_aead.c +++ b/wd_aead.c @@ -957,9 +957,6 @@ int wd_do_aead_async(handle_t h_sess, struct wd_aead_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_aead_env_config, idx); - if (ret) - goto fail_with_msg; return 0; @@ -1039,15 +1036,10 @@ int wd_aead_poll(__u32 expt, __u32 *count) return sched->poll_policy(h_ctx, expt, count); } -static const struct wd_config_variable table[] = { - { .name = "WD_AEAD_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_AEAD_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_AEAD_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_aead_ops = { @@ -1062,8 +1054,8 @@ int wd_aead_env_init(struct wd_sched *sched) { wd_aead_env_config.sched = sched; - return wd_alg_env_init(&wd_aead_env_config, table, - &wd_aead_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_aead_env_config, &table, + &wd_aead_ops, 1, NULL); } void wd_aead_env_uninit(void) @@ -1080,8 +1072,8 @@ int wd_aead_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_aead_env_config, table, - &wd_aead_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_aead_env_config, &table, + &wd_aead_ops, 1, &ctx_attr); } void wd_aead_ctx_num_uninit(void) diff --git a/wd_cipher.c b/wd_cipher.c index abedfb3..a4d6c63 100644 --- a/wd_cipher.c +++ b/wd_cipher.c @@ -800,9 +800,6 @@ int wd_do_cipher_async(handle_t h_sess, struct wd_cipher_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_cipher_env_config, idx); - if (ret) - goto fail_with_msg; return 0; @@ -883,15 +880,10 @@ int wd_cipher_poll(__u32 expt, __u32 *count) return sched->poll_policy(h_ctx, expt, count); } -static const struct wd_config_variable table[] = { - { .name = "WD_CIPHER_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_CIPHER_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_CIPHER_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_cipher_ops = { @@ -906,8 +898,8 @@ int wd_cipher_env_init(struct wd_sched *sched) { wd_cipher_env_config.sched = sched; - return wd_alg_env_init(&wd_cipher_env_config, table, - &wd_cipher_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_cipher_env_config, &table, + &wd_cipher_ops, 1, NULL); } void wd_cipher_env_uninit(void) @@ -924,8 +916,8 @@ int wd_cipher_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_cipher_env_config, table, - &wd_cipher_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_cipher_env_config, &table, + &wd_cipher_ops, 1, &ctx_attr); } void wd_cipher_ctx_num_uninit(void) diff --git a/wd_comp.c b/wd_comp.c index 247eadd..84fb079 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -880,9 +880,6 @@ int wd_do_comp_async(handle_t h_sess, struct wd_comp_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_comp_env_config, idx); - if (unlikely(ret)) - goto fail_with_msg; return 0; @@ -908,19 +905,10 @@ int wd_comp_poll(__u32 expt, __u32 *count) return sched->poll_policy(h_sched_ctx, expt, count); } -static const struct wd_config_variable table[] = { - { .name = "WD_COMP_CTX_NUM", - .def_val = "sync-comp:1@0,sync-decomp:1@0,async-comp:1@0,async-decomp:1@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_COMP_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - }, - { .name = "WD_COMP_ASYNC_POLL_NUM", - .def_val = "1@0", - .parse_fn = wd_parse_async_poll_num - } +static const struct wd_config_variable table = { + .name = "WD_COMP_CTX_NUM", + .def_val = "sync-comp:1@0,sync-decomp:1@0,async-comp:1@0,async-decomp:1@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_comp_ops = { @@ -935,8 +923,8 @@ int wd_comp_env_init(struct wd_sched *sched) { wd_comp_env_config.sched = sched; - return wd_alg_env_init(&wd_comp_env_config, table, - &wd_comp_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_comp_env_config, &table, + &wd_comp_ops, 1, NULL); } void wd_comp_env_uninit(void) @@ -958,8 +946,8 @@ int wd_comp_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_comp_env_config, table, - &wd_comp_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_comp_env_config, &table, + &wd_comp_ops, 1, &ctx_attr); } void wd_comp_ctx_num_uninit(void) diff --git a/wd_dh.c b/wd_dh.c index 3395060..7837114 100644 --- a/wd_dh.c +++ b/wd_dh.c @@ -441,9 +441,6 @@ int wd_do_dh_async(handle_t sess, struct wd_dh_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_dh_env_config, idx); - if (ret) - goto fail_with_msg; return WD_SUCCESS; @@ -654,15 +651,10 @@ void wd_dh_free_sess(handle_t sess) free(sess_t); } -static const struct wd_config_variable table[] = { - { .name = "WD_DH_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_DH_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_DH_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_dh_ops = { @@ -677,8 +669,8 @@ int wd_dh_env_init(struct wd_sched *sched) { wd_dh_env_config.sched = sched; - return wd_alg_env_init(&wd_dh_env_config, table, - &wd_dh_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_dh_env_config, &table, + &wd_dh_ops, 1, NULL); } void wd_dh_env_uninit(void) @@ -695,8 +687,8 @@ int wd_dh_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_dh_env_config, table, - &wd_dh_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_dh_env_config, &table, + &wd_dh_ops, 1, &ctx_attr); } void wd_dh_ctx_num_uninit(void) diff --git a/wd_digest.c b/wd_digest.c index 8e5bf94..bd3dd05 100644 --- a/wd_digest.c +++ b/wd_digest.c @@ -746,9 +746,6 @@ int wd_do_digest_async(handle_t h_sess, struct wd_digest_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_digest_env_config, idx); - if (ret) - goto fail_with_msg; return 0; @@ -830,15 +827,10 @@ int wd_digest_poll(__u32 expt, __u32 *count) return sched->poll_policy(h_ctx, expt, count); } -static const struct wd_config_variable table[] = { - { .name = "WD_DIGEST_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_DIGEST_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_DIGEST_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_digest_ops = { @@ -853,8 +845,8 @@ int wd_digest_env_init(struct wd_sched *sched) { wd_digest_env_config.sched = sched; - return wd_alg_env_init(&wd_digest_env_config, table, - &wd_digest_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_digest_env_config, &table, + &wd_digest_ops, 1, NULL); } void wd_digest_env_uninit(void) @@ -871,8 +863,8 @@ int wd_digest_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_digest_env_config, table, - &wd_digest_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_digest_env_config, &table, + &wd_digest_ops, 1, &ctx_attr); } void wd_digest_ctx_num_uninit(void) diff --git a/wd_ecc.c b/wd_ecc.c index c31495f..4c77a04 100644 --- a/wd_ecc.c +++ b/wd_ecc.c @@ -2319,9 +2319,6 @@ int wd_do_ecc_async(handle_t sess, struct wd_ecc_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_ecc_env_config, idx); - if (ret) - goto fail_with_msg; return WD_SUCCESS; @@ -2401,15 +2398,10 @@ int wd_ecc_poll(__u32 expt, __u32 *count) return wd_ecc_setting.sched.poll_policy(h_sched_sess, expt, count); } -static const struct wd_config_variable table[] = { - { .name = "WD_ECC_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_ECC_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_ECC_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_ecc_ops = { @@ -2424,8 +2416,8 @@ int wd_ecc_env_init(struct wd_sched *sched) { wd_ecc_env_config.sched = sched; - return wd_alg_env_init(&wd_ecc_env_config, table, - &wd_ecc_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_ecc_env_config, &table, + &wd_ecc_ops, 1, NULL); } void wd_ecc_env_uninit(void) @@ -2442,8 +2434,8 @@ int wd_ecc_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_ecc_env_config, table, - &wd_ecc_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_ecc_env_config, &table, + &wd_ecc_ops, 1, &ctx_attr); } void wd_ecc_ctx_num_uninit(void) diff --git a/wd_rsa.c b/wd_rsa.c index c020514..2242fed 100644 --- a/wd_rsa.c +++ b/wd_rsa.c @@ -502,9 +502,6 @@ int wd_do_rsa_async(handle_t sess, struct wd_rsa_req *req) } wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); - ret = wd_add_task_to_async_queue(&wd_rsa_env_config, idx); - if (ret) - goto fail_with_msg; return WD_SUCCESS; @@ -1266,15 +1263,10 @@ void wd_rsa_get_prikey(handle_t sess, struct wd_rsa_prikey **prikey) *prikey = ((struct wd_rsa_sess *)sess)->prikey; } -static const struct wd_config_variable table[] = { - { .name = "WD_RSA_CTX_NUM", - .def_val = "sync:2@0,async:2@0", - .parse_fn = wd_parse_ctx_num - }, - { .name = "WD_RSA_ASYNC_POLL_EN", - .def_val = "0", - .parse_fn = wd_parse_async_poll_en - } +static const struct wd_config_variable table = { + .name = "WD_RSA_CTX_NUM", + .def_val = "sync:2@0,async:2@0", + .parse_fn = wd_parse_ctx_num }; static const struct wd_alg_ops wd_rsa_ops = { @@ -1289,8 +1281,8 @@ int wd_rsa_env_init(struct wd_sched *sched) { wd_rsa_env_config.sched = sched; - return wd_alg_env_init(&wd_rsa_env_config, table, - &wd_rsa_ops, ARRAY_SIZE(table), NULL); + return wd_alg_env_init(&wd_rsa_env_config, &table, + &wd_rsa_ops, 1, NULL); } void wd_rsa_env_uninit(void) @@ -1307,8 +1299,8 @@ int wd_rsa_ctx_num_init(__u32 node, __u32 type, __u32 num, __u8 mode) if (ret) return ret; - return wd_alg_env_init(&wd_rsa_env_config, table, - &wd_rsa_ops, ARRAY_SIZE(table), &ctx_attr); + return wd_alg_env_init(&wd_rsa_env_config, &table, + &wd_rsa_ops, 1, &ctx_attr); } void wd_rsa_ctx_num_uninit(void) diff --git a/wd_util.c b/wd_util.c index f97b558..c174d7c 100644 --- a/wd_util.c +++ b/wd_util.c @@ -8,7 +8,6 @@ #include <dirent.h> #include <dlfcn.h> #include <pthread.h> -#include <semaphore.h> #include <string.h> #include <ctype.h> #include "wd_sched.h" @@ -72,27 +71,6 @@ static const char *wd_env_name[WD_TYPE_MAX] = { "WD_JOIN_GATHER_CTX_NUM", }; -struct async_task { - __u32 idx; -}; - -struct async_task_queue { - struct async_task *head; - int depth; - /* the producer offset of task queue */ - int prod; - /* the consumer offset of task queue */ - int cons; - int cur_task; - int left_task; - int end; - sem_t empty_sem; - sem_t full_sem; - pthread_mutex_t lock; - pthread_t tid; - int (*alg_poll_ctx)(__u32, __u32, __u32 *); -}; - struct drv_lib_list { void *dlhandle; struct drv_lib_list *next; @@ -784,17 +762,6 @@ static int str_to_bool(const char *s, bool *target) return 0; } -int wd_parse_async_poll_en(struct wd_env_config *config, const char *s) -{ - int ret; - - ret = str_to_bool(s, &config->enable_internal_poll); - if (ret) - WD_ERR("failed to parse async poll enable flag(%s)!\n", s); - - return ret; -} - static int parse_num_on_numa(const char *s, int *num, int *node) { char *sep, *start, *left; @@ -1036,41 +1003,6 @@ int wd_parse_ctx_num(struct wd_env_config *config, const char *s) return parse_ctx_num(config, s); } -int wd_parse_async_poll_num(struct wd_env_config *config, const char *s) -{ - struct wd_env_config_per_numa *config_numa; - char *left, *section, *start; - int node, poll_num, ret; - - if (!config->enable_internal_poll) { - WD_ERR("internal poll not enabled, skip parse poll number!\n"); - return 0; - } - - start = strdup(s); - if (!start) - return -ENOMEM; - - left = start; - while ((section = strsep(&left, ","))) { - ret = parse_num_on_numa(section, &poll_num, &node); - if (ret) - goto out; - config_numa = wd_get_config_numa(config, node); - if (!config_numa) { - ret = -WD_EINVAL; - goto out; - } - config_numa->async_poll_num = poll_num; - } - - free(start); - return 0; -out: - free(start); - return ret; -} - static int wd_parse_env(struct wd_env_config *config) { const struct wd_config_variable *var; @@ -1119,8 +1051,6 @@ static int wd_parse_ctx_attr(struct wd_env_config *env_config, /* Use default sched and disable internal poll */ env_config->sched = NULL; - env_config->enable_internal_poll = 0; - config_numa->async_poll_num = 0; return 0; } @@ -1348,21 +1278,17 @@ static int wd_init_sched_config(struct wd_env_config *config, { struct wd_env_config_per_numa *config_numa; int i, j, ret, max_node, type_num; - void *func = NULL; type_num = config->op_type_num; max_node = numa_max_node() + 1; if (max_node <= 0) return -WD_EINVAL; - if (!config->enable_internal_poll) - func = alg_poll_ctx; - config->internal_sched = false; if (!config->sched) { WD_ERR("no sched is specified, alloc a default sched!\n"); config->sched = wd_sched_rr_alloc(SCHED_POLICY_RR, type_num, - max_node, func); + max_node, alg_poll_ctx); if (!config->sched) return -WD_ENOMEM; @@ -1389,339 +1315,6 @@ err_release_sched: return ret; } -static struct async_task_queue *find_async_queue(struct wd_env_config *config, - __u32 idx) -{ - struct wd_env_config_per_numa *config_numa; - struct wd_ctx_range **ctx_table; - struct async_task_queue *head; - unsigned long offset = 0; - __u32 i, num = 0; - - FOREACH_NUMA(i, config, config_numa) { - num += config_numa->sync_ctx_num + config_numa->async_ctx_num; - if (idx < num) - break; - } - - if (i == config->numa_num) { - WD_ERR("failed to find a proper numa node!\n"); - return NULL; - } - - if (!config_numa->async_poll_num) { - WD_ERR("invalid: async_poll_num of numa is zero!\n"); - return NULL; - } - - ctx_table = config_numa->ctx_table; - for (i = 0; i < config_numa->op_type_num; i++) { - if (idx <= ctx_table[CTX_MODE_ASYNC][i].end && - idx >= ctx_table[CTX_MODE_ASYNC][i].begin) { - offset = (idx - ctx_table[CTX_MODE_ASYNC][i].begin) % - config_numa->async_poll_num; - break; - } - } - - if (i == config_numa->op_type_num) { - WD_ERR("failed to find async queue for ctx: idx %u!\n", idx); - return NULL; - } - - head = (struct async_task_queue *)config_numa->async_task_queue_array; - - return head + offset; -} - -int wd_add_task_to_async_queue(struct wd_env_config *config, __u32 idx) -{ - struct async_task_queue *task_queue; - struct async_task *task; - int curr_prod, ret; - - if (!config->enable_internal_poll) - return 0; - - task_queue = find_async_queue(config, idx); - if (!task_queue) - return -WD_EINVAL; - - ret = sem_wait(&task_queue->empty_sem); - if (ret) { - WD_ERR("failed to wait empty_sem!\n"); - return ret; - } - - pthread_mutex_lock(&task_queue->lock); - - /* get an available async task and fill ctx idx */ - curr_prod = task_queue->prod; - task = task_queue->head + curr_prod; - task->idx = idx; - - /* update global information of task queue */ - task_queue->prod = (curr_prod + 1) % task_queue->depth; - task_queue->cur_task++; - task_queue->left_task--; - - pthread_mutex_unlock(&task_queue->lock); - - ret = sem_post(&task_queue->full_sem); - if (ret) { - WD_ERR("failed to post full_sem!\n"); - goto err_out; - } - - return 0; - -err_out: - pthread_mutex_lock(&task_queue->lock); - task_queue->left_task++; - task_queue->cur_task--; - task_queue->prod = curr_prod; - pthread_mutex_unlock(&task_queue->lock); - sem_post(&task_queue->empty_sem); - - return ret; -} - -static void *async_poll_process_func(void *args) -{ - struct async_task_queue *task_queue = args; - struct async_task *head, *task; - __u32 count; - int cons, ret; - - while (1) { - if (sem_wait(&task_queue->full_sem)) { - if (errno == EINTR) { - continue; - } - } - if (__atomic_load_n(&task_queue->end, __ATOMIC_ACQUIRE)) { - __atomic_store_n(&task_queue->end, 0, __ATOMIC_RELEASE); - goto out; - } - - pthread_mutex_lock(&task_queue->lock); - - /* async sending message isn't submitted yet */ - if (task_queue->cons == task_queue->prod) { - pthread_mutex_unlock(&task_queue->lock); - sem_post(&task_queue->full_sem); - continue; - } - - cons = task_queue->cons; - head = task_queue->head; - task = head + cons; - - task_queue->cons = (cons + 1) % task_queue->depth; - task_queue->cur_task--; - task_queue->left_task++; - - pthread_mutex_unlock(&task_queue->lock); - - ret = task_queue->alg_poll_ctx(task->idx, 1, &count); - if (ret < 0) { - pthread_mutex_lock(&task_queue->lock); - task_queue->cons = cons; - task_queue->cur_task++; - task_queue->left_task--; - pthread_mutex_unlock(&task_queue->lock); - if (ret == -WD_EAGAIN) { - sem_post(&task_queue->full_sem); - continue; - } else - goto out; - } - - if (sem_post(&task_queue->empty_sem)) - goto out; - } -out: - return NULL; -} - -static int wd_init_one_task_queue(struct async_task_queue *task_queue, - void *alg_poll_ctx) - -{ - struct async_task *head; - pthread_t thread_id; - pthread_attr_t attr; - int depth, ret; - - task_queue->depth = depth = WD_ASYNC_DEF_QUEUE_DEPTH; - - head = calloc(task_queue->depth, sizeof(*head)); - if (!head) - return -WD_ENOMEM; - - task_queue->head = head; - task_queue->left_task = depth; - task_queue->alg_poll_ctx = alg_poll_ctx; - - if (sem_init(&task_queue->empty_sem, 0, depth)) { - WD_ERR("failed to init empty_sem!\n"); - goto err_free_head; - } - - if (sem_init(&task_queue->full_sem, 0, 0)) { - WD_ERR("failed to init full_sem!\n"); - goto err_uninit_empty_sem; - } - - if (pthread_mutex_init(&task_queue->lock, NULL)) { - WD_ERR("failed to init task queue's mutex lock!\n"); - goto err_uninit_full_sem; - } - - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - task_queue->tid = 0; - if (pthread_create(&thread_id, &attr, async_poll_process_func, - task_queue)) { - WD_ERR("failed to create poll thread!\n"); - goto err_destory_mutex; - } - - task_queue->tid = thread_id; - pthread_attr_destroy(&attr); - - return 0; - -err_destory_mutex: - pthread_attr_destroy(&attr); - pthread_mutex_destroy(&task_queue->lock); -err_uninit_full_sem: - sem_destroy(&task_queue->full_sem); -err_uninit_empty_sem: - sem_destroy(&task_queue->empty_sem); -err_free_head: - free(head); - ret = -errno; - return ret; -} - -static void wd_uninit_one_task_queue(struct async_task_queue *task_queue) -{ - /* - * If there's no async task, async_poll_process_func() is sleeping - * on task_queue->full_sem. It'll cause that threads could not - * be end and memory leak. - */ - sem_post(&task_queue->full_sem); - __atomic_store_n(&task_queue->end, 1, __ATOMIC_RELEASE); - while (__atomic_load_n(&task_queue->end, __ATOMIC_ACQUIRE)) - sched_yield(); - - pthread_mutex_destroy(&task_queue->lock); - sem_destroy(&task_queue->full_sem); - sem_destroy(&task_queue->empty_sem); - free(task_queue->head); - task_queue->head = NULL; -} - -static int wd_init_async_polling_thread_per_numa(struct wd_env_config *config, - struct wd_env_config_per_numa *config_numa, - void *alg_poll_ctx) -{ - struct async_task_queue *task_queue, *queue_head; - int i, j, ret; - double num; - - if (!config_numa->async_ctx_num) - return 0; - - if (!config_numa->async_poll_num) { - WD_ERR("invalid async poll num (%lu) is set.\n", - config_numa->async_poll_num); - WD_ERR("change to default value: %d\n", WD_ASYNC_DEF_POLL_NUM); - config_numa->async_poll_num = WD_ASYNC_DEF_POLL_NUM; - } - - num = MIN(config_numa->async_poll_num, config_numa->async_ctx_num); - - /* make max task queues as the number of async ctxs */ - queue_head = calloc(config_numa->async_ctx_num, sizeof(*queue_head)); - if (!queue_head) - return -WD_ENOMEM; - - task_queue = queue_head; - for (i = 0; i < num; task_queue++, i++) { - ret = wd_init_one_task_queue(task_queue, alg_poll_ctx); - if (ret) { - for (j = 0; j < i; task_queue++, j++) - wd_uninit_one_task_queue(task_queue); - free(queue_head); - return ret; - } - } - - config_numa->async_task_queue_array = (void *)queue_head; - - return 0; -} - -static void wd_uninit_async_polling_thread_per_numa(struct wd_env_config *cfg, - struct wd_env_config_per_numa *config_numa) -{ - struct async_task_queue *task_queue, *head; - double num; - int i; - - if (!config_numa || !config_numa->async_task_queue_array) - return; - - head = config_numa->async_task_queue_array; - task_queue = head; - num = MIN(config_numa->async_poll_num, config_numa->async_ctx_num); - - for (i = 0; i < num; task_queue++, i++) - wd_uninit_one_task_queue(task_queue); - free(head); - config_numa->async_task_queue_array = NULL; -} - -static int wd_init_async_polling_thread(struct wd_env_config *config, - void *alg_poll_ctx) -{ - struct wd_env_config_per_numa *config_numa; - int i, ret; - - if (!config->enable_internal_poll) - return 0; - - FOREACH_NUMA(i, config, config_numa) { - ret = wd_init_async_polling_thread_per_numa(config, config_numa, - alg_poll_ctx); - if (ret) - goto out; - } - - return 0; - -out: - FOREACH_NUMA(i, config, config_numa) - wd_uninit_async_polling_thread_per_numa(config, config_numa); - - return ret; -} - -static void wd_uninit_async_polling_thread(struct wd_env_config *config) -{ - struct wd_env_config_per_numa *config_numa; - int i; - - if (!config->enable_internal_poll) - return; - - FOREACH_NUMA(i, config, config_numa) - wd_uninit_async_polling_thread_per_numa(config, config_numa); -} - static int wd_init_resource(struct wd_env_config *config, const struct wd_alg_ops *ops) { @@ -1739,14 +1332,8 @@ static int wd_init_resource(struct wd_env_config *config, if (ret) goto err_uninit_sched; - ret = wd_init_async_polling_thread(config, ops->alg_poll_ctx); - if (ret) - goto err_uninit_alg; - return 0; -err_uninit_alg: - ops->alg_uninit(); err_uninit_sched: wd_uninit_sched_config(config); err_uninit_ctx: @@ -1757,7 +1344,6 @@ err_uninit_ctx: static void wd_uninit_resource(struct wd_env_config *config, const struct wd_alg_ops *ops) { - wd_uninit_async_polling_thread(config); ops->alg_uninit(); wd_uninit_sched_config(config); wd_free_ctx(config); @@ -1811,7 +1397,7 @@ int wd_alg_get_env_param(struct wd_env_config *env_config, return -WD_EINVAL; } - *is_enable = env_config->enable_internal_poll; + *is_enable = 0; config_numa = wd_get_config_numa(env_config, attr.node); if (!config_numa) -- 2.43.0
From: Longfang Liu <liulongfang@huawei.com> In the original scheduling domain partitioning scheme of the scheduler, a static three-dimensional array-based partitioning approach was used. This method lacks scalability, is tightly coupled with service devices, has fixed memory allocation, and results in memory wastage. To address these issues, an improved solution has been developed. The new approach employs a hash bucket scheme combined with linked lists, enabling optimized processing within the scheduling domain. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- include/wd_alg_common.h | 4 +- wd_sched.c | 2699 ++++++++++++++++++++------------------- 2 files changed, 1417 insertions(+), 1286 deletions(-) diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index 54b02e2..ac50cb2 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -190,9 +190,7 @@ struct wd_sched { void *sched_key, const int sched_mode); int (*poll_policy)(handle_t h_sched_ctx, __u32 expect, __u32 *count); - void (*set_param)(handle_t h_sched_ctx, - void *sched_key, - void *sched_param); + void (*set_param)(handle_t h_sched_ctx, void *sched_key, void *sched_param); handle_t h_sched_ctx; }; diff --git a/wd_sched.c b/wd_sched.c index 073108c..edc893c 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -1,778 +1,1005 @@ // SPDX-License-Identifier: Apache-2.0 /* - * Copyright 2020-2021 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2020-2026 Huawei Technologies Co.,Ltd. All rights reserved. * Copyright 2020-2021 Linaro ltd. + * + * Scheduler: Simplified Pure Hash Table with Dynamic Context Expansion + * + * Key improvements: + * - Single global hash table with (region_id, mode, op_type, prop) dimensions + * - Segment list for non-contiguous ctx ranges + * - Dual-domain queues for session key. + * - Dynamic ctx expansion in HUNGRY mode based on load threshold + * - Packet reception is handled through the active queues in the session key. + * - Simplified sched_init: only allocate one sync + one async ctx + * - Removed redundant wd_sched_info layer */ #define _GNU_SOURCE #include <stdlib.h> #include <stdbool.h> +#include <string.h> #include <sched.h> #include <numa.h> +#include <limits.h> +#include <pthread.h> +#include <stdatomic.h> #include "wd_sched.h" -#define MAX_POLL_TIMES 1000 +#define MAX_POLL_TIMES 1000 +#define HUNGRY_LOAD_THRESHOLD 256 +#define SKEY_CTX_MAX_NUM 16 +#define SKEY_MAX_THREAD_NUM 64 +#define SKEY_LOAD_UPDATE_INTERVAL 128 +#define MAX_NUMA_NODES (NUMA_NUM_NODES >> 5) + +/* ============================================================================ + * Hash Table Configuration + * ============================================================================ + */ +#define WD_SCHED_MAX_BUCKETS 512 +#define WD_SCHED_MIN_BUCKETS 32 +#define WD_SCHED_LOAD_FACTOR 0.75f +#define HASH_PRIME1 73 +#define HASH_PRIME2 13 +#define HASH_PRIME3 7 +#define HASH_PRIME4 11 + +/* ============================================================================ + * Scheduling Region Mode + * ============================================================================ + */ enum sched_region_mode { SCHED_MODE_SYNC = 0, SCHED_MODE_ASYNC = 1, SCHED_MODE_BUTT }; -struct wd_sched_balancer { - int switch_slice; - __u32 hw_task_num; - __u32 sw_task_num; - __u32 hw_dfx_num; - __u32 sw_dfx_num; +/* ============================================================================ + * Segment List for Domain Index Organization + * ============================================================================ + */ + +/** + * wd_sched_ctx_segment - Contiguous segment of ctx indices in domain + * @begin: Start index of this segment + * @end: End index of this segment (inclusive) + * @next: Pointer to next segment in the linked list + * + * Supports non-contiguous ctx ranges via segment list. + */ +struct wd_sched_ctx_segment { + __u32 begin; + __u32 end; + struct wd_sched_ctx_segment *next; }; -/* - * sched_key - The key if schedule region. - * @numa_id: The schedule numa region id. - * @mode: Sync mode:0, async_mode:1 - * @type: Service type , the value must smaller than type_num. - * @sync_ctxid: alloc ctx id for sync mode - * @async_ctxid: alloc ctx id for async mode +/* ============================================================================ + * Session key domain cache processing. + * ============================================================================ */ -struct sched_key { - int numa_id; - __u8 type; - __u8 mode; - __u32 dev_id; - __u8 ctx_prop; - __u16 is_stream; - __u16 prio_mode; - __u32 pkt_size; - __u16 sync_ctxid[UADK_CTX_MAX]; - __u16 async_ctxid[UADK_CTX_MAX]; - __u16 def_sync_ctxid; - __u16 def_async_ctxid; - struct wd_sched_balancer balancer; +/** + * wd_sched_domain_idx_cache - Simplified fixed array cache for skey domains + * + * Design principles: + * - Fixed array for cache-friendly memory layout + * - Atomic operations for lock-free load tracking + * - Simple RR and load balancing strategies + * - Maximum 16 queues per thread (typical usage) + */ +struct wd_sched_domain_idx_cache { + /* === Queue index array === */ + __u32 idx_list[SKEY_CTX_MAX_NUM]; /* Array of ctx indices */ + atomic_uint load_values[SKEY_CTX_MAX_NUM]; /* Atomic load counters */ + __u32 valid_count; /* Number of valid queues */ + + /* === Scheduling state === */ + atomic_uint rr_ptr; /* Round-robin pointer */ + atomic_uint min_load_idx; /* Cached min load index */ + atomic_uint op_counter; /* Operation counter for updates */ + + /* === Configuration === */ + __u32 update_interval; /* Min load update interval */ + __u8 policy; /* Scheduling policy */ + + /* === Synchronization === */ + pthread_mutex_t cache_lock; /* Lock for structure modifications */ }; -#define LOOP_SWITH_STEP 1 -#define LOOP_SWITH_SLICE 10 -#define UADK_SWITH_PKT_SZ 2048 -/* - * struct sched_ctx_range - define one ctx pos. - * @begin: the start pos in ctxs of config. - * @end: the end pos in ctxx of config. - * @last: the last one which be distributed. - * @valid: the region used flag. - * @lock: lock the currentscheduling region. +/** + * wd_sched_ctx_domain - Scheduling domain with four dimensions + * @region_id: Region identifier (numa_id or device_id) + * @mode: Context mode (SYNC/ASYNC) + * @op_type: Operation type + * @prop: Property (e.g., device type: HW, CE, SOFT) + * @segments: Linked list of context ranges + * @segment_count: Number of segments + * @total_ctx_count: Total contexts across all segments + * @current_segment: Current segment pointer for round-robin + * @current_pos: Current position within segment + * @valid: Domain validity flag + * @lock: Synchronization spinlock */ -struct sched_ctx_region { - __u32 begin; - __u32 end; - __u32 last; +struct wd_sched_ctx_domain { + int region_id; + __u8 mode; + __u32 op_type; + __u8 prop; + + struct wd_sched_ctx_segment *segments; + __u32 segment_count; + __u32 total_ctx_count; + + struct wd_sched_ctx_segment *current_segment; + __u32 current_pos; bool valid; + pthread_mutex_t lock; }; -/* - * wd_sched_info - define the context of the scheduler. - * @ctx_region: define the map for the comp ctxs, using for quickly search. - * the x range: two(sync and async), the y range: - * two(e.g. comp and uncomp) the map[x][y]'s value is the ctx - * begin and end pos. - * @valid: the region used flag. - * @region_type: the region's property - * @next_info: next scheduling domain +/** + * wd_sched_domain_hash_node - Hash table collision chain node */ -struct wd_sched_info { - struct sched_ctx_region *ctx_region[SCHED_MODE_BUTT]; // default as HW ctxs - bool valid; - int region_type; - struct wd_sched_info *next_info; +struct wd_sched_domain_hash_node { + struct wd_sched_ctx_domain domain; + struct wd_sched_domain_hash_node *next; }; -#define MAX_SKEY_REGION_NUM 64 -struct dev_region_map { - __u32 dev_id; - __u32 region_id; +/** + * wd_sched_domain_hash_table - Pure dynamic hash table for scheduling domains + * @buckets: Hash table bucket array + * @bucket_size: Number of buckets + * @entry_count: Total entries in table + * @max_chain_length: Maximum chain length for statistics + * @lock: Read-write lock for concurrent access + */ +struct wd_sched_domain_hash_table { + struct wd_sched_domain_hash_node **buckets; + __u32 bucket_size; + __u32 entry_count; + __u32 max_chain_length; + + pthread_mutex_t lock; }; -/* - * The default value for NUMA_NUM_NODES is 2048, - * but in reality, most systems will not have such a large NUMA; - * they will typically have fewer than 64 nodes. +/* ============================================================================ + * Dual-Domain Structure for Session Key + * ============================================================================ */ -#define MAX_NUMA_NODES (NUMA_NUM_NODES >> 5) -/* - * wd_sched_ctx - define the context of the scheduler. - * @policy: define the policy of the scheduler. - * @numa_num: the max numa numbers of the scheduler. - * @type_num: the max operation types of the scheduler. - * @poll_func: the task's poll operation function. - * @numa_map: a map of cpus to devices. - * @sched_info: the context of the scheduler. +/** + * wd_sched_key_domain - Session domain with min-heap + * @idx_cache: Index cache with min-heap for load-based selection + * @lock: Synchronization spinlock + * @expanded_count: Track how many times ctx has been expanded + */ +struct wd_sched_key_domain { + struct wd_sched_domain_idx_cache idx_cache; + pthread_mutex_t lock; + __u32 expanded_count; +}; + +/** + * wd_sched_key - Session-level scheduling key + * @region_id: Region identifier + * @type: Operation type + * @mode: Current mode (SYNC/ASYNC) + * @dev_id: Device identifier (for SCHED_POLICY_DEV) + * @ctx_prop: Context property + * @is_stream: Stream mode flag + * @prio_mode: Priority mode + * @pkt_size: Current packet size + * @sync_domain: Min-heap domain for sync contexts + * @async_domain: Min-heap domain for async contexts + * @lock: Synchronization spinlock + */ +struct wd_sched_key { + int region_id; + __u8 type; + __u8 mode; + __u32 dev_id; + __u8 ctx_prop; + __u16 is_stream; + __u16 prio_mode; + __u32 pkt_size; + + struct wd_sched_key_domain sync_domain; + struct wd_sched_key_domain async_domain; + + pthread_mutex_t lock; +}; + +/** + * wd_sched_ctx - Main scheduler context + * @policy: Scheduling policy type + * @type_num: Number of operation types + * @mode_num: Number of modes (SYNC/ASYNC) + * @region_num: Number of regions (numa or devices) + * @poll_func: Poll function for receiving responses + * @domain_hash_table: Global hash table for all domains + * @skey_num: Number of active session keys + * @skey_lock: Lock for skey array + * @skey: Array of session keys + * @poll_tid: Thread IDs for polling */ struct wd_sched_ctx { __u32 policy; __u32 type_num; - __u16 numa_num; - __u16 dev_num; + __u32 mode_num; + __u16 region_num; + user_poll_func poll_func; - int numa_map[MAX_NUMA_NODES]; + struct wd_sched_domain_hash_table *domain_hash_table; __u32 skey_num; pthread_mutex_t skey_lock; - struct sched_key *skey[MAX_SKEY_REGION_NUM]; // supports up to 64 threads region - __u32 poll_tid[MAX_SKEY_REGION_NUM]; - struct wd_sched_balancer balancer; - - struct dev_region_map dev_id_map[DEVICE_REGION_MAX]; - struct wd_sched_info sched_info[MAX_NUMA_NODES]; + struct wd_sched_key *skey[SKEY_MAX_THREAD_NUM]; + __u32 poll_tid[SKEY_MAX_THREAD_NUM]; }; -#define nop() asm volatile("nop") -static void delay_us(int ustime) +/* ============================================================================ + * Hash Table Core Operations + * ============================================================================ + */ + +static bool wd_sched_is_prime(__u32 n) { - int cycle = 2600; // for 2.6GHz CPU - int i, j; + __u32 i; - for (i = 0; i < ustime; i++) { - for (j = 0; j < cycle; j++) - nop(); + if (n <= 1) + return false; + if (n <= 3) + return true; + if (n % 2 == 0 || n % 3 == 0) + return false; + + for (i = 5; i * i <= n; i += 6) { + if (n % i == 0 || n % (i + 2) == 0) + return false; } - usleep(1); + + return true; } -static void sched_skey_param_init(struct wd_sched_ctx *sched_ctx, struct sched_key *skey) +static __u32 wd_sched_find_prime(__u32 n) { - __u32 i; - - pthread_mutex_lock(&sched_ctx->skey_lock); - for (i = 0; i < MAX_SKEY_REGION_NUM; i++) { - if (sched_ctx->skey[i] == NULL) { - sched_ctx->skey[i] = skey; - sched_ctx->skey_num++; - pthread_mutex_unlock(&sched_ctx->skey_lock); - WD_ERR("success: get valid skey node[%u]!\n", i); - return; - } - } - pthread_mutex_unlock(&sched_ctx->skey_lock); - WD_ERR("invalid: skey node number is too much!\n"); + while (!wd_sched_is_prime(n)) + n++; + return n; } -static struct sched_key *sched_get_poll_skey(struct wd_sched_ctx *sched_ctx) +static __u32 wd_sched_compute_bucket_size(__u32 estimated_entries) { - __u32 tid = pthread_self(); - __u16 i, tidx = 0; + __u32 target_size; - /* Delay processing within 17us is performed */ - delay_us(tid % 17); - /* Set mapping relationship between the recv tid and the send skey id */ - for (i = 0; i < sched_ctx->skey_num; i++) { - if (sched_ctx->poll_tid[i] == tid) { - //WD_ERR("poll tid ---> skey id:<%u, %u>!\n", i, tid); - tidx = i; - break; - } else if (sched_ctx->poll_tid[i] == 0) { - pthread_mutex_lock(&sched_ctx->skey_lock); - if (sched_ctx->poll_tid[i] == 0) { - //WD_ERR("poll tid<%u> <---> skey id:<%u>!\n", i, tid); - sched_ctx->poll_tid[i] = tid; - tidx = i; - } else { - pthread_mutex_unlock(&sched_ctx->skey_lock); - return NULL; - } - pthread_mutex_unlock(&sched_ctx->skey_lock); - break; - } - } + target_size = (estimated_entries * 4) / 3; - return sched_ctx->skey[tidx]; + if (target_size < WD_SCHED_MIN_BUCKETS) + target_size = WD_SCHED_MIN_BUCKETS; + if (target_size > WD_SCHED_MAX_BUCKETS) + target_size = WD_SCHED_MAX_BUCKETS; + + return wd_sched_find_prime(target_size); } -static bool sched_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +/** + * wd_sched_hash_compute - Compute hash value for four-dimensional domain key + * @region_id: Region identifier + * @mode: Context mode + * @op_type: Operation type + * @prop: Property + * @bucket_size: Hash table bucket count + * + * Combines four dimensions using prime number multipliers. + */ +static inline __u32 wd_sched_hash_compute(int region_id, __u8 mode, + __u32 op_type, __u8 prop, __u32 bucket_size) { - if (key->numa_id >= sched_ctx->numa_num || key->mode >= SCHED_MODE_BUTT || - key->type >= sched_ctx->type_num) { - WD_ERR("invalid: sched key's numa: %d, mode: %u, type: %u!\n", - key->numa_id, key->mode, key->type); - return false; - } + __u32 hash; - return true; + hash = (region_id * HASH_PRIME1) + (mode * HASH_PRIME2) + + (op_type * HASH_PRIME3) + (prop * HASH_PRIME4); + return hash % bucket_size; } -/* - * sched_get_ctx_range - Get ctx range from ctx_map by the wd comp arg +static inline bool wd_sched_domain_key_match( + int region_id1, __u8 mode1, __u32 op_type1, __u8 prop1, + int region_id2, __u8 mode2, __u32 op_type2, __u8 prop2) +{ + return (region_id1 == region_id2 && mode1 == mode2 && + op_type1 == op_type2 && prop1 == prop2); +} + +/** + * wd_sched_hash_table_create - Create hash table + * @estimated_entries: Estimated number of entries + * + * Returns: Initialized hash table or NULL on error */ -static struct sched_ctx_region *sched_get_ctx_range(struct wd_sched_ctx *sched_ctx, - const struct sched_key *key) +static struct wd_sched_domain_hash_table * +wd_sched_hash_table_create(__u32 estimated_entries) { - struct wd_sched_info *sched_info; - int numa_id; + struct wd_sched_domain_hash_table *table; + __u32 bucket_size; + int ret; + + table = calloc(1, sizeof(*table)); + if (!table) + return NULL; + + bucket_size = wd_sched_compute_bucket_size(estimated_entries); + WD_DEBUG("Hash table: estimated_entries=%u, bucket_size=%u\n", + estimated_entries, bucket_size); - sched_info = sched_ctx->sched_info; - if (key->numa_id >= 0 && - sched_info[key->numa_id].ctx_region[key->mode][key->type].valid) - return &sched_info[key->numa_id].ctx_region[key->mode][key->type]; + table->buckets = calloc(bucket_size, sizeof(*table->buckets)); + if (!table->buckets) { + free(table); + return NULL; + } - /* If the key->numa_id is not exist, we should scan for a region */ - for (numa_id = 0; numa_id < sched_ctx->numa_num; numa_id++) { - if (sched_info[numa_id].ctx_region[key->mode][key->type].valid) - return &sched_info[numa_id].ctx_region[key->mode][key->type]; + table->bucket_size = bucket_size; + table->entry_count = 0; + table->max_chain_length = 0; + + ret = pthread_mutex_init(&table->lock, NULL); + if (ret) { + free(table->buckets); + free(table); + return NULL; } - return NULL; + return table; } -/* - * sched_get_next_pos_rr - Get next resource pos by RR schedule. - * The second para is reserved for future. - */ -static __u32 sched_get_next_pos_rr(struct sched_ctx_region *region, void *para) +static void wd_sched_hash_table_destroy(struct wd_sched_domain_hash_table *table) { - __u32 pos; - - pthread_mutex_lock(®ion->lock); + struct wd_sched_domain_hash_node *node, *next; + __u32 i; - pos = region->last; + if (!table) + return; - if (pos < region->end) - region->last++; - else - region->last = region->begin; + for (i = 0; i < table->bucket_size; i++) { + node = table->buckets[i]; + while (node) { + next = node->next; + + /* Release segment linked list */ + struct wd_sched_ctx_segment *seg = node->domain.segments; + while (seg) { + struct wd_sched_ctx_segment *next_seg = seg->next; + free(seg); + seg = next_seg; + } - pthread_mutex_unlock(®ion->lock); + pthread_mutex_destroy(&node->domain.lock); + free(node); + node = next; + } + } - return pos; + pthread_mutex_destroy(&table->lock); + free(table->buckets); + free(table); } -/* - * session_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance - */ -static __u32 session_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *key, - const int sched_mode) +static struct wd_sched_ctx_domain * +wd_sched_hash_table_lookup(struct wd_sched_domain_hash_table *table, + int region_id, __u8 mode, __u32 op_type, __u8 prop) { - struct sched_ctx_region *region = NULL; - bool ret; + struct wd_sched_domain_hash_node *node; + struct wd_sched_ctx_domain *domain = NULL; + __u32 hash_idx; + __u8 node_idx = 0; - key->mode = sched_mode; - ret = sched_key_valid(sched_ctx, key); - if (!ret) - return INVALID_POS; + if (!table) + return NULL; - region = sched_get_ctx_range(sched_ctx, key); - if (!region) - return INVALID_POS; + hash_idx = wd_sched_hash_compute(region_id, mode, op_type, prop, table->bucket_size); + + pthread_mutex_lock(&table->lock); + node = table->buckets[hash_idx]; + while (node) { + if (wd_sched_domain_key_match( + node->domain.region_id, node->domain.mode, node->domain.op_type, + node->domain.prop, region_id, mode, op_type, prop)) { + domain = &node->domain; + break; + } + node = node->next; + node_idx++; + } + pthread_mutex_unlock(&table->lock); + WD_DEBUG("Get domain hash_idx: %u ------node idx: %u.\n", hash_idx, node_idx); - return sched_get_next_pos_rr(region, NULL); + return domain; } -static handle_t session_sched_init(handle_t h_sched_ctx, void *sched_param) +static struct wd_sched_ctx_domain * +wd_sched_hash_table_insert(struct wd_sched_domain_hash_table *table, + int region_id, __u8 mode, __u32 op_type, __u8 prop) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - struct sched_key *skey; - unsigned int node; + struct wd_sched_domain_hash_node *node, *new_node; + struct wd_sched_ctx_domain *existing; + __u32 chain_length; + __u32 hash_idx; + int ret; - if (getcpu(NULL, &node)) { - WD_ERR("failed to get node, errno %d!\n", errno); - return (handle_t)(-errno); - } - if (node == (unsigned int)NUMA_NO_NODE) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); - } + if (!table) + return NULL; - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); - return (handle_t)(-WD_EINVAL); - } + existing = wd_sched_hash_table_lookup(table, region_id, mode, op_type, prop); + if (existing) + return existing; - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); + hash_idx = wd_sched_hash_compute(region_id, mode, op_type, prop, table->bucket_size); + WD_DEBUG("Instance domain hash_idx: %u\n", hash_idx); + + pthread_mutex_lock(&table->lock); + /* Alloc and initialize new domain */ + new_node = calloc(1, sizeof(*new_node)); + if (!new_node) { + pthread_mutex_unlock(&table->lock); + return NULL; } - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - skey->numa_id = sched_ctx->numa_map[node]; - if (wd_need_debug()) - WD_DEBUG("session don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - skey->numa_id = sched_ctx->numa_map[node]; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; + /* Initialize new domain */ + new_node->domain.region_id = region_id; + new_node->domain.mode = mode; + new_node->domain.op_type = op_type; + new_node->domain.prop = prop; + new_node->domain.segments = NULL; + new_node->domain.segment_count = 0; + new_node->domain.total_ctx_count = 0; + new_node->domain.current_segment = NULL; + new_node->domain.current_pos = 0; + new_node->domain.valid = false; + + ret = pthread_mutex_init(&new_node->domain.lock, NULL); + if (ret) { + pthread_mutex_unlock(&table->lock); + free(new_node); + return NULL; } - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; + new_node->next = table->buckets[hash_idx]; + table->buckets[hash_idx] = new_node; - skey->sync_ctxid[0] = session_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = session_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; + table->entry_count++; + if (new_node->next) { + chain_length++; + if (chain_length > table->max_chain_length) + table->max_chain_length = chain_length; } - return (handle_t)skey; + pthread_mutex_unlock(&table->lock); -out: - free(skey); - return (handle_t)(-WD_EINVAL); + WD_DEBUG("Created new domain: region=%d, mode=%u, op_type=%u, prop=%u\n", + region_id, mode, op_type, prop); + + return &new_node->domain; } -/* - * session_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/* ============================================================================ + * Segment List Operations + * ============================================================================ + */ + +/** + * wd_sched_domain_add_segment - Add context range segment to domain + * @domain: Target domain + * @begin: Start context index + * @end: End context index (inclusive) * - * The user must init the schedule info through session_sched_init + * Supports non-contiguous context ranges via segment list. */ -static __u32 session_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) +static int wd_sched_domain_add_segment(struct wd_sched_ctx_domain *domain, + __u32 begin, __u32 end) { - struct sched_key *key = (struct sched_key *)sched_key; + struct wd_sched_ctx_segment *seg, *new_seg; - if (unlikely(!h_sched_ctx || !key)) { - WD_ERR("invalid: sched ctx or key is NULL!\n"); - return INVALID_POS; - } + if (!domain || begin > end) + return -WD_EINVAL; - /* return in do task */ - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[0]; - return key->async_ctxid[0]; -} + new_seg = calloc(1, sizeof(*new_seg)); + if (!new_seg) + return -WD_ENOMEM; -static int session_poll_region(struct wd_sched_ctx *sched_ctx, __u32 begin, - __u32 end, __u32 expect, __u32 *count) -{ - __u32 poll_num = 0; - __u32 i; - int ret; + new_seg->begin = begin; + new_seg->end = end; + new_seg->next = NULL; - /* i is the pos of sched_ctxs, the max is end */ - for (i = begin; i <= end; i++) { - /* - * RR schedule, one time poll one package, - * poll_num is always not more than one here. - */ - ret = sched_ctx->poll_func(i, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; - *count += poll_num; - if (*count == expect) - break; + pthread_mutex_lock(&domain->lock); + + /* Append to segment list tail */ + if (!domain->segments) { + domain->segments = new_seg; + } else { + seg = domain->segments; + while (seg->next) + seg = seg->next; + seg->next = new_seg; } + domain->segment_count++; + domain->total_ctx_count += (end - begin + 1); + + /* Initialize polling state */ + if (!domain->current_segment) + domain->current_segment = domain->segments; + + pthread_mutex_unlock(&domain->lock); + + WD_DEBUG("Added segment to domain: begin=%u, end=%u, total_count=%u\n", + begin, end, domain->total_ctx_count); + return 0; } -static int session_poll_policy_rr(struct wd_sched_ctx *sched_ctx, int numa_id, - __u32 expect, __u32 *count) +/** + * wd_sched_domain_get_next_rr - Get next context via round-robin from domain + * @domain: Source domain + * + * Returns: Next context index in round-robin order + * Time complexity: O(1) + */ +static __u32 wd_sched_domain_get_next_rr(struct wd_sched_ctx_domain *domain) { - struct sched_ctx_region **region = sched_ctx->sched_info[numa_id].ctx_region; - __u32 begin, end; - __u32 i; - int ret; + __u32 pos, next_pos; - for (i = 0; i < sched_ctx->type_num; i++) { - if (!region[SCHED_MODE_ASYNC][i].valid) - continue; + if (!domain || !domain->segments || domain->total_ctx_count == 0) + return INVALID_POS; - begin = region[SCHED_MODE_ASYNC][i].begin; - end = region[SCHED_MODE_ASYNC][i].end; - ret = session_poll_region(sched_ctx, begin, end, expect, count); - if (unlikely(ret)) - return ret; + pthread_mutex_lock(&domain->lock); + if (!domain->current_segment) + domain->current_segment = domain->segments; + + pos = domain->current_pos; + next_pos = pos + 1; + /* Move to next position */ + if (next_pos <= domain->current_segment->end) { + domain->current_pos = next_pos; + } else { + /* Move to next segment */ + if (domain->current_segment->next) { + domain->current_segment = domain->current_segment->next; + domain->current_pos = domain->current_segment->begin; + next_pos = domain->current_segment->begin; + } else { + /* Loop back to beginning */ + domain->current_segment = domain->segments; + next_pos = domain->segments->begin; + } + domain->current_pos = next_pos; } - return 0; + pthread_mutex_unlock(&domain->lock); + WD_DEBUG("Get next ctx: pos=%u, current_pos=%u\n", pos, domain->current_pos); + + return pos; } -/* - * session_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. - * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. +/* ============================================================================ + * SKey Domain Cache Management Functions + * ============================================================================ + */ +/** + * wd_sched_skey_cache_init - Initialize skey domain cache + * @cache: Pointer to cache structure + * @policy: Scheduling policy * @sched_type: Scheduling policy type (cannot modify per API contract) + * + * Initialize fixed array cache with invalid positions and zero loads. */ -static int session_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +static int wd_sched_skey_cache_init(struct wd_sched_domain_idx_cache *cache, __u8 policy) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct wd_sched_info *sched_info; - __u32 loop_time = 0; - __u32 last_count = 0; - __u16 i, region_mum; - int ret; + int i; - if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { - WD_ERR("invalid: sched ctx or poll_func is NULL or count is zero!\n"); + if (!cache) { + WD_ERR("Invalid cache pointer\n"); return -WD_EINVAL; } - if (unlikely(sched_ctx->numa_num > MAX_NUMA_NODES)) { - WD_ERR("invalid: ctx's numa number is %u!\n", sched_ctx->numa_num); - return -WD_EINVAL; + /* Initialize array with invalid positions */ + for (i = 0; i < SKEY_CTX_MAX_NUM; i++) { + cache->idx_list[i] = INVALID_POS; + atomic_store(&cache->load_values[i], 0); } - sched_info = sched_ctx->sched_info; - if (sched_ctx->policy == SCHED_POLICY_DEV) - region_mum = sched_ctx->dev_num; - else - region_mum = sched_ctx->numa_num; - - /* - * Try different region's ctx if we can't receive any - * package last time, it is more efficient. In most - * bad situation, poll ends after MAX_POLL_TIMES loop. - */ - while (++loop_time < MAX_POLL_TIMES) { - for (i = 0; i < region_mum;) { - /* If current numa is not valid, find next. */ - if (!sched_info[i].valid) { - i++; - continue; - } - - last_count = *count; - ret = session_poll_policy_rr(sched_ctx, i, expect, count); - if (unlikely(ret)) - return ret; + /* Initialize atomic counters */ + atomic_store(&cache->rr_ptr, 0); + atomic_store(&cache->min_load_idx, 0); + atomic_store(&cache->op_counter, 0); - if (expect == *count) - return 0; + /* Set configuration */ + cache->valid_count = 0; + cache->update_interval = SKEY_LOAD_UPDATE_INTERVAL; + cache->policy = policy; - /* - * If no package is received, find next numa, - * otherwise, keep receiving packets at this node. - */ - if (last_count == *count) - i++; - } + /* Initialize structure lock */ + if (pthread_mutex_init(&cache->cache_lock, NULL)) { + WD_ERR("Failed to init cache lock\n"); + return -WD_EINVAL; } - return 0; + return WD_SUCCESS; } - -static handle_t sched_none_init(handle_t h_sched_ctx, void *sched_param) +/** + * wd_sched_skey_cache_uninit - Cleanup skey domain cache + * @cache: Pointer to cache structure + * + * Release resources and reset cache state. + */ +static void wd_sched_skey_cache_uninit(struct wd_sched_domain_idx_cache *cache) { - return (handle_t)0; -} + if (!cache) + return; -static __u32 sched_none_pick_next_ctx(handle_t sched_ctx, - void *sched_key, const int sched_mode) -{ - return 0; -} + pthread_mutex_destroy(&cache->cache_lock); -static int sched_none_poll_policy(handle_t h_sched_ctx, - __u32 expect, __u32 *count) + /* Reset cache state */ + for (int i = 0; i < SKEY_CTX_MAX_NUM; i++) { + cache->idx_list[i] = INVALID_POS; + atomic_store(&cache->load_values[i], 0); + } + + cache->valid_count = 0; +} +/** + * wd_sched_skey_add_ctx - Add ctx to skey domain cache + * @cache: Pointer to cache structure + * @ctx_id: Context ID to add + * + * Add ctx to next available position in fixed array. + * Returns 0 on success, negative error code on failure. + */ +static int wd_sched_skey_add_ctx(struct wd_sched_domain_idx_cache *cache, __u32 ctx_id) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - __u32 loop_times = MAX_POLL_TIMES + expect; - __u32 poll_num = 0; - int ret; + __u32 i; - if (!sched_ctx || !sched_ctx->poll_func) { - WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + if (!cache || ctx_id == INVALID_POS) { + WD_ERR("Invalid parameters\n"); return -WD_EINVAL; } - while (loop_times > 0) { - /* Default use ctx 0 */ - loop_times--; - ret = sched_ctx->poll_func(0, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; + pthread_mutex_lock(&cache->cache_lock); + /* Check if cache is full */ + if (cache->valid_count >= SKEY_CTX_MAX_NUM) { + pthread_mutex_unlock(&cache->cache_lock); + WD_ERR("SKey cache full, cannot add more queues\n"); + return -WD_EINVAL; + } - *count += poll_num; - if (*count == expect) - break; + /* Check for duplicate ctx_id */ + for (i = 0; i < cache->valid_count; i++) { + if (cache->idx_list[i] == ctx_id) { + WD_ERR("Context %u already exists in skey cache at position %d\n", ctx_id, i); + pthread_mutex_unlock(&cache->cache_lock); + return -WD_EEXIST; + } } - return 0; -} + /* Update min load index if as the new ctx */ + atomic_store(&cache->min_load_idx, ctx_id); -static handle_t sched_single_init(handle_t h_sched_ctx, void *sched_param) -{ - return (handle_t)0; -} + /* Add to next available position */ + cache->idx_list[cache->valid_count] = ctx_id; + atomic_store(&cache->load_values[cache->valid_count], 0); + cache->valid_count++; + pthread_mutex_unlock(&cache->cache_lock); -static __u32 sched_single_pick_next_ctx(handle_t sched_ctx, - void *sched_key, const int sched_mode) -{ -#define CTX_ASYNC 1 -#define CTX_SYNC 0 + WD_DEBUG("Added ctx %u to skey cache at position %u, list valid_count: %u\n", + ctx_id, cache->valid_count - 1, cache->valid_count); - if (sched_mode) - return CTX_ASYNC; - else - return CTX_SYNC; + return WD_SUCCESS; } -static int sched_single_poll_policy(handle_t h_sched_ctx, - __u32 expect, __u32 *count) +/** + * wd_sched_skey_remove_ctx - Remove ctx from skey domain cache + * @cache: Pointer to cache structure + * @ctx_id: Context ID to remove + * + * Remove ctx by shifting array elements to maintain continuity. + * Returns 0 on success, negative error code if not found. + */ +static int wd_sched_skey_remove_ctx(struct wd_sched_domain_idx_cache *cache, + __u32 ctx_id) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - __u32 loop_times = MAX_POLL_TIMES + expect; - __u32 poll_num = 0; - int ret; + int i, found = 0; + __u32 current_min; - if (!sched_ctx || !sched_ctx->poll_func) { - WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + if (!cache) { + WD_ERR("Invalid cache pointer\n"); return -WD_EINVAL; } - while (loop_times > 0) { - /* Default async mode use ctx 1 */ - loop_times--; - ret = sched_ctx->poll_func(1, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; - - *count += poll_num; - if (*count == expect) + pthread_mutex_lock(&cache->cache_lock); + /* Find and remove the ctx */ + for (i = 0; i < cache->valid_count; i++) { + if (cache->idx_list[i] == ctx_id) { + found = 1; break; + } } - return 0; -} - -static bool sched_dev_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) -{ - bool found = false; - int i; - - if (key->mode >= SCHED_MODE_BUTT || key->type >= sched_ctx->type_num) { - WD_ERR("invalid: sched key's device id: %u, mode: %u, type: %u!\n", - key->dev_id, key->mode, key->type); - return false; + if (!found) { + WD_ERR("Context %u not found in skey cache\n", ctx_id); + pthread_mutex_unlock(&cache->cache_lock); + return -WD_ENODEV; } - for (i = 0; i < sched_ctx->dev_num; i++) { - if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { - found = true; - break; - } + /* Shift remaining elements to fill the gap */ + for (; i < cache->valid_count - 1; i++) { + cache->idx_list[i] = cache->idx_list[i + 1]; + atomic_store(&cache->load_values[i], + atomic_load(&cache->load_values[i + 1])); } - if (!found) { - WD_ERR("invalid: dev_id %u is not registered!\n", key->dev_id); - return false; + /* Clear last position */ + cache->idx_list[cache->valid_count - 1] = INVALID_POS; + atomic_store(&cache->load_values[cache->valid_count - 1], 0); + cache->valid_count--; + + /* Reset pointers if cache becomes empty */ + if (cache->valid_count == 0) { + atomic_store(&cache->rr_ptr, 0); + atomic_store(&cache->min_load_idx, 0); + } else { + /* Adjust min load index if necessary */ + current_min = atomic_load(&cache->min_load_idx); + if (current_min >= cache->valid_count) + atomic_store(&cache->min_load_idx, 0); } + pthread_mutex_unlock(&cache->cache_lock); + WD_DEBUG("Removed ctx %u from skey cache, valid_count: %u\n", + ctx_id, cache->valid_count); - return true; + return WD_SUCCESS; } -/* - * sched_dev_get_region - Get ctx region from ctx_map by the wd comp arg +/** + * wd_sched_update_min_load - Update cached min load index + * @cache: Pointer to cache structure + * + * Scan valid queues to find the one with minimum load. + * Called periodically to avoid frequent scanning. */ -static struct sched_ctx_region *sched_dev_get_region(struct wd_sched_ctx *sched_ctx, - const struct sched_key *key) +static void wd_sched_update_min_load(struct wd_sched_domain_idx_cache *cache) { - struct wd_sched_info *sched_info; - int i, region_id; - - for (i = 0; i < sched_ctx->dev_num; i++) { - if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { - region_id = sched_ctx->dev_id_map[i].region_id; - sched_info = &sched_ctx->sched_info[region_id]; - if (sched_info->ctx_region[key->mode][key->type].valid) - return &sched_info->ctx_region[key->mode][key->type]; + __u32 min_load = UINT_MAX; + __u32 min_idx = 0; + __u32 i, load; + + if (cache->valid_count == 0) + return; + + /* Simple linear scan - efficient for small arrays */ + for (i = 0; i < cache->valid_count; i++) { + load = atomic_load(&cache->load_values[i]); + if (load < min_load) { + min_load = load; + min_idx = i; } } - /* - * If the scheduling domain of dev_id does not exist, - * taskes operations cannot be executed using queues from other devices; - * otherwise, an SMMU error will occur. - */ - return NULL; + atomic_store(&cache->min_load_idx, min_idx); } -/* - * session_dev_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance +/** + * wd_sched_skey_pick_next - Pick next ctx from skey domain cache + * @cache: Pointer to cache structure + * + * Select next ctx based on scheduling policy: + * - RR: Simple round-robin selection + * - LOAD_BALANCE: Choose ctx with minimum load + * + * Returns selected ctx index, or INVALID_POS if no valid ctx. */ -static __u32 session_dev_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *key, - const int sched_mode) +static __u32 wd_sched_skey_pick_next(struct wd_sched_domain_idx_cache *cache, __u32 *ctx_idx) { - struct sched_ctx_region *region = NULL; - bool ret; + __u32 selected_idx; + __u32 op_count; - key->mode = sched_mode; - ret = sched_dev_key_valid(sched_ctx, key); - if (!ret) + if (!cache || cache->valid_count == 0) { return INVALID_POS; + } - region = sched_dev_get_region(sched_ctx, key); - if (!region) + switch (cache->policy) { + case SCHED_POLICY_RR: + case SCHED_POLICY_NONE: + case SCHED_POLICY_SINGLE: + case SCHED_POLICY_DEV: + case SCHED_POLICY_LOOP: + case SCHED_POLICY_INSTR: + /* Round-robin: atomic increment and modulo */ + selected_idx = atomic_fetch_add(&cache->rr_ptr, 1) % cache->valid_count; + break; + case SCHED_POLICY_HUNGRY: + /* Update min load periodically */ + op_count = atomic_fetch_add(&cache->op_counter, 1); + if (op_count % cache->update_interval == 0) + wd_sched_update_min_load(cache); + + /* Load balancing: use cached min load index */ + selected_idx = atomic_load(&cache->min_load_idx); + break; + default: + WD_ERR("Unknown scheduling policy: %d\n", cache->policy); + selected_idx = INVALID_POS; + break; + } + + /* Ensure index is within valid range */ + if (selected_idx >= cache->valid_count) return INVALID_POS; - return sched_get_next_pos_rr(region, NULL); + *ctx_idx = selected_idx; + return cache->idx_list[selected_idx]; } -static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) +/** + * wd_sched_skey_update_load - Update load for a specific ctx + * @cache: Pointer to cache structure + * @ctx_idx: Context index in list + * @delta: Load delta (positive for send, negative for receive) + * + * Atomically update load counter for the specified ctx. + * Returns 0 on success, negative error code if ctx not found. + */ +static int wd_sched_skey_update_load(struct wd_sched_domain_idx_cache *cache, + __u32 ctx_idx, int delta) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - struct sched_key *skey; - unsigned int node; + /* Atomic update without locking */ + if (delta > 0) + atomic_fetch_add(&cache->load_values[ctx_idx], delta); + else + atomic_fetch_sub(&cache->load_values[ctx_idx], -delta); + return WD_SUCCESS; +} - if (getcpu(NULL, &node)) { - WD_ERR("failed to get numa node, errno %d!\n", errno); - return (handle_t)(-errno); - } - if (node == (unsigned int)NUMA_NO_NODE) { - WD_ERR("invalid: failed to get numa node for dev sched init!\n"); - return (handle_t)(-WD_EINVAL); - } +/* ============================================================================ + * Session Key Domain Initialization + * ============================================================================ + */ - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); - return (handle_t)(-WD_EINVAL); - } +/** + * wd_sched_skey_domain_init - Initialize session domain with min-heap + * @key_domain: Target key domain + * @ctx_idx: context indices idx + * @policy: current session's policy + * + * Initializes dual-domain structure for session. + */ +static int wd_sched_skey_domain_init(struct wd_sched_key_domain *key_domain, + __u32 ctx_idx, __u8 policy) +{ + int ret; - if (!param) { - WD_DEBUG("no-sva session don't set scheduler parameters!\n"); - return (handle_t)(-WD_EINVAL); - } + if (!key_domain) + return -WD_EINVAL; - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } + ret = wd_sched_skey_cache_init(&key_domain->idx_cache, policy); + if (ret) + return ret; - skey->type = param->type; - skey->dev_id = param->dev_id; + ret = wd_sched_skey_add_ctx(&key_domain->idx_cache, ctx_idx); + if (ret) + goto init_err; - skey->sync_ctxid[0] = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; + ret = pthread_mutex_init(&key_domain->lock, NULL); + if (ret) { + goto add_ctx_err; } - return (handle_t)skey; + key_domain->expanded_count = 0; -out: - free(skey); - return (handle_t)(-WD_EINVAL); + return 0; + +add_ctx_err: + wd_sched_skey_remove_ctx(&key_domain->idx_cache, ctx_idx); +init_err: + wd_sched_skey_cache_uninit(&key_domain->idx_cache); + return ret; } -static struct sched_ctx_region *loop_get_near_region( - struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +/** + * wd_sched_skey_domain_destroy - Release session domain resources + */ +static void wd_sched_skey_domain_destroy(struct wd_sched_key_domain *key_domain) { - struct wd_sched_info *sched_info, *demon_info; - int numa_id; - - /* If the key->numa_id is not exist, we should scan for a valid region */ - for (numa_id = 0; numa_id < sched_ctx->numa_num; numa_id++) { - sched_info = sched_ctx->sched_info + numa_id; - if (sched_info->valid) { - demon_info = sched_info; - while (demon_info) { - if (demon_info->valid) - return &demon_info->ctx_region[key->mode][key->type]; - demon_info = demon_info->next_info; - } - } - } + if (!key_domain) + return; - return NULL; + pthread_mutex_destroy(&key_domain->lock); + wd_sched_skey_cache_uninit(&key_domain->idx_cache); } -/* - * loop_get_ctx_range - Get ctx range from ctx_map by the wd comp arg +/** + * wd_sched_poll_skey - Poll contexts for scheduler session + * @sched_ctx: Scheduler context + * @skey: Session key + * @expect: Expected number of responses + * @count: Actual response count (output) + * + * Polls all contexts in session domains and updates load values. */ -static struct sched_ctx_region *loop_get_ctx_range( - struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +static int wd_sched_poll_skey(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey, + __u32 expect, __u32 *count) { - struct wd_sched_info *sched_region, *sched_info; - int ctx_prop = key->ctx_prop; + __u32 sum_poll_num = 0; + __u32 current_load; + __u32 poll_num; + __u32 idx, i; + int ret; + + /* Poll async domain contexts */ + for (i = 0; i < skey->async_domain.idx_cache.valid_count; i++) { + idx = skey->async_domain.idx_cache.idx_list[i]; + + poll_num = 0; + ret = sched_ctx->poll_func(idx, expect, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; - if (key->numa_id < 0) - return loop_get_near_region(sched_ctx, key); + if (poll_num > 0) + sum_poll_num += poll_num; - sched_region = sched_ctx->sched_info; - sched_info = sched_region + key->numa_id; - while (sched_info) { - if (sched_info->valid && ctx_prop == sched_info->region_type && - sched_info->ctx_region[key->mode][key->type].valid) - return &sched_info->ctx_region[key->mode][key->type]; - sched_info = sched_info->next_info; + /* Update load value for this context */ + if (skey->async_domain.idx_cache.policy == SCHED_POLICY_HUNGRY) + wd_sched_skey_update_load(&skey->async_domain.idx_cache, i, poll_num); } + *count = sum_poll_num; - WD_ERR("failed to get valid sched region!\n"); - return NULL; + return 0; } -/* - * loop_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance +/* ============================================================================ + * Utility Functions + * ============================================================================ */ -static __u32 loop_sched_init_ctx(struct wd_sched_ctx *sched_ctx, - struct sched_key *key, const int sched_mode) + +#define nop() asm volatile("nop") +static void delay_us(int ustime) { - struct sched_ctx_region *region = NULL; - bool ret; + int cycle = 2600; + int i, j; - key->mode = sched_mode; - ret = sched_key_valid(sched_ctx, key); - if (!ret) - return INVALID_POS; + for (i = 0; i < ustime; i++) { + for (j = 0; j < cycle; j++) + nop(); + } + usleep(1); +} - region = loop_get_ctx_range(sched_ctx, key); - if (!region) - return INVALID_POS; +static void sched_skey_param_init(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey) +{ + __u32 i; - return sched_get_next_pos_rr(region, NULL); + pthread_mutex_lock(&sched_ctx->skey_lock); + for (i = 0; i < SKEY_MAX_THREAD_NUM; i++) { + if (sched_ctx->skey[i] == NULL) { + sched_ctx->skey[i] = skey; + sched_ctx->skey_num++; + pthread_mutex_unlock(&sched_ctx->skey_lock); + WD_ERR("success: get valid skey node[%u]!\n", i); + return; + } + } + pthread_mutex_unlock(&sched_ctx->skey_lock); + WD_ERR("invalid: skey node number is too much!\n"); } -static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) +static handle_t sched_session_common_init(struct wd_sched_ctx *sched_ctx, + struct sched_params *param) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - int ctx_prop; + struct wd_sched_key *skey; + unsigned int node; + int ret = 0; - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + if (getcpu(NULL, &node)) { + WD_ERR("failed to get node, errno %d!\n", errno); + return (handle_t)(-errno); } if (!sched_ctx) { @@ -780,217 +1007,265 @@ static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); + skey = malloc(sizeof(struct wd_sched_key)); if (!skey) { WD_ERR("failed to alloc memory for session sched key!\n"); return (handle_t)(-WD_ENOMEM); } + memset(skey, 0, sizeof(struct wd_sched_key)); if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_HW; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; + skey->region_id = node; + if (wd_need_debug()) + WD_DEBUG("session don't set scheduler parameters!\n"); } else { + if (param->numa_id >= 0) { + skey->region_id = param->numa_id; + } else { + skey->region_id = node; + } skey->type = param->type; - skey->numa_id = param->numa_id; skey->ctx_prop = param->ctx_prop; } - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; - - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; - } - WD_ERR("sync_ctxid is: %u; async_ctxid is: %u!\n", skey->sync_ctxid[0], skey->async_ctxid[0]); - ctx_prop = skey->ctx_prop; - skey->ctx_prop = UADK_CTX_CE_INS; - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - skey->ctx_prop = ctx_prop; - if (skey->sync_ctxid[UADK_CTX_CE_INS] == INVALID_POS && - skey->async_ctxid[UADK_CTX_CE_INS] == INVALID_POS) { - WD_ERR("failed to get valid CE sync_ctxid or async_ctxid!\n"); - skey->sync_ctxid[UADK_CTX_CE_INS] = skey->sync_ctxid[0]; - skey->async_ctxid[UADK_CTX_CE_INS] = skey->async_ctxid[0]; - } - - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); - return (handle_t)skey; out: free(skey); - return (handle_t)(-WD_EINVAL); + return (handle_t)(-WD_EINVAL); } -static __u32 wd_sched_special_pick(struct sched_key *key, int sched_mode) +static struct wd_sched_key *sched_get_poll_skey(struct wd_sched_ctx *sched_ctx) { - if (key->is_stream) { - if (key->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_CE_INS]; - else - return key->async_ctxid[UADK_CTX_CE_INS]; - } else { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_HW]; - else - return key->async_ctxid[UADK_CTX_HW]; + __u32 ctx_num = sched_ctx->skey_num; + __u32 tid = pthread_self(); + __u16 tpos, start_pos; + __u16 i, tidx = 0; + + /* Randomize the initial query position. */ + start_pos = tid % ctx_num; + + for (i = 0; i < ctx_num; i++) { + /* Each thread re-determines the starting traversal position based on its tid. */ + tpos = (start_pos + i) % ctx_num; + if (sched_ctx->poll_tid[tpos] == tid) { + tidx = tpos; + break; + } else if (sched_ctx->poll_tid[tpos] == 0) { + pthread_mutex_lock(&sched_ctx->skey_lock); + if (sched_ctx->poll_tid[tpos] == 0) { + sched_ctx->poll_tid[tpos] = tid; + tidx = tpos; + } else { + pthread_mutex_unlock(&sched_ctx->skey_lock); + return NULL; + } + pthread_mutex_unlock(&sched_ctx->skey_lock); + break; } } - return INVALID_POS; + return sched_ctx->skey[tidx]; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/** + * session_sched_init_ctx - Pre-fetch single context from domain for session + * @sched_ctx: Scheduler context + * @region_id: Region identifier + * @op_type: Operation type + * @prop: Property + * @sched_mode: Mode (SYNC/ASYNC) * - * The user must init the schedule info through session_sched_init + * Returns: Context index from domain */ -static __u32 loop_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) +static __u32 session_sched_init_ctx(struct wd_sched_ctx *sched_ctx, + int region_id, __u32 op_type, __u8 prop, + const int sched_mode) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *key = (struct sched_key *)sched_key; - struct wd_sched_balancer *balancer = &sched_ctx->balancer; + struct wd_sched_ctx_domain *domain = NULL; - if (unlikely(!h_sched_ctx || !key)) { - WD_ERR("invalid: sched ctx or key is NULL!\n"); + if (region_id >= sched_ctx->region_num || sched_mode >= SCHED_MODE_BUTT || + op_type >= sched_ctx->type_num || prop >= UADK_CTX_MAX) { + WD_ERR("invalid: region: %d, mode: %d, type: %u!, prop: %u\n", + region_id, sched_mode, op_type, prop); return INVALID_POS; } - if (key->sync_ctxid[UADK_CTX_HW] == INVALID_POS || - key->async_ctxid[UADK_CTX_HW] == INVALID_POS) - return session_sched_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); + if (!sched_ctx->domain_hash_table) + return INVALID_POS; - if (key->is_stream) - return wd_sched_special_pick(key, sched_mode); + domain = wd_sched_hash_table_lookup(sched_ctx->domain_hash_table, + region_id, sched_mode, op_type, prop); + if (!domain || !domain->valid) + return INVALID_POS; + + return wd_sched_domain_get_next_rr(domain); +} + +/** + * session_sched_domain_destroy - Destroy session domains + * @skey: Session key to destroy domains for + * + * Releases all resources associated with session domains. + */ +static void session_sched_domain_destroy(struct wd_sched_key *skey) +{ + if (!skey) + return; + + /* Destroy both sync and async domains */ + wd_sched_skey_domain_destroy(&skey->sync_domain); + wd_sched_skey_domain_destroy(&skey->async_domain); + + WD_DEBUG("Destroyed session domains for skey\n"); +} + +/** + * session_sched_domain_init - Initialize session domains with sync/async contexts + * @sched_ctx: Scheduler context + * @skey: Session key to initialize + * + * Pre-fetches sync and async contexts and initializes corresponding domains. + * Returns: 0 on success, negative error code on failure. + */ +static int session_sched_domain_init(struct wd_sched_ctx *sched_ctx, + struct wd_sched_key *skey) +{ + __u32 sync_ctx, async_ctx; - // Small packets go directly through instruction acceleration - if (key->pkt_size != 0 && key->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_CE_INS]; - else - return key->async_ctxid[UADK_CTX_CE_INS]; + if (!sched_ctx || !skey) { + WD_ERR("invalid: sched_ctx or skey is NULL!\n"); + return -WD_EINVAL; } - if (sched_mode == CTX_MODE_SYNC) { - if (balancer->switch_slice == LOOP_SWITH_SLICE) { - balancer->switch_slice = 0; - balancer->hw_dfx_num += LOOP_SWITH_STEP; - /* run in HW */ - key->def_sync_ctxid = key->sync_ctxid[UADK_CTX_HW]; - } else { - balancer->switch_slice++; - /* run in soft CE */ - balancer->sw_dfx_num += LOOP_SWITH_STEP; - key->def_sync_ctxid = key->sync_ctxid[UADK_CTX_CE_INS]; + /* Pre-fetch one sync context */ + sync_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, + skey->ctx_prop, SCHED_MODE_SYNC); + + /* Pre-fetch one async context */ + async_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, + skey->ctx_prop, SCHED_MODE_ASYNC); + + if (sync_ctx == INVALID_POS && async_ctx == INVALID_POS) { + WD_ERR("failed to get valid sync_ctx or async_ctx!\n"); + return -WD_EINVAL; + } + + WD_DEBUG("Got ctx index sync_ctx=%u, async_ctx=%u\n", sync_ctx, async_ctx); + + /* Initialize sync domain if context is valid */ + if (sync_ctx != INVALID_POS) { + if (wd_sched_skey_domain_init(&skey->sync_domain, sync_ctx, sched_ctx->policy) != 0) { + WD_ERR("failed to init sync domain!\n"); + return -WD_EINVAL; } - return key->def_sync_ctxid; } - // Async mode - if (balancer->hw_task_num > balancer->sw_task_num) { - /* run in soft CE */ - balancer->sw_task_num += LOOP_SWITH_STEP >> 1; - key->def_async_ctxid = key->async_ctxid[UADK_CTX_CE_INS]; - } else { - /* run in HW */ - balancer->hw_task_num += LOOP_SWITH_STEP >> 2; - key->def_async_ctxid = key->async_ctxid[UADK_CTX_HW]; + + /* Initialize async domain if context is valid */ + if (async_ctx != INVALID_POS) { + if (wd_sched_skey_domain_init(&skey->async_domain, async_ctx, sched_ctx->policy) != 0) { + WD_ERR("failed to init async domain!\n"); + /* Cleanup sync domain if async domain init failed */ + if (sync_ctx != INVALID_POS) + wd_sched_skey_domain_destroy(&skey->sync_domain); + return -WD_EINVAL; + } } - return key->def_async_ctxid; + return 0; } -static int loop_poll_policy_rr(struct wd_sched_ctx *sched_ctx, int numa_id, - __u32 expect, __u32 *count) +/* ============================================================================ + * Scheduler Policy Functions + * ============================================================================ + */ +/** + * round_robin_sched_init - Initialize session with single sync and async ctx + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Allocates session key and pre-fetches one sync and one async context. + */ +static handle_t round_robin_sched_init(handle_t h_sched_ctx, void *sched_param) { - struct wd_sched_balancer *balancer = &sched_ctx->balancer; - struct wd_sched_info *sched_info, *cur_info, *pnext_info; - struct sched_ctx_region **region; - __u32 begin, end; - __u32 i; - int ret; + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; - sched_info = sched_ctx->sched_info; - cur_info = sched_info + numa_id; - pnext_info = cur_info; - while (pnext_info) { - if (!pnext_info->valid) { - pnext_info = pnext_info->next_info; - continue; - } + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } - region = pnext_info->ctx_region; - for (i = 0; i < sched_ctx->type_num; i++) { - if (!region[SCHED_MODE_ASYNC][i].valid) - continue; - - begin = region[SCHED_MODE_ASYNC][i].begin; - end = region[SCHED_MODE_ASYNC][i].end; - // WD_ERR("session_poll_policy_rr numa: %d, from %u ---> %u!\n", numa_id, begin, end); - ret = session_poll_region(sched_ctx, begin, end, expect, count); - if (unlikely(ret)) - return ret; - } + skey = (struct wd_sched_key *)hskey; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } - /* run in HW */ - if (pnext_info->region_type == UADK_CTX_HW) { - if (balancer->hw_task_num > *count) - balancer->hw_task_num -= *count; - else - balancer->hw_task_num = 0; - balancer->hw_dfx_num += *count; - } else { - if (balancer->sw_task_num > *count) - balancer->sw_task_num -= *count; - else - balancer->sw_task_num = 0; - balancer->sw_dfx_num += *count; - } + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized RR scheduler with sync and async domains\n"); + + return hskey; +} + +/** + * round_robin_pick_next_ctx - Pick context with load-based selection + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) + * + * Returns: Context index with minimum load + * Time complexity: O(1) + */ +static __u32 round_robin_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, + const int sched_mode) +{ + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, ctx_idx; + __u32 new_ctx; - pnext_info = pnext_info->next_info; + if (unlikely(!h_sched_ctx || !skey)) { + WD_ERR("invalid: sched ctx or key is NULL!\n"); + return INVALID_POS; } - return 0; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; + } else { + domain = &skey->async_domain; + } + + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; + + return min_ctx; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * round_robin_poll_policy - Poll policy for session scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ -static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +static int round_robin_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct wd_sched_info *sched_info; - __u32 loop_time = 0; - __u32 last_count = 0; - __u16 i; + __u32 skey_num = sched_ctx->skey_num; + struct wd_sched_key *skey; + __u32 tid = pthread_self(); + __u16 i, tpos, start_pos; + __u32 poll_num, sum_count = 0; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -998,251 +1273,220 @@ static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *cou return -WD_EINVAL; } - if (unlikely(sched_ctx->numa_num > MAX_NUMA_NODES)) { - WD_ERR("invalid: ctx's numa number is %u!\n", sched_ctx->numa_num); + /* Randomize the initial query position. */ + start_pos = tid % skey_num; + /* Query the queues on each skey separately. */ + for (i = 0; i < skey_num; i++) { + tpos = (start_pos + i) % skey_num; + skey = sched_ctx->skey[tpos]; + ret = wd_sched_poll_skey(sched_ctx, skey, expect, &poll_num); + if (unlikely(ret)) + return ret; + + sum_count += poll_num; + } + *count = sum_count; + + return 0; +} + +static handle_t sched_none_init(handle_t h_sched_ctx, void *sched_param) +{ + return (handle_t)0; +} + +static __u32 sched_none_pick_next_ctx(handle_t sched_ctx, + void *sched_key, const int sched_mode) +{ + return 0; +} + +static int sched_none_poll_policy(handle_t h_sched_ctx, + __u32 expect, __u32 *count) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + __u32 loop_times = MAX_POLL_TIMES + expect; + __u32 poll_num = 0; + int ret; + + if (!sched_ctx || !sched_ctx->poll_func) { + WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); return -WD_EINVAL; } - sched_info = sched_ctx->sched_info; + while (loop_times > 0) { + loop_times--; + ret = sched_ctx->poll_func(0, 1, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; + else if (ret == -EAGAIN) + continue; + + *count += poll_num; + if (*count == expect) + break; + } + + return 0; +} - /* - * Try different numa's ctx if we can't receive any - * package last time, it is more efficient. In most - * bad situation, poll ends after MAX_POLL_TIMES loop. - */ - while (++loop_time < MAX_POLL_TIMES) { - for (i = 0; i < sched_ctx->numa_num;) { - /* If current numa is not valid, find next. */ - if (!sched_info[i].valid) { - i++; - continue; - } +static handle_t sched_single_init(handle_t h_sched_ctx, void *sched_param) +{ + return (handle_t)0; +} - last_count = *count; - ret = loop_poll_policy_rr(sched_ctx, i, expect, count); - if (unlikely(ret)) - return ret; +static __u32 sched_single_pick_next_ctx(handle_t sched_ctx, + void *sched_key, const int sched_mode) +{ + if (sched_mode) + return 1; + else + return 0; +} + +static int sched_single_poll_policy(handle_t h_sched_ctx, + __u32 expect, __u32 *count) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + __u32 loop_times = MAX_POLL_TIMES + expect; + __u32 poll_num = 0; + int ret; + + if (!sched_ctx || !sched_ctx->poll_func) { + WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + return -WD_EINVAL; + } - if (expect == *count) - return 0; + while (loop_times > 0) { + loop_times--; + ret = sched_ctx->poll_func(1, 1, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; + else if (ret == -EAGAIN) + continue; - /* - * If no package is received, find next numa, - * otherwise, keep receiving packets at this node. - */ - if (last_count == *count) - i++; - } + *count += poll_num; + if (*count == expect) + break; } return 0; } +/** + * skey_sched_init - Initialize hungry scheduler session with dynamic expansion + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Pre-fetches one sync and one async context, supports dynamic expansion. + */ static handle_t skey_sched_init(handle_t h_sched_ctx, void *sched_param) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - int ctx_prop; - - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + struct wd_sched_key *skey; + __u32 req_ctx_num = 0; + handle_t hskey; + int i, ret = 0; + __u8 def_prop; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + def_prop = skey->ctx_prop; + /* Init and get ctx for every ctx mode */ + for (i = 0; i < UADK_CTX_MAX; i++) { + skey->ctx_prop = i; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("Can't to request prop=%d type ctx!\n", i); + continue; + } + /* Request two Pre_fetch queues each time. */ + req_ctx_num += 2; } - - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); + if (!req_ctx_num) { + free(skey); return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } - - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_HW; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; - skey->ctx_prop = param->ctx_prop; - } - - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - memset(&skey->balancer, 0x0, sizeof(struct wd_sched_balancer)); - skey->numa_id = 0; - - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; - } - WD_ERR("sync_ctxid is: %u; async_ctxid is: %u!\n", skey->sync_ctxid[0], skey->async_ctxid[0]); - ctx_prop = skey->ctx_prop; - skey->ctx_prop = UADK_CTX_CE_INS; - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - skey->ctx_prop = ctx_prop; - if (skey->sync_ctxid[1] == INVALID_POS && skey->async_ctxid[1] == INVALID_POS) { - WD_ERR("failed to get valid CE sync_ctxid or async_ctxid!\n"); - skey->sync_ctxid[1] = skey->sync_ctxid[0]; - skey->async_ctxid[1] = skey->async_ctxid[0]; - } - + /* Restore the initialization prop settings. */ + skey->ctx_prop = def_prop; sched_skey_param_init(sched_ctx, skey); - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_CE_INS]; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_CE_INS]; - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); - - return (handle_t)skey; + WD_INFO("initialized Hungry scheduler with sync and async domains\n"); -out: - free(skey); - return (handle_t)(-WD_EINVAL); + return hskey; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/** + * skey_sched_pick_next_ctx - Pick context from hungry scheduler with load awareness + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) * - * The user must init the schedule info through session_sched_init + * Returns: Context with minimum load, or expands if threshold exceeded + * Time complexity: O(1) for selection, O(n) if expansion needed */ static __u32 skey_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) + const int sched_mode) { - struct sched_key *skey = (struct sched_key *)sched_key; + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, min_load, ctx_idx; + __u32 new_ctx; if (unlikely(!h_sched_ctx || !skey)) { WD_ERR("invalid: sched ctx or key is NULL!\n"); return INVALID_POS; } - if (skey->sync_ctxid[UADK_CTX_HW] == INVALID_POS || - skey->async_ctxid[UADK_CTX_HW] == INVALID_POS) - return session_sched_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); - - if (skey->is_stream) - return wd_sched_special_pick(skey, sched_mode); - - // Small packets go directly through instruction acceleration - if (skey->pkt_size != 0 && skey->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return skey->sync_ctxid[UADK_CTX_CE_INS]; - else - return skey->async_ctxid[UADK_CTX_CE_INS]; - } - - // Async mode - if (sched_mode == CTX_MODE_ASYNC) { - if (skey->balancer.hw_task_num > (1024 + skey->balancer.sw_task_num >> 1)) { - /* run in soft CE */ - skey->balancer.sw_task_num += LOOP_SWITH_STEP; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_CE_INS]; - } else { - /* run in HW */ - skey->balancer.hw_task_num += LOOP_SWITH_STEP; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_HW]; - } - return skey->def_async_ctxid; - } - - if (skey->balancer.switch_slice >= LOOP_SWITH_SLICE) { - skey->balancer.switch_slice = 0; - skey->balancer.hw_dfx_num += LOOP_SWITH_STEP >> 1; - /* run in HW */ - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_HW]; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; } else { - skey->balancer.switch_slice++; - skey->balancer.sw_dfx_num += LOOP_SWITH_STEP >> 2; - /* run in soft CE */ - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_CE_INS]; + domain = &skey->async_domain; } - return skey->def_sync_ctxid; -} - -static int skey_poll_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *skey, - __u32 expect, __u32 *count) -{ - __u32 hw_num = 0; - __u32 sw_num = 0; - __u32 poll_num; - int i, ret; - - /* - * Collect hardware messages first, multi-threading performance is better; - * Collect software packets first, single-thread performance is better - */ - for (i = UADK_CTX_MAX - 1; i >= 0; i--) { - if (skey->async_ctxid[i] == INVALID_POS) - continue; - - poll_num = 0; - ret = sched_ctx->poll_func(skey->async_ctxid[i], expect, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (poll_num == 0) - continue; - - if (i == 0) - hw_num += poll_num; - else - sw_num += poll_num; - } + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; - *count = *count + hw_num + sw_num; - if (hw_num > 0) { - if (skey->balancer.hw_task_num > hw_num) - skey->balancer.hw_task_num -= hw_num; - else - skey->balancer.hw_task_num = 0; - skey->balancer.hw_dfx_num += hw_num; - } - if (sw_num > 0) { - if (skey->balancer.sw_task_num > sw_num) - skey->balancer.sw_task_num -= sw_num; - else - skey->balancer.sw_task_num = 0; - skey->balancer.sw_dfx_num += sw_num; + /* Update load value for send one task */ + wd_sched_skey_update_load(&skey->async_domain.idx_cache, ctx_idx, 1); + min_load = domain->idx_cache.load_values[ctx_idx]; + + /* Check if we need to expand context pool */ + if (min_load > HUNGRY_LOAD_THRESHOLD) { + /* Try to allocate new context from domain */ + new_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, skey->ctx_prop, sched_mode); + if (new_ctx != INVALID_POS) { + if (wd_sched_skey_add_ctx(&domain->idx_cache, new_ctx) == 0) { + domain->expanded_count++; + min_ctx = new_ctx; + } + } } - return 0; + return min_ctx; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * skey_sched_poll_policy - Poll policy for hungry scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ static int skey_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *skey; + struct wd_sched_key *skey; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -1254,131 +1498,163 @@ static int skey_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *cou if (!skey) return -WD_EAGAIN; - ret = skey_poll_ctx(sched_ctx, skey, expect, count); + ret = wd_sched_poll_skey(sched_ctx, skey, expect, count); if (unlikely(ret)) return ret; return 0; } -static handle_t instr_sched_init(handle_t h_sched_ctx, void *sched_param) +/** + * loop_sched_init - Initialize loop scheduler session with single ctx per mode + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Pre-fetches one sync and one async context. + */ +static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + struct wd_sched_key *skey; + __u32 req_ctx_num = 0; + handle_t hskey; + int i, ret = 0; + __u8 def_prop; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + def_prop = skey->ctx_prop; + /* Init and get ctx for every ctx mode */ + for (i = 0; i < UADK_CTX_MAX; i++) { + skey->ctx_prop = i; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("Can't to request prop=%d type ctx!\n", i); + continue; + } + /* Request two Pre_fetch queues each time. */ + req_ctx_num += 2; } - - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); + if (!req_ctx_num) { + free(skey); return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } + /* Restore the initialization prop settings. */ + skey->ctx_prop = def_prop; + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized Loop scheduler with sync and async domains\n"); + return (handle_t)skey; +} - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_CE_INS; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; - skey->ctx_prop = param->ctx_prop; - } +/** + * loop_sched_pick_next_ctx - Pick context for loop scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) + * + * Returns: Context index with minimum load + * Time complexity: O(1) + */ +static __u32 loop_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, + const int sched_mode) +{ + return round_robin_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); +} + +static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +{ + return round_robin_poll_policy(h_sched_ctx, expect, count); +} + +static handle_t instr_sched_init(handle_t h_sched_ctx, void *sched_param) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); + skey = (struct wd_sched_key *)hskey; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } sched_skey_param_init(sched_ctx, skey); - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); return (handle_t)skey; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through session_sched_init - */ static __u32 instr_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) + const int sched_mode) { - struct sched_key *key = (struct sched_key *)sched_key; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, ctx_idx; + __u32 new_ctx; - //if (unlikely(!h_sched_ctx || !key)) { - // WD_ERR("invalid: sched ctx or key is NULL!\n"); - // return INVALID_POS; - //} + if (unlikely(!h_sched_ctx || !skey)) { + WD_ERR("invalid: sched ctx or key is NULL!\n"); + return INVALID_POS; + } - key->balancer.sw_dfx_num++; - if (sched_mode == CTX_MODE_SYNC) { - /* run in soft CE */ - return key->sync_ctxid[UADK_CTX_CE_INS]; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; + } else { + domain = &skey->async_domain; } - // Async mode - /* run in soft CE */ - return key->async_ctxid[UADK_CTX_CE_INS]; + + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; + + return min_ctx; } -static int instr_poll_policy_rr(struct wd_sched_ctx *sched_ctx, struct sched_key *skey, - __u32 expect, __u32 *count) +static int instr_poll_policy_rr(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey, + __u32 expect, __u32 *count) { __u32 recv_cnt, ctx_id; int ret; - //WD_ERR("success: sched skey num: %u!\n", i); recv_cnt = 0; - ctx_id = skey->async_ctxid[UADK_CTX_CE_INS]; + ctx_id = skey->async_domain.idx_cache.idx_list[0]; ret = sched_ctx->poll_func(ctx_id, expect, &recv_cnt); if ((ret < 0) && (ret != -EAGAIN)) return ret; *count += recv_cnt; - //WD_ERR("success: sched recv task num: %u!\n", *count); return 0; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * instr_sched_poll_policy - Poll policy for instruction scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *skey; + struct wd_sched_key *skey; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -1386,7 +1662,6 @@ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *co return -WD_EINVAL; } - /* First poll the skey is NULL */ skey = sched_get_poll_skey(sched_ctx); if (!skey) return -WD_EAGAIN; @@ -1398,30 +1673,59 @@ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *co return ret; } +static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + skey->type = param->type; + + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } + + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized Dev RR scheduler with sync and async domains\n"); + return (handle_t)skey; +} + +/** + * wd_sched_set_param - Set scheduler parameters + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + */ static void wd_sched_set_param(handle_t h_sched_ctx, void *sched_key, void *sched_param) { struct wd_sched_params *params = (struct wd_sched_params *)sched_param; - struct sched_key *skey = (struct sched_key *)sched_key; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; skey->pkt_size = params->pkt_size; skey->is_stream = params->data_mode; skey->prio_mode = params->prio_mode; } -static void none_set_param(handle_t h_sched_ctx, - void *sched_key, void *sched_param) -{ - return; -} - static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { { .name = "RR scheduler", .sched_policy = SCHED_POLICY_RR, - .sched_init = session_sched_init, - .pick_next_ctx = session_sched_pick_next_ctx, - .poll_policy = session_sched_poll_policy, + .sched_init = round_robin_sched_init, + .pick_next_ctx = round_robin_pick_next_ctx, + .poll_policy = round_robin_poll_policy, .set_param = wd_sched_set_param, }, { .name = "None scheduler", @@ -1441,8 +1745,8 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { .name = "Device RR scheduler", .sched_policy = SCHED_POLICY_DEV, .sched_init = session_dev_sched_init, - .pick_next_ctx = session_sched_pick_next_ctx, - .poll_policy = session_sched_poll_policy, + .pick_next_ctx = round_robin_pick_next_ctx, + .poll_policy = round_robin_poll_policy, .set_param = wd_sched_set_param, }, { .name = "Loop scheduler", @@ -1451,6 +1755,7 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { .pick_next_ctx = loop_sched_pick_next_ctx, .poll_policy = loop_sched_poll_policy, .set_param = wd_sched_set_param, + }, { .name = "Hungry scheduler", .sched_policy = SCHED_POLICY_HUNGRY, @@ -1468,128 +1773,37 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { }, }; -static int wd_sched_get_nearby_numa_id(struct wd_sched_info *sched_info, int node, int numa_num) -{ - int dis = INT32_MAX; - int valid_id = -1; - int i, tmp; - - for (i = 0; i < numa_num; i++) { - if (sched_info[i].valid) { - tmp = numa_distance(node, i); - if (dis > tmp) { - valid_id = i; - dis = tmp; - } - } - } - - return valid_id; -} - -static void wd_sched_map_cpus_to_dev(struct wd_sched_ctx *sched_ctx) -{ - struct wd_sched_info *sched_info = sched_ctx->sched_info; - int i, numa_num = sched_ctx->numa_num; - int *numa_map = sched_ctx->numa_map; - - for (i = 0; i < numa_num; i++) { - if (sched_info[i].valid) - numa_map[i] = i; - else - numa_map[i] = wd_sched_get_nearby_numa_id(sched_info, i, numa_num); - } -} - -static int wd_instance_dev_region(struct wd_sched_ctx *sched_ctx, - struct sched_params *param) +static int numa_num_check(__u16 region_num) { - struct wd_sched_info *sched_info; - __u32 region_idx = INVALID_POS; - __u8 type, mode; - __u32 dev_id; - int i; - - dev_id = param->dev_id; - type = param->type; - mode = param->mode; - - /* Check whether dev_id has already been registered. */ - for (i = 0; i < sched_ctx->dev_num; i++) { - if (sched_ctx->dev_id_map[i].dev_id == dev_id) { - region_idx = sched_ctx->dev_id_map[i].region_id; - break; - } - } - - /* If not registered, allocate a new region. */ - if (region_idx == INVALID_POS) { - if (sched_ctx->dev_num >= DEVICE_REGION_MAX) { - WD_ERR("too many devices registered!\n"); - return -WD_EINVAL; - } - - region_idx = sched_ctx->dev_num; - sched_ctx->dev_id_map[region_idx].dev_id = dev_id; - sched_ctx->dev_id_map[region_idx].region_id = region_idx; - sched_ctx->dev_num++; - - sched_info = &sched_ctx->sched_info[region_idx]; - } else { - sched_info = &sched_ctx->sched_info[region_idx]; - } + int max_node; - /* Check whether the mode and type have already been registered. */ - if (sched_info->ctx_region[mode][type].valid) { - WD_INFO("device %u mode %u type %u already registered\n", - dev_id, mode, type); - return WD_SUCCESS; + max_node = numa_max_node() + 1; + if (max_node <= 0) { + WD_ERR("invalid: numa max node is %d!\n", max_node); + return -WD_EINVAL; } - /* Initialize the scheduling region for this mode and type */ - sched_info->ctx_region[mode][type].begin = param->begin; - sched_info->ctx_region[mode][type].end = param->end; - sched_info->ctx_region[mode][type].last = param->begin; - sched_info->ctx_region[mode][type].valid = true; - sched_info->valid = true; - - pthread_mutex_init(&sched_info->ctx_region[mode][type].lock, NULL); - - return WD_SUCCESS; -} - -static int wd_sched_region_instance(struct wd_sched_info *sched_info, - struct sched_params *param) -{ - struct wd_sched_info *next_info; - __u8 type, mode; - - type = param->type; - mode = param->mode; - next_info = sched_info; - while (next_info) { - if (next_info->region_type == param->ctx_prop) { - next_info->ctx_region[mode][type].begin = param->begin; - next_info->ctx_region[mode][type].end = param->end; - next_info->ctx_region[mode][type].last = param->begin; - next_info->ctx_region[mode][type].valid = true; - next_info->valid = true; - pthread_mutex_init(&next_info->ctx_region[mode][type].lock, NULL); - WD_ERR("instance numa<%d>, property<%d>, mode<%u>, type<%u> ctx: begin: %u ----> end: %u!\n", - param->numa_id, param->ctx_prop, mode, type, param->begin, param->end); - return 0; - } - next_info = next_info->next_info; + if (!region_num || region_num > max_node) { + WD_ERR("invalid: region number is %u!\n", region_num); + return -WD_EINVAL; } - return -WD_EINVAL; + return 0; } +/** + * wd_sched_rr_instance - External API for scheduling region instance + * @sched: Scheduler (cannot modify per API contract) + * @param: Scheduling parameters (cannot modify per API contract) + * + * Creates scheduling region for given parameters. + */ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *param) { struct wd_sched_ctx *sched_ctx = NULL; - __u8 type, mode; - int numa_id, ret; + struct wd_sched_ctx_domain *domain; + __u8 mode; + int ret; if (!sched || !sched->h_sched_ctx || !param) { WD_ERR("invalid: sched or sched_params is NULL!\n"); @@ -1601,92 +1815,61 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para return -WD_EINVAL; } - numa_id = param->numa_id; - type = param->type; mode = param->mode; sched_ctx = (struct wd_sched_ctx *)sched->h_sched_ctx; - if (sched_ctx->numa_num > 0 && (numa_id >= sched_ctx->numa_num || - numa_id < 0)) { - WD_ERR("invalid: sched_ctx's numa_id is %d, numa_num is %u!\n", - numa_id, sched_ctx->numa_num); + if (param->numa_id >= sched_ctx->region_num || param->numa_id < 0) { + WD_ERR("invalid: region_id is %d, region_num is %u!\n", + param->numa_id, sched_ctx->region_num); return -WD_EINVAL; } - if (type >= sched_ctx->type_num) { - WD_ERR("invalid: sched_ctx's type is %u, type_num is %u!\n", - type, sched_ctx->type_num); + if (param->type >= sched_ctx->type_num) { + WD_ERR("invalid: type is %u, type_num is %u!\n", + param->type, sched_ctx->type_num); return -WD_EINVAL; } if (mode >= SCHED_MODE_BUTT) { - WD_ERR("invalid: sched_ctx's mode is %u, mode_num is %d!\n", - mode, SCHED_MODE_BUTT); + WD_ERR("invalid: mode is %u, mode_num is %u!\n", + mode, sched_ctx->mode_num); return -WD_EINVAL; } - if (sched_ctx->policy == SCHED_POLICY_DEV) - return wd_instance_dev_region(sched_ctx, param); - - /* For older tools, the default setting is of the HW type. */ if (param->ctx_prop < 0 || param->ctx_prop > UADK_CTX_SOFT) param->ctx_prop = UADK_CTX_HW; - if (!sched_ctx->sched_info[numa_id].ctx_region[mode]) { - WD_ERR("invalid: ctx_region is NULL, numa: %d, mode: %u!\n", - numa_id, mode); - return -WD_EINVAL; - } + /* Insert or get domain from hash table using four dimensions */ + domain = wd_sched_hash_table_insert(sched_ctx->domain_hash_table, + param->numa_id, mode, param->type, + param->ctx_prop); + if (!domain) + return -WD_ENOMEM; - ret = wd_sched_region_instance(&sched_ctx->sched_info[numa_id], param); + /* Add context range as new segment */ + ret = wd_sched_domain_add_segment(domain, param->begin, param->end); if (ret) { - WD_ERR("failed to instance ctx_region!\n"); + WD_ERR("failed to add segment to domain!\n"); return ret; } + domain->valid = true; - wd_sched_map_cpus_to_dev(sched_ctx); + WD_ERR("instance: region=%d, mode=%u, type=%u, prop=%d, begin=%u, end=%u\n", + param->numa_id, mode, param->type, param->ctx_prop, + param->begin, param->end); return WD_SUCCESS; } -static void wd_sched_region_release(struct wd_sched_ctx *sched_ctx) -{ - struct wd_sched_info *sched_info, *next_info, *cur_info; - int i, j, region_num; - - sched_info = sched_ctx->sched_info; - if (!sched_info) - return; - - /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ - if (sched_ctx->policy == SCHED_POLICY_DEV) - region_num = DEVICE_REGION_MAX; - else - region_num = sched_ctx->numa_num; - - for (i = 0; i < region_num; i++) { - cur_info = &sched_info[i]; - while (cur_info) { - next_info = cur_info->next_info; - for (j = 0; j < SCHED_MODE_BUTT; j++) { - if (cur_info->ctx_region[j]) { - free(cur_info->ctx_region[j]); - cur_info->ctx_region[j] = NULL; - } - } - /* First info region is alloced by sched ctx */ - if (cur_info->region_type != UADK_CTX_HW) - free(cur_info); - cur_info = next_info; - } - } -} - +/** + * wd_sched_rr_release - External API for scheduler release + * @sched: Scheduler to release (cannot modify per API contract) + * + * Releases all scheduler resources. + */ void wd_sched_rr_release(struct wd_sched *sched) { struct wd_sched_ctx *sched_ctx; - __u32 hw_dfx_num = 0; - __u32 sw_dfx_num = 0; __u32 i; if (!sched) @@ -1696,21 +1879,22 @@ void wd_sched_rr_release(struct wd_sched *sched) if (!sched_ctx) goto ctx_out; + /* Release all session keys */ for (i = 0; i < sched_ctx->skey_num; i++) { if (sched_ctx->skey[i] != NULL) { - hw_dfx_num += sched_ctx->skey[i]->balancer.hw_dfx_num; - sw_dfx_num += sched_ctx->skey[i]->balancer.sw_dfx_num; + wd_sched_skey_domain_destroy(&sched_ctx->skey[i]->sync_domain); + wd_sched_skey_domain_destroy(&sched_ctx->skey[i]->async_domain); } sched_ctx->skey[i] = NULL; } - hw_dfx_num += sched_ctx->balancer.hw_dfx_num; - sw_dfx_num += sched_ctx->balancer.sw_dfx_num; sched_ctx->skey_num = 0; - /* Release sched dfx info */ - WD_ERR("scheduler balance hw task num: %u, sw task num: %u\n", - hw_dfx_num, sw_dfx_num); - wd_sched_region_release(sched_ctx); + /* Release hash table */ + if (sched_ctx->domain_hash_table) { + wd_sched_hash_table_destroy(sched_ctx->domain_hash_table); + sched_ctx->domain_hash_table = NULL; + } + free(sched_ctx); ctx_out: @@ -1718,79 +1902,22 @@ ctx_out: return; } -static int numa_num_check(__u16 numa_num) -{ - int max_node; - - max_node = numa_max_node() + 1; - if (max_node <= 0) { - WD_ERR("invalid: numa max node is %d!\n", max_node); - return -WD_EINVAL; - } - - if (!numa_num || numa_num > max_node) { - WD_ERR("invalid: numa number is %u!\n", numa_num); - return -WD_EINVAL; - } - - return 0; -} - -static int wd_sched_region_init(struct wd_sched_ctx *sched_ctx, - __u8 type_num, __u16 numa_num) -{ - struct wd_sched_info *sched_info = sched_ctx->sched_info; - struct wd_sched_info *cur_info; - int i, j, k; - - for (i = 0; i < MAX_SKEY_REGION_NUM; i++) { - sched_ctx->skey[i] = NULL; - sched_ctx->poll_tid[i] = 0; - } - pthread_mutex_init(&sched_ctx->skey_lock, NULL); - sched_ctx->skey_num = 0; - memset(&sched_ctx->balancer, 0x0, sizeof(struct wd_sched_balancer)); - - for (i = 0; i < numa_num; i++) { - /* Init sched_info next list */ - cur_info = &sched_info[i]; - for (j = 0; j < UADK_CTX_MAX; j++) { - for (k = 0; k < SCHED_MODE_BUTT; k++) { - cur_info->ctx_region[k] = - calloc(1, sizeof(struct sched_ctx_region) * type_num); - if (!cur_info->ctx_region[k]) - goto sched_err; - } - cur_info->valid = false; - cur_info->region_type = j; - - /* The last node point to NULL */ - if (j == UADK_CTX_MAX - 1) { - cur_info->next_info = NULL; - break; - } - cur_info->next_info = calloc(1, sizeof(*cur_info)); - if (!cur_info) - goto sched_err; - cur_info = cur_info->next_info; - } - } - - return 0; - -sched_err: - wd_sched_region_release(sched_ctx); - - return -WD_EINVAL; -} - +/** + * wd_sched_rr_alloc - External API for scheduler allocation + * @sched_type: Scheduling policy type (cannot modify per API contract) + * @type_num: Number of operation types (cannot modify per API contract) + * @region_num: Number of regions (cannot modify per API contract) + * @func: Poll function (cannot modify per API contract) + * + * Allocates and initializes scheduler with single global hash table. + */ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, - __u16 numa_num, user_poll_func func) + __u16 region_num, user_poll_func func) { struct wd_sched_ctx *sched_ctx; struct wd_sched *sched; - int region_num; - int i, ret; + __u32 estimated_entries; + __u32 i; if (sched_type >= SCHED_POLICY_BUTT || !type_num) { WD_ERR("invalid: sched_type is %u or type_num is %u!\n", @@ -1804,48 +1931,51 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, return NULL; } - if (sched_type == SCHED_POLICY_DEV) - region_num = DEVICE_REGION_MAX; - else - region_num = numa_num; - - sched_ctx = calloc(1, sizeof(struct wd_sched_ctx) + - sizeof(struct wd_sched_info) * region_num); + sched_ctx = calloc(1, sizeof(struct wd_sched_ctx)); if (!sched_ctx) { WD_ERR("failed to alloc memory for sched_ctx!\n"); goto err_out; } - /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ + /* Cache dimension parameters */ + sched_ctx->type_num = type_num; + sched_ctx->mode_num = SCHED_MODE_BUTT; + sched_ctx->region_num = region_num; + sched_ctx->policy = sched_type; + + if (sched_type == SCHED_POLICY_NONE || sched_type == SCHED_POLICY_SINGLE) { + /* Simple schedulers don't need hash table */ + goto simple_ok; + } + if (sched_type == SCHED_POLICY_DEV) { - sched_ctx->numa_num = 0; - sched_ctx->dev_num = 0; - for (i = 0; i < DEVICE_REGION_MAX; i++) { - sched_ctx->dev_id_map[i].dev_id = INVALID_POS; - sched_ctx->dev_id_map[i].region_id = INVALID_POS; - } + /* Device mode: region_num is actually device count */ + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; } else { - sched_ctx->numa_num = numa_num; - sched_ctx->dev_num = 0; - if (numa_num_check(sched_ctx->numa_num)) + /* NUMA mode: validate region_num */ + if (numa_num_check(region_num)) goto err_out; + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; } - sched->h_sched_ctx = (handle_t)sched_ctx; - if (sched_type == SCHED_POLICY_NONE || - sched_type == SCHED_POLICY_SINGLE) - goto simple_ok; - - ret = wd_sched_region_init(sched_ctx, type_num, numa_num); - if (ret) + /* Create single global hash table */ + sched_ctx->domain_hash_table = wd_sched_hash_table_create(estimated_entries); + if (!sched_ctx->domain_hash_table) { + WD_ERR("failed to create hash table!\n"); goto ctx_out; + } simple_ok: sched_ctx->poll_func = func; - sched_ctx->policy = sched_type; - sched_ctx->type_num = type_num; - memset(sched_ctx->numa_map, -1, sizeof(int) * MAX_NUMA_NODES); + for (i = 0; i < SKEY_MAX_THREAD_NUM; i++) { + sched_ctx->skey[i] = NULL; + sched_ctx->poll_tid[i] = 0; + } + pthread_mutex_init(&sched_ctx->skey_lock, NULL); + sched_ctx->skey_num = 0; + + sched->h_sched_ctx = (handle_t)sched_ctx; sched->sched_init = sched_table[sched_type].sched_init; sched->pick_next_ctx = sched_table[sched_type].pick_next_ctx; sched->poll_policy = sched_table[sched_type].poll_policy; @@ -1853,6 +1983,9 @@ simple_ok: sched->name = sched_table[sched_type].name; sched->set_param = sched_table[sched_type].set_param; + WD_INFO("Scheduler %s allocated: type_num=%u, region_num=%u, mode_num=%d\n", + sched->name, type_num, region_num, SCHED_MODE_BUTT); + return sched; ctx_out: -- 2.43.0
From: Longfang Liu <liulongfang@huawei.com> To complete the overall framework update design for UADK, I need to retain only the cipher algorithm and hisi_sec driver while disabling the APIs of all other modules. After debugging this module successfully, I will gradually adapt the remaining modules one by one to accelerate the solution's debugging process. Additionally, the corresponding modifications need to be made in `build/specs/uadk.spec` to disable the copying operations of other testing tools, retaining only `uadk_tool`. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- BK_Makefile | 225 +++++++ Makefile.am | 145 +---- drv/hisi_sec.c | 8 + uadk_tool/BK_Makefile | 63 ++ uadk_tool/Makefile.am | 27 +- uadk_tool/benchmark/sec_uadk_benchmark.c | 784 +---------------------- uadk_tool/benchmark/uadk_benchmark.c | 30 - uadk_tool/uadk_tool.c | 13 +- 8 files changed, 330 insertions(+), 965 deletions(-) create mode 100644 BK_Makefile create mode 100644 uadk_tool/BK_Makefile diff --git a/BK_Makefile b/BK_Makefile new file mode 100644 index 0000000..89f949a --- /dev/null +++ b/BK_Makefile @@ -0,0 +1,225 @@ +ACLOCAL_AMFLAGS = -I m4 -I./include +AUTOMAKE_OPTIONS = foreign subdir-objects +AM_CFLAGS=-std=gnu11 -Wall -Werror -Wextra -Wno-unused-parameter -Wfloat-equal \ + -fno-common -fno-strict-aliasing -I$(top_srcdir)/include +AM_CFLAGS+=-fPIC -fPIE -pie -fstack-protector-strong \ + -g +AM_CFLAGS += -Wall -Wuninitialized -Wno-error -Wno-error=format -Wundef \ +-Wunused -Wdate-time -Wfloat-equal -Wshadow -Wvla -Wdisabled-optimization \ +-Wempty-body -Wignored-qualifiers -Wimplicit-fallthrough=3 -Wtype-limits \ +-Wshift-negative-value -Wswitch-default -Wframe-larger-than=8192 -Wshift-overflow=2 \ +-Wwrite-strings -Wmissing-format-attribute -Wformat-nonliteral -Wduplicated-cond \ +-Wtrampolines -Wlogical-op -Wsuggest-attribute=format -Wduplicated-branches \ +-Wmissing-include-dirs -Wformat-signedness -Wmissing-declarations -Wreturn-local-addr \ +-Wredundant-decls -Wfloat-conversion -Wmissing-prototypes -Wstrict-prototypes + +CLEANFILES = + +if WITH_LOG_FILE +AM_CFLAGS+=-DWITH_LOG_FILE=\"$(with_log_file)\" +endif # WITH_LOG_FILE + +# libtool version is {current}:{revision}:{age} with -version-info +# libNAME.so.{x}.{y}.{z} +# But {current}:{revision}:{age} doesn't equal to {x}.{y}.{z} +# x = current - age +# y = age +# z = revision +# +# libtool version is {major}:{minor}:{revision} with -version-number +# libNAME.so.{x}.{y}.{z} +# x = major +# y = minor +# z = revision +MAJOR = 2 +MINOR = 8 +REVISION = 0 +UADK_VERSION = -version-number ${MAJOR}:${MINOR}:${REVISION} + +DAY = 1 +MONTH = Dec +YEAR = 2024 +AM_CFLAGS+= -DUADK_VERSION_NUMBER="\"UADK version: ${MAJOR}.${MINOR}.${REVISION}\"" +AM_CFLAGS+= -DUADK_RELEASED_TIME="\"Released ${MONTH} ${DAY}, ${YEAR}\"" + +pkginclude_HEADERS = include/wd.h include/wd_internal.h include/wd_cipher.h include/wd_aead.h \ + include/wd_comp.h include/wd_dh.h include/wd_digest.h \ + include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ + include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ + include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ + include/wd_udma.h include/wd_join_gather.h \ + include/wd_bmm.h + +nobase_pkginclude_HEADERS=v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ + v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h v1/wd_ecc.h v1/wd_comp.h + +lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la libwd_udma.la + +uadk_driversdir=$(libdir)/uadk +uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ + libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la + +libwd_la_SOURCES=wd.c wd_mempool.c wd_bmm.c wd_bmm.h wd.h wd_alg.c wd_alg.h \ + v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ + v1/wd_rsa.c v1/wd_rsa.h \ + v1/wd_aead.c v1/wd_aead.h \ + v1/wd_dh.c v1/wd_dh.h \ + v1/wd_comp.c v1/wd_comp.h \ + v1/wd_cipher.c v1/wd_cipher.h \ + v1/wd_digest.c v1/wd_digest.h \ + v1/wd_util.c v1/wd_util.h \ + v1/wd_bmm.c v1/wd_bmm.h \ + v1/wd_ecc.c v1/wd_ecc.h \ + v1/wd_sgl.c v1/wd_sgl.h \ + aes.h sm4.h galois.h \ + lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ + v1/drv/hisi_qm_udrv.c v1/drv/hisi_qm_udrv.h \ + v1/drv/hisi_zip_udrv.c v1/drv/hisi_zip_udrv.h \ + v1/drv/hisi_zip_huf.c v1/drv/hisi_zip_huf.h \ + v1/drv/hisi_hpre_udrv.c v1/drv/hisi_hpre_udrv.h \ + v1/drv/hisi_sec_udrv.c v1/drv/hisi_sec_udrv.h + +libwd_udma_la_SOURCES=wd_udma.h wd_udma_drv.h wd_udma.c \ + wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h + +libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c wd_join_gather.h wd_join_gather_drv.h wd_join_gather.c \ + wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h + +libwd_comp_la_SOURCES=wd_comp.c wd_comp.h wd_comp_drv.h wd_util.c wd_util.h \ + wd_sched.c wd_sched.h wd.c wd.h wd_zlibwrapper.c + +libhisi_zip_la_SOURCES=drv/hisi_comp.c hisi_comp.h drv/hisi_qm_udrv.c \ + drv/hisi_comp_huf.c drv/hisi_comp_huf.h \ + hisi_qm_udrv.h wd_comp_drv.h + +libwd_crypto_la_SOURCES=wd_cipher.c wd_cipher.h wd_cipher_drv.h \ + wd_aead.c wd_aead.h wd_aead_drv.h \ + wd_rsa.c wd_rsa.h wd_rsa_drv.h \ + wd_dh.c wd_dh.h wd_dh_drv.h \ + wd_ecc.c wd_ecc.h wd_ecc_drv.h \ + wd_digest.c wd_digest.h wd_digest_drv.h \ + wd_util.c wd_util.h \ + wd_sched.c wd_sched.h \ + wd.c wd.h + +libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ + lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ + hisi_qm_udrv.h wd_cipher_drv.h wd_aead_drv.h aes.h sm4.h galois.h + +libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h + +libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \ + drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h wd_util.c wd_util.h + +libisa_sve_la_SOURCES=drv/hash_mb/hash_mb.c wd_digest_drv.h drv/hash_mb/hash_mb.h \ + drv/hash_mb/sm3_sve_common.S drv/hash_mb/sm3_mb_asimd_x1.S \ + drv/hash_mb/sm3_mb_asimd_x4.S drv/hash_mb/sm3_mb_sve.S \ + drv/hash_mb/md5_sve_common.S drv/hash_mb/md5_mb_asimd_x1.S \ + drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S + +libhisi_dae_la_SOURCES=drv/hisi_dae.c hisi_dae.h drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h drv/hisi_dae_join_gather.c drv/hisi_dae_common.c + +libhisi_udma_la_SOURCES=drv/hisi_udma.c drv/hisi_qm_udrv.c \ + hisi_qm_udrv.h + +if WD_STATIC_DRV +AM_CFLAGS += -DWD_STATIC_DRV -fPIC +AM_CFLAGS += -DWD_NO_LOG + +libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma -lpthread + +libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lpthread -lnuma +libwd_comp_la_DEPENDENCIES = libwd.la + +libhisi_zip_la_LIBADD = -ldl -lpthread + +libwd_crypto_la_LIBADD = -lwd -ldl -lnuma -lm -lpthread +libwd_crypto_la_DEPENDENCIES = libwd.la + +libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread +libwd_udma_la_DEPENDENCIES = libwd.la + +libwd_dae_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread +libwd_dae_la_DEPENDENCIES = libwd.la + +libhisi_sec_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) +libhisi_sec_la_DEPENDENCIES = libwd.la libwd_crypto.la + +libhisi_hpre_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) +libhisi_hpre_la_DEPENDENCIES = libwd.la libwd_crypto.la + +libisa_ce_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) +libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la + +libisa_sve_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) +libisa_sve_la_DEPENDENCIES = libwd.la libwd_crypto.la + +libhisi_udma_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_udma_la_OBJECTS) +libhisi_udma_la_DEPENDENCIES = libwd.la libwd_udma.la + +libhisi_dae_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_dae_la_OBJECTS) +libhisi_dae_la_DEPENDENCIES = libwd.la libwd_dae.la + +else +UADK_WD_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd.map +UADK_CRYPTO_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_crypto.map +UADK_DAE_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_dae.map +UADK_COMP_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_comp.map +UADK_V1_SYMBOL= -Wl,--version-script,$(top_srcdir)/v1/libwd.map + +libwd_la_LDFLAGS=$(UADK_VERSION) $(UADK_WD_SYMBOL) $(UADK_V1_SYMBOL) +libwd_la_LIBADD= -lnuma -lrt + +libwd_comp_la_LIBADD= -lwd -ldl -lpthread -lnuma +libwd_comp_la_LDFLAGS=$(UADK_VERSION) $(UADK_COMP_SYMBOL) +libwd_comp_la_DEPENDENCIES= libwd.la + +libwd_crypto_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread +libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) +libwd_crypto_la_DEPENDENCIES= libwd.la + +libwd_udma_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread +libwd_udma_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) +libwd_udma_la_DEPENDENCIES= libwd.la + +libwd_dae_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread +libwd_dae_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) +libwd_dae_la_DEPENDENCIES= libwd.la + +libhisi_zip_la_LIBADD= -lwd -ldl -lwd_comp +libhisi_zip_la_LDFLAGS=$(UADK_VERSION) +libhisi_zip_la_DEPENDENCIES= libwd.la libwd_comp.la + +libhisi_sec_la_LIBADD= -lwd -lwd_crypto +libhisi_sec_la_LDFLAGS=$(UADK_VERSION) +libhisi_sec_la_DEPENDENCIES= libwd.la libwd_crypto.la + +libhisi_hpre_la_LIBADD= -lwd -lwd_crypto +libhisi_hpre_la_LDFLAGS=$(UADK_VERSION) +libhisi_hpre_la_DEPENDENCIES= libwd.la libwd_crypto.la + +libisa_ce_la_LIBADD= -lwd -lwd_crypto +libisa_ce_la_LDFLAGS=$(UADK_VERSION) +libisa_ce_la_DEPENDENCIES= libwd.la libwd_crypto.la + +libisa_sve_la_LIBADD= -lwd -lwd_crypto +libisa_sve_la_LDFLAGS=$(UADK_VERSION) +libisa_sve_la_DEPENDENCIES= libwd.la libwd_crypto.la + +libhisi_udma_la_LIBADD= -lwd -lwd_udma +libhisi_udma_la_LDFLAGS=$(UADK_VERSION) +libhisi_udma_la_DEPENDENCIES= libwd.la libwd_udma.la + +libhisi_dae_la_LIBADD= -lwd -lwd_dae +libhisi_dae_la_LDFLAGS=$(UADK_VERSION) +libhisi_dae_la_DEPENDENCIES= libwd.la libwd_dae.la + +endif # WD_STATIC_DRV + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = lib/libwd_crypto.pc lib/libwd_comp.pc lib/libwd.pc +CLEANFILES += $(pkgconfig_DATA) + +SUBDIRS=. test v1/test uadk_tool sample diff --git a/Makefile.am b/Makefile.am index 89f949a..fc14529 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,7 @@ ACLOCAL_AMFLAGS = -I m4 -I./include AUTOMAKE_OPTIONS = foreign subdir-objects + +# Compiler flags for strict code quality and security AM_CFLAGS=-std=gnu11 -Wall -Werror -Wextra -Wno-unused-parameter -Wfloat-equal \ -fno-common -fno-strict-aliasing -I$(top_srcdir)/include AM_CFLAGS+=-fPIC -fPIE -pie -fstack-protector-strong \ @@ -15,100 +17,56 @@ AM_CFLAGS += -Wall -Wuninitialized -Wno-error -Wno-error=format -Wundef \ CLEANFILES = +# Log file configuration if WITH_LOG_FILE AM_CFLAGS+=-DWITH_LOG_FILE=\"$(with_log_file)\" endif # WITH_LOG_FILE -# libtool version is {current}:{revision}:{age} with -version-info -# libNAME.so.{x}.{y}.{z} -# But {current}:{revision}:{age} doesn't equal to {x}.{y}.{z} -# x = current - age -# y = age -# z = revision -# -# libtool version is {major}:{minor}:{revision} with -version-number -# libNAME.so.{x}.{y}.{z} -# x = major -# y = minor -# z = revision +# Library version configuration +# libtool version format: {major}:{minor}:{revision} MAJOR = 2 MINOR = 8 REVISION = 0 UADK_VERSION = -version-number ${MAJOR}:${MINOR}:${REVISION} +# Release information DAY = 1 MONTH = Dec YEAR = 2024 AM_CFLAGS+= -DUADK_VERSION_NUMBER="\"UADK version: ${MAJOR}.${MINOR}.${REVISION}\"" AM_CFLAGS+= -DUADK_RELEASED_TIME="\"Released ${MONTH} ${DAY}, ${YEAR}\"" -pkginclude_HEADERS = include/wd.h include/wd_internal.h include/wd_cipher.h include/wd_aead.h \ - include/wd_comp.h include/wd_dh.h include/wd_digest.h \ - include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ - include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ - include/wd_zlibwrapper.h include/wd_dae.h include/wd_agg.h \ - include/wd_udma.h include/wd_join_gather.h \ - include/wd_bmm.h +# Header files for cipher module only +pkginclude_HEADERS = include/wd.h include/wd_internal.h include/wd_cipher.h \ + include/wd_aead.h include/uacce.h include/wd_alg_common.h \ + include/wd_sched.h include/wd_alg.h -nobase_pkginclude_HEADERS=v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ - v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h v1/wd_ecc.h v1/wd_comp.h +nobase_pkginclude_HEADERS=v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h -lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libwd_dae.la libwd_udma.la +# Library targets - only cipher-related libraries +lib_LTLIBRARIES=libwd.la libwd_crypto.la +# Cipher-specific drivers only uadk_driversdir=$(libdir)/uadk -uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ - libisa_ce.la libisa_sve.la libhisi_dae.la libhisi_udma.la +uadk_drivers_LTLIBRARIES=libhisi_sec.la libisa_ce.la libisa_sve.la +# Core framework library (libwd) - minimal sources for cipher support libwd_la_SOURCES=wd.c wd_mempool.c wd_bmm.c wd_bmm.h wd.h wd_alg.c wd_alg.h \ - v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ - v1/wd_rsa.c v1/wd_rsa.h \ - v1/wd_aead.c v1/wd_aead.h \ - v1/wd_dh.c v1/wd_dh.h \ - v1/wd_comp.c v1/wd_comp.h \ - v1/wd_cipher.c v1/wd_cipher.h \ - v1/wd_digest.c v1/wd_digest.h \ - v1/wd_util.c v1/wd_util.h \ - v1/wd_bmm.c v1/wd_bmm.h \ - v1/wd_ecc.c v1/wd_ecc.h \ - v1/wd_sgl.c v1/wd_sgl.h \ aes.h sm4.h galois.h \ - lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ - v1/drv/hisi_qm_udrv.c v1/drv/hisi_qm_udrv.h \ - v1/drv/hisi_zip_udrv.c v1/drv/hisi_zip_udrv.h \ - v1/drv/hisi_zip_huf.c v1/drv/hisi_zip_huf.h \ - v1/drv/hisi_hpre_udrv.c v1/drv/hisi_hpre_udrv.h \ - v1/drv/hisi_sec_udrv.c v1/drv/hisi_sec_udrv.h - -libwd_udma_la_SOURCES=wd_udma.h wd_udma_drv.h wd_udma.c \ - wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h - -libwd_dae_la_SOURCES=wd_dae.h wd_agg.h wd_agg_drv.h wd_agg.c wd_join_gather.h wd_join_gather_drv.h wd_join_gather.c \ - wd_util.c wd_util.h wd_sched.c wd_sched.h wd.c wd.h - -libwd_comp_la_SOURCES=wd_comp.c wd_comp.h wd_comp_drv.h wd_util.c wd_util.h \ - wd_sched.c wd_sched.h wd.c wd.h wd_zlibwrapper.c - -libhisi_zip_la_SOURCES=drv/hisi_comp.c hisi_comp.h drv/hisi_qm_udrv.c \ - drv/hisi_comp_huf.c drv/hisi_comp_huf.h \ - hisi_qm_udrv.h wd_comp_drv.h + lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c +# Crypto library with cipher and aead support only libwd_crypto_la_SOURCES=wd_cipher.c wd_cipher.h wd_cipher_drv.h \ wd_aead.c wd_aead.h wd_aead_drv.h \ - wd_rsa.c wd_rsa.h wd_rsa_drv.h \ - wd_dh.c wd_dh.h wd_dh_drv.h \ - wd_ecc.c wd_ecc.h wd_ecc_drv.h \ - wd_digest.c wd_digest.h wd_digest_drv.h \ wd_util.c wd_util.h \ wd_sched.c wd_sched.h \ wd.c wd.h +# Cipher driver for hisi_sec hardware libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ hisi_qm_udrv.h wd_cipher_drv.h wd_aead_drv.h aes.h sm4.h galois.h -libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h - libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \ drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h wd_util.c wd_util.h @@ -118,88 +76,41 @@ libisa_sve_la_SOURCES=drv/hash_mb/hash_mb.c wd_digest_drv.h drv/hash_mb/hash_mb. drv/hash_mb/md5_sve_common.S drv/hash_mb/md5_mb_asimd_x1.S \ drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S -libhisi_dae_la_SOURCES=drv/hisi_dae.c hisi_dae.h drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h drv/hisi_dae_join_gather.c drv/hisi_dae_common.c - -libhisi_udma_la_SOURCES=drv/hisi_udma.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h - +# Static driver build configuration if WD_STATIC_DRV AM_CFLAGS += -DWD_STATIC_DRV -fPIC AM_CFLAGS += -DWD_NO_LOG libwd_la_LIBADD = $(libwd_la_OBJECTS) -lnuma -lpthread -libwd_comp_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lpthread -lnuma -libwd_comp_la_DEPENDENCIES = libwd.la - -libhisi_zip_la_LIBADD = -ldl -lpthread - libwd_crypto_la_LIBADD = -lwd -ldl -lnuma -lm -lpthread libwd_crypto_la_DEPENDENCIES = libwd.la -libwd_udma_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread -libwd_udma_la_DEPENDENCIES = libwd.la - -libwd_dae_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma -lm -lpthread -libwd_dae_la_DEPENDENCIES = libwd.la - libhisi_sec_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) libhisi_sec_la_DEPENDENCIES = libwd.la libwd_crypto.la -libhisi_hpre_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) -libhisi_hpre_la_DEPENDENCIES = libwd.la libwd_crypto.la - libisa_ce_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la libisa_sve_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) libisa_sve_la_DEPENDENCIES = libwd.la libwd_crypto.la - -libhisi_udma_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_udma_la_OBJECTS) -libhisi_udma_la_DEPENDENCIES = libwd.la libwd_udma.la - -libhisi_dae_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_dae_la_OBJECTS) -libhisi_dae_la_DEPENDENCIES = libwd.la libwd_dae.la - else +# Dynamic driver build configuration UADK_WD_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd.map UADK_CRYPTO_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_crypto.map -UADK_DAE_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_dae.map -UADK_COMP_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_comp.map UADK_V1_SYMBOL= -Wl,--version-script,$(top_srcdir)/v1/libwd.map libwd_la_LDFLAGS=$(UADK_VERSION) $(UADK_WD_SYMBOL) $(UADK_V1_SYMBOL) libwd_la_LIBADD= -lnuma -lrt -libwd_comp_la_LIBADD= -lwd -ldl -lpthread -lnuma -libwd_comp_la_LDFLAGS=$(UADK_VERSION) $(UADK_COMP_SYMBOL) -libwd_comp_la_DEPENDENCIES= libwd.la - libwd_crypto_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) libwd_crypto_la_DEPENDENCIES= libwd.la -libwd_udma_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread -libwd_udma_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) -libwd_udma_la_DEPENDENCIES= libwd.la - -libwd_dae_la_LIBADD= -lwd -ldl -lnuma -lm -lpthread -libwd_dae_la_LDFLAGS=$(UADK_VERSION) $(UADK_DAE_SYMBOL) -libwd_dae_la_DEPENDENCIES= libwd.la - -libhisi_zip_la_LIBADD= -lwd -ldl -lwd_comp -libhisi_zip_la_LDFLAGS=$(UADK_VERSION) -libhisi_zip_la_DEPENDENCIES= libwd.la libwd_comp.la - libhisi_sec_la_LIBADD= -lwd -lwd_crypto libhisi_sec_la_LDFLAGS=$(UADK_VERSION) libhisi_sec_la_DEPENDENCIES= libwd.la libwd_crypto.la -libhisi_hpre_la_LIBADD= -lwd -lwd_crypto -libhisi_hpre_la_LDFLAGS=$(UADK_VERSION) -libhisi_hpre_la_DEPENDENCIES= libwd.la libwd_crypto.la - libisa_ce_la_LIBADD= -lwd -lwd_crypto libisa_ce_la_LDFLAGS=$(UADK_VERSION) libisa_ce_la_DEPENDENCIES= libwd.la libwd_crypto.la @@ -208,18 +119,12 @@ libisa_sve_la_LIBADD= -lwd -lwd_crypto libisa_sve_la_LDFLAGS=$(UADK_VERSION) libisa_sve_la_DEPENDENCIES= libwd.la libwd_crypto.la -libhisi_udma_la_LIBADD= -lwd -lwd_udma -libhisi_udma_la_LDFLAGS=$(UADK_VERSION) -libhisi_udma_la_DEPENDENCIES= libwd.la libwd_udma.la - -libhisi_dae_la_LIBADD= -lwd -lwd_dae -libhisi_dae_la_LDFLAGS=$(UADK_VERSION) -libhisi_dae_la_DEPENDENCIES= libwd.la libwd_dae.la - endif # WD_STATIC_DRV +# Package configuration files pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = lib/libwd_crypto.pc lib/libwd_comp.pc lib/libwd.pc +pkgconfig_DATA = lib/libwd_crypto.pc lib/libwd.pc CLEANFILES += $(pkgconfig_DATA) -SUBDIRS=. test v1/test uadk_tool sample +# Subdirectories - only include cipher-related tests and samples +SUBDIRS=. uadk_tool diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index ad209c4..0e17eb9 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -2022,6 +2022,7 @@ static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_DIGEST; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); +#if 0 temp_msg = wd_digest_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -2029,6 +2030,7 @@ static void parse_digest_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, recv_msg->tag); return; } +#endif recv_msg->in = temp_msg->in; recv_msg->mm_ops = temp_msg->mm_ops; } else { @@ -2569,6 +2571,7 @@ static void parse_digest_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_DIGEST; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); +#if 0 temp_msg = wd_digest_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -2576,6 +2579,7 @@ static void parse_digest_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, recv_msg->tag); return; } +#endif recv_msg->in = temp_msg->in; recv_msg->mm_ops = temp_msg->mm_ops; } else { @@ -3326,6 +3330,7 @@ static void parse_aead_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_AEAD; recv_msg->data_fmt = get_data_fmt_v2(sqe->sds_sa_type); +#if 0 temp_msg = wd_aead_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -3333,6 +3338,7 @@ static void parse_aead_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, qp->q_info.idx, recv_msg->tag); return; } +#endif recv_msg->in = temp_msg->in; recv_msg->out = temp_msg->out; recv_msg->mm_ops = temp_msg->mm_ops; @@ -3842,6 +3848,7 @@ static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, if (qp->q_info.qp_mode == CTX_MODE_ASYNC) { recv_msg->alg_type = WD_AEAD; recv_msg->data_fmt = get_data_fmt_v3(sqe->bd_param); +#if 0 temp_msg = wd_aead_get_msg(qp->q_info.idx, recv_msg->tag); if (!temp_msg) { recv_msg->result = WD_IN_EPARA; @@ -3849,6 +3856,7 @@ static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, qp->q_info.idx, recv_msg->tag); return; } +#endif recv_msg->in = temp_msg->in; recv_msg->out = temp_msg->out; recv_msg->mm_ops = temp_msg->mm_ops; diff --git a/uadk_tool/BK_Makefile b/uadk_tool/BK_Makefile new file mode 100644 index 0000000..b0e50eb --- /dev/null +++ b/uadk_tool/BK_Makefile @@ -0,0 +1,63 @@ +ACLOCAL_AMFLAGS = -I m4 -I./include +AUTOMAKE_OPTIONS = foreign subdir-objects +AM_CFLAGS=-Wall -Werror -fno-strict-aliasing -I$(top_srcdir) -I$(top_srcdir)/benchmark/include \ + -pthread +AM_CFLAGS += -fPIC -fPIE -pie -fstack-protector-strong \ +-g + +#AUTOMAKE_OPTIONS = subdir-objects + +bin_PROGRAMS=uadk_tool + +uadk_tool_SOURCES=uadk_tool.c dfx/uadk_dfx.c dfx/uadk_dfx.h \ + benchmark/uadk_benchmark.c benchmark/uadk_benchmark.h \ + benchmark/sec_uadk_benchmark.c benchmark/sec_uadk_benchmark.h \ + benchmark/sec_wd_benchmark.c benchmark/sec_wd_benchmark.h \ + benchmark/hpre_uadk_benchmark.c benchmark/hpre_uadk_benchmark.h \ + benchmark/hpre_wd_benchmark.c hpre_wd_benchmark.h \ + benchmark/zip_uadk_benchmark.c benchmark/zip_uadk_benchmark.h \ + benchmark/zip_wd_benchmark.c benchmark/zip_wd_benchmark.h \ + test/uadk_test.c test/uadk_test.h \ + test/test_sec.c test/test_sec.h test/sec_template_tv.h + +if WD_STATIC_DRV +AM_CFLAGS+=-Bstatic +uadk_tool_LDADD=$(libwd_la_OBJECTS) \ + $(libwd_crypto_la_OBJECTS) \ + ../.libs/libwd_comp.a \ + ../.libs/libhisi_sec.a \ + ../.libs/libhisi_hpre.a \ + ../.libs/libhisi_zip.a \ + ../.libs/libisa_ce.a \ + -ldl -lnuma +else +uadk_tool_LDADD=-L../.libs -l:libwd.so.2 -l:libwd_crypto.so.2 \ + -l:libwd_comp.so.2 -lnuma +endif + +# For statistics +uadk_tool_LDADD+=-lm + +if HAVE_ZLIB +uadk_tool_LDADD+=-lz +uadk_tool_CPPFLAGS=-DUSE_ZLIB +endif + +if WITH_ZLIB_FSE_DIR +AM_CFLAGS += -DZLIB_FSE +uadk_tool_LDADD+= $(with_zlib_fse_dir)/libfse.a +endif + +if WITH_OPENSSL_DIR +AM_CFLAGS+= -DWITH_OPENSSL_DIR -I$(with_openssl_dir)/include + +uadk_tool_SOURCES+=benchmark/sec_soft_benchmark.c benchmark/sec_soft_benchmark.h \ + test/comp_main.c test/comp_main.h test/comp_lib.c test/comp_lib.h + +if WD_STATIC_DRV +uadk_tool_LDADD+= $(with_openssl_dir)/libcrypto.a +else +uadk_tool_LDADD+= $(with_openssl_dir)/libcrypto.so.1.1 +endif + +endif diff --git a/uadk_tool/Makefile.am b/uadk_tool/Makefile.am index b0e50eb..7b1c1ac 100644 --- a/uadk_tool/Makefile.am +++ b/uadk_tool/Makefile.am @@ -9,30 +9,20 @@ AM_CFLAGS += -fPIC -fPIE -pie -fstack-protector-strong \ bin_PROGRAMS=uadk_tool -uadk_tool_SOURCES=uadk_tool.c dfx/uadk_dfx.c dfx/uadk_dfx.h \ +# Only keep cipher-related test sources +uadk_tool_SOURCES=uadk_tool.c \ benchmark/uadk_benchmark.c benchmark/uadk_benchmark.h \ - benchmark/sec_uadk_benchmark.c benchmark/sec_uadk_benchmark.h \ - benchmark/sec_wd_benchmark.c benchmark/sec_wd_benchmark.h \ - benchmark/hpre_uadk_benchmark.c benchmark/hpre_uadk_benchmark.h \ - benchmark/hpre_wd_benchmark.c hpre_wd_benchmark.h \ - benchmark/zip_uadk_benchmark.c benchmark/zip_uadk_benchmark.h \ - benchmark/zip_wd_benchmark.c benchmark/zip_wd_benchmark.h \ - test/uadk_test.c test/uadk_test.h \ - test/test_sec.c test/test_sec.h test/sec_template_tv.h + benchmark/sec_uadk_benchmark.c benchmark/sec_uadk_benchmark.h if WD_STATIC_DRV AM_CFLAGS+=-Bstatic uadk_tool_LDADD=$(libwd_la_OBJECTS) \ $(libwd_crypto_la_OBJECTS) \ - ../.libs/libwd_comp.a \ ../.libs/libhisi_sec.a \ - ../.libs/libhisi_hpre.a \ - ../.libs/libhisi_zip.a \ - ../.libs/libisa_ce.a \ -ldl -lnuma else uadk_tool_LDADD=-L../.libs -l:libwd.so.2 -l:libwd_crypto.so.2 \ - -l:libwd_comp.so.2 -lnuma + -lnuma endif # For statistics @@ -43,16 +33,11 @@ uadk_tool_LDADD+=-lz uadk_tool_CPPFLAGS=-DUSE_ZLIB endif -if WITH_ZLIB_FSE_DIR -AM_CFLAGS += -DZLIB_FSE -uadk_tool_LDADD+= $(with_zlib_fse_dir)/libfse.a -endif - if WITH_OPENSSL_DIR AM_CFLAGS+= -DWITH_OPENSSL_DIR -I$(with_openssl_dir)/include -uadk_tool_SOURCES+=benchmark/sec_soft_benchmark.c benchmark/sec_soft_benchmark.h \ - test/comp_main.c test/comp_main.h test/comp_lib.c test/comp_lib.h +# Only keep cipher-related soft benchmark +uadk_tool_SOURCES+=benchmark/sec_soft_benchmark.c benchmark/sec_soft_benchmark.h if WD_STATIC_DRV uadk_tool_LDADD+= $(with_openssl_dir)/libcrypto.a diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c index e235058..420110e 100644 --- a/uadk_tool/benchmark/sec_uadk_benchmark.c +++ b/uadk_tool/benchmark/sec_uadk_benchmark.c @@ -5,8 +5,6 @@ #include "sec_uadk_benchmark.h" #include "include/wd_cipher.h" -#include "include/wd_digest.h" -#include "include/wd_aead.h" #include "include/wd_sched.h" #include "include/wd_bmm.h" @@ -70,7 +68,6 @@ static unsigned int g_pktlen; static unsigned int g_alg; static unsigned int g_algtype; static unsigned int g_optype; -static unsigned int g_maclen; static unsigned int g_dev_id; static unsigned int g_data_fmt; @@ -120,55 +117,16 @@ struct aead_alg_info aead_info[] = { }, }; -static u32 get_aead_mac_len(int algtype) -{ - int table_size = sizeof(aead_info) / sizeof(aead_info[0]); - int i; - - for (i = 0; i < table_size; i++) { - if (algtype == aead_info[i].index) - return aead_info[i].mac_len; - } - - SEC_TST_PRT("failed to get the aead mac len\n"); - - return -1; -} - -static char *get_aead_alg_name(int algtype) -{ - int table_size = sizeof(aead_info) / sizeof(aead_info[0]); - int i; - - for (i = 0; i < table_size; i++) { - if (algtype == aead_info[i].index) - return aead_info[i].name; - } - - SEC_TST_PRT("failed to get the aead alg name\n"); - - return NULL; -} static void *cipher_async_cb(struct wd_cipher_req *req, void *data) { return NULL; } -static void *aead_async_cb(struct wd_aead_req *req, void *data) -{ - return NULL; -} - -static void *digest_async_cb(void *data) -{ - return NULL; -} static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) { u32 algtype = options->algtype; - u32 optype = options->optype; bool is_union = false; u32 out_bytes = 32; u8 keysize = 0; @@ -455,51 +413,6 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) mode = WD_CIPHER_GCM; alg = WD_CIPHER_AES; break; - case AES_128_CBC_SHA256_HMAC: - keysize = 16; - ivsize = 16; - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_AES; - is_union = true; - dalg = WD_DIGEST_SHA256; - dmode = WD_DIGEST_HMAC; - break; - case AES_192_CBC_SHA256_HMAC: - keysize = 24; - ivsize = 16; - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_AES; - is_union = true; - dalg = WD_DIGEST_SHA256; - dmode = WD_DIGEST_HMAC; - break; - case AES_256_CBC_SHA256_HMAC: - keysize = 32; - ivsize = 16; - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_AES; - is_union = true; - dalg = WD_DIGEST_SHA256; - dmode = WD_DIGEST_HMAC; - break; - case AES_128_CBC_SHA1_HMAC: - keysize = 16; - ivsize = 16; - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_AES; - is_union = true; - dalg = WD_DIGEST_SHA1; - dmode = WD_DIGEST_HMAC; - break; - case SM4_CBC_SM3_HMAC: - keysize = 16; - ivsize = 16; - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_SM4; - is_union = true; - dalg = WD_DIGEST_SM3; - dmode = WD_DIGEST_HMAC; - break; case SM4_128_CCM: keysize = 16; ivsize = 16; @@ -512,60 +425,6 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) mode = WD_CIPHER_GCM; alg = WD_CIPHER_SM4; break; - case SM3_ALG: // digest mode is optype - keysize = 4; - mode = optype; - out_bytes = 32; - alg = WD_DIGEST_SM3; - break; - case MD5_ALG: - keysize = 4; - out_bytes = 16; - mode = optype; - alg = WD_DIGEST_MD5; - break; - case SHA1_ALG: - keysize = 4; - out_bytes = 20; - mode = optype; - alg = WD_DIGEST_SHA1; - break; - case SHA256_ALG: - keysize = 4; - out_bytes = 32; - mode = optype; - alg = WD_DIGEST_SHA256; - break; - case SHA224_ALG: - keysize = 4; - out_bytes = 28; - mode = optype; - alg = WD_DIGEST_SHA224; - break; - case SHA384_ALG: - keysize = 4; - out_bytes = 48; - mode = optype; - alg = WD_DIGEST_SHA384; - break; - case SHA512_ALG: - keysize = 4; - out_bytes = 64; - mode = optype; - alg = WD_DIGEST_SHA512; - break; - case SHA512_224: - keysize = 4; - out_bytes = 28; - mode = optype; - alg = WD_DIGEST_SHA512_224; - break; - case SHA512_256: - keysize = 4; - out_bytes = 32; - mode = optype; - alg = WD_DIGEST_SHA512_256; - break; default: SEC_TST_PRT("failed to set sec alg\n"); return -EINVAL; @@ -737,22 +596,6 @@ static int init_ctx_config(struct acc_option *options) g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, max_node, wd_cipher_poll_ctx); break; - case AEAD_TYPE: - if (options->mem_type == UADK_AUTO) - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, SEC_OP_TYPE_MAX, - max_node, wd_aead_poll_ctx); - else - g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, - max_node, wd_aead_poll_ctx); - break; - case DIGEST_TYPE: - if (options->mem_type == UADK_AUTO) - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, SEC_OP_TYPE_MAX, - max_node, wd_digest_poll_ctx); - else - g_sched = wd_sched_rr_alloc(SCHED_POLICY_DEV, SEC_OP_TYPE_MAX, - max_node, wd_digest_poll_ctx); - break; default: SEC_TST_PRT("failed to parse alg subtype!\n"); goto free_ctx; @@ -780,12 +623,6 @@ static int init_ctx_config(struct acc_option *options) case CIPHER_TYPE: ret = wd_cipher_init(&g_ctx_cfg, g_sched); break; - case AEAD_TYPE: - ret = wd_aead_init(&g_ctx_cfg, g_sched); - break; - case DIGEST_TYPE: - ret = wd_digest_init(&g_ctx_cfg, g_sched); - break; } if (ret) { SEC_TST_PRT("failed to init sec ctx!\n"); @@ -816,12 +653,6 @@ static void uninit_ctx_config(int subtype) case CIPHER_TYPE: wd_cipher_uninit(); break; - case AEAD_TYPE: - wd_aead_uninit(); - break; - case DIGEST_TYPE: - wd_digest_uninit(); - break; default: SEC_TST_PRT("failed to parse alg subtype on uninit!\n"); return; @@ -841,13 +672,6 @@ static void uninit_ctx_config2(int subtype) case CIPHER_INSTR_TYPE: wd_cipher_uninit2(); break; - case AEAD_TYPE: - wd_aead_uninit2(); - break; - case DIGEST_TYPE: - case DIGEST_INSTR_TYPE: - wd_digest_uninit2(); - break; default: SEC_TST_PRT("failed to parse alg subtype on uninit2!\n"); return; @@ -912,29 +736,6 @@ static int init_ctx_config2(struct acc_option *options) if (ret) SEC_TST_PRT("failed to do cipher intruction init2!\n"); break; - case AEAD_TYPE: - if (options->mem_type == UADK_AUTO) - ret = wd_aead_init2_(alg_name, SCHED_POLICY_RR, TASK_HW, &ctx_params); - else - ret = wd_aead_init2_(alg_name, SCHED_POLICY_DEV, TASK_HW, &ctx_params); - if (ret) - SEC_TST_PRT("failed to do aead init2!\n"); - break; - case DIGEST_TYPE: - if (options->mem_type == UADK_AUTO) { - ctx_params.op_type_num = 1; - ret = wd_digest_init2_(alg_name, SCHED_POLICY_RR, options->task_type, &ctx_params); - } else - ret = wd_digest_init2_(alg_name, SCHED_POLICY_DEV, options->task_type, &ctx_params); - if (ret) - SEC_TST_PRT("failed to do digest init2!\n"); - break; - case DIGEST_INSTR_TYPE: - ctx_params.op_type_num = 1; - ret = wd_digest_init2_(alg_name, SCHED_POLICY_INSTR, TASK_INSTR, &ctx_params); - if (ret) - SEC_TST_PRT("failed to do digest instruction init2!\n"); - break; } if (ret) { SEC_TST_PRT("failed to do cipher init2!\n"); @@ -944,95 +745,6 @@ static int init_ctx_config2(struct acc_option *options) return ret; } -static void get_aead_data(u8 *addr, u32 size) -{ - memset(addr, 0, size); - memcpy(addr, aead_aad, SEC_AEAD_LEN); -} - -static void save_aead_dst_data(u8 *addr, u32 size) -{ - char file_name[SEC_SAVE_FILE_LEN] = {0}; - char *alg_name; - FILE *fp; - - alg_name = get_aead_alg_name(g_algtype); - if (!alg_name) { - SEC_TST_PRT("failed to get the aead alg name!\n"); - return; - } - - snprintf(file_name, SEC_SAVE_FILE_LEN, "ctext_%s_%u", alg_name, g_pktlen); - - fp = fopen(file_name, "w"); - if (!fp) { - SEC_TST_PRT("failed to open the ctext file!\n"); - return; - } - - memcpy(addr + size, g_uadk_pool.pool[0].bds[0].mac, SEC_PERF_AUTH_SIZE); - - for (int i = 0; i < size + SEC_PERF_AUTH_SIZE; i++) - fputc((char)addr[i], fp); - - fclose(fp); -} - -static void read_aead_dst_data(u8 *addr, u32 len) -{ - char file_name[SEC_SAVE_FILE_LEN] = {0}; - struct wd_datalist *current; - size_t bytes_to_read = 0; - size_t bytes_read = 0; - char *alg_name; - FILE *fp; - int size; - - alg_name = get_aead_alg_name(g_algtype); - if (!alg_name) { - SEC_TST_PRT("failed to get the aead alg name!\n"); - return; - } - - snprintf(file_name, SEC_SAVE_FILE_LEN, "ctext_%s_%u", alg_name, g_pktlen); - - fp = fopen(file_name, "r"); - if (!fp) { - SEC_TST_PRT("failed to open the ctext file!\n"); - return; - } - - fseek(fp, 0, SEEK_END); - size = ftell(fp); - - rewind(fp); - - if (!g_data_fmt) { - size = fread(addr, 1, size, fp); - addr[size] = '\0'; - - memcpy(g_save_mac, (char *)addr + len, SEC_MAX_MAC_LEN); - } else { - current = (struct wd_datalist *)addr; - while (current && size > 0) { - bytes_to_read = current->len; - if (bytes_to_read > size) - bytes_to_read = size; - bytes_read = fread(current->data, 1, bytes_to_read, fp); - - if (bytes_read != bytes_to_read) { - SEC_TST_PRT("partial read: expected %zu, got %zu\n", bytes_to_read, bytes_read); - fclose(fp); - return; - } - size -= bytes_read; - current = current->next; - } - } - - fclose(fp); -} - static int init_ivkey_source(void) { int i, j, m, idx; @@ -1129,12 +841,8 @@ static void *create_buf(int sgl, size_t sz, size_t unit_sz) if (sgl == WD_FLAT_BUF) return buf; - if (g_alg != AEAD_TYPE) { + if (g_alg != AEAD_TYPE) get_rand_data(buf, g_pktlen); - } else { - if (!g_optype) - get_aead_data(buf, g_pktlen + SEC_AEAD_LEN); - } tail_sz = sz % unit_sz; sgl_num = sz / unit_sz; /* the number with unit_sz bytes */ @@ -1250,22 +958,9 @@ static int init_uadk_bd_pool(void) if (g_prefetch) get_rand_data(g_uadk_pool.pool[i].bds[j].dst, g_pktlen); - } else { - if (!g_optype) - get_aead_data(g_uadk_pool.pool[i].bds[j].src, - g_pktlen + SEC_AEAD_LEN); - else { - read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, - g_pktlen + SEC_AEAD_LEN); - memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); - } } } else if (g_data_fmt == WD_SGL_BUF) { - if (g_optype && g_alg == AEAD_TYPE) { - read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, - g_pktlen + SEC_AEAD_LEN); - memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); - } else if (g_prefetch && g_alg == DIGEST_TYPE) { + if (g_prefetch && g_alg == DIGEST_TYPE) { get_rand_data(g_uadk_pool.pool[i].bds[j].dst, g_pktlen); } } @@ -1312,11 +1007,6 @@ static void free_uadk_bd_pool(void) { int i, j; - /* save aad + ctext + mac */ - if (g_alg == AEAD_TYPE && !g_optype) - save_aead_dst_data(g_uadk_pool.pool[0].bds[0].dst, - g_pktlen + SEC_AEAD_LEN); - for (i = 0; i < g_thread_num; i++) { if (g_uadk_pool.pool[i].bds) { for (j = 0; j < MAX_POOL_LENTH; j++) { @@ -1557,13 +1247,6 @@ static int init_uadk_rsv_pool(struct acc_option *option) get_rand_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen); if (g_prefetch) get_rand_data(g_uadk_pool.pool[i].bds[j].dst, g_pktlen); - } else { - if (!g_optype) - get_aead_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen + SEC_AEAD_LEN); - else { - read_aead_dst_data(g_uadk_pool.pool[i].bds[j].src, g_pktlen + SEC_AEAD_LEN); - memcpy(g_uadk_pool.pool[i].bds[j].mac, g_save_mac, SEC_MAX_MAC_LEN); - } } } } @@ -1622,11 +1305,6 @@ static void free_uadk_rsv_pool(struct acc_option *option) handle_t h_ctx = (handle_t)g_ctx_cfg.priv; int i, j; - /* save aad + ctext + mac */ - if (g_alg == AEAD_TYPE && !g_optype) - save_aead_dst_data(g_uadk_pool.pool[0].bds[0].dst, - g_pktlen + SEC_AEAD_LEN); - for (i = 0; i < g_thread_num; i++) { if (g_uadk_pool.pool[i].bds) { for (j = 0; j < MAX_POOL_LENTH; j++) { @@ -1674,12 +1352,6 @@ static void *sec_uadk_poll(void *data) case CIPHER_TYPE: uadk_poll_ctx = wd_cipher_poll_ctx; break; - case AEAD_TYPE: - uadk_poll_ctx = wd_aead_poll_ctx; - break; - case DIGEST_TYPE: - uadk_poll_ctx = wd_digest_poll_ctx; - break; default: SEC_TST_PRT("<<<<<<async poll interface is NULL!\n"); return NULL; @@ -1719,18 +1391,9 @@ static void *sec_uadk_poll2(void *data) case CIPHER_TYPE: uadk_poll_policy = wd_cipher_poll; break; - case AEAD_TYPE: - uadk_poll_policy = wd_aead_poll; - break; - case DIGEST_TYPE: - uadk_poll_policy = wd_digest_poll; - break; case CIPHER_INSTR_TYPE: uadk_poll_policy = wd_cipher_poll; break; - case DIGEST_INSTR_TYPE: - uadk_poll_policy = wd_digest_poll; - break; default: SEC_TST_PRT("<<<<<<async poll interface is NULL!\n"); return NULL; @@ -1858,247 +1521,6 @@ static void *sec_uadk_cipher_async(void *arg) return NULL; } -static void *sec_uadk_aead_async(void *arg) -{ - thread_data *pdata = (thread_data *)arg; - struct wd_aead_sess_setup aead_setup = {0}; - struct sched_params sc_param = {0}; - u8 *priv_iv, *priv_key, *priv_hash; - u32 auth_size = SEC_PERF_AUTH_SIZE; - struct wd_aead_req areq = {0}; - struct bd_pool *uadk_pool; - int try_cnt = 0; - handle_t h_sess; - u32 count = 0; - int ret, i; - - if (pdata->td_id > g_thread_num) - return NULL; - - uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; - priv_key = g_uadk_pool.key[pdata->td_id]; - priv_hash = g_uadk_pool.hash[pdata->td_id]; - - memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); - memset(priv_key, DEF_IVK_DATA, MAX_IVK_LENTH); - - aead_setup.calg = pdata->alg; - aead_setup.cmode = pdata->mode; - aead_setup.mm_type = pdata->mm_type; - sc_param.numa_id = 0; - sc_param.type = 0; - sc_param.mode = 0; // sync mode - if (g_uadk_pool.rsv_pool) - sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); - aead_setup.sched_param = (void *)&sc_param; - - aead_setup.mm_ops.usr = g_uadk_pool.rsv_pool; - aead_setup.mm_ops.alloc = (void *)wd_mem_alloc; - aead_setup.mm_ops.free = (void *)wd_mem_free; - aead_setup.mm_ops.iova_map = (void *)wd_mem_map; - aead_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; - aead_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; - - if (pdata->is_union) { - aead_setup.dalg = pdata->dalg; - aead_setup.dmode = pdata->dmode; - } - h_sess = wd_aead_alloc_sess(&aead_setup); - if (!h_sess) - return NULL; - ret = wd_aead_set_ckey(h_sess, (const __u8*)priv_key, pdata->keysize); - if (ret) { - SEC_TST_PRT("test sec cipher set key is failed!\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - if (pdata->is_union) { - ret = wd_aead_set_akey(h_sess, (const __u8*)priv_hash, HASH_ZISE); - if (ret) { - SEC_TST_PRT("test sec aead set akey is failed!\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - } - ret = wd_aead_set_authsize(h_sess, auth_size); - if (ret) { - SEC_TST_PRT("set auth size fail, authsize: 16\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - - areq.op_type = pdata->optype; - areq.iv = priv_iv; // aead IV need update with param - areq.mac = uadk_pool->bds[0].mac; - areq.iv_bytes = pdata->ivsize; - areq.mac_bytes = auth_size; - areq.assoc_bytes = SEC_AEAD_LEN; - areq.in_bytes = g_pktlen; - areq.msg_state = 0; - if (pdata->is_union) - areq.mac_bytes = 32; - if (areq.op_type) // decrypto - areq.out_bytes = g_pktlen + 16; // aadsize = 16; - else - areq.out_bytes = g_pktlen + 32; // aadsize + authsize = 32; - - areq.data_fmt = g_data_fmt; - areq.state = 0; - areq.cb = aead_async_cb; - - while(1) { - if (get_run_state() == 0) - break; - try_cnt = 0; - i = count % MAX_POOL_LENTH; - - if (i == 0 && count > 0) { - count++; - continue; - } - - areq.src = uadk_pool->bds[i].src; - areq.dst = uadk_pool->bds[i].dst; - areq.mac = uadk_pool->bds[i].mac; - - ret = wd_do_aead_async(h_sess, &areq); - if (ret < 0) { - usleep(SEND_USLEEP * try_cnt); - try_cnt++; - if (try_cnt > MAX_TRY_CNT) { - SEC_TST_PRT("Test aead send fail %d times!\n", MAX_TRY_CNT); - try_cnt = 0; - } - continue; - } - count++; - } - - /* Release memory after all tasks are complete. */ - if (count) { - i = 0; - while (get_recv_time() != g_ctxnum) { - if (i++ >= MAX_TRY_CNT) { - SEC_TST_PRT("failed to wait poll thread finish!\n"); - break; - } - - usleep(SEND_USLEEP); - } - } - /* Wait for the device to complete the tasks. */ - usleep(SEND_USLEEP * MAX_TRY_CNT); - - wd_aead_free_sess(h_sess); - - add_send_complete(); - - return NULL; -} - -static void *sec_uadk_digest_async(void *arg) -{ - thread_data *pdata = (thread_data *)arg; - struct wd_digest_sess_setup digest_setup = {0}; - struct sched_params sc_param = {0}; - struct wd_digest_req dreq; - struct bd_pool *uadk_pool; - u8 *priv_iv, *priv_key; - int try_cnt = 0; - handle_t h_sess; - u32 count = 0; - int ret, i; - - if (pdata->td_id > g_thread_num) - return NULL; - - uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; - priv_key = g_uadk_pool.key[pdata->td_id]; - - memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); - memset(priv_key, DEF_IVK_DATA, MAX_IVK_LENTH); - - digest_setup.alg = pdata->alg; - digest_setup.mode = pdata->mode; // digest mode is optype - digest_setup.mm_type = pdata->mm_type; - sc_param.numa_id = 0; - sc_param.type = 0; - sc_param.mode = 0; // sync mode - if (g_uadk_pool.rsv_pool) - sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); - digest_setup.sched_param = (void *)&sc_param; - - digest_setup.mm_ops.usr = g_uadk_pool.rsv_pool; - digest_setup.mm_ops.alloc = (void *)wd_mem_alloc; - digest_setup.mm_ops.free = (void *)wd_mem_free; - digest_setup.mm_ops.iova_map = (void *)wd_mem_map; - digest_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; - digest_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; - - h_sess = wd_digest_alloc_sess(&digest_setup); - if (!h_sess) - return NULL; - if (digest_setup.mode == WD_DIGEST_HMAC) { - ret = wd_digest_set_key(h_sess, (const __u8*)priv_key, 4); - if (ret) { - SEC_TST_PRT("test sec digest set key is failed!\n"); - wd_digest_free_sess(h_sess); - return NULL; - } - } - dreq.in_bytes = g_pktlen; - dreq.out_bytes = pdata->d_outbytes; - dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = g_data_fmt; - dreq.state = 0; - dreq.has_next = 0; - dreq.cb = digest_async_cb; - - while(1) { - if (get_run_state() == 0) - break; - try_cnt = 0; - i = count % MAX_POOL_LENTH; - dreq.in = uadk_pool->bds[i].src; - dreq.out = uadk_pool->bds[i].dst; - - ret = wd_do_digest_async(h_sess, &dreq); - if (ret < 0) { - usleep(SEND_USLEEP * try_cnt); - try_cnt++; - if (try_cnt > MAX_TRY_CNT) { - SEC_TST_PRT("Test digest send fail %d times!\n", MAX_TRY_CNT); - try_cnt = 0; - } - continue; - } - count++; - } - - /* Release memory after all tasks are complete. */ - if (count) { - i = 0; - while (get_recv_time() != g_ctxnum) { - if (i++ >= MAX_TRY_CNT) { - SEC_TST_PRT("failed to wait poll thread finish!\n"); - break; - } - - usleep(SEND_USLEEP); - } - } - /* Wait for the device to complete the tasks. */ - usleep(SEND_USLEEP * MAX_TRY_CNT); - - wd_digest_free_sess(h_sess); - - add_send_complete(); - - return NULL; -} - static void *sec_uadk_cipher_sync(void *arg) { thread_data *pdata = (thread_data *)arg; @@ -2175,186 +1597,6 @@ static void *sec_uadk_cipher_sync(void *arg) return NULL; } -static void *sec_uadk_aead_sync(void *arg) -{ - thread_data *pdata = (thread_data *)arg; - struct wd_aead_sess_setup aead_setup = {0}; - struct sched_params sc_param = {0}; - u8 *priv_iv, *priv_key, *priv_hash; - u32 auth_size = SEC_PERF_AUTH_SIZE; - struct wd_aead_req areq = {0}; - struct bd_pool *uadk_pool; - handle_t h_sess; - u32 count = 0; - int ret, i; - - if (pdata->td_id > g_thread_num) - return NULL; - - uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - - priv_iv = g_uadk_pool.iv[pdata->td_id]; - priv_key = g_uadk_pool.key[pdata->td_id]; - priv_hash = g_uadk_pool.hash[pdata->td_id]; - - memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); - memset(priv_key, DEF_IVK_DATA, MAX_IVK_LENTH); - - aead_setup.calg = pdata->alg; - aead_setup.cmode = pdata->mode; - aead_setup.mm_type = pdata->mm_type; - sc_param.numa_id = 0; - sc_param.type = 0; - sc_param.mode = 0; // sync mode - if (g_uadk_pool.rsv_pool) - sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); - aead_setup.sched_param = (void *)&sc_param; - - aead_setup.mm_ops.usr = g_uadk_pool.rsv_pool; - aead_setup.mm_ops.alloc = (void *)wd_mem_alloc; - aead_setup.mm_ops.free = (void *)wd_mem_free; - aead_setup.mm_ops.iova_map = (void *)wd_mem_map; - aead_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; - aead_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; - if (pdata->is_union) { - aead_setup.dalg = pdata->dalg; - aead_setup.dmode = pdata->dmode; - } - h_sess = wd_aead_alloc_sess(&aead_setup); - if (!h_sess) - return NULL; - ret = wd_aead_set_ckey(h_sess, (const __u8*)priv_key, pdata->keysize); - if (ret) { - SEC_TST_PRT("test sec cipher set key is failed!\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - if (pdata->is_union) { - ret = wd_aead_set_akey(h_sess, (const __u8*)priv_hash, HASH_ZISE); - if (ret) { - SEC_TST_PRT("test sec aead set akey is failed!\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - } - ret = wd_aead_set_authsize(h_sess, auth_size); - if (ret) { - SEC_TST_PRT("set auth size fail, authsize: 16\n"); - wd_aead_free_sess(h_sess); - return NULL; - } - - areq.op_type = pdata->optype; - areq.iv = priv_iv; // aead IV need update with param - areq.mac = uadk_pool->bds[0].mac; - areq.iv_bytes = pdata->ivsize; - areq.assoc_bytes = SEC_AEAD_LEN; - areq.in_bytes = g_pktlen; - areq.mac_bytes = g_maclen; - areq.msg_state = 0; - if (areq.op_type) // decrypto - areq.out_bytes = g_pktlen + 16; // aadsize = 16; - else - areq.out_bytes = g_pktlen + 32; // aadsize + authsize = 32; - - areq.data_fmt = g_data_fmt; - areq.state = 0; - - while(1) { - i = count % MAX_POOL_LENTH; - areq.src = uadk_pool->bds[i].src; - areq.dst = uadk_pool->bds[i].dst; - count++; - - ret = wd_do_aead_sync(h_sess, &areq); - if (ret || areq.state) - break; - if (get_run_state() == 0) - break; - } - wd_aead_free_sess(h_sess); - - cal_avg_latency(count); - add_recv_data(count, g_pktlen); - - return NULL; -} - -static void *sec_uadk_digest_sync(void *arg) -{ - thread_data *pdata = (thread_data *)arg; - struct wd_digest_sess_setup digest_setup = {0}; - struct sched_params sc_param = {0}; - struct wd_digest_req dreq; - struct bd_pool *uadk_pool; - u8 *priv_iv, *priv_key; - handle_t h_sess; - u32 count = 0; - int ret, i; - - if (pdata->td_id > g_thread_num) - return NULL; - - uadk_pool = &g_uadk_pool.pool[pdata->td_id]; - priv_iv = g_uadk_pool.iv[pdata->td_id]; - priv_key = g_uadk_pool.key[pdata->td_id]; - - memset(priv_iv, DEF_IVK_DATA, MAX_IVK_LENTH); - memset(priv_key, DEF_IVK_DATA, MAX_IVK_LENTH); - - digest_setup.alg = pdata->alg; - digest_setup.mode = pdata->mode; // digest mode is optype - digest_setup.mm_type = pdata->mm_type; - sc_param.numa_id = 0; - sc_param.type = 0; - sc_param.mode = 0; // sync mode - if (g_uadk_pool.rsv_pool) - sc_param.dev_id = wd_get_dev_id(g_uadk_pool.rsv_pool); - digest_setup.sched_param = (void *)&sc_param; - - digest_setup.mm_ops.usr = g_uadk_pool.rsv_pool; - digest_setup.mm_ops.alloc = (void *)wd_mem_alloc; - digest_setup.mm_ops.free = (void *)wd_mem_free; - digest_setup.mm_ops.iova_map = (void *)wd_mem_map; - digest_setup.mm_ops.iova_unmap = (void *)wd_mem_unmap; - digest_setup.mm_ops.get_bufsize = (void *)wd_get_bufsize; - h_sess = wd_digest_alloc_sess(&digest_setup); - if (!h_sess) - return NULL; - if (digest_setup.mode == WD_DIGEST_HMAC) { - ret = wd_digest_set_key(h_sess, (const __u8*)priv_key, 4); - if (ret) { - SEC_TST_PRT("test sec digest set key is failed!\n"); - wd_digest_free_sess(h_sess); - return NULL; - } - } - dreq.in_bytes = g_pktlen; - dreq.out_bytes = pdata->d_outbytes; - dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = g_data_fmt; - dreq.state = 0; - dreq.has_next = 0; - - while(1) { - i = count % MAX_POOL_LENTH; - dreq.in = uadk_pool->bds[i].src; - dreq.out = uadk_pool->bds[i].dst; - ret = wd_do_digest_sync(h_sess, &dreq); - if (ret || dreq.state) - break; - count++; - if (get_run_state() == 0) - break; - } - wd_digest_free_sess(h_sess); - - cal_avg_latency(count); - add_recv_data(count, g_pktlen); - - return NULL; -} - int sec_uadk_sync_threads(struct acc_option *options) { typedef void *(*sec_sync_run)(void *arg); @@ -2374,13 +1616,6 @@ int sec_uadk_sync_threads(struct acc_option *options) case CIPHER_INSTR_TYPE: uadk_sec_sync_run = sec_uadk_cipher_sync; break; - case AEAD_TYPE: - uadk_sec_sync_run = sec_uadk_aead_sync; - break; - case DIGEST_TYPE: - case DIGEST_INSTR_TYPE: - uadk_sec_sync_run = sec_uadk_digest_sync; - break; default: SEC_TST_PRT("Invalid subtype!\n"); return -EINVAL; @@ -2438,13 +1673,6 @@ int sec_uadk_async_threads(struct acc_option *options) case CIPHER_INSTR_TYPE: uadk_sec_async_run = sec_uadk_cipher_async; break; - case AEAD_TYPE: - uadk_sec_async_run = sec_uadk_aead_async; - break; - case DIGEST_TYPE: - case DIGEST_INSTR_TYPE: - uadk_sec_async_run = sec_uadk_digest_async; - break; } for (i = 0; i < g_ctxnum; i++) { @@ -2516,14 +1744,6 @@ int sec_uadk_benchmark(struct acc_option *options) g_algtype = options->algtype; g_data_fmt = options->data_fmt; - if (g_alg == AEAD_TYPE) { - g_maclen = get_aead_mac_len(g_algtype); - if (g_maclen < 0) { - SEC_TST_PRT("SEC algtype error: %u\n", g_algtype); - return -EINVAL; - } - } - if (options->optype > WD_CIPHER_DECRYPTION) { SEC_TST_PRT("SEC optype error: %u\n", options->optype); return -EINVAL; diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 5deec63..8f06f2d 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -7,14 +7,6 @@ #include "uadk_benchmark.h" #include "sec_uadk_benchmark.h" -#include "sec_wd_benchmark.h" -#include "sec_soft_benchmark.h" - -#include "hpre_uadk_benchmark.h" -#include "hpre_wd_benchmark.h" - -#include "zip_uadk_benchmark.h" -#include "zip_wd_benchmark.h" #define TABLE_SPACE_SIZE 8 @@ -565,28 +557,6 @@ static int benchmark_run(struct acc_option *option) (option->modetype == INSTR_MODE) || (option->modetype == MULTIBUF_MODE)) { ret = sec_uadk_benchmark(option); - } else if (option->modetype == NOSVA_MODE) { - ret = sec_wd_benchmark(option); - } - usleep(20000); -#ifdef HAVE_CRYPTO - if (option->modetype == SOFT_MODE) { - ret = sec_soft_benchmark(option); - } -#endif - break; - case HPRE_TYPE: - if (option->modetype == SVA_MODE) { - ret = hpre_uadk_benchmark(option); - } else if (option->modetype == NOSVA_MODE) { - ret = hpre_wd_benchmark(option); - } - break; - case ZIP_TYPE: - if (option->modetype == SVA_MODE) { - ret = zip_uadk_benchmark(option); - } else if (option->modetype == NOSVA_MODE) { - ret = zip_wd_benchmark(option); } break; } diff --git a/uadk_tool/uadk_tool.c b/uadk_tool/uadk_tool.c index 64d4163..d545de4 100644 --- a/uadk_tool/uadk_tool.c +++ b/uadk_tool/uadk_tool.c @@ -21,12 +21,7 @@ int main(int argc, char **argv) int ret; if (argc > index) { - if (!strcmp("dfx", argv[index])) { - if (!argv[++index]) - print_dfx_help(); - - dfx_cmd_parse(argc, argv); - } else if (!strcmp("benchmark", argv[index])) { + if (!strcmp("benchmark", argv[index])) { printf("start UADK benchmark test.\n"); if (!argv[++index]) { print_benchmark_help(); @@ -41,12 +36,6 @@ int main(int argc, char **argv) if (ret) return ret; (void)acc_benchmark_run(&option); - } else if (!strcmp("test", argv[index])) { - if (!argv[++index]) - print_test_help(); - - printf("start UADK acc algorithm test.\n"); - acc_test_run(argc, argv); } else { print_tool_help(); } -- 2.43.0
From: Longfang Liu <liulongfang@huawei.com> On the new UADK framework, we have unified the queue abstraction layer and logically merged the init and init2 paths. This has ultimately achieved framework decoupling and solution updates, laying the foundation for future maintenance and expansion. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- Makefile.am | 13 +- drv/wd_drv.c | 284 +++++++++++ drv/wd_drv.h | 61 +++ include/wd_alg.h | 89 +++- include/wd_alg_common.h | 39 +- include/wd_internal.h | 34 +- include/wd_util.h | 32 +- libwd.map | 6 +- wd.c | 52 +- wd_alg.c | 666 +++++++++++++++++------- wd_cipher.c | 98 +++- wd_sched.c | 34 +- wd_util.c | 1070 ++++++++++++++++----------------------- 13 files changed, 1500 insertions(+), 978 deletions(-) create mode 100644 drv/wd_drv.c create mode 100644 drv/wd_drv.h diff --git a/Makefile.am b/Makefile.am index fc14529..9292f9f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -58,23 +58,26 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd_bmm.c wd_bmm.h wd.h wd_alg.c wd_alg.h \ # Crypto library with cipher and aead support only libwd_crypto_la_SOURCES=wd_cipher.c wd_cipher.h wd_cipher_drv.h \ wd_aead.c wd_aead.h wd_aead_drv.h \ + wd.c wd.h wd_alg.h \ wd_util.c wd_util.h \ - wd_sched.c wd_sched.h \ - wd.c wd.h + wd_sched.c wd_sched.h # Cipher driver for hisi_sec hardware libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ - hisi_qm_udrv.h wd_cipher_drv.h wd_aead_drv.h aes.h sm4.h galois.h + hisi_qm_udrv.h wd_cipher_drv.h wd_aead_drv.h aes.h sm4.h galois.h \ + drv/wd_drv.h drv/wd_drv.c libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \ - drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h wd_util.c wd_util.h + drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h wd_util.c wd_util.h \ + drv/wd_drv.h drv/wd_drv.c libisa_sve_la_SOURCES=drv/hash_mb/hash_mb.c wd_digest_drv.h drv/hash_mb/hash_mb.h \ drv/hash_mb/sm3_sve_common.S drv/hash_mb/sm3_mb_asimd_x1.S \ drv/hash_mb/sm3_mb_asimd_x4.S drv/hash_mb/sm3_mb_sve.S \ drv/hash_mb/md5_sve_common.S drv/hash_mb/md5_mb_asimd_x1.S \ - drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S + drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S \ + drv/wd_drv.h drv/wd_drv.c # Static driver build configuration if WD_STATIC_DRV diff --git a/drv/wd_drv.c b/drv/wd_drv.c new file mode 100644 index 0000000..fcb03b1 --- /dev/null +++ b/drv/wd_drv.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2020-2026 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#include <stdlib.h> + +#include "wd_internal.h" +#include "wd_alg.h" +#include "wd_util.h" +#include "wd_drv.h" + +int wd_soft_alloc_ctx(char *alg_name, void *params, handle_t *ctx) +{ + struct wd_drv_ctx_params *ctx_params = (struct wd_drv_ctx_params *)params; + struct wd_soft_ctx *sfctx; + + if (!params || !ctx) { + WD_ERR("invalid: params, or ctx is NULL!\n"); + return -WD_EINVAL; + } + + /* Allocate ONE software context structure */ + sfctx = calloc(1, sizeof(struct wd_soft_ctx)); + if (!sfctx) { + WD_ERR("failed to alloc ctx!\n"); + return -WD_ENOMEM; + } + + /* Initialize as software context */ + sfctx->fd = -1; + pthread_spin_init(&sfctx->slock, PTHREAD_PROCESS_SHARED); + pthread_spin_init(&sfctx->rlock, PTHREAD_PROCESS_SHARED); + + /* Return context handle */ + *ctx = (handle_t)sfctx; + + WD_INFO("SW context allocated: alg=%s, type=%d, mode=%d\n", + alg_name, ctx_params->op_type, ctx_params->ctx_mode); + + return 0; +} + +void wd_soft_free_ctx(handle_t ctx) +{ + struct wd_soft_ctx *sfctx = (struct wd_soft_ctx *)ctx; + + if (!sfctx) { + WD_ERR("invalid: ctx is NULL!\n"); + return; + } + + /* Simply free the allocated wd_ctx_h structure */ + pthread_spin_destroy(&sfctx->slock); + pthread_spin_destroy(&sfctx->rlock); + free(sfctx); + + WD_INFO("SW context released\n"); +} + +struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +{ + struct uacce_dev_list *p, *node, *result = NULL; + struct uacce_dev *dev; + int numa_id, ret; + + if (!bmp) { + WD_ERR("invalid: bmp is NULL!\n"); + return WD_ERR_PTR(-WD_EINVAL); + } + + p = list; + while (p) { + dev = p->dev; + numa_id = dev->numa_id; + ret = numa_bitmask_isbitset(bmp, numa_id); + if (!ret) { + p = p->next; + continue; + } + + node = calloc(1, sizeof(*node)); + if (!node) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_list; + } + + node->dev = wd_clone_dev(dev); + if (!node->dev) { + result = WD_ERR_PTR(-WD_ENOMEM); + goto out_free_node; + } + + if (!result) + result = node; + else + wd_add_dev_to_list(result, node); + + p = p->next; + } + + return result ? result : WD_ERR_PTR(-WD_ENODEV); + +out_free_node: + free(node); +out_free_list: + wd_free_list_accels(result); + return result; +} + +/** + * wd_hw_alloc_ctx() - HW driver's alloc_ctx callback. + * + * Allocates ONE hardware context from UACCE device. + * This is a driver callback, called by framework for each context. + * + * @alg: The alg name + * @params: Minimal allocation parameters (ctx_mode, op_type, bmp) + * @result: (output) Allocated context information + * + * Return: 0 on success, negative on failure + */ +int wd_hw_alloc_ctx(char *alg_name, void *params, handle_t *ctx) +{ + struct wd_drv_ctx_params *ctx_params = (struct wd_drv_ctx_params *)params; + struct uacce_dev_list *dev_list, *used_list = NULL; + struct bitmask *used_bmp = ctx_params->bmp; + char alg_type[CRYPTO_MAX_ALG_NAME]; + struct uacce_dev *dev = NULL; + struct uacce_dev_list *curr; + struct wd_ctx_h *ctx_h; + int ret = -WD_EINVAL; + + if (!params || !ctx) { + WD_ERR("invalid parameters!\n"); + return -WD_EINVAL; + } + + /* Get algorithm type and device list */ + wd_get_alg_type(alg_name, alg_type); + dev_list = wd_get_accel_list(alg_type); + if (!dev_list) { + WD_ERR("failed to get device list for alg %s\n", alg_name); + return -WD_ENODEV; + } + + /* Get usable device list based on NUMA mask */ + used_list = wd_get_usable_list(dev_list, used_bmp); + if (!used_list) { + WD_ERR("failed to get usable device list\n"); + ret = -WD_ENODEV; + goto out; + } + + /* + * After allocating all queues on the current device, proceed to + * request queues from the next device to ensure NUMA affinity handling. + * + * Try each device in the usable list until success + */ + curr = used_list; + while (curr) { + dev = curr->dev; + if (WD_IS_ERR(dev) || !dev) { + WD_ERR("invalid device in list, skip\n"); + curr = curr->next; + continue; + } + + /* Request hardware context from current device */ + ctx_h = wd_request_ctx(dev); + if (ctx_h) { + /* Success: return context handle */ + ctx_h->priv = NULL; + *ctx = (handle_t)ctx_h; + ret = 0; + WD_INFO("successful to alloc ctx from device %s, ctx: %p\n", + dev->dev_root, ctx_h); + goto out; + } + + WD_DEBUG("failed to request ctx from device %s, try next\n", + dev->dev_root); + curr = curr->next; + } + + /* All devices failed */ + WD_ERR("failed to request ctx from all available devices for driver %s\n", + alg_name); + ret = -WD_EBUSY; + +out: + if (dev_list) + wd_free_list_accels(dev_list); + + return ret; +} + +/** + * wd_hw_free_ctx() - HW driver's free_ctx callback. + * + * Releases ONE hardware context back to UACCE device. + * + * @ctx: The context handle to release + */ +void wd_hw_free_ctx(handle_t ctx) +{ + struct wd_ctx_h *ctx_h = (struct wd_ctx_h *)ctx; + + if (!ctx_h) { + WD_ERR("invalid: ctx is NULL!\n"); + return; + } + + /* Release hardware context back to device */ + wd_release_ctx(ctx); + + WD_INFO("HW context released\n"); +} + +int wd_get_sqe_from_queue(struct wd_soft_ctx *sctx, __u32 tag_id) +{ + struct wd_soft_sqe *sqe = NULL; + + pthread_spin_lock(&sctx->slock); + sqe = &sctx->qfifo[sctx->head]; + if (!sqe->used && !sqe->complete) { // find the next not used sqe + sctx->head++; + if (unlikely(sctx->head == MAX_SOFT_QUEUE_LENGTH)) + sctx->head = 0; + + sqe->used = 1; + sqe->complete = 1; + sqe->id = tag_id; + sqe->result = 0; + __atomic_fetch_add(&sctx->run_num, 0x1, __ATOMIC_ACQUIRE); + pthread_spin_unlock(&sctx->slock); + } else { + pthread_spin_unlock(&sctx->slock); + return -WD_EBUSY; + } + + return 0; +} + +int wd_put_sqe_to_queue(struct wd_soft_ctx *sctx, __u32 *tag_id, __u8 *result) +{ + struct wd_soft_sqe *sqe = NULL; + + /* The queue is not used */ + if (sctx->run_num < 1) + return -WD_EAGAIN; + + if (pthread_spin_trylock(&sctx->rlock)) + return -WD_EAGAIN; + sqe = &sctx->qfifo[sctx->tail]; + if (sqe->used && sqe->complete) { // find a used sqe + sctx->tail++; + if (unlikely(sctx->tail == MAX_SOFT_QUEUE_LENGTH)) + sctx->tail = 0; + + *tag_id = sqe->id; + *result = sqe->result; + sqe->used = 0x0; + sqe->complete = 0x0; + __atomic_fetch_sub(&sctx->run_num, 0x1, __ATOMIC_ACQUIRE); + pthread_spin_unlock(&sctx->rlock); + } else { + pthread_spin_unlock(&sctx->rlock); + return -WD_EAGAIN; + } + + return 0; +} + +int wd_queue_is_busy(struct wd_soft_ctx *sctx) +{ + /* The queue is not used */ + if (sctx->run_num >= MAX_SOFT_QUEUE_LENGTH - 1) + return -WD_EBUSY; + + return 0; +} + diff --git a/drv/wd_drv.h b/drv/wd_drv.h new file mode 100644 index 0000000..ddf3728 --- /dev/null +++ b/drv/wd_drv.h @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2020-2026 Huawei Technologies Co.,Ltd. All rights reserved. + */ + +#ifndef __WD_DRV_H +#define __WD_DRV_H + +#include <numa.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <pthread.h> + +#include "wd.h" +#include "wd_alg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_SOFT_QUEUE_LENGTH 1024U + +/** + * default queue length set to 1024 + */ +struct wd_soft_sqe { + __u8 used; + __u8 result; + __u8 complete; + __u32 id; +}; + +struct wd_soft_ctx { + int fd; + pthread_spinlock_t slock; + __u32 head; + struct wd_soft_sqe qfifo[MAX_SOFT_QUEUE_LENGTH]; + pthread_spinlock_t rlock; + __u32 tail; + __u32 run_num; + void *priv; +}; + +/* Public function declarations */ +int wd_hw_alloc_ctx(char *alg_name, void *params, handle_t *ctx); +void wd_hw_free_ctx(handle_t ctx); + +int wd_soft_alloc_ctx(char *alg_name, void *params, handle_t *ctx); +void wd_soft_free_ctx(handle_t ctx); + +int wd_queue_is_busy(struct wd_soft_ctx *sctx); +int wd_get_sqe_from_queue(struct wd_soft_ctx *sctx, __u32 tag_id); +int wd_put_sqe_to_queue(struct wd_soft_ctx *sctx, __u32 *tag_id, __u8 *result); + +#ifdef __cplusplus +} +#endif + +#endif /* __WD_DRV_H */ diff --git a/include/wd_alg.h b/include/wd_alg.h index 05e022c..237d15a 100644 --- a/include/wd_alg.h +++ b/include/wd_alg.h @@ -64,11 +64,14 @@ typedef unsigned char __u8; # define HWCAP2_RNG (1 << 16) #endif -enum alg_priority { - UADK_ALG_HW = 0x0, - UADK_ALG_CE_INSTR = 0x1, +enum alg_dev_type { + UADK_ALG_HW = 0x0, + UADK_ALG_CE_INSTR = 0x1, UADK_ALG_SVE_INSTR = 0x2, - UADK_ALG_SOFT = 0x3 + UADK_ALG_SOFT = 0x3, + UADK_ALG_NPU = 0x4, + UADK_ALG_GPU = 0x5, + UADK_ALG_TYPE_MAX, }; enum alg_drv_type { @@ -80,6 +83,25 @@ enum alg_drv_type { ALG_DRV_FB, }; +/** + * struct wd_ctx_alloc_params - Minimal parameters for single context allocation. + * + * Used to pass only necessary information to driver's alloc_ctx callback. + * Keeps driver layer simple and focused. + * + * @ctx_mode: CTX_MODE_SYNC or CTX_MODE_ASYNC + * @op_type: Operation type + * @bmp: NUMA node bitmask (optional, NULL if not needed) + */ +struct wd_drv_ctx_params { + __u8 ctx_mode; + __u8 op_type; + int numa_id; + __u32 idx; + bool epoll_en; + struct bitmask *bmp; +}; + /** * @drv_name: name of the current device driver * @alg_name: name of the algorithm supported by the driver @@ -108,7 +130,13 @@ enum alg_drv_type { * @get_usage: callback interface used to obtain the * utilization rate of devices. * @get_extend_ops: callback interface to get private operation of drivers. + * @alloc_ctx: Allocate contexts for this driver. + * HW drivers use wd_hw_alloc_ctx. + * Non-HW drivers use wd_drv_alloc_ctx_array. + * @free_ctx: Release all resources allocated by alloc_ctx. + * @sched_policy: Preferred scheduling policy. */ + struct wd_alg_driver { const char *drv_name; const char *alg_name; @@ -127,6 +155,9 @@ struct wd_alg_driver { int (*recv)(handle_t ctx, void *drv_msg); int (*get_usage)(void *param); int (*get_extend_ops)(void *ops); + + int (*alloc_ctx)(char *alg_name, void *params, handle_t *ctx); + void (*free_ctx)(handle_t ctx); }; struct hisi_dev_usage { @@ -144,30 +175,49 @@ struct hisi_dev_usage { int wd_alg_driver_register(struct wd_alg_driver *drv); void wd_alg_driver_unregister(struct wd_alg_driver *drv); +#define MAX_DRV_ALG_NUM 64 /** - * @alg_name: name of the algorithm supported by the driver - * @drv_name: name of the current device driver + * Secondary structure: Algorithm entry (only algorithm-specific attributes) + * @alg_name: Specific algorithm name, e.g., "cbc(aes)" + * @avaiblable: Availability depends on specific CE/SVE instructions + */ +struct wd_alg_entry { + char alg_name[ALG_NAME_SIZE]; + bool available; +}; +/** + * Primary structure: Driver node (List backbone, contains driver-level shared attributes) + * @drv_name: name of the current device driver e.g., "hisi_sec" + * @alg_type: Algorithm class, e.g., "cipher" (Promoted to driver level) * @available: Indicates whether the current driver still has resources available * @priority: priority of the type of algorithm supported by the driver - * @calc_type: the calculation method of algorithm supported by the driver - * @refcnt: the number of times the algorithm driver is being cited by the task + * @calc_type: Driver calc type (HW, CE, SVE, SOFT) + * @refcnt: Driver-level global reference count * - * @drv: device Drivers Supporting Algorithms + * @drv: Pointer to driver implementation + * @algs: Static array for supported algorithms + * @alg_count: Current number of registered algorithms * @next: pointer to the next node of the algorithm linked list */ -struct wd_alg_list { - char alg_name[ALG_NAME_SIZE]; +struct wd_drv_node { char drv_name[DEV_NAME_LEN]; - bool available; - int priority; - int calc_type; - int refcnt; - - struct wd_alg_driver *drv; - struct wd_alg_list *next; char alg_type[ALG_NAME_SIZE]; + int priority; + int calc_type; + int refcnt; + struct wd_alg_driver *drv; + struct wd_alg_entry algs[MAX_DRV_ALG_NUM]; + int alg_count; + struct wd_drv_node *next; }; +int wd_get_drv_array(const char *alg_type, int task_type, char *drv_name, + struct wd_alg_driver ***drv_array, __u32 *drv_count); +void wd_put_drv_array(struct wd_alg_driver **drv_array, __u32 drv_count); + +void wd_alg_drv_ref_inc(struct wd_alg_driver **drv_array, __u32 drv_count); +void wd_alg_drv_ref_dec(struct wd_alg_driver **drv_array, __u32 drv_count); + /** * wd_request_drv() - Apply for an algorithm driver. * @alg_name: task algorithm name. @@ -176,7 +226,6 @@ struct wd_alg_list { * Returns the applied algorithm driver, non means error. */ struct wd_alg_driver *wd_request_drv(const char *alg_name, int drv_type); -void wd_release_drv(struct wd_alg_driver *drv); /** * wd_drv_alg_support() - Check the algorithms supported by the driver. @@ -197,7 +246,7 @@ void wd_disable_drv(struct wd_alg_driver *drv); int wd_alg_get_dev_usage(const char *dev_name, const char *alg_type, __u8 op_type); int wd_get_alg_type(const char *alg_name, char *alg_type); -struct wd_alg_list *wd_get_alg_head(void); +struct wd_drv_node *wd_get_alg_head(void); #ifdef WD_STATIC_DRV /** diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index ac50cb2..00d4430 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -92,7 +92,6 @@ struct wd_ctx { handle_t ctx; __u8 op_type; __u8 ctx_mode; - __u8 ctx_type; }; /** @@ -124,31 +123,16 @@ struct wd_ctx_config { struct wd_cap_config *cap; }; -/* 0x0 mean calloc init value */ -enum wd_ctx_property { - UADK_CTX_HW = 0x0, - UADK_CTX_CE_INS = 0x1, - UADK_CTX_SVE_INS = 0x2, - UADK_CTX_SOFT = 0x3, - UADK_CTX_MAX -}; - /** * struct wd_ctx_nums - Define the ctx sets numbers. * @sync_ctx_num: The ctx numbers which are used for sync mode for each * ctx sets. * @async_ctx_num: The ctx numbers which are used for async mode for each * ctx sets. - * @ctx_prop: Indicates the properties of the current queue - * @ctx_begin: The encoding starting position of the current device ctx - * @other_ctx: Other types of queues configured */ struct wd_ctx_nums { __u32 sync_ctx_num; __u32 async_ctx_num; - __u8 ctx_prop; - __u16 ctx_begin; - struct wd_ctx_nums *other_ctx; }; /** @@ -194,9 +178,30 @@ struct wd_sched { handle_t h_sched_ctx; }; -typedef int (*wd_alg_init)(struct wd_ctx_config *config, struct wd_sched *sched); +typedef int (*wd_alg_init)(struct wd_ctx_config *config, struct wd_sched *sched, void *attrs); typedef int (*wd_alg_poll_ctx)(__u32 idx, __u32 expt, __u32 *count); +/** + * struct wd_init_attrs - Algorithm initialization attributes. + * + * Updated: No longer contains driver field. + * Initialization path determined solely by task_type. + */ +struct wd_init_attrs { + __u32 sched_type; + __u32 task_type; + char alg[CRYPTO_MAX_ALG_NAME]; + struct wd_sched *sched; + struct wd_ctx_params *ctx_params; + struct wd_ctx_config *ctx_config; + wd_alg_init alg_init; + wd_alg_poll_ctx alg_poll_ctx; + + struct wd_ctx_config_internal *ctx_config_internal; + struct wd_alg_driver **drv_array; + __u32 drv_count; +}; + #ifdef __cplusplus } #endif diff --git a/include/wd_internal.h b/include/wd_internal.h index c12500b..455e283 100644 --- a/include/wd_internal.h +++ b/include/wd_internal.h @@ -9,6 +9,7 @@ #include <pthread.h> #include <stdbool.h> #include "wd.h" +#include "wd_alg.h" #ifdef __cplusplus extern "C" { @@ -17,7 +18,6 @@ extern "C" { #define DEVICE_REGION_MAX 16 #define DECIMAL_NUMBER 10 #define MAX_FD_NUM 65535 -#define MAX_SOFT_QUEUE_LENGTH 1024U struct wd_ctx_h { int fd; @@ -30,33 +30,6 @@ struct wd_ctx_h { void *priv; }; -struct wd_soft_sqe { - __u8 used; - __u8 result; - __u8 complete; - __u32 id; -}; - -/** - * default queue length set to 1024 - */ -struct wd_soft_ctx { - int fd; - pthread_spinlock_t slock; - __u32 head; - struct wd_soft_sqe qfifo[MAX_SOFT_QUEUE_LENGTH]; - pthread_spinlock_t rlock; - __u32 tail; - __u32 run_num; - void *priv; -}; - -struct wd_ce_ctx { - int fd; - char *drv_name; - void *priv; -}; - struct wd_ctx_internal { __u8 op_type; __u8 ctx_mode; @@ -66,8 +39,8 @@ struct wd_ctx_internal { __u16 sqn; pthread_spinlock_t lock; struct wd_alg_driver *drv; - void *extend_ops; void *drv_priv; + void *extend_ops; }; struct wd_ctx_config_internal { @@ -78,6 +51,9 @@ struct wd_ctx_config_internal { bool epoll_en; unsigned long *msg_cnt; const char *alg_name; + + struct wd_alg_driver **drv_array; + __u32 drv_count; }; struct wd_datalist { diff --git a/include/wd_util.h b/include/wd_util.h index c24c554..1fc7d60 100644 --- a/include/wd_util.h +++ b/include/wd_util.h @@ -118,17 +118,6 @@ struct wd_msg_handle { int (*recv)(handle_t sess, void *msg); }; -struct wd_init_attrs { - __u32 sched_type; - __u32 task_type; - char alg[CRYPTO_MAX_ALG_NAME]; - struct wd_sched *sched; - struct wd_ctx_params *ctx_params; - struct wd_ctx_config *ctx_config; - wd_alg_init alg_init; - wd_alg_poll_ctx alg_poll_ctx; -}; - /* * wd_init_ctx_config() - Init internal ctx configuration. * @in: ctx configuration in global setting. @@ -435,17 +424,6 @@ void wd_ctx_param_uninit(struct wd_ctx_params *ctx_params); int wd_alg_attrs_init(struct wd_init_attrs *attrs); void wd_alg_attrs_uninit(struct wd_init_attrs *attrs); -/** - * wd_alg_drv_bind() - Request the ctxs and initialize the sched_domain - * with the given devices list, ctxs number and numa mask. - * @ctx_type: the type of ctx specified by the current algorithm. - * @alg_name: the name of the algorithm specified by the task. - * - * Return device driver if succeed and other NULL if fail. - */ -struct wd_alg_driver *wd_alg_drv_bind(__u8 ctx_prop, char *alg_name); -void wd_alg_drv_unbind(struct wd_alg_driver *drv); - /** * wd_alg_init_driver() - Initialize the current device driver according * to the obtained queue resource and the applied driver. @@ -519,9 +497,13 @@ static inline void wd_ctx_spin_unlock(struct wd_ctx_internal *ctx, int type) int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type); -int wd_queue_is_busy(struct wd_soft_ctx *sctx); -int wd_get_sqe_from_queue(struct wd_soft_ctx *sctx, __u32 tag_id); -int wd_put_sqe_to_queue(struct wd_soft_ctx *sctx, __u32 *tag_id, __u8 *result); +int wd_alg_drv_discover(struct wd_init_attrs *attrs); +void wd_alg_drv_undiscover(struct wd_init_attrs *attrs); +int wd_alg_ctx_init(struct wd_init_attrs *attrs); +void wd_alg_ctx_uninit(struct wd_init_attrs *attrs); +int wd_ctx_bind_drivers(struct wd_ctx_config_internal *config, + struct wd_alg_driver **drv_array, __u32 drv_count); +void wd_ctx_unbind_drivers(struct wd_ctx_config_internal *config); #ifdef __cplusplus } diff --git a/libwd.map b/libwd.map index 0635198..ce8c3ce 100644 --- a/libwd.map +++ b/libwd.map @@ -45,11 +45,15 @@ global: wd_alg_driver_register; wd_alg_driver_unregister; wd_request_drv; - wd_release_drv; wd_drv_alg_support; wd_enable_drv; wd_disable_drv; wd_get_alg_head; + wd_get_drv_array; + wd_put_drv_array; + wd_alg_drv_ref_inc; + wd_alg_drv_ref_dec; + wd_alg_driver_init; wd_alg_driver_exit; wd_alg_driver_send; diff --git a/wd.c b/wd.c index 01334e1..dd75ca2 100644 --- a/wd.c +++ b/wd.c @@ -996,33 +996,39 @@ void wd_release_alg_cap(struct wd_capability *head) struct wd_capability *wd_get_alg_cap(void) { - struct wd_alg_list *head = wd_get_alg_head(); - struct wd_alg_list *pnext = head->next; + struct wd_drv_node *head = wd_get_alg_head(); + struct wd_drv_node *drv_node = head->next; struct wd_capability *cap_head = NULL; struct wd_capability *cap_pnext = NULL; struct wd_capability *cap_node; - - while (pnext) { - cap_node = calloc(1, sizeof(struct wd_capability)); - if (!cap_node) { - WD_ERR("fail to alloc wd capability head\n"); - goto alloc_err; - } - - (void)strcpy(cap_node->alg_name, pnext->alg_name); - (void)strcpy(cap_node->drv_name, pnext->drv_name); - cap_node->available = pnext->available; - cap_node->priority = pnext->priority; - cap_node->calc_type = pnext->calc_type; - cap_node->next = NULL; - - pnext = pnext->next; - if (!cap_pnext) { - cap_head = cap_node; - cap_pnext = cap_node; + int i; + + while (drv_node) { + /* Traverse the static algorithm array inside each driver node */ + for (i = 0; i < drv_node->alg_count; i++) { + cap_node = calloc(1, sizeof(struct wd_capability)); + if (!cap_node) { + WD_ERR("fail to alloc wd capability head\n"); + goto alloc_err; + } + /* Flatten the secondary structure into the original binary-tuple format */ + (void)strcpy(cap_node->alg_name, drv_node->algs[i].alg_name); + (void)strcpy(cap_node->drv_name, drv_node->drv_name); + cap_node->available = drv_node->algs[i].available; + cap_node->priority = drv_node->priority; + cap_node->calc_type = drv_node->calc_type; + cap_node->next = NULL; + + /* Append to the capability linked list */ + if (!cap_head) { + cap_head = cap_node; + cap_pnext = cap_node; + } else { + cap_pnext->next = cap_node; + cap_pnext = cap_node; + } } - cap_pnext->next = cap_node; - cap_pnext = cap_node; + drv_node = drv_node->next; } return cap_head; diff --git a/wd_alg.c b/wd_alg.c index 787dcad..3de8539 100644 --- a/wd_alg.c +++ b/wd_alg.c @@ -6,6 +6,7 @@ #define _GNU_SOURCE #include <dirent.h> #include <errno.h> +#include <stdio.h> #include <stdbool.h> #include <stdlib.h> #include <pthread.h> @@ -19,10 +20,21 @@ #define DEV_SVA_SIZE 32 #define STR_DECIMAL 0xA -static struct wd_alg_list alg_list_head; -static struct wd_alg_list *alg_list_tail = &alg_list_head; +/* Registry structure (List manager) */ +struct wd_alg_registry { + struct wd_drv_node *head; + struct wd_drv_node *tail; + pthread_mutex_t mutex; + int drv_type_num; /* Number of unique driver nodes in the list */ +}; -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static struct wd_drv_node drv_list_head; +static struct wd_alg_registry alg_registry = { + .head = &drv_list_head, + .tail = &drv_list_head, + .mutex = PTHREAD_MUTEX_INITIALIZER, + .drv_type_num = 0, +}; struct acc_alg_item { const char *name; @@ -208,47 +220,33 @@ static bool wd_alg_check_available(int calc_type, return ret; } -static bool wd_alg_driver_match(struct wd_alg_driver *drv, - struct wd_alg_list *node) +/** + * Mapping from task_type to calc_type filter: + * + * TASK_HW → calc_type == UADK_ALG_HW + * TASK_INSTR → calc_type != UADK_ALG_HW (CE_INSTR | SVE_INSTR | SOFT) + * TASK_MIX → all calc_type values + */ +static inline bool wd_alg_drv_type_match(int task_type, int drv_calc_type) { - if (strcmp(drv->alg_name, node->alg_name)) - return false; - - if (strcmp(drv->drv_name, node->drv_name)) - return false; - - if (drv->priority != node->priority) - return false; - - if (drv->calc_type != node->calc_type) + switch (task_type) { + case TASK_HW: + return drv_calc_type == UADK_ALG_HW; + case TASK_INSTR: + return drv_calc_type != UADK_ALG_HW; + case TASK_MIX: + return true; + default: return false; - - return true; -} - -static bool wd_alg_repeat_check(struct wd_alg_driver *drv) -{ - struct wd_alg_list *npre = &alg_list_head; - struct wd_alg_list *pnext = NULL; - - pthread_mutex_lock(&mutex); - pnext = npre->next; - while (pnext) { - if (wd_alg_driver_match(drv, pnext)) { - pthread_mutex_unlock(&mutex); - return true; - } - npre = pnext; - pnext = pnext->next; } - pthread_mutex_unlock(&mutex); - - return false; } int wd_alg_driver_register(struct wd_alg_driver *drv) { - struct wd_alg_list *new_alg; + struct wd_drv_node *node = alg_registry.head->next; + struct wd_drv_node *target_node = NULL; + char alg_type[ALG_NAME_SIZE]; + int i, ret; if (!drv) { WD_ERR("invalid: register drv is NULL!\n"); @@ -260,155 +258,285 @@ int wd_alg_driver_register(struct wd_alg_driver *drv) return -WD_EINVAL; } - if (wd_alg_repeat_check(drv)) - return 0; + ret = wd_get_alg_type(drv->alg_name, alg_type); + if (ret) { + WD_ERR("failed to get alg_type for %s!\n", drv->alg_name); + return -WD_EINVAL; + } - new_alg = calloc(1, sizeof(struct wd_alg_list)); - if (!new_alg) { - WD_ERR("failed to alloc alg driver memory!\n"); - return -WD_ENOMEM; + /* Search for an existing node with the same drv_name */ + pthread_mutex_lock(&alg_registry.mutex); + while (node) { + if (strcmp(node->drv_name, drv->drv_name) == 0) { + target_node = node; + break; + } + node = node->next; } - (void)wd_get_alg_type(drv->alg_name, new_alg->alg_type); - strncpy(new_alg->alg_name, drv->alg_name, ALG_NAME_SIZE - 1); - strncpy(new_alg->drv_name, drv->drv_name, DEV_NAME_LEN - 1); - new_alg->priority = drv->priority; - new_alg->calc_type = drv->calc_type; - new_alg->drv = drv; - new_alg->refcnt = 0; - new_alg->next = NULL; + if (target_node) { + /* Consistency check: a driver must strictly have uniform properties */ + if (strcmp(target_node->alg_type, alg_type) != 0 || + target_node->priority != drv->priority || + target_node->calc_type != drv->calc_type) { + WD_ERR("invalid: driver %s attributes mismatch on re-register!\n", drv->drv_name); + pthread_mutex_unlock(&alg_registry.mutex); + return -WD_EINVAL; + } - new_alg->available = wd_alg_check_available(drv->calc_type, - drv->alg_name, drv->drv_name); - if (!new_alg->available) { - free(new_alg); - return -WD_ENODEV; - } + /* Check if alg_name already exists in this driver's array */ + for (i = 0; i < target_node->alg_count; i++) { + if (strcmp(target_node->algs[i].alg_name, drv->alg_name) == 0) { + /* Algorithm already registered, skip duplicate */ + pthread_mutex_unlock(&alg_registry.mutex); + return 0; + } + } + + /* Check array capacity */ + if (target_node->alg_count >= MAX_DRV_ALG_NUM) { + WD_ERR("driver %s alg array overflow (max %d)!\n", drv->drv_name, MAX_DRV_ALG_NUM); + pthread_mutex_unlock(&alg_registry.mutex); + return -WD_ENOMEM; + } + + /* Add new algorithm to existing driver node */ + strncpy(target_node->algs[target_node->alg_count].alg_name, drv->alg_name, ALG_NAME_SIZE - 1); + target_node->algs[target_node->alg_count].available = + wd_alg_check_available(drv->calc_type, drv->alg_name, drv->drv_name); + if (!target_node->algs[target_node->alg_count].available) { + WD_ERR("driver %s alg %s not available on current system!\n", drv->drv_name, drv->alg_name); + pthread_mutex_unlock(&alg_registry.mutex); + return -WD_ENODEV; + } + target_node->alg_count++; + } else { + /* Create a new driver node */ + target_node = calloc(1, sizeof(struct wd_drv_node)); + if (!target_node) { + WD_ERR("failed to alloc drv node memory!\n"); + pthread_mutex_unlock(&alg_registry.mutex); + return -WD_ENOMEM; + } + + strncpy(target_node->drv_name, drv->drv_name, DEV_NAME_LEN - 1); + strncpy(target_node->alg_type, alg_type, ALG_NAME_SIZE - 1); + target_node->priority = drv->priority; + target_node->calc_type = drv->calc_type; + target_node->drv = drv; + target_node->refcnt = 0; + target_node->alg_count = 0; + + /* Add the first algorithm to the new node's array */ + strncpy(target_node->algs[0].alg_name, drv->alg_name, ALG_NAME_SIZE - 1); + target_node->algs[0].available = + wd_alg_check_available(drv->calc_type, drv->alg_name, drv->drv_name); + if (!target_node->algs[0].available) { + free(target_node); + WD_ERR("driver %s alg %s not available on current system!\n", drv->drv_name, drv->alg_name); + pthread_mutex_unlock(&alg_registry.mutex); + return -WD_ENODEV; + } + target_node->alg_count = 1; + target_node->next = NULL; - pthread_mutex_lock(&mutex); - alg_list_tail->next = new_alg; - alg_list_tail = new_alg; - pthread_mutex_unlock(&mutex); + /* Append to list tail */ + alg_registry.tail->next = target_node; + alg_registry.tail = target_node; + alg_registry.drv_type_num++; + } + pthread_mutex_unlock(&alg_registry.mutex); return 0; } void wd_alg_driver_unregister(struct wd_alg_driver *drv) { - struct wd_alg_list *npre = &alg_list_head; - struct wd_alg_list *pnext = npre->next; + struct wd_drv_node *npre = alg_registry.head; + struct wd_drv_node *pnext = npre->next; + int i; - /* Alg driver list has no drivers */ if (!pnext || !drv) return; - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&alg_registry.mutex); + /* Find the driver node matching drv_name */ while (pnext) { - if (wd_alg_driver_match(drv, pnext)) + if (strcmp(drv->drv_name, pnext->drv_name) == 0) break; npre = pnext; pnext = pnext->next; } - /* The current algorithm is not registered */ if (!pnext) { - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&alg_registry.mutex); return; } - /* Used to locate the problem and ensure symmetrical use driver */ - if (pnext->refcnt > 0) - WD_ERR("driver<%s> still in used: %d\n", pnext->drv_name, pnext->refcnt); + /* Find and remove the specific alg_name from the node's array */ + for (i = 0; i < pnext->alg_count; i++) { + if (strcmp(pnext->algs[i].alg_name, drv->alg_name) == 0) { + /* Compact the array: move the last element to the removed slot */ + if (i != pnext->alg_count - 1) + pnext->algs[i] = pnext->algs[pnext->alg_count - 1]; + pnext->alg_count--; + break; + } + } + + /* If the driver no longer supports any algorithms, remove the entire node */ + if (pnext->alg_count == 0) { + if (pnext->refcnt > 0) + WD_ERR("driver<%s> still in used: %d\n", pnext->drv_name, pnext->refcnt); + + if (pnext == alg_registry.tail) + alg_registry.tail = npre; - if (pnext == alg_list_tail) - alg_list_tail = npre; + npre->next = pnext->next; + free(pnext); + if (alg_registry.drv_type_num > 0) + alg_registry.drv_type_num--; + } - npre->next = pnext->next; - free(pnext); - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&alg_registry.mutex); } -struct wd_alg_list *wd_get_alg_head(void) +struct wd_drv_node *wd_get_alg_head(void) { - return &alg_list_head; + return alg_registry.head; +} + +/** + * wd_alg_match_drv() - Check if a given algorithm match a specific driver. + * @drv: Pointer to the driver instance + * @alg_name: Specific algorithm name to check (e.g., "cbc(aes)") + * + * Uses the new hierarchical structure: finds the driver node, then searches + * its internal static algorithm array. + * + * Return: true if supported and available, false otherwise. + */ +bool wd_alg_match_drv(struct wd_alg_driver *drv, const char *alg_name) +{ + struct wd_drv_node *node; + int i; + + if (!drv || !alg_name) + return false; + + pthread_mutex_lock(&alg_registry.mutex); + node = alg_registry.head->next; + while (node) { + if (node->drv == drv) { + /* Found the driver node, now search its algs array */ + for (i = 0; i < node->alg_count; i++) { + if (!strcmp(node->algs[i].alg_name, alg_name) && + node->algs[i].available) { + pthread_mutex_unlock(&alg_registry.mutex); + return true; + } + } + /* Driver found, but algorithm not in its array or not available */ + pthread_mutex_unlock(&alg_registry.mutex); + return false; + } + node = node->next; + } + pthread_mutex_unlock(&alg_registry.mutex); + + return false; } bool wd_drv_alg_support(const char *alg_name, void *param) { struct wd_ctx_config_internal *config = param; - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; - struct wd_alg_driver *drv; - __u32 i; + struct wd_drv_node *head = alg_registry.head; + struct wd_drv_node *node; + __u32 i, j; if (!alg_name || !config) return false; + /* Check whether the currently allocated ctxs supports the specified algorithm. */ for (i = 0; i < config->ctx_num; i++) { - drv = config->ctxs[i].drv; - while (pnext) { - if (!strcmp(alg_name, pnext->alg_name) && - !strcmp(drv->drv_name, pnext->drv_name)) { - return true; + if (!config->ctxs[i].drv) + continue; + node = head->next; + while (node) { + /* Query the position of the driver matching the context in the list. */ + if (strcmp(config->ctxs[i].drv->drv_name, node->drv_name) == 0) { + for (j = 0; j < node->alg_count; j++) { + if (!strcmp(alg_name, node->algs[j].alg_name) && + node->algs[j].available) + return true; + } + break; } - pnext = pnext->next; + node = node->next; } } - return false; } void wd_enable_drv(struct wd_alg_driver *drv) { - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; + struct wd_drv_node *node = alg_registry.head->next; + int i; - if (!pnext || !drv) + if (!node || !drv) return; - pthread_mutex_lock(&mutex); - while (pnext) { - if (wd_alg_driver_match(drv, pnext)) + pthread_mutex_lock(&alg_registry.mutex); + while (node) { + if (strcmp(drv->drv_name, node->drv_name) == 0) break; - pnext = pnext->next; + node = node->next; } - if (pnext) - pnext->available = wd_alg_check_available(drv->calc_type, - drv->alg_name, drv->drv_name); - pthread_mutex_unlock(&mutex); + if (node) { + /* Re-evaluate availability for each algorithm upon enabling */ + for (i = 0; i < node->alg_count; i++) { + node->algs[i].available = + wd_alg_check_available(node->calc_type, + node->algs[i].alg_name, + node->drv_name); + } + } + pthread_mutex_unlock(&alg_registry.mutex); } void wd_disable_drv(struct wd_alg_driver *drv) { - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; + struct wd_drv_node *node = alg_registry.head->next; + int i; - if (!pnext || !drv) + if (!node || !drv) return; - pthread_mutex_lock(&mutex); - while (pnext) { - if (wd_alg_driver_match(drv, pnext) && pnext->available) + pthread_mutex_lock(&alg_registry.mutex); + while (node) { + if (strcmp(drv->drv_name, node->drv_name) == 0) break; - pnext = pnext->next; + node = node->next; } - if (pnext) - pnext->available = false; - pthread_mutex_unlock(&mutex); + if (node) { + /* Disable all algorithms for this driver */ + for (i = 0; i < node->alg_count; i++) + node->algs[i].available = false; + } + pthread_mutex_unlock(&alg_registry.mutex); } struct wd_alg_driver *wd_request_drv(const char *alg_name, int drv_type) { - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; - struct wd_alg_list *select_node = NULL; + struct wd_drv_node *node = alg_registry.head->next; struct wd_alg_driver *drv = NULL; int tmp_priority = -1; + int i; - if (!pnext) { - WD_ERR("invalid: requset drv pnext is NULL!\n"); + if (!node) { + WD_ERR("invalid: request drv node is NULL!\n"); return NULL; } @@ -417,72 +545,59 @@ struct wd_alg_driver *wd_request_drv(const char *alg_name, int drv_type) return NULL; } - /* Check the list to get an best driver */ - pthread_mutex_lock(&mutex); - while (pnext) { - if (!strcmp(alg_name, pnext->alg_name) && pnext->available) { - /* HW driver mean to used hardware dev */ - if (drv_type == ALG_DRV_HW && pnext->drv->calc_type == UADK_ALG_HW) - select_node = pnext; - /* CE driver mean to used CE dev */ - else if (drv_type == ALG_DRV_CE_INS && pnext->drv->calc_type == UADK_ALG_CE_INSTR) - select_node = pnext; - /* SVE driver mean to used SVE dev */ - else if (drv_type == ALG_DRV_SVE_INS && pnext->drv->calc_type == UADK_ALG_SVE_INSTR) - select_node = pnext; - /* INS driver mean to used CE and SVE dev */ - else if (drv_type == ALG_DRV_INS && (pnext->drv->calc_type == UADK_ALG_CE_INSTR || - pnext->drv->calc_type == UADK_ALG_SVE_INSTR)) - select_node = pnext; - /* Soft driver mean to used Soft, CE and SVE dev */ - else if (drv_type == ALG_DRV_SOFT && pnext->drv->calc_type != UADK_ALG_HW) - select_node = pnext; - /* Fallback driver mean to used Soft or CE dev */ - else if (drv_type == ALG_DRV_FB && (pnext->drv->calc_type == UADK_ALG_SOFT || - pnext->drv->calc_type == UADK_ALG_CE_INSTR)) - select_node = pnext; - - if (select_node && select_node->drv->priority > tmp_priority) { - drv = select_node->drv; - tmp_priority = select_node->drv->priority; + pthread_mutex_lock(&alg_registry.mutex); + while (node) { + bool type_match = false; + + /* Check calc_type against requested drv_type */ + if (drv_type == ALG_DRV_HW && node->calc_type == UADK_ALG_HW) + type_match = true; + else if (drv_type == ALG_DRV_CE_INS && node->calc_type == UADK_ALG_CE_INSTR) + type_match = true; + else if (drv_type == ALG_DRV_SVE_INS && node->calc_type == UADK_ALG_SVE_INSTR) + type_match = true; + else if (drv_type == ALG_DRV_INS && (node->calc_type == UADK_ALG_CE_INSTR || + node->calc_type == UADK_ALG_SVE_INSTR)) + type_match = true; + else if (drv_type == ALG_DRV_SOFT && node->calc_type != UADK_ALG_HW) + type_match = true; + else if (drv_type == ALG_DRV_FB && (node->calc_type == UADK_ALG_SOFT || + node->calc_type == UADK_ALG_CE_INSTR)) + type_match = true; + + if (type_match && node->drv->priority > tmp_priority) { + /* Check if this driver supports the requested alg_name and it's available */ + for (i = 0; i < node->alg_count; i++) { + if (!strcmp(alg_name, node->algs[i].alg_name) && + node->algs[i].available) { + drv = node->drv; + tmp_priority = node->drv->priority; + break; + } } } - pnext = pnext->next; + node = node->next; } - if (select_node) - select_node->refcnt++; - pthread_mutex_unlock(&mutex); - - return drv; -} - -void wd_release_drv(struct wd_alg_driver *drv) -{ - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; - struct wd_alg_list *select_node = NULL; - - if (!pnext || !drv) - return; - - pthread_mutex_lock(&mutex); - while (pnext) { - if (wd_alg_driver_match(drv, pnext) && pnext->refcnt > 0) { - select_node = pnext; - break; + /* Increment refcnt on the selected driver node */ + if (drv) { + node = alg_registry.head->next; + while (node) { + if (node->drv == drv) { + node->refcnt++; + break; + } + node = node->next; } - pnext = pnext->next; } - if (select_node) - select_node->refcnt--; - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&alg_registry.mutex); + return drv; } int wd_alg_get_dev_usage(const char *dev_name, const char *alg_type, __u8 alg_op_type) { - struct wd_alg_list *pnext = alg_list_head.next; + struct wd_drv_node *node = alg_registry.head->next; struct hisi_dev_usage dev_usage; struct wd_alg_driver *drv; @@ -491,18 +606,19 @@ int wd_alg_get_dev_usage(const char *dev_name, const char *alg_type, __u8 alg_op return -WD_EINVAL; } - while (pnext) { - if (strstr(dev_name, pnext->drv_name) && - !strcmp(alg_type, pnext->alg_type)) + while (node) { + /* Match dev_name and alg_type at the driver node level */ + if (strstr(dev_name, node->drv_name) && + !strcmp(alg_type, node->alg_type)) break; - pnext = pnext->next; + node = node->next; } - if (!pnext) + if (!node) return -WD_EACCES; - drv = pnext->drv; + drv = node->drv; if (!drv->get_usage) return -WD_EINVAL; @@ -512,3 +628,201 @@ int wd_alg_get_dev_usage(const char *dev_name, const char *alg_type, __u8 alg_op return drv->get_usage(&dev_usage); } + +/** + * wd_put_drv_array() - Release driver array allocated by wd_get_drv_array(). + * + * Frees the driver pointer array. Does NOT touch the drivers themselves + * (refcount managed separately by wd_alg_drv_ref_inc/dec). + * + * @drv_array: Driver array from wd_get_drv_array() + * @drv_count: Number of entries (unused, for API symmetry) + */ +void wd_put_drv_array(struct wd_alg_driver **drv_array, __u32 drv_count) +{ + __u32 i; + + for (i = 0; i < drv_count; i++) + drv_array[i] = NULL; + free(drv_array); +} + +/** + * wd_get_drv_array() - Discover all unique drivers matching alg_type and task_type. + * + * @alg_type: Algorithm class string ("cipher", "digest", "aead", "comp", etc.) + * @task_type: TASK_HW (hardware only), TASK_INSTR (instruction only), TASK_MIX (all) + * @drv_array: Output - newly allocated array of unique wd_alg_driver* pointers, + * caller must free with plain free() + * @drv_count: Output - number of unique drivers found + * + * Traverses wd_drv_node list once: + * 1. Matches by alg_type at node level (no need to traverse algs array for this). + * 2. Filters by task_type using wd_alg_drv_type_match(). + * 3. Deduplicates is inherently solved (each node is a unique driver). + * + * This is a PURE QUERY — no reference counting or resource allocation side effects. + * Reference counting is done separately by wd_alg_drv_ref_inc/dec(). + * + * Return: 0 on success, negative on failure. + */ +int wd_get_drv_array(const char *alg_type, int task_type, char *drv_name, + struct wd_alg_driver ***drv_array, __u32 *drv_count) +{ + struct wd_drv_node *head, *node; + struct wd_alg_driver **drivers; + __u32 max_driver_count, current_count = 0; + int i; + + if (!alg_type || !drv_array || !drv_count) { + WD_ERR("invalid: NULL parameter!\n"); + return -WD_EINVAL; + } + + *drv_array = NULL; + *drv_count = 0; + head = wd_get_alg_head(); + if (!head) { + WD_ERR("failed to get alg list head!\n"); + return -WD_EINVAL; + } + + max_driver_count = alg_registry.drv_type_num; + WD_INFO("drivers list drv_type_num: %d\n", alg_registry.drv_type_num); + + if (max_driver_count == 0) { + WD_ERR("no drivers registered for alg_type: %s\n", alg_type); + return -WD_EINVAL; + } + + drivers = calloc(max_driver_count, sizeof(struct wd_alg_driver *)); + if (!drivers) { + WD_ERR("failed to allocate drivers array!\n"); + return -WD_ENOMEM; + } + + /* + * Single traversal of wd_drv_node list: + * - Match by alg_type at node level + * - Filter by task_type + * - Deduplication inherently solved + */ + node = head->next; + while (node) { + if (strcmp(node->alg_type, alg_type) == 0 && + wd_alg_drv_type_match(task_type, node->calc_type)) { + + if (drv_name && strcmp(node->drv_name, drv_name) != 0) + continue; + + /* Check if at least one algorithm in this driver is available */ + bool has_available_alg = false; + for (i = 0; i < node->alg_count; i++) { + if (node->algs[i].available) { + has_available_alg = true; + break; + } + } + + if (!has_available_alg) { + node = node->next; + continue; + } + + if (current_count >= max_driver_count) { + WD_ERR("driver array overflow!\n"); + goto query_failed; + } + drivers[current_count] = node->drv; + current_count++; + } + node = node->next; + } + + if (current_count == 0) { + WD_ERR("no available drivers for alg_type: %s, task_type: %d\n", + alg_type, task_type); + goto query_failed; + } + + WD_INFO("Driver discovery: %u unique drivers for alg_type=%s\n", + current_count, alg_type); + *drv_array = drivers; + *drv_count = current_count; + return 0; + +query_failed: + free(drivers); + return -WD_EINVAL; +} + +/** + * wd_alg_drv_ref_inc() - Increment reference count for each unique driver. + * + * @drv_array: Array of unique driver pointers + * @drv_count: Number of drivers in the array + * + * For each unique driver, finds its node in wd_drv_node list and + * increments refcnt by exactly 1. This ensures refcnt reflects the + * number of configs using the driver, not the number of ctxs. + * + * Must be called after wd_get_drv_array() and after ctx binding. + */ +void wd_alg_drv_ref_inc(struct wd_alg_driver **drv_array, __u32 drv_count) +{ + struct wd_drv_node *node; + __u32 i; + + if (!drv_array || drv_count == 0) + return; + + pthread_mutex_lock(&alg_registry.mutex); + for (i = 0; i < drv_count; i++) { + if (!drv_array[i]) + continue; + /* Directly find the unique driver node and increment refcnt */ + node = alg_registry.head->next; + while (node) { + if (node->drv == drv_array[i]) { + node->refcnt++; + break; + } + node = node->next; + } + } + pthread_mutex_unlock(&alg_registry.mutex); +} + +/** + * wd_alg_drv_ref_dec() - Decrement reference count for each unique driver. + * + * @drv_array: Array of unique driver pointers + * @drv_count: Number of drivers in the array + * + * Inverse of wd_alg_drv_ref_inc(). Decrements refcnt by 1 for each + * unique driver. Must be called during cleanup. + */ +void wd_alg_drv_ref_dec(struct wd_alg_driver **drv_array, __u32 drv_count) +{ + struct wd_drv_node *node; + __u32 i; + + if (!drv_array || drv_count == 0) + return; + + pthread_mutex_lock(&alg_registry.mutex); + for (i = 0; i < drv_count; i++) { + if (!drv_array[i]) + continue; + /* Directly find the unique driver node and decrement refcnt */ + node = alg_registry.head->next; + while (node) { + if (node->drv == drv_array[i] && node->refcnt > 0) { + node->refcnt--; + break; + } + node = node->next; + } + } + pthread_mutex_unlock(&alg_registry.mutex); +} diff --git a/wd_cipher.c b/wd_cipher.c index a4d6c63..f312545 100644 --- a/wd_cipher.c +++ b/wd_cipher.c @@ -340,8 +340,9 @@ static void wd_cipher_clear_status(void) } static int wd_cipher_common_init(struct wd_ctx_config *config, - struct wd_sched *sched) + struct wd_sched *sched, void *attrs) { + struct wd_init_attrs *cipher_attrs = (struct wd_init_attrs *)attrs; int ret; ret = wd_set_epoll_en("WD_CIPHER_EPOLL_EN", @@ -358,6 +359,7 @@ static int wd_cipher_common_init(struct wd_ctx_config *config, if (ret < 0) goto out_clear_ctx_config; + /* allocate async pool for every ctx */ ret = wd_init_async_request_pool(&wd_cipher_setting.pool, config, WD_POOL_MAX_ENTRIES, @@ -367,6 +369,10 @@ static int wd_cipher_common_init(struct wd_ctx_config *config, wd_cipher_setting.priv = STATUS_ENABLE; + /* V2 path: let framework know where the internal config is */ + if (cipher_attrs) + cipher_attrs->ctx_config_internal = &wd_cipher_setting.config; + return 0; out_clear_sched: @@ -395,6 +401,7 @@ static int wd_cipher_common_uninit(void) int wd_cipher_init(struct wd_ctx_config *config, struct wd_sched *sched) { + __u32 drv_count = 0; int ret; pthread_atfork(NULL, NULL, wd_cipher_clear_status); @@ -411,26 +418,46 @@ int wd_cipher_init(struct wd_ctx_config *config, struct wd_sched *sched) if (ret) goto out_clear_init; - ret = wd_cipher_common_init(config, sched); + /* ═══ Phase 1: Internal copy (existing common_init) ═══ */ + ret = wd_cipher_common_init(config, sched, NULL); if (ret) goto out_close_driver; - ret = wd_ctx_drv_config("ecb(aes)", &wd_cipher_setting.config); - if (ret) - goto out_uninit_nolock; + /* ═══ Phase 2: Driver discovery ═══ */ + ret = wd_get_drv_array("cipher", TASK_HW, "hisi_sec2", + &wd_cipher_setting.config.drv_array, &drv_count); + if (ret) { + WD_ERR("driver discovery failed!\n"); + goto out_free_drv_array; + } + WD_INFO("discovered %u unique drivers\n", drv_count); + + /* ═══ Phase 2.5: RR bind drivers to internal ctxs ═══ */ + ret = wd_ctx_bind_drivers(&wd_cipher_setting.config, + wd_cipher_setting.config.drv_array, drv_count); + if (ret) { + WD_ERR("driver binding failed!\n"); + goto out_common_uninit; + } + /* ═══ Phase 3: Driver initialization ═══ */ ret = wd_alg_init_driver(&wd_cipher_setting.config); - if (ret) - goto out_drv_deconfig; + if (ret) { + WD_ERR("cipher driver init failed!\n"); + goto out_unbind_drivers; + } wd_alg_set_init(&wd_cipher_setting.status); return 0; -out_drv_deconfig: - wd_ctx_drv_deconfig(&wd_cipher_setting.config); -out_uninit_nolock: +out_unbind_drivers: + wd_ctx_unbind_drivers(&wd_cipher_setting.config); +out_common_uninit: wd_cipher_common_uninit(); +out_free_drv_array: + wd_put_drv_array(wd_cipher_setting.config.drv_array, drv_count); + wd_cipher_setting.config.drv_array = NULL; out_close_driver: wd_cipher_close_driver(WD_TYPE_V1); out_clear_init: @@ -443,17 +470,21 @@ void wd_cipher_uninit(void) int ret; wd_alg_uninit_driver(&wd_cipher_setting.config); - wd_ctx_drv_deconfig(&wd_cipher_setting.config); - + wd_ctx_unbind_drivers(&wd_cipher_setting.config); ret = wd_cipher_common_uninit(); if (ret) return; + wd_put_drv_array(wd_cipher_setting.config.drv_array, + wd_cipher_setting.config.drv_count); + wd_cipher_setting.config.drv_array = NULL; + wd_cipher_close_driver(WD_TYPE_V1); wd_alg_clear_init(&wd_cipher_setting.status); } -int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_params *ctx_params) +int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, + struct wd_ctx_params *ctx_params) { struct wd_ctx_nums cipher_ctx_num[WD_CIPHER_DECRYPTION + 1] = {0}; struct wd_ctx_params cipher_ctx_params = {0}; @@ -483,16 +514,17 @@ int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_p goto out_uninit; while (ret != 0) { - memset(&wd_cipher_setting.config, 0, sizeof(struct wd_ctx_config_internal)); + memset(&wd_cipher_setting.config, 0, + sizeof(struct wd_ctx_config_internal)); /* Init ctx param and prepare for ctx request */ cipher_ctx_params.ctx_set_num = cipher_ctx_num; ret = wd_ctx_param_init(&cipher_ctx_params, ctx_params, - alg, task_type, WD_CIPHER_TYPE, WD_CIPHER_DECRYPTION + 1); + alg, task_type, WD_CIPHER_TYPE, + WD_CIPHER_DECRYPTION + 1); if (ret) { - if (ret == -WD_EAGAIN) { + if (ret == -WD_EAGAIN) continue; - } goto out_dlclose; } @@ -502,6 +534,8 @@ int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_p wd_cipher_init_attrs.ctx_params = &cipher_ctx_params; wd_cipher_init_attrs.alg_init = wd_cipher_common_init; wd_cipher_init_attrs.alg_poll_ctx = wd_cipher_poll_ctx; + + /* ═══ Phase 1 + Phase 2 ═══ */ ret = wd_alg_attrs_init(&wd_cipher_init_attrs); if (ret) { if (ret == -WD_ENODEV) { @@ -513,23 +547,31 @@ int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_p } } - WD_ERR("ctxs numbers: %u.\n", wd_cipher_setting.config.ctx_num); - ret = wd_ctx_drv_config(alg, &wd_cipher_setting.config); - if (ret) - goto out_uninit_nolock; + WD_INFO("ctxs numbers: %u.\n", wd_cipher_setting.config.ctx_num); + /* ═══ Phase 2.5: RR bind drivers ═══ */ + ret = wd_ctx_bind_drivers(&wd_cipher_setting.config, + wd_cipher_init_attrs.drv_array, + wd_cipher_init_attrs.drv_count); + if (ret) { + WD_ERR("driver binding failed!\n"); + goto out_common_uninit; + } + /* ═══ Phase 3: Driver initialization ═══ */ ret = wd_alg_init_driver(&wd_cipher_setting.config); - if (ret) - goto out_drv_deconfig; + if (ret) { + WD_ERR("driver init failed!\n"); + goto out_unbind_drivers; + } wd_alg_set_init(&wd_cipher_setting.status); wd_ctx_param_uninit(&cipher_ctx_params); return 0; -out_drv_deconfig: - wd_ctx_drv_deconfig(&wd_cipher_setting.config); -out_uninit_nolock: +out_unbind_drivers: + wd_ctx_unbind_drivers(&wd_cipher_setting.config); +out_common_uninit: wd_cipher_common_uninit(); wd_alg_attrs_uninit(&wd_cipher_init_attrs); out_params_uninit: @@ -545,12 +587,14 @@ void wd_cipher_uninit2(void) { int ret; - wd_ctx_drv_deconfig(&wd_cipher_setting.config); + wd_alg_uninit_driver(&wd_cipher_setting.config); + wd_ctx_unbind_drivers(&wd_cipher_setting.config); ret = wd_cipher_common_uninit(); if (ret) return; wd_alg_attrs_uninit(&wd_cipher_init_attrs); + wd_cipher_close_driver(WD_TYPE_V2); wd_alg_clear_init(&wd_cipher_setting.status); } diff --git a/wd_sched.c b/wd_sched.c index edc893c..c4451a7 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -1085,7 +1085,7 @@ static __u32 session_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct wd_sched_ctx_domain *domain = NULL; if (region_id >= sched_ctx->region_num || sched_mode >= SCHED_MODE_BUTT || - op_type >= sched_ctx->type_num || prop >= UADK_CTX_MAX) { + op_type >= sched_ctx->type_num || prop >= UADK_ALG_TYPE_MAX) { WD_ERR("invalid: region: %d, mode: %d, type: %u!, prop: %u\n", region_id, sched_mode, op_type, prop); return INVALID_POS; @@ -1147,7 +1147,7 @@ static int session_sched_domain_init(struct wd_sched_ctx *sched_ctx, skey->ctx_prop, SCHED_MODE_ASYNC); if (sync_ctx == INVALID_POS && async_ctx == INVALID_POS) { - WD_ERR("failed to get valid sync_ctx or async_ctx!\n"); + WD_ERR("there is no valid sync_ctx or async_ctx domain!\n"); return -WD_EINVAL; } @@ -1209,7 +1209,7 @@ static handle_t round_robin_sched_init(handle_t h_sched_ctx, void *sched_param) } sched_skey_param_init(sched_ctx, skey); - WD_INFO("initialized RR scheduler with sync and async domains\n"); + WD_DEBUG("initialized RR scheduler with sync and async domains\n"); return hskey; } @@ -1399,14 +1399,14 @@ static handle_t skey_sched_init(handle_t h_sched_ctx, void *sched_param) skey = (struct wd_sched_key *)hskey; def_prop = skey->ctx_prop; /* Init and get ctx for every ctx mode */ - for (i = 0; i < UADK_CTX_MAX; i++) { + for (i = 0; i < UADK_ALG_TYPE_MAX; i++) { skey->ctx_prop = i; ret = session_sched_domain_init(sched_ctx, skey); - if (ret != 0) { - WD_ERR("Can't to request prop=%d type ctx!\n", i); + if (ret != 0) continue; - } + /* Request two Pre_fetch queues each time. */ + WD_INFO("Successful to request prop=%d type ctx!\n", i); req_ctx_num += 2; } if (!req_ctx_num) { @@ -1417,7 +1417,7 @@ static handle_t skey_sched_init(handle_t h_sched_ctx, void *sched_param) /* Restore the initialization prop settings. */ skey->ctx_prop = def_prop; sched_skey_param_init(sched_ctx, skey); - WD_INFO("initialized Hungry scheduler with sync and async domains\n"); + WD_DEBUG("initialized Hungry scheduler with sync and async domains\n"); return hskey; } @@ -1531,14 +1531,14 @@ static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) skey = (struct wd_sched_key *)hskey; def_prop = skey->ctx_prop; /* Init and get ctx for every ctx mode */ - for (i = 0; i < UADK_CTX_MAX; i++) { + for (i = 0; i < UADK_ALG_TYPE_MAX; i++) { skey->ctx_prop = i; ret = session_sched_domain_init(sched_ctx, skey); - if (ret != 0) { - WD_ERR("Can't to request prop=%d type ctx!\n", i); + if (ret != 0) continue; - } + /* Request two Pre_fetch queues each time. */ + WD_INFO("Successful to request prop=%d type ctx!\n", i); req_ctx_num += 2; } if (!req_ctx_num) { @@ -1549,7 +1549,7 @@ static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) /* Restore the initialization prop settings. */ skey->ctx_prop = def_prop; sched_skey_param_init(sched_ctx, skey); - WD_INFO("initialized Loop scheduler with sync and async domains\n"); + WD_DEBUG("initialized Loop scheduler with sync and async domains\n"); return (handle_t)skey; } @@ -1836,8 +1836,8 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para return -WD_EINVAL; } - if (param->ctx_prop < 0 || param->ctx_prop > UADK_CTX_SOFT) - param->ctx_prop = UADK_CTX_HW; + if (param->ctx_prop < 0 || param->ctx_prop > UADK_ALG_SOFT) + param->ctx_prop = UADK_ALG_HW; /* Insert or get domain from hash table using four dimensions */ domain = wd_sched_hash_table_insert(sched_ctx->domain_hash_table, @@ -1950,12 +1950,12 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, if (sched_type == SCHED_POLICY_DEV) { /* Device mode: region_num is actually device count */ - estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_ALG_TYPE_MAX; } else { /* NUMA mode: validate region_num */ if (numa_num_check(region_num)) goto err_out; - estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_ALG_TYPE_MAX; } /* Create single global hash table */ diff --git a/wd_util.c b/wd_util.c index c174d7c..a80d4af 100644 --- a/wd_util.c +++ b/wd_util.c @@ -32,6 +32,7 @@ #define WD_SOFT_CTX_NUM 2 #define WD_SOFT_SYNC_CTX 0 #define WD_SOFT_ASYNC_CTX 1 +#define WD_DRV_MAX_NUM 128 #define WD_DRV_LIB_DIR "uadk" #define WD_DRV_CONF_FILE "uadk.cnf" @@ -163,38 +164,12 @@ int wd_mem_ops_init(handle_t h_ctx, struct wd_mm_ops *mm_ops, int mem_type) return 0; } -static int wd_parse_dev_id(handle_t h_ctx) -{ - struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; - char *dev_path = ctx->dev_path; - char *last_str = NULL; - char *endptr; - int dev_id; - - if (!dev_path) - return -WD_EINVAL; - - /* Find the last '-' in the string. */ - last_str = strrchr(dev_path, '-'); - if (!last_str || *(last_str + 1) == '\0') - return -WD_EINVAL; - - /* Parse the following number */ - dev_id = strtol(last_str + 1, &endptr, DECIMAL_NUMBER); - /* Check whether it is truly all digits */ - if (*endptr != '\0' || dev_id < 0) - return -WD_EINVAL; - - return dev_id; -} - static void clone_ctx_to_internal(struct wd_ctx *ctx, struct wd_ctx_internal *ctx_in) { ctx_in->ctx = ctx->ctx; ctx_in->op_type = ctx->op_type; ctx_in->ctx_mode = ctx->ctx_mode; - ctx_in->ctx_type = ctx->ctx_type; } static int wd_shm_create(struct wd_ctx_config_internal *in) @@ -1255,7 +1230,7 @@ static int wd_sched_fill_table(struct wd_env_config_per_numa *config_numa, param.type = i; param.begin = ctx_table[mode][i].begin; param.end = ctx_table[mode][i].end; - param.ctx_prop = UADK_CTX_HW; + param.ctx_prop = UADK_ALG_HW; ret = wd_sched_rr_instance(sched, ¶m); if (ret) return ret; @@ -1436,7 +1411,7 @@ int wd_check_ctx(struct wd_ctx_config_internal *config, __u8 mode, __u32 idx) } ctx = config->ctxs + idx; - if (ctx->ctx_type == UADK_CTX_HW && ctx->ctx_mode != mode) { + if (ctx->ctx_type == UADK_ALG_HW && ctx->ctx_mode != mode) { WD_ERR("invalid: ctx(%u) mode is %hhu!\n", idx, ctx->ctx_mode); return -WD_EINVAL; } @@ -1596,6 +1571,8 @@ static int wd_ctx_init_driver(struct wd_ctx_config_internal *config, if (!driver) return 0; + WD_INFO("driver init: drv name: %s, alg_name: %s \n", + driver->drv_name, driver->alg_name); /* Prevent repeated initialization */ if (driver->init_state) return 0; @@ -2203,739 +2180,556 @@ free_path: return (void *)head; } -struct wd_alg_driver *wd_alg_drv_bind(__u8 ctx_prop, char *alg_name) +int wd_ctx_drv_config(char *alg_name, struct wd_ctx_config_internal *ctx_config) +{ + return 0; +} +void wd_ctx_drv_deconfig(struct wd_ctx_config_internal *ctx_config) { - struct wd_alg_driver *fb_drv; - struct wd_alg_driver *drv; - - /* Get alg driver from ctx type and alg name */ - switch (ctx_prop) { - case UADK_CTX_HW: - drv = wd_request_drv(alg_name, ALG_DRV_HW); - if (!drv) { - WD_ERR("no HW %s driver support\n", alg_name); - return NULL; - } - - fb_drv = wd_request_drv(alg_name, ALG_DRV_SOFT); - if (!fb_drv) - drv->fallback = 0; - else - drv->fallback = (handle_t)fb_drv; - - break; - case UADK_CTX_CE_INS: - drv = wd_request_drv(alg_name, ALG_DRV_CE_INS); - if (!drv) { - WD_ERR("no CE instr soft %s driver support\n", alg_name); - return NULL; - } - drv->fallback = 0; - break; - case UADK_CTX_SVE_INS: - drv = wd_request_drv(alg_name, ALG_DRV_SVE_INS); - if (!drv) { - WD_ERR("no SVE instr soft %s driver support\n", alg_name); - return NULL; - } - drv->fallback = 0; - break; - case UADK_CTX_SOFT: - drv = wd_request_drv(alg_name, ALG_DRV_SOFT); - if (!drv) { - WD_ERR("no instr soft %s driver support\n", alg_name); - return NULL; - } - drv->fallback = 0; - break; - default: - WD_ERR("ctx type error: %d.\n", ctx_prop); - return WD_ERR_PTR(-WD_ENODEV); - } - - return drv; } -void wd_alg_drv_unbind(struct wd_alg_driver *drv) +/** + * wd_ctx_unbind_drivers() - Phase 2.5 reverse: Unbind drivers from internal ctxs. + * + * Decrements driver refcounts and clears all drv pointers. + * + * @config: Internal ctx config + */ +void wd_ctx_unbind_drivers(struct wd_ctx_config_internal *config) { - struct wd_alg_driver *fb_drv = NULL; + __u32 i; - if (!drv) + if (!config || !config->drv_array) return; - fb_drv = (struct wd_alg_driver *)drv->fallback; - if (fb_drv) - wd_release_drv(fb_drv); - wd_release_drv(drv); + wd_alg_drv_ref_dec(config->drv_array, config->drv_count); + + for (i = 0; i < config->ctx_num; i++) + config->ctxs[i].drv = NULL; } -static __u32 wd_ctxs_idx_init(struct wd_init_attrs *attrs, int numa_cnt, int task_type) +/** + * wd_ctx_bind_drivers() - Bind drivers to internal ctxs via RR. + * + * This is the SINGLE WRITE POINT for ctxs[i].drv in the entire lifecycle. + * Uses RR rule: ctxs[i].drv = drv_array[i % drv_count] + * + * Also: + * - Sets up soft fallback for HW drivers (once per unique HW driver) + * - Caches drv_array in config for session queries + * - Increments driver refcounts (deduplicated: each unique driver +1) + * + * and overwrote the RR mapping. + * + * @config: Internal ctx config (ctxs[] already copied by wd_init_ctx_config) + * @drv_array: Discovered unique drivers (from Phase 1) + * @drv_count: Number of unique drivers + * Return: 0 on success, negative on failure + */ +int wd_ctx_bind_drivers(struct wd_ctx_config_internal *config, + struct wd_alg_driver **drv_array, __u32 drv_count) { - struct wd_ctx_params *ctx_params = attrs->ctx_params; - int end = ctx_params->op_type_num; - struct wd_ctx_nums *ptr_ctx; - __u32 count = 0; - int i, uidx; + struct wd_alg_driver *drv; + __u32 i; - if (ctx_params->op_type_num > MAX_CTX_OP_TYPE) { - WD_ERR("invalid: max ctx op type<%u> is wrong!\n", ctx_params->op_type_num); - return 0; + if (!config || !drv_array || drv_count == 0) { + WD_ERR("invalid parameters!\n"); + return -WD_EINVAL; } - for (uidx = 0; uidx < UADK_CTX_MAX; uidx++) { - /* If it is a soft computing task, do not use HW queue */ - if (task_type == TASK_INSTR && uidx == UADK_CTX_HW) - continue; - for (i = 0; i < end; i++) { - ptr_ctx = &ctx_params->ctx_set_num[i]; - while (ptr_ctx) { - if (ptr_ctx->ctx_prop == uidx) { - ptr_ctx->ctx_begin = count; - if (uidx == UADK_CTX_HW) { - count += ptr_ctx->sync_ctx_num * numa_cnt; - count += ptr_ctx->async_ctx_num * numa_cnt; - } else { - count += ptr_ctx->sync_ctx_num; - count += ptr_ctx->async_ctx_num; - } - WD_ERR("optype<%d>, prop<%u>, begin<%u>, synx<%u>, async<%u>\n", - i, ptr_ctx->ctx_prop, ptr_ctx->ctx_begin, ptr_ctx->sync_ctx_num, ptr_ctx->async_ctx_num); - } - ptr_ctx = ptr_ctx->other_ctx; + WD_INFO("Phase 2: drivers array have <%u> drvers.\n", drv_count); + for (i = 0; i < config->ctx_num; i++) { + /* In the init process, only one hisi driver will be specified. */ + if (drv_count == 1) { + config->ctxs[i].drv = drv_array[0]; + config->ctxs[i].ctx_type = config->ctxs[0].drv->calc_type; + } else { + /* + * RR binding — the ONLY write to ctxs[i].drv in the + * entire lifecycle. After this, drv is read-only. + */ + config->ctxs[i].drv = drv_array[i % drv_count]; + config->ctxs[i].ctx_type = config->ctxs[i].drv->calc_type; + } + WD_INFO("driver bind: drv name: %s, alg_name: %s for ctx<%u>\n", + config->ctxs[i].drv->drv_name, config->ctxs[i].drv->alg_name, i); + + /* HW driver needs soft fallback — set once per unique driver */ + if (config->ctxs[i].ctx_type == UADK_ALG_HW) { + drv = config->ctxs[i].drv; + if (!drv->fallback) { + drv->fallback = (handle_t)wd_request_drv( + config->alg_name, ALG_DRV_SOFT); } } } - return count; -} + /* Cache driver array for session queries */ + config->drv_array = drv_array; + config->drv_count = drv_count; -static struct wd_ctx_nums *wd_get_ctx_ptr(struct wd_ctx_params *ctx_params, - __u32 op_type, int ctx_prop) -{ - struct wd_ctx_nums *ptr_ctx; + /* Deduplicated refcount increment */ + wd_alg_drv_ref_inc(drv_array, drv_count); - if (op_type > ctx_params->op_type_num) - return NULL; + WD_INFO("Phase 2.5: bound %u ctxs to %u drivers via RR\n", + config->ctx_num, drv_count); - ptr_ctx = &ctx_params->ctx_set_num[op_type]; - while (ptr_ctx) { - if (ptr_ctx->ctx_prop == ctx_prop) - return ptr_ctx; - ptr_ctx = ptr_ctx->other_ctx; - } - - return NULL; + return 0; } -void wd_ctx_drv_deconfig(struct wd_ctx_config_internal *ctx_config) +/** + * wd_alg_drv_undiscover() - Free driver discovery result. + * + * Releases the drv_array allocated by wd_alg_drv_discover(). + * Does NOT touch the drivers themselves (refcount managed separately). + * + * @attrs: Initialization attributes + */ +void wd_alg_drv_undiscover(struct wd_init_attrs *attrs) { - __u32 i; - - // wd_dlclose_drv after this - for (i = 0; i < ctx_config->ctx_num; i++) - wd_alg_drv_unbind(ctx_config->ctxs[i].drv); + if (!attrs || !attrs->drv_array) + return; + /* Release wd_get_drv_array alloc memory */ + wd_put_drv_array(attrs->drv_array, attrs->drv_count); + attrs->drv_array = NULL; + attrs->drv_count = 0; } -int wd_ctx_drv_config(char *alg_name, struct wd_ctx_config_internal *ctx_config) +/** + * wd_alg_drv_discover() - Discover matching drivers. + * + * Normalizes attrs->alg to alg_type ("cipher", "digest", etc.), + * then calls wd_get_drv_array() to find all unique drivers. + * Results stored in attrs->drv_array and attrs->drv_count. + * + * Pure query — no resource allocation, no refcount changes. + * + * @attrs: Initialization attributes (input: alg, task_type; output: drv_array, drv_count) + * Return: 0 on success, negative on failure + */ +int wd_alg_drv_discover(struct wd_init_attrs *attrs) { - __u32 i, j; + char alg_type[CRYPTO_MAX_ALG_NAME] = {0}; + int ret; - // wd_dlopen_drv before this - WD_ERR("debug: call function: %s!\n", __func__); - for (i = 0; i < ctx_config->ctx_num; i++) { - ctx_config->ctxs[i].drv = wd_alg_drv_bind(ctx_config->ctxs[i].ctx_type, alg_name); - if (WD_IS_ERR(ctx_config->ctxs[i].drv)) { - continue; - } else if (!ctx_config->ctxs[i].drv) { - WD_ERR("failed to bind %s driver.\n", alg_name); - goto bind_err; - } - } + if (!attrs || !attrs->alg[0]) + return -WD_EINVAL; - return 0; + /* Normalize alg to alg_type (e.g. "cipher", "digest") */ + wd_get_alg_type(attrs->alg, alg_type); + if (!alg_type[0]) { + WD_ERR("unknown alg type for %s\n", attrs->alg); + return -WD_EINVAL; + } -bind_err: - for (j = 0; j < i; j++) { - wd_alg_drv_unbind(ctx_config->ctxs[j].drv); + ret = wd_get_drv_array(alg_type, attrs->task_type, NULL, + &attrs->drv_array, &attrs->drv_count); + if (ret) { + WD_ERR("failed to get %s's driver array\n", attrs->alg); + return -WD_EINVAL; } - return -WD_EINVAL; + + return 0; } -struct uacce_dev_list *wd_get_usable_list(struct uacce_dev_list *list, struct bitmask *bmp) +static int wd_alg_sched_instance(struct wd_sched *sched, + struct wd_ctx_config *ctx_config, + struct wd_ctx_params *ctx_params) { - struct uacce_dev_list *p, *node, *result = NULL; - struct uacce_dev *dev; - int numa_id, ret; + struct sched_params sparams; + __u32 sync_count, async_count, total_count; + __u32 i; + int ret; - if (!bmp) { - WD_ERR("invalid: bmp is NULL!\n"); - return WD_ERR_PTR(-WD_EINVAL); + if (!sched || !ctx_config || !ctx_params) { + WD_ERR("invalid: sched, ctx_config, or ctx_params is NULL!\n"); + return -WD_EINVAL; } - p = list; - while (p) { - dev = p->dev; - numa_id = dev->numa_id; - ret = numa_bitmask_isbitset(bmp, numa_id); - if (!ret) { - p = p->next; - continue; - } - - node = calloc(1, sizeof(*node)); - if (!node) { - result = WD_ERR_PTR(-WD_ENOMEM); - goto out_free_list; - } - - node->dev = wd_clone_dev(dev); - if (!node->dev) { - result = WD_ERR_PTR(-WD_ENOMEM); - goto out_free_node; - } - - if (!result) - result = node; - else - wd_add_dev_to_list(result, node); + /* Calculate total sync/async context counts */ + sync_count = 0; + async_count = 0; - p = p->next; + for (i = 0; i < ctx_params->op_type_num; i++) { + sync_count += ctx_params->ctx_set_num[i].sync_ctx_num; + async_count += ctx_params->ctx_set_num[i].async_ctx_num; } - return result ? result : WD_ERR_PTR(-WD_ENODEV); - -out_free_node: - free(node); -out_free_list: - wd_free_list_accels(result); - return result; -} - -static int wd_init_hw_ctx_set(struct wd_init_attrs *attrs, struct uacce_dev_list *list, - __u32 idx, int numa_id, __u32 op_type) -{ - struct wd_ctx_nums ctx_nums = attrs->ctx_params->ctx_set_num[op_type]; - __u32 ctx_set_num = ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num; - struct wd_ctx_config *ctx_config = attrs->ctx_config; - __u32 count = idx + ctx_set_num; - struct uacce_dev *dev; - __u32 i, cnt = 0; - - /* If the ctx set number is 0, the initialization is skipped. */ - if (!ctx_set_num) - return -WD_ENOPROC; - - dev = wd_find_dev_by_numa(list, numa_id); - if (WD_IS_ERR(dev)) - return WD_PTR_ERR(dev); - - for (i = idx; i < count; i++) { - ctx_config->ctxs[i].ctx = wd_request_ctx(dev); - if (errno == WD_EBUSY) { - dev = wd_find_dev_by_numa(list, numa_id); - if (WD_IS_ERR(dev)) - return WD_PTR_ERR(dev); - - if (cnt++ > WD_INIT_RETRY_TIMES) { - WD_ERR("failed to request enough ctx due to timeout!\n"); - return -WD_ETIMEDOUT; - } + total_count = sync_count + async_count; - /* self-decrease i to eliminate self-increase on next loop */ - i--; - continue; - } else if (!ctx_config->ctxs[i].ctx) { - /* - * wd_release_ctx_set will release ctx in - * caller wd_init_ctx_and_sched. - */ - return -WD_ENOMEM; - } - ctx_config->ctxs[i].op_type = op_type; - ctx_config->ctxs[i].ctx_mode = - ((i - idx) < ctx_nums.sync_ctx_num) ? - CTX_MODE_SYNC : CTX_MODE_ASYNC; - ctx_config->ctxs[i].ctx_type = UADK_CTX_HW; + if (total_count != ctx_config->ctx_num) { + WD_ERR("mismatch: expected %u contexts, got %u!\n", + total_count, ctx_config->ctx_num); + return -WD_EINVAL; } - return 0; -} + WD_INFO("Registering contexts: sync=%u, async=%u, total=%u\n", + sync_count, async_count, total_count); -static void wd_release_ctx_set(struct wd_ctx_config *ctx_config) -{ - __u32 i; + /* Register sync contexts range to scheduler */ + if (sync_count > 0) { + memset(&sparams, 0, sizeof(sparams)); + sparams.numa_id = 0; + sparams.type = 0; + sparams.mode = CTX_MODE_SYNC; + sparams.begin = 0; + sparams.end = sync_count - 1; + sparams.ctx_prop = 0; - for (i = 0; i < ctx_config->ctx_num; i++) - if (ctx_config->ctxs[i].ctx) { - wd_release_ctx(ctx_config->ctxs[i].ctx); - ctx_config->ctxs[i].ctx = 0; + ret = wd_sched_rr_instance(sched, &sparams); + if (ret) { + WD_ERR("failed to register sync contexts to scheduler!\n"); + return ret; } -} - -static int wd_instance_sched_set(struct wd_init_attrs *attrs, struct wd_ctx_nums ctx_nums, - int idx, int numa_id, int op_type) -{ - struct wd_sched *sched = attrs->sched; - struct sched_params sparams; - int i, end, dev_id, ret = 0; - dev_id = wd_parse_dev_id(attrs->ctx_config->ctxs[idx].ctx); - if (dev_id < 0) - return -WD_EINVAL; - - for (i = 0; i < CTX_MODE_MAX; i++) { - sparams.numa_id = numa_id; - sparams.type = op_type; - sparams.dev_id = dev_id; - sparams.mode = i; - sparams.begin = idx + ctx_nums.sync_ctx_num * i; - sparams.ctx_prop = UADK_CTX_HW; - end = idx - 1 + ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num * i; - if (end < 0 || sparams.begin > (__u32)end) - continue; - - sparams.end = end; - ret = wd_sched_rr_instance(sched, &sparams); - if (ret) - goto out; + WD_INFO("Registered %u sync contexts to scheduler\n", sync_count); } -out: - return ret; -} - -static int wd_hw_ctx_and_sched(struct wd_init_attrs *attrs, struct bitmask *bmp, - struct uacce_dev_list *list) -{ - struct wd_ctx_params *ctx_params = attrs->ctx_params; - __u32 op_type_num = ctx_params->op_type_num; - int i, ret, max_node = numa_max_node() + 1; - struct wd_ctx_nums ctx_nums; - __u32 j, idx = 0; + /* Register async contexts range to scheduler */ + if (async_count > 0) { + memset(&sparams, 0, sizeof(sparams)); + sparams.numa_id = 0; + sparams.type = 0; + sparams.mode = CTX_MODE_ASYNC; + sparams.begin = sync_count; + sparams.end = total_count - 1; + sparams.ctx_prop = 0; - for (i = 0; i < max_node; i++) { - if (!numa_bitmask_isbitset(bmp, i)) - continue; - for (j = 0; j < op_type_num; j++) { - ctx_nums = ctx_params->ctx_set_num[j]; - ret = wd_init_hw_ctx_set(attrs, list, idx, i, j); - if (ret == -WD_ENOPROC) - continue; - else if (ret) - goto free_ctxs; - ret = wd_instance_sched_set(attrs, ctx_nums, idx, i, j); - if (ret) - goto free_ctxs; - idx += (ctx_nums.sync_ctx_num + ctx_nums.async_ctx_num); + ret = wd_sched_rr_instance(sched, &sparams); + if (ret) { + WD_ERR("failed to register async contexts to scheduler!\n"); + return ret; } + + WD_INFO("Registered %u async contexts to scheduler\n", async_count); } return 0; - -free_ctxs: - wd_release_ctx_set(attrs->ctx_config); - - return ret; } -static void wd_init_device_nodemask(struct uacce_dev_list *list, struct bitmask *bmp) +/** + * wd_alg_ctx_uninit() - Release ctxs, scheduler, ctx_config. + * + * Releases resources in reverse allocation order: + * 1. Release scheduler + * 2. Release ctxs via RR rule (drv->free_ctx) + * 3. Free ctx_config and ctxs array + * + * @attrs: Initialization attributes + */ +void wd_alg_ctx_uninit(struct wd_init_attrs *attrs) { - struct uacce_dev_list *p = list; + struct wd_ctx_config *ctx_config; + struct wd_alg_driver *drv; + __u32 i, drv_idx; - numa_bitmask_clearall(bmp); - while (p) { - numa_bitmask_setbit(bmp, p->dev->numa_id); - p = p->next; - } -} + if (!attrs) + return; -static int wd_alg_other_ctx_init(struct wd_init_attrs *attrs, int ctx_prop) -{ - struct wd_ctx_config *ctx_config = attrs->ctx_config; - struct wd_ctx_params *ctx_params = attrs->ctx_params; - struct wd_ctx_nums *ptr_ctxs; - struct wd_soft_ctx *sfctx; - struct sched_params sparams; - __u32 begin, end, ctx_num; - int sync_type, ret; - __u32 i, j, k; + ctx_config = attrs->ctx_config; - WD_ERR("debug: call function: %s!\n", __func__); - for (i = 0; i < ctx_params->op_type_num; i++) { - ptr_ctxs = wd_get_ctx_ptr(ctx_params, i, ctx_prop); - if (!ptr_ctxs) - continue; + WD_INFO("Phase 1: wd_alg_ctx_uninit\n"); + /* Release scheduler */ + if (attrs->sched) { + wd_sched_rr_release(attrs->sched); + attrs->sched = NULL; + } - for (sync_type = CTX_MODE_SYNC; sync_type < CTX_MODE_MAX; sync_type++) { - if (sync_type == CTX_MODE_SYNC) { - ctx_num = ptr_ctxs->sync_ctx_num; - begin = ptr_ctxs->ctx_begin; - } else { - ctx_num = ptr_ctxs->async_ctx_num; - begin = ptr_ctxs->ctx_begin + ptr_ctxs->sync_ctx_num; - } - if (ctx_num == 0) + /* Release ctxs via RR rule */ + if (ctx_config && ctx_config->ctxs && attrs->drv_array && + attrs->drv_count > 0) { + for (i = 0; i < ctx_config->ctx_num; i++) { + if (!ctx_config->ctxs[i].ctx) continue; - end = begin + ctx_num; - for (j = begin; j < end; j++) { - ctx_config->ctxs[j].op_type = i; - ctx_config->ctxs[j].ctx_mode = sync_type; - ctx_config->ctxs[j].ctx_type = ctx_prop; - sfctx = calloc(1, sizeof(struct wd_soft_ctx)); - if (!sfctx) { - WD_ERR("failed to alloc ctx!\n"); - goto ctx_err; - } - ctx_config->ctxs[j].ctx = (handle_t)sfctx; - pthread_spin_init(&sfctx->slock, PTHREAD_PROCESS_SHARED); - pthread_spin_init(&sfctx->rlock, PTHREAD_PROCESS_SHARED); - } + drv_idx = i % attrs->drv_count; + drv = attrs->drv_array[drv_idx]; - memset(&sparams, 0x0, sizeof(struct sched_params)); - sparams.begin = begin; - sparams.end = end - 1; - sparams.mode = sync_type; - sparams.numa_id = 0; - sparams.ctx_prop = ctx_prop; - ret = wd_sched_rr_instance(attrs->sched, &sparams); - if (ret) { - WD_ERR("fail to instance scheduler.\n"); - goto ctx_err; + if (drv && drv->free_ctx) { + drv->free_ctx(ctx_config->ctxs[i].ctx); + ctx_config->ctxs[i].ctx = 0; + WD_INFO("Phase 2: free drv--<%s>, ctx<%u>\n", drv->drv_name, i); } } } - return WD_SUCCESS; - -ctx_err: - for (k = j; k >= begin; k--) { - free((struct wd_soft_ctx *)ctx_config->ctxs[k].ctx); - ctx_config->ctxs[k].ctx = 0; + /* Release ctx_config */ + if (ctx_config) { + if (ctx_config->ctxs) { + free(ctx_config->ctxs); + ctx_config->ctxs = NULL; + } + free(ctx_config); + attrs->ctx_config = NULL; } - return -WD_ENOMEM; + attrs->ctx_config_internal = NULL; + + WD_INFO("Phase 3 reverse: ctx uninit complete\n"); } -static int wd_alg_other_init(struct wd_init_attrs *attrs) +/** + * wd_alg_ctx_init() - Allocate ctxs, scheduler, and do internal copy. + * + * Uses drivers discovered in Phase 1 (attrs->drv_array). + * Allocates ctxs via RR: ctx[i] → drv_array[i % drv_count]->alloc_ctx() + * Then allocates scheduler, registers ctx ranges, and calls alg_init + * which performs wd_init_ctx_config() (wd_ctx[] → wd_ctx_internal[] copy). + * + * On return: + * - attrs->ctx_config: user-visible ctx array (populated) + * - attrs->sched: scheduler (allocated and populated) + * - attrs->ctx_config_internal: MUST be set by alg_init callback + * + * NOTE: ctxs[i].drv is still NULL after this function — that's set in Phase 2.5. + * + * @attrs: Initialization attributes (input: drv_array, ctx_params, alg_init, etc.) + * Return: 0 on success, negative on failure + */ +int wd_alg_ctx_init(struct wd_init_attrs *attrs) { - struct wd_ctx_config *ctx_config = attrs->ctx_config; - struct wd_ctx_params *ctx_params = attrs->ctx_params; - __u32 ctx_set_num, op_type_num; + struct wd_ctx_config *ctx_config; + struct wd_ctx_params *ctx_params; + struct wd_drv_ctx_params dparams; + struct wd_alg_driver *drv; + handle_t ctx; + __u32 sync_num = 0, async_num = 0; + __u32 total_ctx_num = 0; + __u32 ctx_idx, drv_idx; + __u32 i, op_type; + __u16 region_num; + __u32 accum = 0; + __u32 cnt; + int ret; - WD_ERR("debug: call function: %s!\n", __func__); - op_type_num = ctx_params->op_type_num; - ctx_set_num = wd_ctxs_idx_init(attrs, 1, attrs->task_type); - if (!ctx_set_num || !op_type_num) { - WD_ERR("invalid: ctx_set_num is %u, op_type_num is %u!\n", - ctx_set_num, op_type_num); + if (!attrs || !attrs->ctx_params || !attrs->drv_array || + attrs->drv_count == 0) { + WD_ERR("invalid: attrs, ctx_params, or drv_array is NULL/empty!\n"); return -WD_EINVAL; } - ctx_config->ctx_num = ctx_set_num; - ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); - if (!ctx_config->ctxs) { - WD_ERR("failed to alloc ctxs!\n"); - return -WD_ENOMEM; - } - - return 0; -} + ctx_params = attrs->ctx_params; -static int wd_alg_hw_ctx_init(struct wd_init_attrs *attrs) -{ - struct wd_ctx_config *ctx_config = attrs->ctx_config; - struct wd_ctx_params *ctx_params = attrs->ctx_params; - struct bitmask *used_bmp = ctx_params->bmp; - struct uacce_dev_list *list, *used_list = NULL; - char alg_type[CRYPTO_MAX_ALG_NAME]; - __u32 ctx_set_num, op_type_num; - int numa_cnt, ret; - - WD_ERR("debug: call function: %s!\n", __func__); - wd_get_alg_type(attrs->alg, alg_type); + /* Calculate total sync/async context counts */ + for (i = 0; i < ctx_params->op_type_num; i++) { + sync_num += ctx_params->ctx_set_num[i].sync_ctx_num; + async_num += ctx_params->ctx_set_num[i].async_ctx_num; + } + total_ctx_num = sync_num + async_num; - list = wd_get_accel_list(alg_type); - if (!list) { - WD_ERR("failed to get devices for alg: %s\n", attrs->alg); - return -WD_ENODEV; + if (total_ctx_num == 0) { + WD_ERR("invalid: total_ctx_num is zero!\n"); + return -WD_EINVAL; } - /* - * Not every numa has a device. Therefore, the first thing is to - * filter the devices in the selected numa node, and the second - * thing is to obtain the distribution of devices. - */ - used_list = wd_get_usable_list(list, used_bmp); - if (WD_IS_ERR(used_list)) { - ret = WD_PTR_ERR(used_list); - WD_ERR("failed to get usable devices(%d)!\n", ret); - goto out_freelist; - } - - wd_init_device_nodemask(used_list, used_bmp); - numa_cnt = numa_bitmask_weight(used_bmp); - if (!numa_cnt) { - ret = numa_cnt; - WD_ERR("invalid: bmp is clear!\n"); - goto out_freeusedlist; - } - - op_type_num = ctx_params->op_type_num; - ctx_set_num = wd_ctxs_idx_init(attrs, numa_cnt, attrs->task_type); - WD_ERR("ctx sum num is: %u, op_type num is: %u!\n", ctx_set_num, op_type_num); - if (!ctx_set_num || !op_type_num) { - WD_ERR("invalid: ctx_set_num is %u, op_type_num is %u!\n", - ctx_set_num, op_type_num); - ret = -WD_EINVAL; - goto out_freelist; + /* Allocate ctx_config structure */ + ctx_config = calloc(1, sizeof(*ctx_config)); + if (!ctx_config) { + WD_ERR("failed to allocate ctx_config!\n"); + return -WD_ENOMEM; } - ctx_config->ctx_num = ctx_set_num; - ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); + /* Allocate user-visible wd_ctx array (no drv pointer — ABI safe) */ + ctx_config->ctxs = calloc(total_ctx_num, sizeof(struct wd_ctx)); if (!ctx_config->ctxs) { - ret = -WD_ENOMEM; - WD_ERR("failed to alloc ctxs!\n"); - goto out_freeusedlist; + WD_ERR("failed to allocate ctxs array!\n"); + free(ctx_config); + return -WD_ENOMEM; } + ctx_config->ctx_num = total_ctx_num; + attrs->ctx_config = ctx_config; - ret = wd_hw_ctx_and_sched(attrs, used_bmp, used_list); - if (ret) - free(ctx_config->ctxs); - -out_freeusedlist: - wd_free_list_accels(used_list); -out_freelist: - wd_free_list_accels(list); - - return ret; -} + WD_INFO("Phase 2: %u drivers, %u ctxs (sync=%u, async=%u)\n", + attrs->drv_count, total_ctx_num, sync_num, async_num); -static void wd_alg_ctxs_uninit(struct wd_ctx_config *ctx_config) -{ - __u32 i; + /* ── RR allocation loop ── */ + for (ctx_idx = 0; ctx_idx < total_ctx_num; ctx_idx++) { + drv_idx = ctx_idx % attrs->drv_count; + drv = attrs->drv_array[drv_idx]; - for (i = 0; i < ctx_config->ctx_num; i++) { - if (ctx_config->ctxs[i].ctx) { - if (ctx_config->ctxs[i].ctx_type == UADK_CTX_HW) - wd_release_ctx(ctx_config->ctxs[i].ctx); - else - free((struct wd_soft_ctx *)ctx_config->ctxs[i].ctx); - ctx_config->ctxs[i].ctx = 0; + if (!drv || !drv->alloc_ctx) { + WD_ERR("Warning: driver-%s alloc_ctx is NULL!\n", drv->drv_name); + continue; } - } - if (ctx_config->ctxs) { - free(ctx_config->ctxs); - ctx_config->ctxs = 0; - } -} - -int wd_alg_attrs_init(struct wd_init_attrs *attrs) -{ - wd_alg_poll_ctx alg_poll_func = attrs->alg_poll_ctx; - wd_alg_init alg_init_func = attrs->alg_init; - struct wd_ctx_config *ctx_config = NULL; - struct wd_sched *alg_sched = NULL; - struct wd_alg_driver *drv = NULL; - char *alg_name = attrs->alg; - __u32 op_type_num; - int ret = 0; + WD_ERR("------driver<%s> alloc ctx<%u>\n", drv->drv_name, ctx_idx); + /* Determine op_type for this ctx index */ + op_type = 0; + accum = 0; + for (i = 0; i < ctx_params->op_type_num; i++) { + cnt = ctx_params->ctx_set_num[i].sync_ctx_num + + ctx_params->ctx_set_num[i].async_ctx_num; + if (ctx_idx < accum + cnt) { + op_type = i; + break; + } + accum += cnt; + } - if (!attrs->ctx_params) - return -WD_EINVAL; + /* Fill driver ctx params */ + memset(&dparams, 0, sizeof(dparams)); + dparams.ctx_mode = (ctx_idx < sync_num) ? + CTX_MODE_SYNC : CTX_MODE_ASYNC; + dparams.op_type = (__u16)op_type; + dparams.numa_id = 0; /* Scheduler handles NUMA at runtime */ + dparams.idx = ctx_idx; + dparams.bmp = ctx_params->bmp; + dparams.epoll_en = false; + + /* Call driver's alloc_ctx — driver owns the implementation */ + ret = drv->alloc_ctx(attrs->alg, &dparams, &ctx); + if (!ctx || ret) { + WD_ERR("driver %u (%s) alloc_ctx failed for ctx %u\n", + drv_idx, drv->drv_name, ctx_idx); + goto cleanup_ctxs; + } - WD_ERR("debug: call function: %s!\n", __func__); - ctx_config = calloc(1, sizeof(*ctx_config)); - if (!ctx_config) { - WD_ERR("fail to alloc ctx config\n"); - return -WD_ENOMEM; + /* Store in user-visible ctx_config */ + ctx_config->ctxs[ctx_idx].ctx = ctx; + ctx_config->ctxs[ctx_idx].op_type = dparams.op_type; + ctx_config->ctxs[ctx_idx].ctx_mode = dparams.ctx_mode; } - attrs->ctx_config = ctx_config; - - /* Get op_type_num */ - op_type_num = attrs->ctx_params->op_type_num; - if (!op_type_num) - goto out_ctx_config; - /* Use default sched_type to alloc scheduler */ + /* ── Allocate scheduler ── */ if (attrs->sched_type == SCHED_POLICY_DEV) - alg_sched = wd_sched_rr_alloc(attrs->sched_type, attrs->ctx_params->op_type_num, - DEVICE_REGION_MAX, alg_poll_func); + region_num = DEVICE_REGION_MAX; else - alg_sched = wd_sched_rr_alloc(attrs->sched_type, attrs->ctx_params->op_type_num, - numa_max_node() + 1, alg_poll_func); - if (!alg_sched) { - WD_ERR("fail to alloc scheduler\n"); - goto out_ctx_config; - } - attrs->sched = alg_sched; - - /* Initialize queues according to task type */ - switch (attrs->task_type) { - case TASK_HW: - ret = wd_alg_hw_ctx_init(attrs); - if (ret) { - WD_ERR("fail to init HW ctx\n"); - goto out_freesched; - } - - break; - case TASK_MIX: - ret = wd_alg_hw_ctx_init(attrs); - if (ret) { - WD_ERR("fail to init mix HW ctx\n"); - goto out_freesched; - } - - WD_ERR("TASK_MIX: call function: %s!\n", __func__); - drv = wd_request_drv(alg_name, ALG_DRV_SOFT); - if (drv == NULL) { - WD_ERR("fail to find soft driver.\n"); - break; - } else if (drv->calc_type == UADK_ALG_CE_INSTR) { - ret = wd_alg_other_ctx_init(attrs, UADK_CTX_CE_INS); - if (ret) { - WD_ERR("fail to init ce ctx\n"); - goto out_ctx_init; - } - } else if (drv->calc_type == UADK_ALG_SVE_INSTR) { - ret = wd_alg_other_ctx_init(attrs, UADK_CTX_SVE_INS); - if (ret) { - WD_ERR("fail to init sve ctx\n"); - goto out_ctx_init; - } - } + region_num = numa_max_node() + 1; + + attrs->sched = wd_sched_rr_alloc(attrs->sched_type, + ctx_params->op_type_num, + region_num, + attrs->alg_poll_ctx); + if (!attrs->sched) { + WD_ERR("failed to allocate scheduler!\n"); + ret = -WD_ENOMEM; + goto cleanup_ctxs; + } - break; - /* Only pure soft queues */ - case TASK_INSTR: - ret = wd_alg_other_init(attrs); - if (ret) { - WD_ERR("fail to init other ctx.\n"); - goto out_freesched; - } + /* ── Register contexts to scheduler ── */ + ret = wd_alg_sched_instance(attrs->sched, ctx_config, ctx_params); + if (ret) { + WD_ERR("failed to register contexts to scheduler!\n"); + goto cleanup_sched; + } - WD_ERR("TASK_INSTR: call function: %s!\n", __func__); - drv = wd_request_drv(alg_name, ALG_DRV_SOFT); - if (drv == NULL) { - WD_ERR("fail to find soft driver.\n"); - goto out_ctx_init; - } else if (drv->calc_type == UADK_ALG_CE_INSTR) { - ret = wd_alg_other_ctx_init(attrs, UADK_CTX_CE_INS); - if (ret) { - WD_ERR("fail to init ce ctx\n"); - goto out_ctx_init; - } - } else if (drv->calc_type == UADK_ALG_SVE_INSTR) { - ret = wd_alg_other_ctx_init(attrs, UADK_CTX_SVE_INS); - if (ret) { - WD_ERR("fail to init sve ctx\n"); - goto out_ctx_init; - } - } - break; - default: - WD_ERR("driver type error: %d\n", drv->calc_type); - return -WD_EINVAL; + /* ── Call algorithm-specific init (does wd_init_ctx_config copy) ── */ + ctx_config->cap = ctx_params->cap; + ret = attrs->alg_init(ctx_config, attrs->sched, attrs); + if (ret) { + WD_ERR("failed to initialize algorithm!\n"); + goto cleanup_sched; } - ctx_config->cap = attrs->ctx_params->cap; - ret = alg_init_func(ctx_config, alg_sched); - if (ret) - goto out_ctx_init; + /* + * IMPORTANT: attrs->ctx_config_internal must be set by alg_init. + * The alg_init callback (e.g. wd_cipher_common_init) internally calls + * wd_init_ctx_config() which creates the wd_ctx_config_internal. + * The callback must store the pointer in attrs->ctx_config_internal. + * + * If the current alg_init signature doesn't pass attrs, it needs to + * be updated. See "Required changes in other files" below. + */ - WD_ERR("---->ctx nums: %u\n", ctx_config->ctx_num); + WD_INFO("Phase 2 complete: %u ctxs from %u drivers\n", + total_ctx_num, attrs->drv_count); return 0; -out_ctx_init: - wd_alg_ctxs_uninit(ctx_config); -out_freesched: - wd_sched_rr_release(alg_sched); -out_ctx_config: - if (ctx_config) - free(ctx_config); + /* ── Error cleanup (LIFO) ── */ +cleanup_sched: + wd_sched_rr_release(attrs->sched); + attrs->sched = NULL; +cleanup_ctxs: + /* Free ctxs allocated so far using RR rule */ + for (i = 0; i < ctx_idx; i++) { + drv_idx = i % attrs->drv_count; + drv = attrs->drv_array[drv_idx]; + if (drv && drv->free_ctx && ctx_config->ctxs[i].ctx) { + drv->free_ctx(ctx_config->ctxs[i].ctx); + ctx_config->ctxs[i].ctx = 0; + } + } + free(ctx_config->ctxs); + free(ctx_config); + attrs->ctx_config = NULL; return ret; } +/** + * wd_alg_attrs_uninit() - Algorithm attribute cleanup. + * + * Releases all resources in strict reverse order of init: + * Phase 2.5 reverse: wd_ctx_unbind_drivers() + * Phase 2 reverse: wd_alg_ctx_uninit() + * Phase 1 reverse: wd_alg_drv_undiscover() + * + * @attrs: Initialization attributes + */ void wd_alg_attrs_uninit(struct wd_init_attrs *attrs) { - struct wd_ctx_config *ctx_config = attrs->ctx_config; - struct wd_sched *alg_sched = attrs->sched; - - if (!ctx_config) { - wd_sched_rr_release(alg_sched); + if (!attrs) return; - } - wd_alg_ctxs_uninit(ctx_config); + WD_INFO("Algorithm cleanup started: alg=%s\n", attrs->alg); - free(ctx_config); - wd_sched_rr_release(alg_sched); -} + /* Phase reverse: release ctxs, scheduler, ctx_config */ + wd_alg_ctx_uninit(attrs); -int wd_queue_is_busy(struct wd_soft_ctx *sctx) -{ - /* The queue is not used */ - if (sctx->run_num >= MAX_SOFT_QUEUE_LENGTH - 1) - return -WD_EBUSY; + /* Phase reverse: free driver array */ + wd_alg_drv_undiscover(attrs); - return 0; + WD_INFO("Algorithm cleanup complete\n"); } -int wd_get_sqe_from_queue(struct wd_soft_ctx *sctx, __u32 tag_id) +/** + * wd_alg_attrs_init() - Algorithm attribute initialization (V2 path). + * + * Orchestrates the 3-phase initialization pipeline: + * Phase 1: wd_alg_drv_discover() — discover unique drivers + * Phase 2: wd_alg_ctx_init() — allocate ctxs + scheduler + internal copy + * Phase 2.5: wd_ctx_bind_drivers() — RR bind drivers to internal ctxs + * + * After this, Phase 3 (driver init) is done by the caller via wd_alg_init_driver(). + * + * @attrs: Initialization attributes (input/output) + * Return: 0 on success, negative on failure + */ +int wd_alg_attrs_init(struct wd_init_attrs *attrs) { - struct wd_soft_sqe *sqe = NULL; - - pthread_spin_lock(&sctx->slock); - sqe = &sctx->qfifo[sctx->head]; - if (!sqe->used && !sqe->complete) { // find the next not used sqe - sctx->head++; - if (unlikely(sctx->head == MAX_SOFT_QUEUE_LENGTH)) - sctx->head = 0; + int ret; - sqe->used = 1; - sqe->complete = 1; - sqe->id = tag_id; - sqe->result = 0; - __atomic_fetch_add(&sctx->run_num, 0x1, __ATOMIC_ACQUIRE); - pthread_spin_unlock(&sctx->slock); - } else { - pthread_spin_unlock(&sctx->slock); - return -WD_EBUSY; + if (!attrs || !attrs->ctx_params) { + WD_ERR("invalid: attrs or ctx_params is NULL!\n"); + return -WD_EINVAL; } - return 0; -} + WD_INFO("Algorithm initialization started: alg=%s, task_type=%u\n", + attrs->alg, attrs->task_type); -int wd_put_sqe_to_queue(struct wd_soft_ctx *sctx, __u32 *tag_id, __u8 *result) -{ - struct wd_soft_sqe *sqe = NULL; - - /* The queue is not used */ - if (sctx->run_num < 1) - return -WD_EAGAIN; - - if (pthread_spin_trylock(&sctx->rlock)) - return -WD_EAGAIN; - sqe = &sctx->qfifo[sctx->tail]; - if (sqe->used && sqe->complete) { // find a used sqe - sctx->tail++; - if (unlikely(sctx->tail == MAX_SOFT_QUEUE_LENGTH)) - sctx->tail = 0; + /* Phase 1: Driver discovery (pure query, no side effects) */ + ret = wd_alg_drv_discover(attrs); + if (ret) { + WD_ERR("Phase 1: driver discovery failed!\n"); + return ret; + } + WD_INFO("Phase 1: discovered %u unique drivers\n", attrs->drv_count); - *tag_id = sqe->id; - *result = sqe->result; - sqe->used = 0x0; - sqe->complete = 0x0; - __atomic_fetch_sub(&sctx->run_num, 0x1, __ATOMIC_ACQUIRE); - pthread_spin_unlock(&sctx->rlock); - } else { - pthread_spin_unlock(&sctx->rlock); - return -WD_EAGAIN; + /* Phase 2: ctx allocation + internal copy + scheduler */ + ret = wd_alg_ctx_init(attrs); + if (ret) { + WD_ERR("Phase 2: ctx init failed!\n"); + goto out_undiscover; } + WD_INFO("Algorithm initialization complete: %u contexts from %u drivers\n", + attrs->ctx_config->ctx_num, attrs->drv_count); + return 0; + +out_undiscover: + wd_alg_drv_undiscover(attrs); + return ret; } + -- 2.43.0
From: Longfang Liu <liulongfang@huawei.com> Adapt the user-space driver according to UADK's framework update method to ensure compatibility between the driver and the framework. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- drv/hash_mb/hash_mb.c | 1 + drv/hisi_comp.c | 2 +- drv/hisi_dae_common.c | 2 +- drv/hisi_hpre.c | 2 +- drv/hisi_sec.c | 8 +++++++- drv/hisi_udma.c | 2 +- drv/isa_ce_sm3.c | 3 +++ drv/isa_ce_sm4.c | 4 +++- 8 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drv/hash_mb/hash_mb.c b/drv/hash_mb/hash_mb.c index 5c0daf2..f205224 100644 --- a/drv/hash_mb/hash_mb.c +++ b/drv/hash_mb/hash_mb.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include "hash_mb.h" +#include "../wd_drv.h" #define MIN(a, b) (((a) > (b)) ? (b) : (a)) #define IPAD_VALUE 0x36 diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index 9246de8..7c7104b 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -1491,7 +1491,7 @@ static int hisi_zip_init(void *conf, void *priv) memcpy(&zip_ctx->config, config, sizeof(struct wd_ctx_config_internal)); /* allocate qp for each context */ for (i = 0; i < config->ctx_num; i++) { - if (config->ctxs[i].ctx_type != UADK_CTX_HW || + if (config->ctxs[i].ctx_type != UADK_ALG_HW || !config->ctxs[i].ctx) continue; h_ctx = config->ctxs[i].ctx; diff --git a/drv/hisi_dae_common.c b/drv/hisi_dae_common.c index 74dc84f..82ae6f9 100644 --- a/drv/hisi_dae_common.c +++ b/drv/hisi_dae_common.c @@ -327,7 +327,7 @@ int dae_init(void *conf, void *priv) qm_priv.sqe_size = sizeof(struct dae_sqe); /* Allocate qp for each context */ for (i = 0; i < config->ctx_num; i++) { - if (config->ctxs[i].ctx_type != UADK_CTX_HW || + if (config->ctxs[i].ctx_type != UADK_ALG_HW || !config->ctxs[i].ctx) continue; h_ctx = config->ctxs[i].ctx; diff --git a/drv/hisi_hpre.c b/drv/hisi_hpre.c index 0a0acc5..4b155a0 100644 --- a/drv/hisi_hpre.c +++ b/drv/hisi_hpre.c @@ -653,7 +653,7 @@ static int hpre_init_qm_priv(struct wd_ctx_config_internal *config, qm_priv->sqe_size = sizeof(struct hisi_hpre_sqe); for (i = 0; i < config->ctx_num; i++) { - if (config->ctxs[i].ctx_type != UADK_CTX_HW || + if (config->ctxs[i].ctx_type != UADK_ALG_HW || !config->ctxs[i].ctx) continue; h_ctx = config->ctxs[i].ctx; diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index 0e17eb9..17c986d 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -10,6 +10,7 @@ #include "crypto/aes.h" #include "crypto/galois.h" #include "hisi_qm_udrv.h" +#include "wd_drv.h" #define BIT(nr) (1UL << (nr)) #define SEC_DIGEST_ALG_OFFSET 11 @@ -797,12 +798,15 @@ static int sec_aead_get_extend_ops(void *ops) .queue_num = SEC_CTX_Q_NUM_DEF,\ .op_type_num = 1,\ .fallback = 0,\ + .init_state = 0,\ .init = hisi_sec_init,\ .exit = hisi_sec_exit,\ .send = alg_type##_send,\ .recv = alg_type##_recv,\ .get_usage = hisi_sec_get_usage,\ .get_extend_ops = sec_aead_get_extend_ops,\ + .alloc_ctx = wd_hw_alloc_ctx, \ + .free_ctx = wd_hw_free_ctx, \ } static struct wd_alg_driver cipher_alg_driver[] = { @@ -3911,10 +3915,12 @@ static int hisi_sec_init(void *conf, void *priv) return -WD_EINVAL; } + WD_INFO("hisi_sec_init: ctx type: %u for %u ctx.\n", + config->ctxs[0].ctx_type, config->ctx_num); qm_priv.sqe_size = sizeof(struct hisi_sec_sqe); /* allocate qp for each context */ for (i = 0; i < config->ctx_num; i++) { - if (config->ctxs[i].ctx_type != UADK_CTX_HW || + if (config->ctxs[i].ctx_type != UADK_ALG_HW || !config->ctxs[i].ctx) continue; h_ctx = config->ctxs[i].ctx; diff --git a/drv/hisi_udma.c b/drv/hisi_udma.c index 62b83e7..424e9b1 100644 --- a/drv/hisi_udma.c +++ b/drv/hisi_udma.c @@ -461,7 +461,7 @@ static int udma_init(void *conf, void *priv) qm_priv.sqe_size = sizeof(struct udma_sqe); /* Allocate qp for each context */ for (i = 0; i < config->ctx_num; i++) { - if (config->ctxs[i].ctx_type != UADK_CTX_HW || + if (config->ctxs[i].ctx_type != UADK_ALG_HW || !config->ctxs[i].ctx) continue; h_ctx = config->ctxs[i].ctx; diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c index 9dbd0a8..196001a 100644 --- a/drv/isa_ce_sm3.c +++ b/drv/isa_ce_sm3.c @@ -17,6 +17,7 @@ #include "drv/isa_ce_sm3.h" #include "drv/wd_digest_drv.h" #include "wd_digest.h" +#include "wd_drv.h" #define SM3_ALIGN_MASK 63U @@ -43,6 +44,8 @@ static struct wd_alg_driver sm3_ce_alg_driver = { .send = sm3_ce_drv_send, .recv = sm3_ce_drv_recv, .get_usage = sm3_ce_get_usage, + .alloc_ctx = wd_soft_alloc_ctx, + .free_ctx = wd_soft_free_ctx, }; static void __attribute__((constructor)) sm3_ce_probe(void) diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c index 4c42693..8d3f606 100644 --- a/drv/isa_ce_sm4.c +++ b/drv/isa_ce_sm4.c @@ -11,10 +11,10 @@ * Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ -#include "wd_alg.h" #include "drv/wd_cipher_drv.h" #include "isa_ce_sm4.h" #include "wd_cipher.h" +#include "wd_drv.h" #define SM4_ENCRYPT 1 #define SM4_DECRYPT 0 @@ -433,6 +433,8 @@ static int cipher_recv(handle_t ctx, void *msg) .exit = isa_ce_exit,\ .send = alg_type##_send,\ .recv = alg_type##_recv,\ + .alloc_ctx = wd_soft_alloc_ctx, \ + .free_ctx = wd_soft_free_ctx, \ } static struct wd_alg_driver cipher_alg_driver[] = { -- 2.43.0
participants (1)
-
ZongYu Wu