From: Longfang Liu <liulongfang@huawei.com> In the original scheduling domain partitioning scheme of the scheduler, a static three-dimensional array-based partitioning approach was used. This method lacks scalability, is tightly coupled with service devices, has fixed memory allocation, and results in memory wastage. To address these issues, an improved solution has been developed. The new approach employs a hash bucket scheme combined with linked lists, enabling optimized processing within the scheduling domain. Signed-off-by: Longfang Liu <liulongfang@huawei.com> --- include/wd_alg_common.h | 4 +- wd_sched.c | 2699 ++++++++++++++++++++------------------- 2 files changed, 1417 insertions(+), 1286 deletions(-) diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index 54b02e2..ac50cb2 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -190,9 +190,7 @@ struct wd_sched { void *sched_key, const int sched_mode); int (*poll_policy)(handle_t h_sched_ctx, __u32 expect, __u32 *count); - void (*set_param)(handle_t h_sched_ctx, - void *sched_key, - void *sched_param); + void (*set_param)(handle_t h_sched_ctx, void *sched_key, void *sched_param); handle_t h_sched_ctx; }; diff --git a/wd_sched.c b/wd_sched.c index 073108c..edc893c 100644 --- a/wd_sched.c +++ b/wd_sched.c @@ -1,778 +1,1005 @@ // SPDX-License-Identifier: Apache-2.0 /* - * Copyright 2020-2021 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2020-2026 Huawei Technologies Co.,Ltd. All rights reserved. * Copyright 2020-2021 Linaro ltd. + * + * Scheduler: Simplified Pure Hash Table with Dynamic Context Expansion + * + * Key improvements: + * - Single global hash table with (region_id, mode, op_type, prop) dimensions + * - Segment list for non-contiguous ctx ranges + * - Dual-domain queues for session key. + * - Dynamic ctx expansion in HUNGRY mode based on load threshold + * - Packet reception is handled through the active queues in the session key. + * - Simplified sched_init: only allocate one sync + one async ctx + * - Removed redundant wd_sched_info layer */ #define _GNU_SOURCE #include <stdlib.h> #include <stdbool.h> +#include <string.h> #include <sched.h> #include <numa.h> +#include <limits.h> +#include <pthread.h> +#include <stdatomic.h> #include "wd_sched.h" -#define MAX_POLL_TIMES 1000 +#define MAX_POLL_TIMES 1000 +#define HUNGRY_LOAD_THRESHOLD 256 +#define SKEY_CTX_MAX_NUM 16 +#define SKEY_MAX_THREAD_NUM 64 +#define SKEY_LOAD_UPDATE_INTERVAL 128 +#define MAX_NUMA_NODES (NUMA_NUM_NODES >> 5) + +/* ============================================================================ + * Hash Table Configuration + * ============================================================================ + */ +#define WD_SCHED_MAX_BUCKETS 512 +#define WD_SCHED_MIN_BUCKETS 32 +#define WD_SCHED_LOAD_FACTOR 0.75f +#define HASH_PRIME1 73 +#define HASH_PRIME2 13 +#define HASH_PRIME3 7 +#define HASH_PRIME4 11 + +/* ============================================================================ + * Scheduling Region Mode + * ============================================================================ + */ enum sched_region_mode { SCHED_MODE_SYNC = 0, SCHED_MODE_ASYNC = 1, SCHED_MODE_BUTT }; -struct wd_sched_balancer { - int switch_slice; - __u32 hw_task_num; - __u32 sw_task_num; - __u32 hw_dfx_num; - __u32 sw_dfx_num; +/* ============================================================================ + * Segment List for Domain Index Organization + * ============================================================================ + */ + +/** + * wd_sched_ctx_segment - Contiguous segment of ctx indices in domain + * @begin: Start index of this segment + * @end: End index of this segment (inclusive) + * @next: Pointer to next segment in the linked list + * + * Supports non-contiguous ctx ranges via segment list. + */ +struct wd_sched_ctx_segment { + __u32 begin; + __u32 end; + struct wd_sched_ctx_segment *next; }; -/* - * sched_key - The key if schedule region. - * @numa_id: The schedule numa region id. - * @mode: Sync mode:0, async_mode:1 - * @type: Service type , the value must smaller than type_num. - * @sync_ctxid: alloc ctx id for sync mode - * @async_ctxid: alloc ctx id for async mode +/* ============================================================================ + * Session key domain cache processing. + * ============================================================================ */ -struct sched_key { - int numa_id; - __u8 type; - __u8 mode; - __u32 dev_id; - __u8 ctx_prop; - __u16 is_stream; - __u16 prio_mode; - __u32 pkt_size; - __u16 sync_ctxid[UADK_CTX_MAX]; - __u16 async_ctxid[UADK_CTX_MAX]; - __u16 def_sync_ctxid; - __u16 def_async_ctxid; - struct wd_sched_balancer balancer; +/** + * wd_sched_domain_idx_cache - Simplified fixed array cache for skey domains + * + * Design principles: + * - Fixed array for cache-friendly memory layout + * - Atomic operations for lock-free load tracking + * - Simple RR and load balancing strategies + * - Maximum 16 queues per thread (typical usage) + */ +struct wd_sched_domain_idx_cache { + /* === Queue index array === */ + __u32 idx_list[SKEY_CTX_MAX_NUM]; /* Array of ctx indices */ + atomic_uint load_values[SKEY_CTX_MAX_NUM]; /* Atomic load counters */ + __u32 valid_count; /* Number of valid queues */ + + /* === Scheduling state === */ + atomic_uint rr_ptr; /* Round-robin pointer */ + atomic_uint min_load_idx; /* Cached min load index */ + atomic_uint op_counter; /* Operation counter for updates */ + + /* === Configuration === */ + __u32 update_interval; /* Min load update interval */ + __u8 policy; /* Scheduling policy */ + + /* === Synchronization === */ + pthread_mutex_t cache_lock; /* Lock for structure modifications */ }; -#define LOOP_SWITH_STEP 1 -#define LOOP_SWITH_SLICE 10 -#define UADK_SWITH_PKT_SZ 2048 -/* - * struct sched_ctx_range - define one ctx pos. - * @begin: the start pos in ctxs of config. - * @end: the end pos in ctxx of config. - * @last: the last one which be distributed. - * @valid: the region used flag. - * @lock: lock the currentscheduling region. +/** + * wd_sched_ctx_domain - Scheduling domain with four dimensions + * @region_id: Region identifier (numa_id or device_id) + * @mode: Context mode (SYNC/ASYNC) + * @op_type: Operation type + * @prop: Property (e.g., device type: HW, CE, SOFT) + * @segments: Linked list of context ranges + * @segment_count: Number of segments + * @total_ctx_count: Total contexts across all segments + * @current_segment: Current segment pointer for round-robin + * @current_pos: Current position within segment + * @valid: Domain validity flag + * @lock: Synchronization spinlock */ -struct sched_ctx_region { - __u32 begin; - __u32 end; - __u32 last; +struct wd_sched_ctx_domain { + int region_id; + __u8 mode; + __u32 op_type; + __u8 prop; + + struct wd_sched_ctx_segment *segments; + __u32 segment_count; + __u32 total_ctx_count; + + struct wd_sched_ctx_segment *current_segment; + __u32 current_pos; bool valid; + pthread_mutex_t lock; }; -/* - * wd_sched_info - define the context of the scheduler. - * @ctx_region: define the map for the comp ctxs, using for quickly search. - * the x range: two(sync and async), the y range: - * two(e.g. comp and uncomp) the map[x][y]'s value is the ctx - * begin and end pos. - * @valid: the region used flag. - * @region_type: the region's property - * @next_info: next scheduling domain +/** + * wd_sched_domain_hash_node - Hash table collision chain node */ -struct wd_sched_info { - struct sched_ctx_region *ctx_region[SCHED_MODE_BUTT]; // default as HW ctxs - bool valid; - int region_type; - struct wd_sched_info *next_info; +struct wd_sched_domain_hash_node { + struct wd_sched_ctx_domain domain; + struct wd_sched_domain_hash_node *next; }; -#define MAX_SKEY_REGION_NUM 64 -struct dev_region_map { - __u32 dev_id; - __u32 region_id; +/** + * wd_sched_domain_hash_table - Pure dynamic hash table for scheduling domains + * @buckets: Hash table bucket array + * @bucket_size: Number of buckets + * @entry_count: Total entries in table + * @max_chain_length: Maximum chain length for statistics + * @lock: Read-write lock for concurrent access + */ +struct wd_sched_domain_hash_table { + struct wd_sched_domain_hash_node **buckets; + __u32 bucket_size; + __u32 entry_count; + __u32 max_chain_length; + + pthread_mutex_t lock; }; -/* - * The default value for NUMA_NUM_NODES is 2048, - * but in reality, most systems will not have such a large NUMA; - * they will typically have fewer than 64 nodes. +/* ============================================================================ + * Dual-Domain Structure for Session Key + * ============================================================================ */ -#define MAX_NUMA_NODES (NUMA_NUM_NODES >> 5) -/* - * wd_sched_ctx - define the context of the scheduler. - * @policy: define the policy of the scheduler. - * @numa_num: the max numa numbers of the scheduler. - * @type_num: the max operation types of the scheduler. - * @poll_func: the task's poll operation function. - * @numa_map: a map of cpus to devices. - * @sched_info: the context of the scheduler. +/** + * wd_sched_key_domain - Session domain with min-heap + * @idx_cache: Index cache with min-heap for load-based selection + * @lock: Synchronization spinlock + * @expanded_count: Track how many times ctx has been expanded + */ +struct wd_sched_key_domain { + struct wd_sched_domain_idx_cache idx_cache; + pthread_mutex_t lock; + __u32 expanded_count; +}; + +/** + * wd_sched_key - Session-level scheduling key + * @region_id: Region identifier + * @type: Operation type + * @mode: Current mode (SYNC/ASYNC) + * @dev_id: Device identifier (for SCHED_POLICY_DEV) + * @ctx_prop: Context property + * @is_stream: Stream mode flag + * @prio_mode: Priority mode + * @pkt_size: Current packet size + * @sync_domain: Min-heap domain for sync contexts + * @async_domain: Min-heap domain for async contexts + * @lock: Synchronization spinlock + */ +struct wd_sched_key { + int region_id; + __u8 type; + __u8 mode; + __u32 dev_id; + __u8 ctx_prop; + __u16 is_stream; + __u16 prio_mode; + __u32 pkt_size; + + struct wd_sched_key_domain sync_domain; + struct wd_sched_key_domain async_domain; + + pthread_mutex_t lock; +}; + +/** + * wd_sched_ctx - Main scheduler context + * @policy: Scheduling policy type + * @type_num: Number of operation types + * @mode_num: Number of modes (SYNC/ASYNC) + * @region_num: Number of regions (numa or devices) + * @poll_func: Poll function for receiving responses + * @domain_hash_table: Global hash table for all domains + * @skey_num: Number of active session keys + * @skey_lock: Lock for skey array + * @skey: Array of session keys + * @poll_tid: Thread IDs for polling */ struct wd_sched_ctx { __u32 policy; __u32 type_num; - __u16 numa_num; - __u16 dev_num; + __u32 mode_num; + __u16 region_num; + user_poll_func poll_func; - int numa_map[MAX_NUMA_NODES]; + struct wd_sched_domain_hash_table *domain_hash_table; __u32 skey_num; pthread_mutex_t skey_lock; - struct sched_key *skey[MAX_SKEY_REGION_NUM]; // supports up to 64 threads region - __u32 poll_tid[MAX_SKEY_REGION_NUM]; - struct wd_sched_balancer balancer; - - struct dev_region_map dev_id_map[DEVICE_REGION_MAX]; - struct wd_sched_info sched_info[MAX_NUMA_NODES]; + struct wd_sched_key *skey[SKEY_MAX_THREAD_NUM]; + __u32 poll_tid[SKEY_MAX_THREAD_NUM]; }; -#define nop() asm volatile("nop") -static void delay_us(int ustime) +/* ============================================================================ + * Hash Table Core Operations + * ============================================================================ + */ + +static bool wd_sched_is_prime(__u32 n) { - int cycle = 2600; // for 2.6GHz CPU - int i, j; + __u32 i; - for (i = 0; i < ustime; i++) { - for (j = 0; j < cycle; j++) - nop(); + if (n <= 1) + return false; + if (n <= 3) + return true; + if (n % 2 == 0 || n % 3 == 0) + return false; + + for (i = 5; i * i <= n; i += 6) { + if (n % i == 0 || n % (i + 2) == 0) + return false; } - usleep(1); + + return true; } -static void sched_skey_param_init(struct wd_sched_ctx *sched_ctx, struct sched_key *skey) +static __u32 wd_sched_find_prime(__u32 n) { - __u32 i; - - pthread_mutex_lock(&sched_ctx->skey_lock); - for (i = 0; i < MAX_SKEY_REGION_NUM; i++) { - if (sched_ctx->skey[i] == NULL) { - sched_ctx->skey[i] = skey; - sched_ctx->skey_num++; - pthread_mutex_unlock(&sched_ctx->skey_lock); - WD_ERR("success: get valid skey node[%u]!\n", i); - return; - } - } - pthread_mutex_unlock(&sched_ctx->skey_lock); - WD_ERR("invalid: skey node number is too much!\n"); + while (!wd_sched_is_prime(n)) + n++; + return n; } -static struct sched_key *sched_get_poll_skey(struct wd_sched_ctx *sched_ctx) +static __u32 wd_sched_compute_bucket_size(__u32 estimated_entries) { - __u32 tid = pthread_self(); - __u16 i, tidx = 0; + __u32 target_size; - /* Delay processing within 17us is performed */ - delay_us(tid % 17); - /* Set mapping relationship between the recv tid and the send skey id */ - for (i = 0; i < sched_ctx->skey_num; i++) { - if (sched_ctx->poll_tid[i] == tid) { - //WD_ERR("poll tid ---> skey id:<%u, %u>!\n", i, tid); - tidx = i; - break; - } else if (sched_ctx->poll_tid[i] == 0) { - pthread_mutex_lock(&sched_ctx->skey_lock); - if (sched_ctx->poll_tid[i] == 0) { - //WD_ERR("poll tid<%u> <---> skey id:<%u>!\n", i, tid); - sched_ctx->poll_tid[i] = tid; - tidx = i; - } else { - pthread_mutex_unlock(&sched_ctx->skey_lock); - return NULL; - } - pthread_mutex_unlock(&sched_ctx->skey_lock); - break; - } - } + target_size = (estimated_entries * 4) / 3; - return sched_ctx->skey[tidx]; + if (target_size < WD_SCHED_MIN_BUCKETS) + target_size = WD_SCHED_MIN_BUCKETS; + if (target_size > WD_SCHED_MAX_BUCKETS) + target_size = WD_SCHED_MAX_BUCKETS; + + return wd_sched_find_prime(target_size); } -static bool sched_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +/** + * wd_sched_hash_compute - Compute hash value for four-dimensional domain key + * @region_id: Region identifier + * @mode: Context mode + * @op_type: Operation type + * @prop: Property + * @bucket_size: Hash table bucket count + * + * Combines four dimensions using prime number multipliers. + */ +static inline __u32 wd_sched_hash_compute(int region_id, __u8 mode, + __u32 op_type, __u8 prop, __u32 bucket_size) { - if (key->numa_id >= sched_ctx->numa_num || key->mode >= SCHED_MODE_BUTT || - key->type >= sched_ctx->type_num) { - WD_ERR("invalid: sched key's numa: %d, mode: %u, type: %u!\n", - key->numa_id, key->mode, key->type); - return false; - } + __u32 hash; - return true; + hash = (region_id * HASH_PRIME1) + (mode * HASH_PRIME2) + + (op_type * HASH_PRIME3) + (prop * HASH_PRIME4); + return hash % bucket_size; } -/* - * sched_get_ctx_range - Get ctx range from ctx_map by the wd comp arg +static inline bool wd_sched_domain_key_match( + int region_id1, __u8 mode1, __u32 op_type1, __u8 prop1, + int region_id2, __u8 mode2, __u32 op_type2, __u8 prop2) +{ + return (region_id1 == region_id2 && mode1 == mode2 && + op_type1 == op_type2 && prop1 == prop2); +} + +/** + * wd_sched_hash_table_create - Create hash table + * @estimated_entries: Estimated number of entries + * + * Returns: Initialized hash table or NULL on error */ -static struct sched_ctx_region *sched_get_ctx_range(struct wd_sched_ctx *sched_ctx, - const struct sched_key *key) +static struct wd_sched_domain_hash_table * +wd_sched_hash_table_create(__u32 estimated_entries) { - struct wd_sched_info *sched_info; - int numa_id; + struct wd_sched_domain_hash_table *table; + __u32 bucket_size; + int ret; + + table = calloc(1, sizeof(*table)); + if (!table) + return NULL; + + bucket_size = wd_sched_compute_bucket_size(estimated_entries); + WD_DEBUG("Hash table: estimated_entries=%u, bucket_size=%u\n", + estimated_entries, bucket_size); - sched_info = sched_ctx->sched_info; - if (key->numa_id >= 0 && - sched_info[key->numa_id].ctx_region[key->mode][key->type].valid) - return &sched_info[key->numa_id].ctx_region[key->mode][key->type]; + table->buckets = calloc(bucket_size, sizeof(*table->buckets)); + if (!table->buckets) { + free(table); + return NULL; + } - /* If the key->numa_id is not exist, we should scan for a region */ - for (numa_id = 0; numa_id < sched_ctx->numa_num; numa_id++) { - if (sched_info[numa_id].ctx_region[key->mode][key->type].valid) - return &sched_info[numa_id].ctx_region[key->mode][key->type]; + table->bucket_size = bucket_size; + table->entry_count = 0; + table->max_chain_length = 0; + + ret = pthread_mutex_init(&table->lock, NULL); + if (ret) { + free(table->buckets); + free(table); + return NULL; } - return NULL; + return table; } -/* - * sched_get_next_pos_rr - Get next resource pos by RR schedule. - * The second para is reserved for future. - */ -static __u32 sched_get_next_pos_rr(struct sched_ctx_region *region, void *para) +static void wd_sched_hash_table_destroy(struct wd_sched_domain_hash_table *table) { - __u32 pos; - - pthread_mutex_lock(®ion->lock); + struct wd_sched_domain_hash_node *node, *next; + __u32 i; - pos = region->last; + if (!table) + return; - if (pos < region->end) - region->last++; - else - region->last = region->begin; + for (i = 0; i < table->bucket_size; i++) { + node = table->buckets[i]; + while (node) { + next = node->next; + + /* Release segment linked list */ + struct wd_sched_ctx_segment *seg = node->domain.segments; + while (seg) { + struct wd_sched_ctx_segment *next_seg = seg->next; + free(seg); + seg = next_seg; + } - pthread_mutex_unlock(®ion->lock); + pthread_mutex_destroy(&node->domain.lock); + free(node); + node = next; + } + } - return pos; + pthread_mutex_destroy(&table->lock); + free(table->buckets); + free(table); } -/* - * session_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance - */ -static __u32 session_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *key, - const int sched_mode) +static struct wd_sched_ctx_domain * +wd_sched_hash_table_lookup(struct wd_sched_domain_hash_table *table, + int region_id, __u8 mode, __u32 op_type, __u8 prop) { - struct sched_ctx_region *region = NULL; - bool ret; + struct wd_sched_domain_hash_node *node; + struct wd_sched_ctx_domain *domain = NULL; + __u32 hash_idx; + __u8 node_idx = 0; - key->mode = sched_mode; - ret = sched_key_valid(sched_ctx, key); - if (!ret) - return INVALID_POS; + if (!table) + return NULL; - region = sched_get_ctx_range(sched_ctx, key); - if (!region) - return INVALID_POS; + hash_idx = wd_sched_hash_compute(region_id, mode, op_type, prop, table->bucket_size); + + pthread_mutex_lock(&table->lock); + node = table->buckets[hash_idx]; + while (node) { + if (wd_sched_domain_key_match( + node->domain.region_id, node->domain.mode, node->domain.op_type, + node->domain.prop, region_id, mode, op_type, prop)) { + domain = &node->domain; + break; + } + node = node->next; + node_idx++; + } + pthread_mutex_unlock(&table->lock); + WD_DEBUG("Get domain hash_idx: %u ------node idx: %u.\n", hash_idx, node_idx); - return sched_get_next_pos_rr(region, NULL); + return domain; } -static handle_t session_sched_init(handle_t h_sched_ctx, void *sched_param) +static struct wd_sched_ctx_domain * +wd_sched_hash_table_insert(struct wd_sched_domain_hash_table *table, + int region_id, __u8 mode, __u32 op_type, __u8 prop) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - struct sched_key *skey; - unsigned int node; + struct wd_sched_domain_hash_node *node, *new_node; + struct wd_sched_ctx_domain *existing; + __u32 chain_length; + __u32 hash_idx; + int ret; - if (getcpu(NULL, &node)) { - WD_ERR("failed to get node, errno %d!\n", errno); - return (handle_t)(-errno); - } - if (node == (unsigned int)NUMA_NO_NODE) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); - } + if (!table) + return NULL; - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); - return (handle_t)(-WD_EINVAL); - } + existing = wd_sched_hash_table_lookup(table, region_id, mode, op_type, prop); + if (existing) + return existing; - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); + hash_idx = wd_sched_hash_compute(region_id, mode, op_type, prop, table->bucket_size); + WD_DEBUG("Instance domain hash_idx: %u\n", hash_idx); + + pthread_mutex_lock(&table->lock); + /* Alloc and initialize new domain */ + new_node = calloc(1, sizeof(*new_node)); + if (!new_node) { + pthread_mutex_unlock(&table->lock); + return NULL; } - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - skey->numa_id = sched_ctx->numa_map[node]; - if (wd_need_debug()) - WD_DEBUG("session don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - skey->numa_id = sched_ctx->numa_map[node]; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; + /* Initialize new domain */ + new_node->domain.region_id = region_id; + new_node->domain.mode = mode; + new_node->domain.op_type = op_type; + new_node->domain.prop = prop; + new_node->domain.segments = NULL; + new_node->domain.segment_count = 0; + new_node->domain.total_ctx_count = 0; + new_node->domain.current_segment = NULL; + new_node->domain.current_pos = 0; + new_node->domain.valid = false; + + ret = pthread_mutex_init(&new_node->domain.lock, NULL); + if (ret) { + pthread_mutex_unlock(&table->lock); + free(new_node); + return NULL; } - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; + new_node->next = table->buckets[hash_idx]; + table->buckets[hash_idx] = new_node; - skey->sync_ctxid[0] = session_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = session_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; + table->entry_count++; + if (new_node->next) { + chain_length++; + if (chain_length > table->max_chain_length) + table->max_chain_length = chain_length; } - return (handle_t)skey; + pthread_mutex_unlock(&table->lock); -out: - free(skey); - return (handle_t)(-WD_EINVAL); + WD_DEBUG("Created new domain: region=%d, mode=%u, op_type=%u, prop=%u\n", + region_id, mode, op_type, prop); + + return &new_node->domain; } -/* - * session_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/* ============================================================================ + * Segment List Operations + * ============================================================================ + */ + +/** + * wd_sched_domain_add_segment - Add context range segment to domain + * @domain: Target domain + * @begin: Start context index + * @end: End context index (inclusive) * - * The user must init the schedule info through session_sched_init + * Supports non-contiguous context ranges via segment list. */ -static __u32 session_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) +static int wd_sched_domain_add_segment(struct wd_sched_ctx_domain *domain, + __u32 begin, __u32 end) { - struct sched_key *key = (struct sched_key *)sched_key; + struct wd_sched_ctx_segment *seg, *new_seg; - if (unlikely(!h_sched_ctx || !key)) { - WD_ERR("invalid: sched ctx or key is NULL!\n"); - return INVALID_POS; - } + if (!domain || begin > end) + return -WD_EINVAL; - /* return in do task */ - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[0]; - return key->async_ctxid[0]; -} + new_seg = calloc(1, sizeof(*new_seg)); + if (!new_seg) + return -WD_ENOMEM; -static int session_poll_region(struct wd_sched_ctx *sched_ctx, __u32 begin, - __u32 end, __u32 expect, __u32 *count) -{ - __u32 poll_num = 0; - __u32 i; - int ret; + new_seg->begin = begin; + new_seg->end = end; + new_seg->next = NULL; - /* i is the pos of sched_ctxs, the max is end */ - for (i = begin; i <= end; i++) { - /* - * RR schedule, one time poll one package, - * poll_num is always not more than one here. - */ - ret = sched_ctx->poll_func(i, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; - *count += poll_num; - if (*count == expect) - break; + pthread_mutex_lock(&domain->lock); + + /* Append to segment list tail */ + if (!domain->segments) { + domain->segments = new_seg; + } else { + seg = domain->segments; + while (seg->next) + seg = seg->next; + seg->next = new_seg; } + domain->segment_count++; + domain->total_ctx_count += (end - begin + 1); + + /* Initialize polling state */ + if (!domain->current_segment) + domain->current_segment = domain->segments; + + pthread_mutex_unlock(&domain->lock); + + WD_DEBUG("Added segment to domain: begin=%u, end=%u, total_count=%u\n", + begin, end, domain->total_ctx_count); + return 0; } -static int session_poll_policy_rr(struct wd_sched_ctx *sched_ctx, int numa_id, - __u32 expect, __u32 *count) +/** + * wd_sched_domain_get_next_rr - Get next context via round-robin from domain + * @domain: Source domain + * + * Returns: Next context index in round-robin order + * Time complexity: O(1) + */ +static __u32 wd_sched_domain_get_next_rr(struct wd_sched_ctx_domain *domain) { - struct sched_ctx_region **region = sched_ctx->sched_info[numa_id].ctx_region; - __u32 begin, end; - __u32 i; - int ret; + __u32 pos, next_pos; - for (i = 0; i < sched_ctx->type_num; i++) { - if (!region[SCHED_MODE_ASYNC][i].valid) - continue; + if (!domain || !domain->segments || domain->total_ctx_count == 0) + return INVALID_POS; - begin = region[SCHED_MODE_ASYNC][i].begin; - end = region[SCHED_MODE_ASYNC][i].end; - ret = session_poll_region(sched_ctx, begin, end, expect, count); - if (unlikely(ret)) - return ret; + pthread_mutex_lock(&domain->lock); + if (!domain->current_segment) + domain->current_segment = domain->segments; + + pos = domain->current_pos; + next_pos = pos + 1; + /* Move to next position */ + if (next_pos <= domain->current_segment->end) { + domain->current_pos = next_pos; + } else { + /* Move to next segment */ + if (domain->current_segment->next) { + domain->current_segment = domain->current_segment->next; + domain->current_pos = domain->current_segment->begin; + next_pos = domain->current_segment->begin; + } else { + /* Loop back to beginning */ + domain->current_segment = domain->segments; + next_pos = domain->segments->begin; + } + domain->current_pos = next_pos; } - return 0; + pthread_mutex_unlock(&domain->lock); + WD_DEBUG("Get next ctx: pos=%u, current_pos=%u\n", pos, domain->current_pos); + + return pos; } -/* - * session_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. - * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. +/* ============================================================================ + * SKey Domain Cache Management Functions + * ============================================================================ + */ +/** + * wd_sched_skey_cache_init - Initialize skey domain cache + * @cache: Pointer to cache structure + * @policy: Scheduling policy * @sched_type: Scheduling policy type (cannot modify per API contract) + * + * Initialize fixed array cache with invalid positions and zero loads. */ -static int session_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +static int wd_sched_skey_cache_init(struct wd_sched_domain_idx_cache *cache, __u8 policy) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct wd_sched_info *sched_info; - __u32 loop_time = 0; - __u32 last_count = 0; - __u16 i, region_mum; - int ret; + int i; - if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { - WD_ERR("invalid: sched ctx or poll_func is NULL or count is zero!\n"); + if (!cache) { + WD_ERR("Invalid cache pointer\n"); return -WD_EINVAL; } - if (unlikely(sched_ctx->numa_num > MAX_NUMA_NODES)) { - WD_ERR("invalid: ctx's numa number is %u!\n", sched_ctx->numa_num); - return -WD_EINVAL; + /* Initialize array with invalid positions */ + for (i = 0; i < SKEY_CTX_MAX_NUM; i++) { + cache->idx_list[i] = INVALID_POS; + atomic_store(&cache->load_values[i], 0); } - sched_info = sched_ctx->sched_info; - if (sched_ctx->policy == SCHED_POLICY_DEV) - region_mum = sched_ctx->dev_num; - else - region_mum = sched_ctx->numa_num; - - /* - * Try different region's ctx if we can't receive any - * package last time, it is more efficient. In most - * bad situation, poll ends after MAX_POLL_TIMES loop. - */ - while (++loop_time < MAX_POLL_TIMES) { - for (i = 0; i < region_mum;) { - /* If current numa is not valid, find next. */ - if (!sched_info[i].valid) { - i++; - continue; - } - - last_count = *count; - ret = session_poll_policy_rr(sched_ctx, i, expect, count); - if (unlikely(ret)) - return ret; + /* Initialize atomic counters */ + atomic_store(&cache->rr_ptr, 0); + atomic_store(&cache->min_load_idx, 0); + atomic_store(&cache->op_counter, 0); - if (expect == *count) - return 0; + /* Set configuration */ + cache->valid_count = 0; + cache->update_interval = SKEY_LOAD_UPDATE_INTERVAL; + cache->policy = policy; - /* - * If no package is received, find next numa, - * otherwise, keep receiving packets at this node. - */ - if (last_count == *count) - i++; - } + /* Initialize structure lock */ + if (pthread_mutex_init(&cache->cache_lock, NULL)) { + WD_ERR("Failed to init cache lock\n"); + return -WD_EINVAL; } - return 0; + return WD_SUCCESS; } - -static handle_t sched_none_init(handle_t h_sched_ctx, void *sched_param) +/** + * wd_sched_skey_cache_uninit - Cleanup skey domain cache + * @cache: Pointer to cache structure + * + * Release resources and reset cache state. + */ +static void wd_sched_skey_cache_uninit(struct wd_sched_domain_idx_cache *cache) { - return (handle_t)0; -} + if (!cache) + return; -static __u32 sched_none_pick_next_ctx(handle_t sched_ctx, - void *sched_key, const int sched_mode) -{ - return 0; -} + pthread_mutex_destroy(&cache->cache_lock); -static int sched_none_poll_policy(handle_t h_sched_ctx, - __u32 expect, __u32 *count) + /* Reset cache state */ + for (int i = 0; i < SKEY_CTX_MAX_NUM; i++) { + cache->idx_list[i] = INVALID_POS; + atomic_store(&cache->load_values[i], 0); + } + + cache->valid_count = 0; +} +/** + * wd_sched_skey_add_ctx - Add ctx to skey domain cache + * @cache: Pointer to cache structure + * @ctx_id: Context ID to add + * + * Add ctx to next available position in fixed array. + * Returns 0 on success, negative error code on failure. + */ +static int wd_sched_skey_add_ctx(struct wd_sched_domain_idx_cache *cache, __u32 ctx_id) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - __u32 loop_times = MAX_POLL_TIMES + expect; - __u32 poll_num = 0; - int ret; + __u32 i; - if (!sched_ctx || !sched_ctx->poll_func) { - WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + if (!cache || ctx_id == INVALID_POS) { + WD_ERR("Invalid parameters\n"); return -WD_EINVAL; } - while (loop_times > 0) { - /* Default use ctx 0 */ - loop_times--; - ret = sched_ctx->poll_func(0, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; + pthread_mutex_lock(&cache->cache_lock); + /* Check if cache is full */ + if (cache->valid_count >= SKEY_CTX_MAX_NUM) { + pthread_mutex_unlock(&cache->cache_lock); + WD_ERR("SKey cache full, cannot add more queues\n"); + return -WD_EINVAL; + } - *count += poll_num; - if (*count == expect) - break; + /* Check for duplicate ctx_id */ + for (i = 0; i < cache->valid_count; i++) { + if (cache->idx_list[i] == ctx_id) { + WD_ERR("Context %u already exists in skey cache at position %d\n", ctx_id, i); + pthread_mutex_unlock(&cache->cache_lock); + return -WD_EEXIST; + } } - return 0; -} + /* Update min load index if as the new ctx */ + atomic_store(&cache->min_load_idx, ctx_id); -static handle_t sched_single_init(handle_t h_sched_ctx, void *sched_param) -{ - return (handle_t)0; -} + /* Add to next available position */ + cache->idx_list[cache->valid_count] = ctx_id; + atomic_store(&cache->load_values[cache->valid_count], 0); + cache->valid_count++; + pthread_mutex_unlock(&cache->cache_lock); -static __u32 sched_single_pick_next_ctx(handle_t sched_ctx, - void *sched_key, const int sched_mode) -{ -#define CTX_ASYNC 1 -#define CTX_SYNC 0 + WD_DEBUG("Added ctx %u to skey cache at position %u, list valid_count: %u\n", + ctx_id, cache->valid_count - 1, cache->valid_count); - if (sched_mode) - return CTX_ASYNC; - else - return CTX_SYNC; + return WD_SUCCESS; } -static int sched_single_poll_policy(handle_t h_sched_ctx, - __u32 expect, __u32 *count) +/** + * wd_sched_skey_remove_ctx - Remove ctx from skey domain cache + * @cache: Pointer to cache structure + * @ctx_id: Context ID to remove + * + * Remove ctx by shifting array elements to maintain continuity. + * Returns 0 on success, negative error code if not found. + */ +static int wd_sched_skey_remove_ctx(struct wd_sched_domain_idx_cache *cache, + __u32 ctx_id) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - __u32 loop_times = MAX_POLL_TIMES + expect; - __u32 poll_num = 0; - int ret; + int i, found = 0; + __u32 current_min; - if (!sched_ctx || !sched_ctx->poll_func) { - WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + if (!cache) { + WD_ERR("Invalid cache pointer\n"); return -WD_EINVAL; } - while (loop_times > 0) { - /* Default async mode use ctx 1 */ - loop_times--; - ret = sched_ctx->poll_func(1, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) - continue; - - *count += poll_num; - if (*count == expect) + pthread_mutex_lock(&cache->cache_lock); + /* Find and remove the ctx */ + for (i = 0; i < cache->valid_count; i++) { + if (cache->idx_list[i] == ctx_id) { + found = 1; break; + } } - return 0; -} - -static bool sched_dev_key_valid(struct wd_sched_ctx *sched_ctx, const struct sched_key *key) -{ - bool found = false; - int i; - - if (key->mode >= SCHED_MODE_BUTT || key->type >= sched_ctx->type_num) { - WD_ERR("invalid: sched key's device id: %u, mode: %u, type: %u!\n", - key->dev_id, key->mode, key->type); - return false; + if (!found) { + WD_ERR("Context %u not found in skey cache\n", ctx_id); + pthread_mutex_unlock(&cache->cache_lock); + return -WD_ENODEV; } - for (i = 0; i < sched_ctx->dev_num; i++) { - if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { - found = true; - break; - } + /* Shift remaining elements to fill the gap */ + for (; i < cache->valid_count - 1; i++) { + cache->idx_list[i] = cache->idx_list[i + 1]; + atomic_store(&cache->load_values[i], + atomic_load(&cache->load_values[i + 1])); } - if (!found) { - WD_ERR("invalid: dev_id %u is not registered!\n", key->dev_id); - return false; + /* Clear last position */ + cache->idx_list[cache->valid_count - 1] = INVALID_POS; + atomic_store(&cache->load_values[cache->valid_count - 1], 0); + cache->valid_count--; + + /* Reset pointers if cache becomes empty */ + if (cache->valid_count == 0) { + atomic_store(&cache->rr_ptr, 0); + atomic_store(&cache->min_load_idx, 0); + } else { + /* Adjust min load index if necessary */ + current_min = atomic_load(&cache->min_load_idx); + if (current_min >= cache->valid_count) + atomic_store(&cache->min_load_idx, 0); } + pthread_mutex_unlock(&cache->cache_lock); + WD_DEBUG("Removed ctx %u from skey cache, valid_count: %u\n", + ctx_id, cache->valid_count); - return true; + return WD_SUCCESS; } -/* - * sched_dev_get_region - Get ctx region from ctx_map by the wd comp arg +/** + * wd_sched_update_min_load - Update cached min load index + * @cache: Pointer to cache structure + * + * Scan valid queues to find the one with minimum load. + * Called periodically to avoid frequent scanning. */ -static struct sched_ctx_region *sched_dev_get_region(struct wd_sched_ctx *sched_ctx, - const struct sched_key *key) +static void wd_sched_update_min_load(struct wd_sched_domain_idx_cache *cache) { - struct wd_sched_info *sched_info; - int i, region_id; - - for (i = 0; i < sched_ctx->dev_num; i++) { - if (key->dev_id == sched_ctx->dev_id_map[i].dev_id) { - region_id = sched_ctx->dev_id_map[i].region_id; - sched_info = &sched_ctx->sched_info[region_id]; - if (sched_info->ctx_region[key->mode][key->type].valid) - return &sched_info->ctx_region[key->mode][key->type]; + __u32 min_load = UINT_MAX; + __u32 min_idx = 0; + __u32 i, load; + + if (cache->valid_count == 0) + return; + + /* Simple linear scan - efficient for small arrays */ + for (i = 0; i < cache->valid_count; i++) { + load = atomic_load(&cache->load_values[i]); + if (load < min_load) { + min_load = load; + min_idx = i; } } - /* - * If the scheduling domain of dev_id does not exist, - * taskes operations cannot be executed using queues from other devices; - * otherwise, an SMMU error will occur. - */ - return NULL; + atomic_store(&cache->min_load_idx, min_idx); } -/* - * session_dev_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance +/** + * wd_sched_skey_pick_next - Pick next ctx from skey domain cache + * @cache: Pointer to cache structure + * + * Select next ctx based on scheduling policy: + * - RR: Simple round-robin selection + * - LOAD_BALANCE: Choose ctx with minimum load + * + * Returns selected ctx index, or INVALID_POS if no valid ctx. */ -static __u32 session_dev_sched_init_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *key, - const int sched_mode) +static __u32 wd_sched_skey_pick_next(struct wd_sched_domain_idx_cache *cache, __u32 *ctx_idx) { - struct sched_ctx_region *region = NULL; - bool ret; + __u32 selected_idx; + __u32 op_count; - key->mode = sched_mode; - ret = sched_dev_key_valid(sched_ctx, key); - if (!ret) + if (!cache || cache->valid_count == 0) { return INVALID_POS; + } - region = sched_dev_get_region(sched_ctx, key); - if (!region) + switch (cache->policy) { + case SCHED_POLICY_RR: + case SCHED_POLICY_NONE: + case SCHED_POLICY_SINGLE: + case SCHED_POLICY_DEV: + case SCHED_POLICY_LOOP: + case SCHED_POLICY_INSTR: + /* Round-robin: atomic increment and modulo */ + selected_idx = atomic_fetch_add(&cache->rr_ptr, 1) % cache->valid_count; + break; + case SCHED_POLICY_HUNGRY: + /* Update min load periodically */ + op_count = atomic_fetch_add(&cache->op_counter, 1); + if (op_count % cache->update_interval == 0) + wd_sched_update_min_load(cache); + + /* Load balancing: use cached min load index */ + selected_idx = atomic_load(&cache->min_load_idx); + break; + default: + WD_ERR("Unknown scheduling policy: %d\n", cache->policy); + selected_idx = INVALID_POS; + break; + } + + /* Ensure index is within valid range */ + if (selected_idx >= cache->valid_count) return INVALID_POS; - return sched_get_next_pos_rr(region, NULL); + *ctx_idx = selected_idx; + return cache->idx_list[selected_idx]; } -static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) +/** + * wd_sched_skey_update_load - Update load for a specific ctx + * @cache: Pointer to cache structure + * @ctx_idx: Context index in list + * @delta: Load delta (positive for send, negative for receive) + * + * Atomically update load counter for the specified ctx. + * Returns 0 on success, negative error code if ctx not found. + */ +static int wd_sched_skey_update_load(struct wd_sched_domain_idx_cache *cache, + __u32 ctx_idx, int delta) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - struct sched_key *skey; - unsigned int node; + /* Atomic update without locking */ + if (delta > 0) + atomic_fetch_add(&cache->load_values[ctx_idx], delta); + else + atomic_fetch_sub(&cache->load_values[ctx_idx], -delta); + return WD_SUCCESS; +} - if (getcpu(NULL, &node)) { - WD_ERR("failed to get numa node, errno %d!\n", errno); - return (handle_t)(-errno); - } - if (node == (unsigned int)NUMA_NO_NODE) { - WD_ERR("invalid: failed to get numa node for dev sched init!\n"); - return (handle_t)(-WD_EINVAL); - } +/* ============================================================================ + * Session Key Domain Initialization + * ============================================================================ + */ - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); - return (handle_t)(-WD_EINVAL); - } +/** + * wd_sched_skey_domain_init - Initialize session domain with min-heap + * @key_domain: Target key domain + * @ctx_idx: context indices idx + * @policy: current session's policy + * + * Initializes dual-domain structure for session. + */ +static int wd_sched_skey_domain_init(struct wd_sched_key_domain *key_domain, + __u32 ctx_idx, __u8 policy) +{ + int ret; - if (!param) { - WD_DEBUG("no-sva session don't set scheduler parameters!\n"); - return (handle_t)(-WD_EINVAL); - } + if (!key_domain) + return -WD_EINVAL; - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } + ret = wd_sched_skey_cache_init(&key_domain->idx_cache, policy); + if (ret) + return ret; - skey->type = param->type; - skey->dev_id = param->dev_id; + ret = wd_sched_skey_add_ctx(&key_domain->idx_cache, ctx_idx); + if (ret) + goto init_err; - skey->sync_ctxid[0] = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = session_dev_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; + ret = pthread_mutex_init(&key_domain->lock, NULL); + if (ret) { + goto add_ctx_err; } - return (handle_t)skey; + key_domain->expanded_count = 0; -out: - free(skey); - return (handle_t)(-WD_EINVAL); + return 0; + +add_ctx_err: + wd_sched_skey_remove_ctx(&key_domain->idx_cache, ctx_idx); +init_err: + wd_sched_skey_cache_uninit(&key_domain->idx_cache); + return ret; } -static struct sched_ctx_region *loop_get_near_region( - struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +/** + * wd_sched_skey_domain_destroy - Release session domain resources + */ +static void wd_sched_skey_domain_destroy(struct wd_sched_key_domain *key_domain) { - struct wd_sched_info *sched_info, *demon_info; - int numa_id; - - /* If the key->numa_id is not exist, we should scan for a valid region */ - for (numa_id = 0; numa_id < sched_ctx->numa_num; numa_id++) { - sched_info = sched_ctx->sched_info + numa_id; - if (sched_info->valid) { - demon_info = sched_info; - while (demon_info) { - if (demon_info->valid) - return &demon_info->ctx_region[key->mode][key->type]; - demon_info = demon_info->next_info; - } - } - } + if (!key_domain) + return; - return NULL; + pthread_mutex_destroy(&key_domain->lock); + wd_sched_skey_cache_uninit(&key_domain->idx_cache); } -/* - * loop_get_ctx_range - Get ctx range from ctx_map by the wd comp arg +/** + * wd_sched_poll_skey - Poll contexts for scheduler session + * @sched_ctx: Scheduler context + * @skey: Session key + * @expect: Expected number of responses + * @count: Actual response count (output) + * + * Polls all contexts in session domains and updates load values. */ -static struct sched_ctx_region *loop_get_ctx_range( - struct wd_sched_ctx *sched_ctx, const struct sched_key *key) +static int wd_sched_poll_skey(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey, + __u32 expect, __u32 *count) { - struct wd_sched_info *sched_region, *sched_info; - int ctx_prop = key->ctx_prop; + __u32 sum_poll_num = 0; + __u32 current_load; + __u32 poll_num; + __u32 idx, i; + int ret; + + /* Poll async domain contexts */ + for (i = 0; i < skey->async_domain.idx_cache.valid_count; i++) { + idx = skey->async_domain.idx_cache.idx_list[i]; + + poll_num = 0; + ret = sched_ctx->poll_func(idx, expect, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; - if (key->numa_id < 0) - return loop_get_near_region(sched_ctx, key); + if (poll_num > 0) + sum_poll_num += poll_num; - sched_region = sched_ctx->sched_info; - sched_info = sched_region + key->numa_id; - while (sched_info) { - if (sched_info->valid && ctx_prop == sched_info->region_type && - sched_info->ctx_region[key->mode][key->type].valid) - return &sched_info->ctx_region[key->mode][key->type]; - sched_info = sched_info->next_info; + /* Update load value for this context */ + if (skey->async_domain.idx_cache.policy == SCHED_POLICY_HUNGRY) + wd_sched_skey_update_load(&skey->async_domain.idx_cache, i, poll_num); } + *count = sum_poll_num; - WD_ERR("failed to get valid sched region!\n"); - return NULL; + return 0; } -/* - * loop_sched_init_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through wd_sched_rr_instance +/* ============================================================================ + * Utility Functions + * ============================================================================ */ -static __u32 loop_sched_init_ctx(struct wd_sched_ctx *sched_ctx, - struct sched_key *key, const int sched_mode) + +#define nop() asm volatile("nop") +static void delay_us(int ustime) { - struct sched_ctx_region *region = NULL; - bool ret; + int cycle = 2600; + int i, j; - key->mode = sched_mode; - ret = sched_key_valid(sched_ctx, key); - if (!ret) - return INVALID_POS; + for (i = 0; i < ustime; i++) { + for (j = 0; j < cycle; j++) + nop(); + } + usleep(1); +} - region = loop_get_ctx_range(sched_ctx, key); - if (!region) - return INVALID_POS; +static void sched_skey_param_init(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey) +{ + __u32 i; - return sched_get_next_pos_rr(region, NULL); + pthread_mutex_lock(&sched_ctx->skey_lock); + for (i = 0; i < SKEY_MAX_THREAD_NUM; i++) { + if (sched_ctx->skey[i] == NULL) { + sched_ctx->skey[i] = skey; + sched_ctx->skey_num++; + pthread_mutex_unlock(&sched_ctx->skey_lock); + WD_ERR("success: get valid skey node[%u]!\n", i); + return; + } + } + pthread_mutex_unlock(&sched_ctx->skey_lock); + WD_ERR("invalid: skey node number is too much!\n"); } -static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) +static handle_t sched_session_common_init(struct wd_sched_ctx *sched_ctx, + struct sched_params *param) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - int ctx_prop; + struct wd_sched_key *skey; + unsigned int node; + int ret = 0; - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + if (getcpu(NULL, &node)) { + WD_ERR("failed to get node, errno %d!\n", errno); + return (handle_t)(-errno); } if (!sched_ctx) { @@ -780,217 +1007,265 @@ static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); + skey = malloc(sizeof(struct wd_sched_key)); if (!skey) { WD_ERR("failed to alloc memory for session sched key!\n"); return (handle_t)(-WD_ENOMEM); } + memset(skey, 0, sizeof(struct wd_sched_key)); if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_HW; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; + skey->region_id = node; + if (wd_need_debug()) + WD_DEBUG("session don't set scheduler parameters!\n"); } else { + if (param->numa_id >= 0) { + skey->region_id = param->numa_id; + } else { + skey->region_id = node; + } skey->type = param->type; - skey->numa_id = param->numa_id; skey->ctx_prop = param->ctx_prop; } - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; - - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; - } - WD_ERR("sync_ctxid is: %u; async_ctxid is: %u!\n", skey->sync_ctxid[0], skey->async_ctxid[0]); - ctx_prop = skey->ctx_prop; - skey->ctx_prop = UADK_CTX_CE_INS; - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - skey->ctx_prop = ctx_prop; - if (skey->sync_ctxid[UADK_CTX_CE_INS] == INVALID_POS && - skey->async_ctxid[UADK_CTX_CE_INS] == INVALID_POS) { - WD_ERR("failed to get valid CE sync_ctxid or async_ctxid!\n"); - skey->sync_ctxid[UADK_CTX_CE_INS] = skey->sync_ctxid[0]; - skey->async_ctxid[UADK_CTX_CE_INS] = skey->async_ctxid[0]; - } - - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); - return (handle_t)skey; out: free(skey); - return (handle_t)(-WD_EINVAL); + return (handle_t)(-WD_EINVAL); } -static __u32 wd_sched_special_pick(struct sched_key *key, int sched_mode) +static struct wd_sched_key *sched_get_poll_skey(struct wd_sched_ctx *sched_ctx) { - if (key->is_stream) { - if (key->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_CE_INS]; - else - return key->async_ctxid[UADK_CTX_CE_INS]; - } else { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_HW]; - else - return key->async_ctxid[UADK_CTX_HW]; + __u32 ctx_num = sched_ctx->skey_num; + __u32 tid = pthread_self(); + __u16 tpos, start_pos; + __u16 i, tidx = 0; + + /* Randomize the initial query position. */ + start_pos = tid % ctx_num; + + for (i = 0; i < ctx_num; i++) { + /* Each thread re-determines the starting traversal position based on its tid. */ + tpos = (start_pos + i) % ctx_num; + if (sched_ctx->poll_tid[tpos] == tid) { + tidx = tpos; + break; + } else if (sched_ctx->poll_tid[tpos] == 0) { + pthread_mutex_lock(&sched_ctx->skey_lock); + if (sched_ctx->poll_tid[tpos] == 0) { + sched_ctx->poll_tid[tpos] = tid; + tidx = tpos; + } else { + pthread_mutex_unlock(&sched_ctx->skey_lock); + return NULL; + } + pthread_mutex_unlock(&sched_ctx->skey_lock); + break; } } - return INVALID_POS; + return sched_ctx->skey[tidx]; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/** + * session_sched_init_ctx - Pre-fetch single context from domain for session + * @sched_ctx: Scheduler context + * @region_id: Region identifier + * @op_type: Operation type + * @prop: Property + * @sched_mode: Mode (SYNC/ASYNC) * - * The user must init the schedule info through session_sched_init + * Returns: Context index from domain */ -static __u32 loop_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) +static __u32 session_sched_init_ctx(struct wd_sched_ctx *sched_ctx, + int region_id, __u32 op_type, __u8 prop, + const int sched_mode) { - struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *key = (struct sched_key *)sched_key; - struct wd_sched_balancer *balancer = &sched_ctx->balancer; + struct wd_sched_ctx_domain *domain = NULL; - if (unlikely(!h_sched_ctx || !key)) { - WD_ERR("invalid: sched ctx or key is NULL!\n"); + if (region_id >= sched_ctx->region_num || sched_mode >= SCHED_MODE_BUTT || + op_type >= sched_ctx->type_num || prop >= UADK_CTX_MAX) { + WD_ERR("invalid: region: %d, mode: %d, type: %u!, prop: %u\n", + region_id, sched_mode, op_type, prop); return INVALID_POS; } - if (key->sync_ctxid[UADK_CTX_HW] == INVALID_POS || - key->async_ctxid[UADK_CTX_HW] == INVALID_POS) - return session_sched_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); + if (!sched_ctx->domain_hash_table) + return INVALID_POS; - if (key->is_stream) - return wd_sched_special_pick(key, sched_mode); + domain = wd_sched_hash_table_lookup(sched_ctx->domain_hash_table, + region_id, sched_mode, op_type, prop); + if (!domain || !domain->valid) + return INVALID_POS; + + return wd_sched_domain_get_next_rr(domain); +} + +/** + * session_sched_domain_destroy - Destroy session domains + * @skey: Session key to destroy domains for + * + * Releases all resources associated with session domains. + */ +static void session_sched_domain_destroy(struct wd_sched_key *skey) +{ + if (!skey) + return; + + /* Destroy both sync and async domains */ + wd_sched_skey_domain_destroy(&skey->sync_domain); + wd_sched_skey_domain_destroy(&skey->async_domain); + + WD_DEBUG("Destroyed session domains for skey\n"); +} + +/** + * session_sched_domain_init - Initialize session domains with sync/async contexts + * @sched_ctx: Scheduler context + * @skey: Session key to initialize + * + * Pre-fetches sync and async contexts and initializes corresponding domains. + * Returns: 0 on success, negative error code on failure. + */ +static int session_sched_domain_init(struct wd_sched_ctx *sched_ctx, + struct wd_sched_key *skey) +{ + __u32 sync_ctx, async_ctx; - // Small packets go directly through instruction acceleration - if (key->pkt_size != 0 && key->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return key->sync_ctxid[UADK_CTX_CE_INS]; - else - return key->async_ctxid[UADK_CTX_CE_INS]; + if (!sched_ctx || !skey) { + WD_ERR("invalid: sched_ctx or skey is NULL!\n"); + return -WD_EINVAL; } - if (sched_mode == CTX_MODE_SYNC) { - if (balancer->switch_slice == LOOP_SWITH_SLICE) { - balancer->switch_slice = 0; - balancer->hw_dfx_num += LOOP_SWITH_STEP; - /* run in HW */ - key->def_sync_ctxid = key->sync_ctxid[UADK_CTX_HW]; - } else { - balancer->switch_slice++; - /* run in soft CE */ - balancer->sw_dfx_num += LOOP_SWITH_STEP; - key->def_sync_ctxid = key->sync_ctxid[UADK_CTX_CE_INS]; + /* Pre-fetch one sync context */ + sync_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, + skey->ctx_prop, SCHED_MODE_SYNC); + + /* Pre-fetch one async context */ + async_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, + skey->ctx_prop, SCHED_MODE_ASYNC); + + if (sync_ctx == INVALID_POS && async_ctx == INVALID_POS) { + WD_ERR("failed to get valid sync_ctx or async_ctx!\n"); + return -WD_EINVAL; + } + + WD_DEBUG("Got ctx index sync_ctx=%u, async_ctx=%u\n", sync_ctx, async_ctx); + + /* Initialize sync domain if context is valid */ + if (sync_ctx != INVALID_POS) { + if (wd_sched_skey_domain_init(&skey->sync_domain, sync_ctx, sched_ctx->policy) != 0) { + WD_ERR("failed to init sync domain!\n"); + return -WD_EINVAL; } - return key->def_sync_ctxid; } - // Async mode - if (balancer->hw_task_num > balancer->sw_task_num) { - /* run in soft CE */ - balancer->sw_task_num += LOOP_SWITH_STEP >> 1; - key->def_async_ctxid = key->async_ctxid[UADK_CTX_CE_INS]; - } else { - /* run in HW */ - balancer->hw_task_num += LOOP_SWITH_STEP >> 2; - key->def_async_ctxid = key->async_ctxid[UADK_CTX_HW]; + + /* Initialize async domain if context is valid */ + if (async_ctx != INVALID_POS) { + if (wd_sched_skey_domain_init(&skey->async_domain, async_ctx, sched_ctx->policy) != 0) { + WD_ERR("failed to init async domain!\n"); + /* Cleanup sync domain if async domain init failed */ + if (sync_ctx != INVALID_POS) + wd_sched_skey_domain_destroy(&skey->sync_domain); + return -WD_EINVAL; + } } - return key->def_async_ctxid; + return 0; } -static int loop_poll_policy_rr(struct wd_sched_ctx *sched_ctx, int numa_id, - __u32 expect, __u32 *count) +/* ============================================================================ + * Scheduler Policy Functions + * ============================================================================ + */ +/** + * round_robin_sched_init - Initialize session with single sync and async ctx + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Allocates session key and pre-fetches one sync and one async context. + */ +static handle_t round_robin_sched_init(handle_t h_sched_ctx, void *sched_param) { - struct wd_sched_balancer *balancer = &sched_ctx->balancer; - struct wd_sched_info *sched_info, *cur_info, *pnext_info; - struct sched_ctx_region **region; - __u32 begin, end; - __u32 i; - int ret; + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; - sched_info = sched_ctx->sched_info; - cur_info = sched_info + numa_id; - pnext_info = cur_info; - while (pnext_info) { - if (!pnext_info->valid) { - pnext_info = pnext_info->next_info; - continue; - } + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } - region = pnext_info->ctx_region; - for (i = 0; i < sched_ctx->type_num; i++) { - if (!region[SCHED_MODE_ASYNC][i].valid) - continue; - - begin = region[SCHED_MODE_ASYNC][i].begin; - end = region[SCHED_MODE_ASYNC][i].end; - // WD_ERR("session_poll_policy_rr numa: %d, from %u ---> %u!\n", numa_id, begin, end); - ret = session_poll_region(sched_ctx, begin, end, expect, count); - if (unlikely(ret)) - return ret; - } + skey = (struct wd_sched_key *)hskey; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } - /* run in HW */ - if (pnext_info->region_type == UADK_CTX_HW) { - if (balancer->hw_task_num > *count) - balancer->hw_task_num -= *count; - else - balancer->hw_task_num = 0; - balancer->hw_dfx_num += *count; - } else { - if (balancer->sw_task_num > *count) - balancer->sw_task_num -= *count; - else - balancer->sw_task_num = 0; - balancer->sw_dfx_num += *count; - } + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized RR scheduler with sync and async domains\n"); + + return hskey; +} + +/** + * round_robin_pick_next_ctx - Pick context with load-based selection + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) + * + * Returns: Context index with minimum load + * Time complexity: O(1) + */ +static __u32 round_robin_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, + const int sched_mode) +{ + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, ctx_idx; + __u32 new_ctx; - pnext_info = pnext_info->next_info; + if (unlikely(!h_sched_ctx || !skey)) { + WD_ERR("invalid: sched ctx or key is NULL!\n"); + return INVALID_POS; } - return 0; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; + } else { + domain = &skey->async_domain; + } + + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; + + return min_ctx; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * round_robin_poll_policy - Poll policy for session scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ -static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +static int round_robin_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct wd_sched_info *sched_info; - __u32 loop_time = 0; - __u32 last_count = 0; - __u16 i; + __u32 skey_num = sched_ctx->skey_num; + struct wd_sched_key *skey; + __u32 tid = pthread_self(); + __u16 i, tpos, start_pos; + __u32 poll_num, sum_count = 0; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -998,251 +1273,220 @@ static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *cou return -WD_EINVAL; } - if (unlikely(sched_ctx->numa_num > MAX_NUMA_NODES)) { - WD_ERR("invalid: ctx's numa number is %u!\n", sched_ctx->numa_num); + /* Randomize the initial query position. */ + start_pos = tid % skey_num; + /* Query the queues on each skey separately. */ + for (i = 0; i < skey_num; i++) { + tpos = (start_pos + i) % skey_num; + skey = sched_ctx->skey[tpos]; + ret = wd_sched_poll_skey(sched_ctx, skey, expect, &poll_num); + if (unlikely(ret)) + return ret; + + sum_count += poll_num; + } + *count = sum_count; + + return 0; +} + +static handle_t sched_none_init(handle_t h_sched_ctx, void *sched_param) +{ + return (handle_t)0; +} + +static __u32 sched_none_pick_next_ctx(handle_t sched_ctx, + void *sched_key, const int sched_mode) +{ + return 0; +} + +static int sched_none_poll_policy(handle_t h_sched_ctx, + __u32 expect, __u32 *count) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + __u32 loop_times = MAX_POLL_TIMES + expect; + __u32 poll_num = 0; + int ret; + + if (!sched_ctx || !sched_ctx->poll_func) { + WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); return -WD_EINVAL; } - sched_info = sched_ctx->sched_info; + while (loop_times > 0) { + loop_times--; + ret = sched_ctx->poll_func(0, 1, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; + else if (ret == -EAGAIN) + continue; + + *count += poll_num; + if (*count == expect) + break; + } + + return 0; +} - /* - * Try different numa's ctx if we can't receive any - * package last time, it is more efficient. In most - * bad situation, poll ends after MAX_POLL_TIMES loop. - */ - while (++loop_time < MAX_POLL_TIMES) { - for (i = 0; i < sched_ctx->numa_num;) { - /* If current numa is not valid, find next. */ - if (!sched_info[i].valid) { - i++; - continue; - } +static handle_t sched_single_init(handle_t h_sched_ctx, void *sched_param) +{ + return (handle_t)0; +} - last_count = *count; - ret = loop_poll_policy_rr(sched_ctx, i, expect, count); - if (unlikely(ret)) - return ret; +static __u32 sched_single_pick_next_ctx(handle_t sched_ctx, + void *sched_key, const int sched_mode) +{ + if (sched_mode) + return 1; + else + return 0; +} + +static int sched_single_poll_policy(handle_t h_sched_ctx, + __u32 expect, __u32 *count) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + __u32 loop_times = MAX_POLL_TIMES + expect; + __u32 poll_num = 0; + int ret; + + if (!sched_ctx || !sched_ctx->poll_func) { + WD_ERR("invalid: sched ctx or poll_func is NULL!\n"); + return -WD_EINVAL; + } - if (expect == *count) - return 0; + while (loop_times > 0) { + loop_times--; + ret = sched_ctx->poll_func(1, 1, &poll_num); + if ((ret < 0) && (ret != -EAGAIN)) + return ret; + else if (ret == -EAGAIN) + continue; - /* - * If no package is received, find next numa, - * otherwise, keep receiving packets at this node. - */ - if (last_count == *count) - i++; - } + *count += poll_num; + if (*count == expect) + break; } return 0; } +/** + * skey_sched_init - Initialize hungry scheduler session with dynamic expansion + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Pre-fetches one sync and one async context, supports dynamic expansion. + */ static handle_t skey_sched_init(handle_t h_sched_ctx, void *sched_param) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - int ctx_prop; - - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + struct wd_sched_key *skey; + __u32 req_ctx_num = 0; + handle_t hskey; + int i, ret = 0; + __u8 def_prop; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + def_prop = skey->ctx_prop; + /* Init and get ctx for every ctx mode */ + for (i = 0; i < UADK_CTX_MAX; i++) { + skey->ctx_prop = i; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("Can't to request prop=%d type ctx!\n", i); + continue; + } + /* Request two Pre_fetch queues each time. */ + req_ctx_num += 2; } - - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); + if (!req_ctx_num) { + free(skey); return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } - - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_HW; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; - skey->ctx_prop = param->ctx_prop; - } - - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - memset(&skey->balancer, 0x0, sizeof(struct wd_sched_balancer)); - skey->numa_id = 0; - - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[0] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - if (skey->sync_ctxid[0] == INVALID_POS && skey->async_ctxid[0] == INVALID_POS) { - WD_ERR("failed to get valid sync_ctxid or async_ctxid!\n"); - goto out; - } - WD_ERR("sync_ctxid is: %u; async_ctxid is: %u!\n", skey->sync_ctxid[0], skey->async_ctxid[0]); - ctx_prop = skey->ctx_prop; - skey->ctx_prop = UADK_CTX_CE_INS; - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); - skey->ctx_prop = ctx_prop; - if (skey->sync_ctxid[1] == INVALID_POS && skey->async_ctxid[1] == INVALID_POS) { - WD_ERR("failed to get valid CE sync_ctxid or async_ctxid!\n"); - skey->sync_ctxid[1] = skey->sync_ctxid[0]; - skey->async_ctxid[1] = skey->async_ctxid[0]; - } - + /* Restore the initialization prop settings. */ + skey->ctx_prop = def_prop; sched_skey_param_init(sched_ctx, skey); - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_CE_INS]; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_CE_INS]; - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); - - return (handle_t)skey; + WD_INFO("initialized Hungry scheduler with sync and async domains\n"); -out: - free(skey); - return (handle_t)(-WD_EINVAL); + return hskey; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. +/** + * skey_sched_pick_next_ctx - Pick context from hungry scheduler with load awareness + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) * - * The user must init the schedule info through session_sched_init + * Returns: Context with minimum load, or expands if threshold exceeded + * Time complexity: O(1) for selection, O(n) if expansion needed */ static __u32 skey_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) + const int sched_mode) { - struct sched_key *skey = (struct sched_key *)sched_key; + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, min_load, ctx_idx; + __u32 new_ctx; if (unlikely(!h_sched_ctx || !skey)) { WD_ERR("invalid: sched ctx or key is NULL!\n"); return INVALID_POS; } - if (skey->sync_ctxid[UADK_CTX_HW] == INVALID_POS || - skey->async_ctxid[UADK_CTX_HW] == INVALID_POS) - return session_sched_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); - - if (skey->is_stream) - return wd_sched_special_pick(skey, sched_mode); - - // Small packets go directly through instruction acceleration - if (skey->pkt_size != 0 && skey->pkt_size < UADK_SWITH_PKT_SZ) { - if (sched_mode == CTX_MODE_SYNC) - return skey->sync_ctxid[UADK_CTX_CE_INS]; - else - return skey->async_ctxid[UADK_CTX_CE_INS]; - } - - // Async mode - if (sched_mode == CTX_MODE_ASYNC) { - if (skey->balancer.hw_task_num > (1024 + skey->balancer.sw_task_num >> 1)) { - /* run in soft CE */ - skey->balancer.sw_task_num += LOOP_SWITH_STEP; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_CE_INS]; - } else { - /* run in HW */ - skey->balancer.hw_task_num += LOOP_SWITH_STEP; - skey->def_async_ctxid = skey->async_ctxid[UADK_CTX_HW]; - } - return skey->def_async_ctxid; - } - - if (skey->balancer.switch_slice >= LOOP_SWITH_SLICE) { - skey->balancer.switch_slice = 0; - skey->balancer.hw_dfx_num += LOOP_SWITH_STEP >> 1; - /* run in HW */ - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_HW]; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; } else { - skey->balancer.switch_slice++; - skey->balancer.sw_dfx_num += LOOP_SWITH_STEP >> 2; - /* run in soft CE */ - skey->def_sync_ctxid = skey->sync_ctxid[UADK_CTX_CE_INS]; + domain = &skey->async_domain; } - return skey->def_sync_ctxid; -} - -static int skey_poll_ctx(struct wd_sched_ctx *sched_ctx, struct sched_key *skey, - __u32 expect, __u32 *count) -{ - __u32 hw_num = 0; - __u32 sw_num = 0; - __u32 poll_num; - int i, ret; - - /* - * Collect hardware messages first, multi-threading performance is better; - * Collect software packets first, single-thread performance is better - */ - for (i = UADK_CTX_MAX - 1; i >= 0; i--) { - if (skey->async_ctxid[i] == INVALID_POS) - continue; - - poll_num = 0; - ret = sched_ctx->poll_func(skey->async_ctxid[i], expect, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (poll_num == 0) - continue; - - if (i == 0) - hw_num += poll_num; - else - sw_num += poll_num; - } + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; - *count = *count + hw_num + sw_num; - if (hw_num > 0) { - if (skey->balancer.hw_task_num > hw_num) - skey->balancer.hw_task_num -= hw_num; - else - skey->balancer.hw_task_num = 0; - skey->balancer.hw_dfx_num += hw_num; - } - if (sw_num > 0) { - if (skey->balancer.sw_task_num > sw_num) - skey->balancer.sw_task_num -= sw_num; - else - skey->balancer.sw_task_num = 0; - skey->balancer.sw_dfx_num += sw_num; + /* Update load value for send one task */ + wd_sched_skey_update_load(&skey->async_domain.idx_cache, ctx_idx, 1); + min_load = domain->idx_cache.load_values[ctx_idx]; + + /* Check if we need to expand context pool */ + if (min_load > HUNGRY_LOAD_THRESHOLD) { + /* Try to allocate new context from domain */ + new_ctx = session_sched_init_ctx(sched_ctx, skey->region_id, skey->type, skey->ctx_prop, sched_mode); + if (new_ctx != INVALID_POS) { + if (wd_sched_skey_add_ctx(&domain->idx_cache, new_ctx) == 0) { + domain->expanded_count++; + min_ctx = new_ctx; + } + } } - return 0; + return min_ctx; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * skey_sched_poll_policy - Poll policy for hungry scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ static int skey_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *skey; + struct wd_sched_key *skey; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -1254,131 +1498,163 @@ static int skey_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *cou if (!skey) return -WD_EAGAIN; - ret = skey_poll_ctx(sched_ctx, skey, expect, count); + ret = wd_sched_poll_skey(sched_ctx, skey, expect, count); if (unlikely(ret)) return ret; return 0; } -static handle_t instr_sched_init(handle_t h_sched_ctx, void *sched_param) +/** + * loop_sched_init - Initialize loop scheduler session with single ctx per mode + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + * + * Pre-fetches one sync and one async context. + */ +static handle_t loop_sched_init(handle_t h_sched_ctx, void *sched_param) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; struct sched_params *param = (struct sched_params *)sched_param; - int cpu = sched_getcpu(); - int node = numa_node_of_cpu(cpu); - struct sched_key *skey; - - if (node < 0) { - WD_ERR("invalid: failed to get numa node!\n"); - return (handle_t)(-WD_EINVAL); + struct wd_sched_key *skey; + __u32 req_ctx_num = 0; + handle_t hskey; + int i, ret = 0; + __u8 def_prop; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + def_prop = skey->ctx_prop; + /* Init and get ctx for every ctx mode */ + for (i = 0; i < UADK_CTX_MAX; i++) { + skey->ctx_prop = i; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("Can't to request prop=%d type ctx!\n", i); + continue; + } + /* Request two Pre_fetch queues each time. */ + req_ctx_num += 2; } - - if (!sched_ctx) { - WD_ERR("invalid: sched ctx is NULL!\n"); + if (!req_ctx_num) { + free(skey); return (handle_t)(-WD_EINVAL); } - skey = malloc(sizeof(struct sched_key)); - if (!skey) { - WD_ERR("failed to alloc memory for session sched key!\n"); - return (handle_t)(-WD_ENOMEM); - } + /* Restore the initialization prop settings. */ + skey->ctx_prop = def_prop; + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized Loop scheduler with sync and async domains\n"); + return (handle_t)skey; +} - if (!param) { - memset(skey, 0, sizeof(struct sched_key)); - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = UADK_CTX_CE_INS; - WD_INFO("loop don't set scheduler parameters!\n"); - } else if (param->numa_id < 0) { - skey->type = param->type; - //skey->numa_id = sched_ctx->numa_map[node]; - skey->numa_id = 0; - skey->ctx_prop = param->ctx_prop; - } else { - skey->type = param->type; - skey->numa_id = param->numa_id; - skey->ctx_prop = param->ctx_prop; - } +/** + * loop_sched_pick_next_ctx - Pick context for loop scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_mode: Mode (cannot modify per API contract) + * + * Returns: Context index with minimum load + * Time complexity: O(1) + */ +static __u32 loop_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, + const int sched_mode) +{ + return round_robin_pick_next_ctx(h_sched_ctx, sched_key, sched_mode); +} + +static int loop_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) +{ + return round_robin_poll_policy(h_sched_ctx, expect, count); +} + +static handle_t instr_sched_init(handle_t h_sched_ctx, void *sched_param) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; - //if (skey->numa_id < 0) { - // WD_ERR("failed to get valid sched numa region!\n"); - // goto out; - //} - skey->numa_id = 0; + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } - memset(&skey->sync_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - memset(&skey->async_ctxid, INVALID_POS, sizeof(__u32) * UADK_CTX_MAX); - skey->sync_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_SYNC); - skey->async_ctxid[UADK_CTX_CE_INS] = loop_sched_init_ctx(sched_ctx, skey, CTX_MODE_ASYNC); + skey = (struct wd_sched_key *)hskey; + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } sched_skey_param_init(sched_ctx, skey); - WD_ERR("sw ctxid is: %u, %u!\n", skey->sync_ctxid[1], skey->async_ctxid[1]); return (handle_t)skey; } -/* - * loop_sched_pick_next_ctx - Get one ctx from ctxs by the sched_ctx and arg. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @sched_key: The key of schedule region. - * @sched_mode: The sched async/sync mode. - * - * The user must init the schedule info through session_sched_init - */ static __u32 instr_sched_pick_next_ctx(handle_t h_sched_ctx, void *sched_key, - const int sched_mode) + const int sched_mode) { - struct sched_key *key = (struct sched_key *)sched_key; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; + struct wd_sched_key_domain *domain; + __u32 min_ctx, ctx_idx; + __u32 new_ctx; - //if (unlikely(!h_sched_ctx || !key)) { - // WD_ERR("invalid: sched ctx or key is NULL!\n"); - // return INVALID_POS; - //} + if (unlikely(!h_sched_ctx || !skey)) { + WD_ERR("invalid: sched ctx or key is NULL!\n"); + return INVALID_POS; + } - key->balancer.sw_dfx_num++; - if (sched_mode == CTX_MODE_SYNC) { - /* run in soft CE */ - return key->sync_ctxid[UADK_CTX_CE_INS]; + if (sched_mode == SCHED_MODE_SYNC) { + domain = &skey->sync_domain; + } else { + domain = &skey->async_domain; } - // Async mode - /* run in soft CE */ - return key->async_ctxid[UADK_CTX_CE_INS]; + + /* Get current minimum load context */ + min_ctx = wd_sched_skey_pick_next(&domain->idx_cache, &ctx_idx); + if (min_ctx == INVALID_POS) + return INVALID_POS; + + return min_ctx; } -static int instr_poll_policy_rr(struct wd_sched_ctx *sched_ctx, struct sched_key *skey, - __u32 expect, __u32 *count) +static int instr_poll_policy_rr(struct wd_sched_ctx *sched_ctx, struct wd_sched_key *skey, + __u32 expect, __u32 *count) { __u32 recv_cnt, ctx_id; int ret; - //WD_ERR("success: sched skey num: %u!\n", i); recv_cnt = 0; - ctx_id = skey->async_ctxid[UADK_CTX_CE_INS]; + ctx_id = skey->async_domain.idx_cache.idx_list[0]; ret = sched_ctx->poll_func(ctx_id, expect, &recv_cnt); if ((ret < 0) && (ret != -EAGAIN)) return ret; *count += recv_cnt; - //WD_ERR("success: sched recv task num: %u!\n", *count); return 0; } -/* - * loop_poll_policy - The polling policy matches the pick next ctx. - * @sched_ctx: Schedule ctx, reference the struct sample_sched_ctx. - * @cfg: The global resoure info. - * @expect: User expect poll msg num. - * @count: The actually poll num. +/** + * instr_sched_poll_policy - Poll policy for instruction scheduler + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @expect: Expected number of responses (cannot modify per API contract) + * @count: Actual response count (cannot modify per API contract) * - * The user must init the schedule info through wd_sched_rr_instance, the - * func interval will not check the valid, becouse it will affect performance. + * Returns: Status code */ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *count) { struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; - struct sched_key *skey; + struct wd_sched_key *skey; int ret; if (unlikely(!count || !sched_ctx || !sched_ctx->poll_func)) { @@ -1386,7 +1662,6 @@ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *co return -WD_EINVAL; } - /* First poll the skey is NULL */ skey = sched_get_poll_skey(sched_ctx); if (!skey) return -WD_EAGAIN; @@ -1398,30 +1673,59 @@ static int instr_sched_poll_policy(handle_t h_sched_ctx, __u32 expect, __u32 *co return ret; } +static handle_t session_dev_sched_init(handle_t h_sched_ctx, void *sched_param) +{ + struct wd_sched_ctx *sched_ctx = (struct wd_sched_ctx *)h_sched_ctx; + struct sched_params *param = (struct sched_params *)sched_param; + struct wd_sched_key *skey; + handle_t hskey; + int ret = 0; + + hskey = sched_session_common_init(sched_ctx, param); + if (WD_IS_ERR(hskey)) { + WD_ERR("failed to init session schedule key!\n"); + return hskey; + } + + skey = (struct wd_sched_key *)hskey; + skey->type = param->type; + + ret = session_sched_domain_init(sched_ctx, skey); + if (ret != 0) { + WD_ERR("failed to initialize session domains!\n"); + free(skey); + return (handle_t)(-WD_EINVAL); + } + + sched_skey_param_init(sched_ctx, skey); + WD_INFO("initialized Dev RR scheduler with sync and async domains\n"); + return (handle_t)skey; +} + +/** + * wd_sched_set_param - Set scheduler parameters + * @h_sched_ctx: Scheduler handle (cannot modify per API contract) + * @sched_key: Session key (cannot modify per API contract) + * @sched_param: Scheduling parameters (cannot modify per API contract) + */ static void wd_sched_set_param(handle_t h_sched_ctx, void *sched_key, void *sched_param) { struct wd_sched_params *params = (struct wd_sched_params *)sched_param; - struct sched_key *skey = (struct sched_key *)sched_key; + struct wd_sched_key *skey = (struct wd_sched_key *)sched_key; skey->pkt_size = params->pkt_size; skey->is_stream = params->data_mode; skey->prio_mode = params->prio_mode; } -static void none_set_param(handle_t h_sched_ctx, - void *sched_key, void *sched_param) -{ - return; -} - static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { { .name = "RR scheduler", .sched_policy = SCHED_POLICY_RR, - .sched_init = session_sched_init, - .pick_next_ctx = session_sched_pick_next_ctx, - .poll_policy = session_sched_poll_policy, + .sched_init = round_robin_sched_init, + .pick_next_ctx = round_robin_pick_next_ctx, + .poll_policy = round_robin_poll_policy, .set_param = wd_sched_set_param, }, { .name = "None scheduler", @@ -1441,8 +1745,8 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { .name = "Device RR scheduler", .sched_policy = SCHED_POLICY_DEV, .sched_init = session_dev_sched_init, - .pick_next_ctx = session_sched_pick_next_ctx, - .poll_policy = session_sched_poll_policy, + .pick_next_ctx = round_robin_pick_next_ctx, + .poll_policy = round_robin_poll_policy, .set_param = wd_sched_set_param, }, { .name = "Loop scheduler", @@ -1451,6 +1755,7 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { .pick_next_ctx = loop_sched_pick_next_ctx, .poll_policy = loop_sched_poll_policy, .set_param = wd_sched_set_param, + }, { .name = "Hungry scheduler", .sched_policy = SCHED_POLICY_HUNGRY, @@ -1468,128 +1773,37 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { }, }; -static int wd_sched_get_nearby_numa_id(struct wd_sched_info *sched_info, int node, int numa_num) -{ - int dis = INT32_MAX; - int valid_id = -1; - int i, tmp; - - for (i = 0; i < numa_num; i++) { - if (sched_info[i].valid) { - tmp = numa_distance(node, i); - if (dis > tmp) { - valid_id = i; - dis = tmp; - } - } - } - - return valid_id; -} - -static void wd_sched_map_cpus_to_dev(struct wd_sched_ctx *sched_ctx) -{ - struct wd_sched_info *sched_info = sched_ctx->sched_info; - int i, numa_num = sched_ctx->numa_num; - int *numa_map = sched_ctx->numa_map; - - for (i = 0; i < numa_num; i++) { - if (sched_info[i].valid) - numa_map[i] = i; - else - numa_map[i] = wd_sched_get_nearby_numa_id(sched_info, i, numa_num); - } -} - -static int wd_instance_dev_region(struct wd_sched_ctx *sched_ctx, - struct sched_params *param) +static int numa_num_check(__u16 region_num) { - struct wd_sched_info *sched_info; - __u32 region_idx = INVALID_POS; - __u8 type, mode; - __u32 dev_id; - int i; - - dev_id = param->dev_id; - type = param->type; - mode = param->mode; - - /* Check whether dev_id has already been registered. */ - for (i = 0; i < sched_ctx->dev_num; i++) { - if (sched_ctx->dev_id_map[i].dev_id == dev_id) { - region_idx = sched_ctx->dev_id_map[i].region_id; - break; - } - } - - /* If not registered, allocate a new region. */ - if (region_idx == INVALID_POS) { - if (sched_ctx->dev_num >= DEVICE_REGION_MAX) { - WD_ERR("too many devices registered!\n"); - return -WD_EINVAL; - } - - region_idx = sched_ctx->dev_num; - sched_ctx->dev_id_map[region_idx].dev_id = dev_id; - sched_ctx->dev_id_map[region_idx].region_id = region_idx; - sched_ctx->dev_num++; - - sched_info = &sched_ctx->sched_info[region_idx]; - } else { - sched_info = &sched_ctx->sched_info[region_idx]; - } + int max_node; - /* Check whether the mode and type have already been registered. */ - if (sched_info->ctx_region[mode][type].valid) { - WD_INFO("device %u mode %u type %u already registered\n", - dev_id, mode, type); - return WD_SUCCESS; + max_node = numa_max_node() + 1; + if (max_node <= 0) { + WD_ERR("invalid: numa max node is %d!\n", max_node); + return -WD_EINVAL; } - /* Initialize the scheduling region for this mode and type */ - sched_info->ctx_region[mode][type].begin = param->begin; - sched_info->ctx_region[mode][type].end = param->end; - sched_info->ctx_region[mode][type].last = param->begin; - sched_info->ctx_region[mode][type].valid = true; - sched_info->valid = true; - - pthread_mutex_init(&sched_info->ctx_region[mode][type].lock, NULL); - - return WD_SUCCESS; -} - -static int wd_sched_region_instance(struct wd_sched_info *sched_info, - struct sched_params *param) -{ - struct wd_sched_info *next_info; - __u8 type, mode; - - type = param->type; - mode = param->mode; - next_info = sched_info; - while (next_info) { - if (next_info->region_type == param->ctx_prop) { - next_info->ctx_region[mode][type].begin = param->begin; - next_info->ctx_region[mode][type].end = param->end; - next_info->ctx_region[mode][type].last = param->begin; - next_info->ctx_region[mode][type].valid = true; - next_info->valid = true; - pthread_mutex_init(&next_info->ctx_region[mode][type].lock, NULL); - WD_ERR("instance numa<%d>, property<%d>, mode<%u>, type<%u> ctx: begin: %u ----> end: %u!\n", - param->numa_id, param->ctx_prop, mode, type, param->begin, param->end); - return 0; - } - next_info = next_info->next_info; + if (!region_num || region_num > max_node) { + WD_ERR("invalid: region number is %u!\n", region_num); + return -WD_EINVAL; } - return -WD_EINVAL; + return 0; } +/** + * wd_sched_rr_instance - External API for scheduling region instance + * @sched: Scheduler (cannot modify per API contract) + * @param: Scheduling parameters (cannot modify per API contract) + * + * Creates scheduling region for given parameters. + */ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *param) { struct wd_sched_ctx *sched_ctx = NULL; - __u8 type, mode; - int numa_id, ret; + struct wd_sched_ctx_domain *domain; + __u8 mode; + int ret; if (!sched || !sched->h_sched_ctx || !param) { WD_ERR("invalid: sched or sched_params is NULL!\n"); @@ -1601,92 +1815,61 @@ int wd_sched_rr_instance(const struct wd_sched *sched, struct sched_params *para return -WD_EINVAL; } - numa_id = param->numa_id; - type = param->type; mode = param->mode; sched_ctx = (struct wd_sched_ctx *)sched->h_sched_ctx; - if (sched_ctx->numa_num > 0 && (numa_id >= sched_ctx->numa_num || - numa_id < 0)) { - WD_ERR("invalid: sched_ctx's numa_id is %d, numa_num is %u!\n", - numa_id, sched_ctx->numa_num); + if (param->numa_id >= sched_ctx->region_num || param->numa_id < 0) { + WD_ERR("invalid: region_id is %d, region_num is %u!\n", + param->numa_id, sched_ctx->region_num); return -WD_EINVAL; } - if (type >= sched_ctx->type_num) { - WD_ERR("invalid: sched_ctx's type is %u, type_num is %u!\n", - type, sched_ctx->type_num); + if (param->type >= sched_ctx->type_num) { + WD_ERR("invalid: type is %u, type_num is %u!\n", + param->type, sched_ctx->type_num); return -WD_EINVAL; } if (mode >= SCHED_MODE_BUTT) { - WD_ERR("invalid: sched_ctx's mode is %u, mode_num is %d!\n", - mode, SCHED_MODE_BUTT); + WD_ERR("invalid: mode is %u, mode_num is %u!\n", + mode, sched_ctx->mode_num); return -WD_EINVAL; } - if (sched_ctx->policy == SCHED_POLICY_DEV) - return wd_instance_dev_region(sched_ctx, param); - - /* For older tools, the default setting is of the HW type. */ if (param->ctx_prop < 0 || param->ctx_prop > UADK_CTX_SOFT) param->ctx_prop = UADK_CTX_HW; - if (!sched_ctx->sched_info[numa_id].ctx_region[mode]) { - WD_ERR("invalid: ctx_region is NULL, numa: %d, mode: %u!\n", - numa_id, mode); - return -WD_EINVAL; - } + /* Insert or get domain from hash table using four dimensions */ + domain = wd_sched_hash_table_insert(sched_ctx->domain_hash_table, + param->numa_id, mode, param->type, + param->ctx_prop); + if (!domain) + return -WD_ENOMEM; - ret = wd_sched_region_instance(&sched_ctx->sched_info[numa_id], param); + /* Add context range as new segment */ + ret = wd_sched_domain_add_segment(domain, param->begin, param->end); if (ret) { - WD_ERR("failed to instance ctx_region!\n"); + WD_ERR("failed to add segment to domain!\n"); return ret; } + domain->valid = true; - wd_sched_map_cpus_to_dev(sched_ctx); + WD_ERR("instance: region=%d, mode=%u, type=%u, prop=%d, begin=%u, end=%u\n", + param->numa_id, mode, param->type, param->ctx_prop, + param->begin, param->end); return WD_SUCCESS; } -static void wd_sched_region_release(struct wd_sched_ctx *sched_ctx) -{ - struct wd_sched_info *sched_info, *next_info, *cur_info; - int i, j, region_num; - - sched_info = sched_ctx->sched_info; - if (!sched_info) - return; - - /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ - if (sched_ctx->policy == SCHED_POLICY_DEV) - region_num = DEVICE_REGION_MAX; - else - region_num = sched_ctx->numa_num; - - for (i = 0; i < region_num; i++) { - cur_info = &sched_info[i]; - while (cur_info) { - next_info = cur_info->next_info; - for (j = 0; j < SCHED_MODE_BUTT; j++) { - if (cur_info->ctx_region[j]) { - free(cur_info->ctx_region[j]); - cur_info->ctx_region[j] = NULL; - } - } - /* First info region is alloced by sched ctx */ - if (cur_info->region_type != UADK_CTX_HW) - free(cur_info); - cur_info = next_info; - } - } -} - +/** + * wd_sched_rr_release - External API for scheduler release + * @sched: Scheduler to release (cannot modify per API contract) + * + * Releases all scheduler resources. + */ void wd_sched_rr_release(struct wd_sched *sched) { struct wd_sched_ctx *sched_ctx; - __u32 hw_dfx_num = 0; - __u32 sw_dfx_num = 0; __u32 i; if (!sched) @@ -1696,21 +1879,22 @@ void wd_sched_rr_release(struct wd_sched *sched) if (!sched_ctx) goto ctx_out; + /* Release all session keys */ for (i = 0; i < sched_ctx->skey_num; i++) { if (sched_ctx->skey[i] != NULL) { - hw_dfx_num += sched_ctx->skey[i]->balancer.hw_dfx_num; - sw_dfx_num += sched_ctx->skey[i]->balancer.sw_dfx_num; + wd_sched_skey_domain_destroy(&sched_ctx->skey[i]->sync_domain); + wd_sched_skey_domain_destroy(&sched_ctx->skey[i]->async_domain); } sched_ctx->skey[i] = NULL; } - hw_dfx_num += sched_ctx->balancer.hw_dfx_num; - sw_dfx_num += sched_ctx->balancer.sw_dfx_num; sched_ctx->skey_num = 0; - /* Release sched dfx info */ - WD_ERR("scheduler balance hw task num: %u, sw task num: %u\n", - hw_dfx_num, sw_dfx_num); - wd_sched_region_release(sched_ctx); + /* Release hash table */ + if (sched_ctx->domain_hash_table) { + wd_sched_hash_table_destroy(sched_ctx->domain_hash_table); + sched_ctx->domain_hash_table = NULL; + } + free(sched_ctx); ctx_out: @@ -1718,79 +1902,22 @@ ctx_out: return; } -static int numa_num_check(__u16 numa_num) -{ - int max_node; - - max_node = numa_max_node() + 1; - if (max_node <= 0) { - WD_ERR("invalid: numa max node is %d!\n", max_node); - return -WD_EINVAL; - } - - if (!numa_num || numa_num > max_node) { - WD_ERR("invalid: numa number is %u!\n", numa_num); - return -WD_EINVAL; - } - - return 0; -} - -static int wd_sched_region_init(struct wd_sched_ctx *sched_ctx, - __u8 type_num, __u16 numa_num) -{ - struct wd_sched_info *sched_info = sched_ctx->sched_info; - struct wd_sched_info *cur_info; - int i, j, k; - - for (i = 0; i < MAX_SKEY_REGION_NUM; i++) { - sched_ctx->skey[i] = NULL; - sched_ctx->poll_tid[i] = 0; - } - pthread_mutex_init(&sched_ctx->skey_lock, NULL); - sched_ctx->skey_num = 0; - memset(&sched_ctx->balancer, 0x0, sizeof(struct wd_sched_balancer)); - - for (i = 0; i < numa_num; i++) { - /* Init sched_info next list */ - cur_info = &sched_info[i]; - for (j = 0; j < UADK_CTX_MAX; j++) { - for (k = 0; k < SCHED_MODE_BUTT; k++) { - cur_info->ctx_region[k] = - calloc(1, sizeof(struct sched_ctx_region) * type_num); - if (!cur_info->ctx_region[k]) - goto sched_err; - } - cur_info->valid = false; - cur_info->region_type = j; - - /* The last node point to NULL */ - if (j == UADK_CTX_MAX - 1) { - cur_info->next_info = NULL; - break; - } - cur_info->next_info = calloc(1, sizeof(*cur_info)); - if (!cur_info) - goto sched_err; - cur_info = cur_info->next_info; - } - } - - return 0; - -sched_err: - wd_sched_region_release(sched_ctx); - - return -WD_EINVAL; -} - +/** + * wd_sched_rr_alloc - External API for scheduler allocation + * @sched_type: Scheduling policy type (cannot modify per API contract) + * @type_num: Number of operation types (cannot modify per API contract) + * @region_num: Number of regions (cannot modify per API contract) + * @func: Poll function (cannot modify per API contract) + * + * Allocates and initializes scheduler with single global hash table. + */ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, - __u16 numa_num, user_poll_func func) + __u16 region_num, user_poll_func func) { struct wd_sched_ctx *sched_ctx; struct wd_sched *sched; - int region_num; - int i, ret; + __u32 estimated_entries; + __u32 i; if (sched_type >= SCHED_POLICY_BUTT || !type_num) { WD_ERR("invalid: sched_type is %u or type_num is %u!\n", @@ -1804,48 +1931,51 @@ struct wd_sched *wd_sched_rr_alloc(__u8 sched_type, __u8 type_num, return NULL; } - if (sched_type == SCHED_POLICY_DEV) - region_num = DEVICE_REGION_MAX; - else - region_num = numa_num; - - sched_ctx = calloc(1, sizeof(struct wd_sched_ctx) + - sizeof(struct wd_sched_info) * region_num); + sched_ctx = calloc(1, sizeof(struct wd_sched_ctx)); if (!sched_ctx) { WD_ERR("failed to alloc memory for sched_ctx!\n"); goto err_out; } - /* In SCHED_POLICY_DEV mode, numa_num mean device numbers */ + /* Cache dimension parameters */ + sched_ctx->type_num = type_num; + sched_ctx->mode_num = SCHED_MODE_BUTT; + sched_ctx->region_num = region_num; + sched_ctx->policy = sched_type; + + if (sched_type == SCHED_POLICY_NONE || sched_type == SCHED_POLICY_SINGLE) { + /* Simple schedulers don't need hash table */ + goto simple_ok; + } + if (sched_type == SCHED_POLICY_DEV) { - sched_ctx->numa_num = 0; - sched_ctx->dev_num = 0; - for (i = 0; i < DEVICE_REGION_MAX; i++) { - sched_ctx->dev_id_map[i].dev_id = INVALID_POS; - sched_ctx->dev_id_map[i].region_id = INVALID_POS; - } + /* Device mode: region_num is actually device count */ + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; } else { - sched_ctx->numa_num = numa_num; - sched_ctx->dev_num = 0; - if (numa_num_check(sched_ctx->numa_num)) + /* NUMA mode: validate region_num */ + if (numa_num_check(region_num)) goto err_out; + estimated_entries = region_num * type_num * SCHED_MODE_BUTT * UADK_CTX_MAX; } - sched->h_sched_ctx = (handle_t)sched_ctx; - if (sched_type == SCHED_POLICY_NONE || - sched_type == SCHED_POLICY_SINGLE) - goto simple_ok; - - ret = wd_sched_region_init(sched_ctx, type_num, numa_num); - if (ret) + /* Create single global hash table */ + sched_ctx->domain_hash_table = wd_sched_hash_table_create(estimated_entries); + if (!sched_ctx->domain_hash_table) { + WD_ERR("failed to create hash table!\n"); goto ctx_out; + } simple_ok: sched_ctx->poll_func = func; - sched_ctx->policy = sched_type; - sched_ctx->type_num = type_num; - memset(sched_ctx->numa_map, -1, sizeof(int) * MAX_NUMA_NODES); + for (i = 0; i < SKEY_MAX_THREAD_NUM; i++) { + sched_ctx->skey[i] = NULL; + sched_ctx->poll_tid[i] = 0; + } + pthread_mutex_init(&sched_ctx->skey_lock, NULL); + sched_ctx->skey_num = 0; + + sched->h_sched_ctx = (handle_t)sched_ctx; sched->sched_init = sched_table[sched_type].sched_init; sched->pick_next_ctx = sched_table[sched_type].pick_next_ctx; sched->poll_policy = sched_table[sched_type].poll_policy; @@ -1853,6 +1983,9 @@ simple_ok: sched->name = sched_table[sched_type].name; sched->set_param = sched_table[sched_type].set_param; + WD_INFO("Scheduler %s allocated: type_num=%u, region_num=%u, mode_num=%d\n", + sched->name, type_num, region_num, SCHED_MODE_BUTT); + return sched; ctx_out: -- 2.43.0