
From: Alekseev Dmitry <alekseev.dmitry@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- The xpu device should be exclusive in sched slice. So add xcu_wait() implementation which return after xpu kernels executed completed. It is called after submit_kick(). Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> Signed-off-by: Xia Fukun <xiafukun@huawei.com> --- drivers/xcu/xcu_group.c | 23 +++- include/linux/vstream.h | 7 +- include/linux/xcu_group.h | 8 ++ include/linux/xsched.h | 19 +-- include/uapi/linux/xcu_vstream.h | 4 +- kernel/xsched/core.c | 200 ++++++++++++++++--------------- kernel/xsched/rt.c | 17 +++ 7 files changed, 161 insertions(+), 117 deletions(-) diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c index fd2f5e1d8024..f09ea3adb459 100644 --- a/drivers/xcu/xcu_group.c +++ b/drivers/xcu/xcu_group.c @@ -135,7 +135,12 @@ int xcu_run(struct xcu_op_handler_params *params) */ int xcu_wait(struct xcu_op_handler_params *params) { - return 0; + if (!params->group->opt || !params->group->opt->wait) { + XSCHED_ERR("No function [wait] called.\n"); + return -EINVAL; + } + + return params->group->opt->wait(params); } /* This function runs "complete" callback for a given xcu_group @@ -215,6 +220,22 @@ int xcu_logic_free(struct xcu_op_handler_params *params) return params->group->opt->logic_free(params); } +/* This function runs a "sqe_op" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to set or get sqe info. + */ +int xcu_sqe_op(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->sqe_op) { + XSCHED_ERR("No function [sqe_op] called.\n"); + return -EINVAL; + } + + return params->group->opt->sqe_op(params); +} + static struct xcu_group __xcu_group_root = { .id = 0, .type = XCU_TYPE_ROOT, diff --git a/include/linux/vstream.h b/include/linux/vstream.h index 58ee4c235a07..f0c290dc184c 100644 --- a/include/linux/vstream.h +++ b/include/linux/vstream.h @@ -13,15 +13,18 @@ * to be processed by a driver. */ typedef struct vstream_metadata { - uint32_t exec_time; /* A value of SQ tail that has been passed with the * kick that is described by this exact metadata object. */ uint32_t sq_tail; uint32_t sqe_num; uint32_t sq_id; + uint8_t sqe[XCU_SQE_SIZE_MAX]; + + /* Report buffer for fake read. */ + int8_t cqe[XCU_CQE_BUF_SIZE]; + uint32_t cqe_num; int32_t timeout; - int8_t sqe[XCU_SQE_SIZE_MAX]; /* A node for metadata list */ struct list_head node; diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h index 93f732f84694..e73c64f6c520 100644 --- a/include/linux/xcu_group.h +++ b/include/linux/xcu_group.h @@ -17,6 +17,11 @@ enum xcu_type { XCU_TYPE_XPU, }; +enum xcu_sqe_op_type { + SQE_SET_NOTIFY, + SQE_IS_NOTIFY, +}; + /** * @group: value for this entry. * @hash_node: hash node list. @@ -41,6 +46,7 @@ struct xcu_op_handler_params { void *param_5; void *param_6; void *param_7; + void *param_8; }; }; }; @@ -55,6 +61,7 @@ struct xcu_operation { xcu_op_handler_fn_t alloc; xcu_op_handler_fn_t logic_alloc; xcu_op_handler_fn_t logic_free; + xcu_op_handler_fn_t sqe_op; }; struct xcu_group { @@ -99,6 +106,7 @@ extern int xcu_finish(struct xcu_op_handler_params *params); extern int xcu_alloc(struct xcu_op_handler_params *params); extern int xcu_logic_alloc(struct xcu_op_handler_params *params); extern int xcu_logic_free(struct xcu_op_handler_params *params); +extern int xcu_sqe_op(struct xcu_op_handler_params *params); #endif /* !CONFIG_XCU_SCHEDULER */ #endif /* __XSCHED_XCU_GROUP_H__ */ diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 5ffaffc5afdb..825ff2dc0c8e 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -38,7 +38,6 @@ #define RUNTIME_INF ((u64)~0ULL) #define XSCHED_TIME_INF RUNTIME_INF #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 -#define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC) #define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC) #define XSCHED_CFG_SHARE_DFLT 1024 @@ -438,13 +437,8 @@ static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse) atomic_inc(&xse->kicks_pending_ctx_cnt); /* Incrementing prio based pending kicks counter for RT class */ - if (xse_is_rt(xse)) { + if (xse_is_rt(xse)) atomic_inc(&xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]); - XSCHED_DEBUG("xcu increased pending kicks @ %s\n", __func__); - } else { - XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid, - __func__); - } return 0; } @@ -476,18 +470,12 @@ static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse) /* Decrementing prio based pending kicks counter for RT class. */ if (xse_is_rt(xse)) { kicks_prio_rt = &xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]; - if (!atomic_read(kicks_prio_rt)) { XSCHED_ERR( - "Tried to decrement prio pending kicks beyond 0!\n"); + "Try to decrement prio pending kicks beyond 0!\n"); return -EINVAL; } - atomic_dec(kicks_prio_rt); - XSCHED_DEBUG("xcu decreased pending kicks @ %s\n", __func__); - } else { - XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid, - __func__); } return 0; @@ -591,7 +579,6 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm, struct vstream_info *vs, vstream_args_t *arg) { vsm->sq_id = arg->sq_id; - vsm->exec_time = arg->vk_args.exec_time; vsm->sqe_num = arg->vk_args.sqe_num; vsm->timeout = arg->vk_args.timeout; memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX); @@ -610,8 +597,6 @@ struct xsched_cu *xcu_find(uint32_t *type, /* Vstream metadata proccesing functions.*/ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); -void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params, - struct vstream_metadata *vsm); /* Xsched group manage functions */ int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); void xcu_cg_init_common(struct xsched_group *xcg); diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index 46d5a32db68e..f72d4720e993 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -6,6 +6,9 @@ #define PAYLOAD_SIZE_MAX 512 #define XCU_SQE_SIZE_MAX 64 +#define XCU_CQE_SIZE_MAX 32 +#define XCU_CQE_REPORT_NUM 4 +#define XCU_CQE_BUF_SIZE (XCU_CQE_REPORT_NUM * XCU_CQE_SIZE_MAX) /* * VSTREAM_ALLOC: alloc a vstream, buffer for tasks @@ -28,7 +31,6 @@ typedef struct vstream_free_args { } vstream_free_args_t; typedef struct vstream_kick_args { __u32 sqe_num; - __u32 exec_time; __s32 timeout; __s8 sqe[XCU_SQE_SIZE_MAX]; } vstream_kick_args_t; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 724f7edb916f..bce720c89e1b 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -52,15 +52,13 @@ static void put_prev_ctx(struct xsched_entity *xse) static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) { - int kick_count; + int kick_count, scheduled = 0, not_empty; struct vstream_info *vs; - unsigned int sum_exec_time = 0; - size_t kicks_submitted = 0; + struct xcu_op_handler_params params; struct vstream_metadata *vsm; - int not_empty; kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); - XSCHED_DEBUG("Before decrement XSE kick_count=%u @ %s\n", + XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", kick_count, __func__); if (kick_count == 0) { @@ -72,31 +70,44 @@ static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) do { not_empty = 0; for_each_vstream_in_ctx(vs, xse->ctx) { + if (scheduled >= XSCHED_CFS_KICK_SLICE) + break; + spin_lock(&vs->stream_lock); vsm = xsched_vsm_fetch_first(vs); spin_unlock(&vs->stream_lock); - if (vsm) { - list_add_tail(&vsm->node, &xcu->vsm_list); - - sum_exec_time += vsm->exec_time; - kicks_submitted++; - xsched_dec_pending_kicks_xse(xse); - XSCHED_DEBUG( - "vs id = %d Kick submit exec_time %u sq_tail %u sqe_num %u sq_id %u @ %s\n", - vs->id, vsm->exec_time, vsm->sq_tail, - vsm->sqe_num, vsm->sq_id, __func__); - not_empty++; + + if (!vsm) + continue; + list_add_tail(&vsm->node, &xcu->vsm_list); + scheduled++; + xsched_dec_pending_kicks_xse(xse); + not_empty++; + } + } while ((scheduled < XSCHED_CFS_KICK_SLICE) && (not_empty)); + + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; } } - } while ((sum_exec_time < XSCHED_CFS_MIN_TIMESLICE) && (not_empty)); + } kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", kick_count, __func__); - xse->total_scheduled += kicks_submitted; - - return kicks_submitted; + xse->total_scheduled += scheduled; + return scheduled; } static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu) @@ -194,7 +205,6 @@ static int delete_ctx(struct xsched_context *ctx) atomic_read(&xse->kicks_pending_ctx_cnt), __func__); xsched_group_xse_detach(xse); - return 0; } @@ -366,7 +376,7 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) err = xsched_xse_set_class(xse); if (err) { - XSCHED_ERR("Failed to set xse class @ %s\n", __func__); + XSCHED_ERR("Fail to set xse class @ %s\n", __func__); return err; } @@ -399,70 +409,97 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) return err; } -/* - * A function for submitting stream's commands (sending commands to a XCU). - */ -static int xsched_proc(struct xsched_cu *xcu, struct vstream_info *vs, - struct vstream_metadata *vsm) +static void submit_kick(struct vstream_metadata *vsm) { + struct vstream_info *vs = vsm->parent; struct xcu_op_handler_params params; - struct xsched_entity *xse; + params.group = vs->xcu->group; + params.fd = vs->fd; + params.param_1 = &vs->id; + params.param_2 = &vs->channel_id; + params.param_3 = vsm->sqe; + params.param_4 = &vsm->sqe_num; + params.param_5 = &vsm->timeout; + params.param_6 = &vs->sqcq_type; + params.param_7 = vs->drv_ctx; + params.param_8 = &vs->logic_vcq_id; - XSCHED_CALL_STUB(); - - xse = &vs->ctx->xse; - - /* Init input parameters for xcu_run and xcu_wait callbacks. */ - params.group = xcu->group; - - /* Increase process time by abstract kick handling time. */ - xse->last_exec_runtime += vsm->exec_time; - - XSCHED_DEBUG("Process vsm sq_tail %d exec_time %u sqe_num %d sq_id %d@ %s\n", - vsm->sq_tail, vsm->exec_time, vsm->sqe_num, vsm->sq_id, __func__); - submit_kick(vs, ¶ms, vsm); + /* Send vstream on a device for processing. */ + if (xcu_run(¶ms)) { + XSCHED_ERR( + "Fail to send Vstream id %u tasks to a device for processing.\n", + vs->id); + } - xse->total_submitted++; + XSCHED_DEBUG("Vstream id %u submit vsm: sq_tail %u\n", vs->id, vsm->sq_tail); +} - XSCHED_DEBUG("xse %d total_submitted = %lu @ %s\n", - xse->tgid, xse->total_submitted, __func__); +static void submit_wait(struct vstream_metadata *vsm) +{ + struct vstream_info *vs = vsm->parent; + struct xcu_op_handler_params params; + /* Wait timeout in ms. */ + int32_t timeout = 500; + + params.group = vs->xcu->group; + params.param_1 = &vs->channel_id; + params.param_2 = &vs->logic_vcq_id; + params.param_3 = &vs->user_stream_id; + params.param_4 = &vsm->sqe; + params.param_5 = vsm->cqe; + params.param_6 = vs->drv_ctx; + params.param_7 = &timeout; + + /* Wait for a device to complete processing. */ + if (xcu_wait(¶ms)) { + XSCHED_ERR("Fail to wait Vstream id %u tasks, logic_cq_id %u.\n", + vs->id, vs->logic_vcq_id); + } - XSCHED_EXIT_STUB(); - return 0; + XSCHED_DEBUG("Vstream id %u wait finish, logic_cq_id %u\n", + vs->id, vs->logic_vcq_id); } static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse) { struct vstream_metadata *vsm, *tmp; - unsigned int submit_exec_time = 0; - size_t kicks_submitted = 0; - unsigned long wait_us; + int submitted = 0; + long submit_exec_time = 0; + ktime_t t_start = 0; + struct xcu_op_handler_params params; XSCHED_DEBUG("%s called for xse %d on xcu %u\n", __func__, xse->tgid, xcu->id); list_for_each_entry_safe(vsm, tmp, &xcu->vsm_list, node) { - xsched_proc(xcu, vsm->parent, vsm); - submit_exec_time += vsm->exec_time; - kicks_submitted++; + submit_kick(vsm); + XSCHED_DEBUG("Xse %d vsm %u sched_delay: %lld ns\n", + xse->tgid, vsm->sq_id, ktime_to_ns(ktime_sub(ktime_get(), vsm->add_time))); + + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_IS_NOTIFY}; + params.param_2 = &vsm->sqe; + if (xcu_sqe_op(¶ms)) { + mutex_unlock(&xcu->xcu_lock); + t_start = ktime_get(); + submit_wait(vsm); + submit_exec_time += ktime_to_ns(ktime_sub(ktime_get(), t_start)); + mutex_lock(&xcu->xcu_lock); + } + submitted++; + list_del(&vsm->node); + kfree(vsm); } + xse->last_exec_runtime += submit_exec_time; + xse->total_submitted += submitted; + atomic_add(submitted, &xse->submitted_one_kick); INIT_LIST_HEAD(&xcu->vsm_list); + XSCHED_DEBUG("Xse %d submitted=%d total=%zu, exec_time=%ld @ %s\n", + xse->tgid, submitted, xse->total_submitted, + submit_exec_time, __func__); - mutex_unlock(&xcu->xcu_lock); - - wait_us = div_u64(submit_exec_time, NSEC_PER_USEC); - XSCHED_DEBUG("XCU kicks_submitted=%lu wait_us=%lu @ %s\n", - kicks_submitted, wait_us, __func__); - - if (wait_us > 0) { - /* Sleep shift not larger than 12.5% */ - usleep_range(wait_us, wait_us + (wait_us >> 3)); - } - - mutex_lock(&xcu->xcu_lock); - - return kicks_submitted; + return submitted; } static inline bool should_preempt(struct xsched_entity *xse) @@ -519,35 +556,6 @@ static int xsched_schedule(void *input_xcu) return err; } -void submit_kick(struct vstream_info *vs, - struct xcu_op_handler_params *params, - struct vstream_metadata *vsm) -{ - int ret; - - params->fd = vs->fd; - params->param_1 = &vs->id; - params->param_2 = &vs->channel_id; - params->param_3 = vsm->sqe; - params->param_4 = &vsm->sqe_num; - params->param_5 = &vsm->timeout; - params->param_6 = &vs->sqcq_type; - params->param_7 = vs->drv_ctx; - /* Send vstream on a device for processing. */ - ret = xcu_run(params); - if (ret) { - XSCHED_ERR( - "Failed to send vstream tasks vstreamId=%d to a device for processing.\n", - vs->id); - } - - XSCHED_DEBUG("Vstream_id %d submit vsm: sq_tail %d\n", vs->id, vsm->sq_tail); - - kfree(vsm); - - return; -} - /* Initialize xsched rt runqueue during kernel init. * Should only be called from xsched_rq_init function. */ @@ -641,7 +649,7 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg) new_vsm = kmalloc(sizeof(struct vstream_metadata), GFP_KERNEL); if (!new_vsm) { - XSCHED_ERR("Failed to alloc kick metadata for vs %u @ %s\n", + XSCHED_ERR("Fail to alloc kick metadata for vs %u @ %s\n", vs->id, __func__); return -ENOMEM; } diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index 4789637dd1cd..c7cbc21e8cb2 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -184,6 +184,7 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse) int kick_count, scheduled = 0; struct vstream_info *vs; struct vstream_metadata *vsm; + struct xcu_op_handler_params params; kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", @@ -205,6 +206,22 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse) spin_unlock(&vs->stream_lock); } + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; + } + } + } + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", kick_count, __func__); -- 2.34.1