
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add xsched cfs class callbacks implementation: - dequeue_ctx_fair. - enqueue_ctx_fair. - pick_next_ctx_fair. - check_preempt_fair. - put_prev_ctx_fair. - submit_prepare_ctx_fair. Add xsched_cfs.c in /kernel/xsched Makefile. Add cfs class related data structure. Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> Signed-off-by: Xia Fukun <xiafukun@huawei.com> --- include/linux/xsched.h | 50 ++++++++++- kernel/xsched/Makefile | 2 +- kernel/xsched/cfs.c | 183 +++++++++++++++++++++++++++++++++++++++++ kernel/xsched/core.c | 32 +++++-- kernel/xsched/rt.c | 2 +- 5 files changed, 255 insertions(+), 14 deletions(-) create mode 100644 kernel/xsched/cfs.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 18e62f265199..b4b6274b4c06 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -35,6 +35,11 @@ #define XCU_HASH_ORDER 6 +#define RUNTIME_INF ((u64)~0ULL) +#define XSCHED_TIME_INF RUNTIME_INF +#define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 +#define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC) + #define __GET_VS_TASK_TYPE(t) ((t)&0xFF) #define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF) @@ -55,6 +60,7 @@ enum xcu_sched_type { XSCHED_TYPE_RT, XSCHED_TYPE_DFLT = XSCHED_TYPE_RT, + XSCHED_TYPE_CFS, XSCHED_TYPE_NUM, }; @@ -88,6 +94,7 @@ enum xse_flag { extern const struct xsched_class rt_xsched_class; +extern const struct xsched_class fair_xsched_class; #define xsched_first_class (&rt_xsched_class) @@ -100,6 +107,13 @@ extern const struct xsched_class rt_xsched_class; #define for_each_vstream_in_ctx(vs, ctx) \ list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node) +/* Manages xsched CFS-like class rbtree based runqueue. */ +struct xsched_rq_cfs { + unsigned int nr_running; + unsigned int load; + u64 min_xruntime; + struct rb_root_cached ctx_timeline; +}; /* Manages xsched RT-like class linked list based runqueue. * @@ -123,10 +137,11 @@ struct xsched_rq { const struct xsched_class *class; int state; - int nr_running; /* RT class run queue.*/ struct xsched_rq_rt rt; + /* CFS class run queue.*/ + struct xsched_rq_cfs cfs; }; enum xcu_state { @@ -159,6 +174,8 @@ struct xsched_cu { /* RT class kick counter. */ atomic_t pending_kicks_rt; + /* CFS class kick counter. */ + atomic_t pending_kicks_cfs; struct task_struct *worker; @@ -185,6 +202,21 @@ struct xsched_entity_rt { s64 kick_slice; }; +struct xsched_entity_cfs { + struct rb_node run_node; + + /* Rq on which this entity is (to be) queued. */ + struct xsched_rq_cfs *cfs_rq; + + /* Value of "virtual" runtime to sort entities in rbtree */ + u64 xruntime; + u32 weight; + + /* Execution time of scheduling entity */ + u64 exec_start; + u64 sum_exec_runtime; +}; + struct xsched_entity { uint32_t task_type; @@ -213,6 +245,8 @@ struct xsched_entity { /* RT class entity. */ struct xsched_entity_rt rt; + /* CFS class entity. */ + struct xsched_entity_cfs cfs; /* Pointer to context object. */ struct xsched_context *ctx; @@ -235,6 +269,11 @@ static inline bool xse_is_rt(const struct xsched_entity *xse) return xse && xse->class == &rt_xsched_class; } +static inline bool xse_is_cfs(const struct xsched_entity *xse) +{ + return xse && xse->class == &fair_xsched_class; +} + /* Returns a pointer to an atomic_t variable representing a counter * of currently pending vstream kicks on a given XCU and for a * given xsched class. @@ -256,6 +295,8 @@ xsched_get_pending_kicks_class(const struct xsched_class *class, if (class == &rt_xsched_class) return &xcu->pending_kicks_rt; + if (class == &fair_xsched_class) + return &xcu->pending_kicks_cfs; XSCHED_ERR("Xsched entity has an invalid class @ %s\n", __func__); return NULL; @@ -362,13 +403,14 @@ static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse) static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu) { atomic_t *kicks_rt; + atomic_t *kicks_cfs; kicks_rt = xsched_get_pending_kicks_class(&rt_xsched_class, xcu); - - if (!kicks_rt) + kicks_cfs = xsched_get_pending_kicks_class(&fair_xsched_class, xcu); + if (!kicks_rt || !kicks_cfs) return false; - return !!atomic_read(kicks_rt); + return (!!atomic_read(kicks_rt) || !!atomic_read(kicks_cfs)); } static inline int xse_integrity_check(const struct xsched_entity *xse) diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index f882518d54ab..fe212f228cf6 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vstream.o -obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o +obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c new file mode 100644 index 000000000000..585462351d8a --- /dev/null +++ b/kernel/xsched/cfs.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Completely Fair Scheduling (CFS) Class for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/xsched.h> + +#define CFS_INNER_RQ_EMPTY(cfs_xse) \ + ((cfs_xse)->xruntime == XSCHED_TIME_INF) + +void xs_rq_add(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + struct rb_node **link = &cfs_rq->ctx_timeline.rb_root.rb_node; + struct rb_node *parent = NULL; + struct xsched_entity_cfs *entry; + bool leftmost = true; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xsched_entity_cfs, run_node); + if (xse->xruntime <= entry->xruntime) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(&xse->run_node, parent, link); + rb_insert_color_cached(&xse->run_node, &cfs_rq->ctx_timeline, leftmost); +} + +void xs_rq_remove(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + + rb_erase_cached(&xse->run_node, &cfs_rq->ctx_timeline); +} + +/** + * xs_cfs_rq_update() - Update entity's runqueue position with new xruntime + */ +static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt) +{ + xs_rq_remove(xse_cfs); + xse_cfs->xruntime = new_xrt; + xs_rq_add(xse_cfs); +} + +static inline struct xsched_entity_cfs * +xs_pick_first(struct xsched_rq_cfs *cfs_rq) +{ + struct xsched_entity_cfs *xse_cfs; + struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); + + if (!left) + return NULL; + + xse_cfs = rb_entry(left, struct xsched_entity_cfs, run_node); + return xse_cfs; +} + +/** + * xs_update() - Account xruntime and runtime metrics. + * @xse_cfs: Point to CFS scheduling entity. + * @delta: Execution time in last period + */ +static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) +{ + u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; +} + +/* + * Xsched Fair class methods + * For rq manipulation we rely on root runqueue lock already acquired in core. + * Access xsched_group_xcu_priv requires no locks because one thread per XCU. + */ +static void dequeue_ctx_fair(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + struct xsched_entity_cfs *first; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + xs_rq_remove(xse_cfs); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; + + if (xcu->xrq.cfs.min_xruntime == XSCHED_TIME_INF) { + atomic_set(&xcu->has_active, 0); + XSCHED_DEBUG("%s: set has_active to 0\n", __func__); + } +} + +/** + * enqueue_ctx_fair() - Add context to the runqueue + * @xse: xsched entity of context + * @xcu: executor + * + * In contrary to enqueue_task it is called once on context init. + * Although groups reside in tree, their nodes not counted in nr_running. + * The xruntime of a group xsched entitry represented by min xruntime inside. + */ +static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + struct xsched_entity_cfs *first; + struct xsched_rq_cfs *rq; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT; + rq = xse_cfs->cfs_rq = &xcu->xrq.cfs; + + /* If no XSE of only empty groups */ + if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) + rq->min_xruntime = xse_cfs->xruntime; + else + xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); + + xs_rq_add(xse_cfs); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; + + if (xcu->xrq.cfs.min_xruntime != XSCHED_TIME_INF) { + atomic_set(&xcu->has_active, 1); + XSCHED_DEBUG("%s: set has_active to 1\n", __func__); + } +} + +static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) +{ + struct xsched_entity_cfs *xse; + struct xsched_rq_cfs *rq = &xcu->xrq.cfs; + + xse = xs_pick_first(rq); + if (!xse) + return NULL; + + return container_of(xse, struct xsched_entity, cfs); +} + +static inline bool +xs_should_preempt_fair(struct xsched_entity *xse) +{ + return (atomic_read(&xse->submitted_one_kick) >= XSCHED_CFS_KICK_SLICE); +} + +static void put_prev_ctx_fair(struct xsched_entity *xse) +{ + struct xsched_entity_cfs *prev = &xse->cfs; + + xs_update(prev, xse->last_exec_runtime); +} + +int submit_prepare_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + return 0; +} + +const struct xsched_class fair_xsched_class = { + .next = NULL, + .dequeue_ctx = dequeue_ctx_fair, + .enqueue_ctx = enqueue_ctx_fair, + .pick_next_ctx = pick_next_ctx_fair, + .put_prev_ctx = put_prev_ctx_fair, + .submit_prepare_ctx = submit_prepare_ctx_fair, + .check_preempt = xs_should_preempt_fair, +}; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 1ab04aa35954..0af75328b92b 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -87,7 +87,7 @@ static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) not_empty++; } } - } while (not_empty); + } while ((sum_exec_time < XSCHED_CFS_MIN_TIMESLICE) && (not_empty)); kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", @@ -320,6 +320,10 @@ int xsched_xse_set_class(struct xsched_entity *xse) xse->class = &rt_xsched_class; XSCHED_DEBUG("Context is in RT class %s\n", __func__); break; + case XSCHED_TYPE_CFS: + xse->class = &fair_xsched_class; + XSCHED_DEBUG("Context is in CFS class %s\n", __func__); + break; default: XSCHED_ERR("Xse has incorrect class @ %s\n", __func__); return -EINVAL; @@ -360,6 +364,10 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) return err; } + if (xse_is_cfs(xse)) { + xse->cfs.sum_exec_runtime = 0; + } + if (xse_is_rt(xse)) { xse->rt.state = XSE_PREPARE; xse->rt.flag = XSE_TIF_NONE; @@ -466,10 +474,9 @@ static int xsched_schedule(void *input_xcu) while (!kthread_should_stop()) { mutex_unlock(&xcu->xcu_lock); wait_event_interruptible(xcu->wq_xcu_idle, - atomic_read(&xcu->has_active) || xcu->xrq.nr_running); - - XSCHED_DEBUG("%s: rt_nr_running = %d, has_active = %d\n", - __func__, xcu->xrq.nr_running, atomic_read(&xcu->has_active)); + xcu->xrq.cfs.nr_running || xcu->xrq.rt.nr_running); + XSCHED_DEBUG("%s: rt nr_running = %u, cfs nr_running = %u\n", + __func__, xcu->xrq.rt.nr_running, xcu->xrq.cfs.nr_running); mutex_lock(&xcu->xcu_lock); if (!xsched_check_pending_kicks_xcu(xcu)) { @@ -534,7 +541,7 @@ void submit_kick(struct vstream_info *vs, } /* Initialize xsched rt runqueue during kernel init. - * Should only be called from xsched_init function. + * Should only be called from xsched_rq_init function. */ static inline void xsched_rt_rq_init(struct xsched_cu *xcu) { @@ -549,14 +556,23 @@ static inline void xsched_rt_rq_init(struct xsched_cu *xcu) } } +/* Initialize xsched cfs runqueue during kernel init. + * Should only be called from xsched_rq_init function. + */ +static inline void xsched_cfs_rq_init(struct xsched_cu *xcu) +{ + xcu->xrq.cfs.nr_running = 0; + xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; +} + /* Initialize xsched classes' runqueues. */ static inline void xsched_rq_init(struct xsched_cu *xcu) { - xcu->xrq.nr_running = 0; xcu->xrq.curr_xse = NULL; xcu->xrq.class = &rt_xsched_class; xcu->xrq.state = XRQ_STATE_IDLE; xsched_rt_rq_init(xcu); + xsched_cfs_rq_init(xcu); } /* Initializes all xsched XCU objects. @@ -572,7 +588,7 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, xcu->group = group; atomic_set(&xcu->pending_kicks_rt, 0); - atomic_set(&xcu->has_active, 0); + atomic_set(&xcu->pending_kicks_cfs, 0); INIT_LIST_HEAD(&xcu->vsm_list); init_waitqueue_head(&xcu->wq_xcu_idle); diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index c6129b86f105..4789637dd1cd 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -214,7 +214,7 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse) } const struct xsched_class rt_xsched_class = { - .next = NULL, + .next = &fair_xsched_class, .dequeue_ctx = dequeue_ctx_rt, .enqueue_ctx = enqueue_ctx_rt, .pick_next_ctx = pick_next_ctx_rt, -- 2.34.1