From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add xsched cfs class callbacks implementation: - dequeue_ctx_fair. - enqueue_ctx_fair. - pick_next_ctx_fair. - check_preempt_fair. - put_prev_ctx_fair. - submit_prepare_ctx_fair. Add xsched_cfs.c in /kernel/xsched Makefile. Add cfs class related data structure. Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> Signed-off-by: Xia Fukun <xiafukun@huawei.com> Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- include/linux/xsched.h | 37 +++++++++ kernel/xsched/Kconfig | 19 +++++ kernel/xsched/Makefile | 1 + kernel/xsched/cfs.c | 185 +++++++++++++++++++++++++++++++++++++++++ kernel/xsched/core.c | 10 ++- 5 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 kernel/xsched/cfs.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index f62bbc55c354..0bb11d7360bd 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -38,15 +38,24 @@ #define MAX_VSTREAM_NUM 512 +#define RUNTIME_INF ((u64)~0ULL) +#define XSCHED_TIME_INF RUNTIME_INF +#define XSCHED_CFS_WEIGHT_DFLT 1 + /* * A default kick slice for RT class XSEs. */ #define XSCHED_RT_KICK_SLICE 2 +/* + * A default kick slice for CFS class XSEs. + */ +#define XSCHED_CFS_KICK_SLICE 10 extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; enum xcu_sched_type { XSCHED_TYPE_RT = 0, + XSCHED_TYPE_CFS = 1, XSCHED_TYPE_NUM, XSCHED_TYPE_DFLT = XSCHED_TYPE_RT }; @@ -59,6 +68,7 @@ enum xse_prio { }; extern struct xsched_class rt_xsched_class; +extern struct xsched_class fair_xsched_class; #define xsched_first_class \ list_first_entry(&(xsched_class_list), struct xsched_class, node) @@ -83,6 +93,14 @@ struct xsched_rq_rt { unsigned int nr_running; }; +/* Manages xsched CFS-like class rbtree based runqueue. */ +struct xsched_rq_cfs { + unsigned int nr_running; + unsigned int load; + u64 min_xruntime; + struct rb_root_cached ctx_timeline; +}; + /* Base XSched runqueue object structure that contains both mutual and * individual parameters for different scheduling classes. */ @@ -94,6 +112,8 @@ struct xsched_rq { int nr_running; /* RT class run queue.*/ struct xsched_rq_rt rt; + /* CFS class run queue.*/ + struct xsched_rq_cfs cfs; }; enum xsched_cu_status { @@ -148,6 +168,21 @@ struct xsched_entity_rt { ktime_t timeslice; }; +struct xsched_entity_cfs { + struct rb_node run_node; + + /* Rq on which this entity is (to be) queued. */ + struct xsched_rq_cfs *cfs_rq; + + /* Value of "virtual" runtime to sort entities in rbtree */ + u64 xruntime; + u32 weight; + + /* Execution time of scheduling entity */ + u64 exec_start; + u64 sum_exec_runtime; +}; + struct xsched_entity { uint32_t task_type; @@ -176,6 +211,8 @@ struct xsched_entity { /* RT class entity. */ struct xsched_entity_rt rt; + /* CFS class entity. */ + struct xsched_entity_cfs cfs; /* Pointer to context object. */ struct xsched_context *ctx; diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig index 77883b6a3cc3..cc03f668a5dc 100644 --- a/kernel/xsched/Kconfig +++ b/kernel/xsched/Kconfig @@ -5,6 +5,7 @@ config XCU_SCHEDULER default n select XCU_VSTREAM select XCU_SCHED_RT + select XCU_SCHED_CFS help This option enables the XSched scheduler, a custom scheduling mechanism designed for heterogeneous compute units (e.g., XPUs). It provides: @@ -49,3 +50,21 @@ config XCU_SCHED_RT Unless you are using RT workloads that rely on strict priority-based scheduling within XCU, it is recommended to keep the default setting. + +config XCU_SCHED_CFS + bool "XCU CFS scheduling class" + default n + depends on XCU_SCHEDULER + help + Enable support for the CFS scheduling class in the XCU scheduler. + + This option allows the XCU scheduler to manage tasks using a fair-share + scheduling model similar to the Completely Fair Scheduler (CFS). + XCU-CFS provides proportional CPU sharing based on weights and supports + hierarchical control through cgroups. + + Enable this option if you want to run workloads that rely on fair, + weight-based CPU distribution within the XCU scheduling framework. + If your workload does not require proportional sharing or uses only the + RT scheduling class, you may leave this disabled. + diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index e98dcea3b2bc..819abd159799 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -2,3 +2,4 @@ obj-y += vstream.o obj-$(CONFIG_XCU_SCHEDULER) += core.o obj-$(CONFIG_XCU_SCHED_RT) += rt.o +obj-$(CONFIG_XCU_SCHED_CFS) += cfs.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c new file mode 100644 index 000000000000..ea39ef8770f8 --- /dev/null +++ b/kernel/xsched/cfs.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Completely Fair Scheduling (CFS) Class for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/xsched.h> + +#define CFS_INNER_RQ_EMPTY(cfs_xse) \ + ((cfs_xse)->xruntime == XSCHED_TIME_INF) + +void xs_rq_add(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + struct rb_node **link = &cfs_rq->ctx_timeline.rb_root.rb_node; + struct rb_node *parent = NULL; + struct xsched_entity_cfs *entry; + bool leftmost = true; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xsched_entity_cfs, run_node); + if (xse->xruntime <= entry->xruntime) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(&xse->run_node, parent, link); + rb_insert_color_cached(&xse->run_node, &cfs_rq->ctx_timeline, leftmost); +} + +void xs_rq_remove(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + + rb_erase_cached(&xse->run_node, &cfs_rq->ctx_timeline); +} + +/** + * xs_cfs_rq_update() - Update entity's runqueue position with new xruntime + */ +static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt) +{ + xs_rq_remove(xse_cfs); + xse_cfs->xruntime = new_xrt; + xs_rq_add(xse_cfs); +} + +static inline struct xsched_entity_cfs * +xs_pick_first(struct xsched_rq_cfs *cfs_rq) +{ + struct xsched_entity_cfs *xse_cfs; + struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); + + if (!left) + return NULL; + + xse_cfs = rb_entry(left, struct xsched_entity_cfs, run_node); + return xse_cfs; +} + +/** + * xs_update() - Account xruntime and runtime metrics. + * @xse_cfs: Point to CFS scheduling entity. + * @delta: Execution time in last period + */ +static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) +{ + u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; +} + +/* + * Xsched Fair class methods + * For rq manipulation we rely on root runqueue lock already acquired in core. + * Access xsched_group_xcu_priv requires no locks because one thread per XCU. + */ +static void dequeue_ctx_fair(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + struct xsched_entity_cfs *first; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + xs_rq_remove(xse_cfs); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; +} + +/** + * enqueue_ctx_fair() - Add context to the runqueue + * @xse: xsched entity of context + * @xcu: executor + * + * In contrary to enqueue_task it is called once on context init. + * Although groups reside in tree, their nodes not counted in nr_running. + * The xruntime of a group xsched entitry represented by min xruntime inside. + */ +static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + struct xsched_entity_cfs *first; + struct xsched_rq_cfs *rq; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + rq = xse_cfs->cfs_rq = &xcu->xrq.cfs; + + /* If no XSE of only empty groups */ + if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) + rq->min_xruntime = xse_cfs->xruntime; + else + xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); + + xs_rq_add(xse_cfs); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; +} + +static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) +{ + struct xsched_entity_cfs *xse; + struct xsched_rq_cfs *rq = &xcu->xrq.cfs; + + xse = xs_pick_first(rq); + if (!xse) + return NULL; + + return container_of(xse, struct xsched_entity, cfs); +} + +static inline bool +xs_should_preempt_fair(struct xsched_entity *xse) +{ + return (atomic_read(&xse->submitted_one_kick) >= XSCHED_CFS_KICK_SLICE); +} + +static void put_prev_ctx_fair(struct xsched_entity *xse) +{ + struct xsched_entity_cfs *prev = &xse->cfs; + + xs_update(prev, xse->last_exec_runtime); +} + +void rq_init_fair(struct xsched_cu *xcu) +{ + xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; +} + +void xse_init_fair(struct xsched_entity *xse) +{ + xse->cfs.weight = XSCHED_CFS_WEIGHT_DFLT; +} + +void xse_deinit_fair(struct xsched_entity *xse) +{ + /* TODO Cgroup exit */ +} + +struct xsched_class fair_xsched_class = { + .class_id = XSCHED_TYPE_CFS, + .kick_slice = XSCHED_CFS_KICK_SLICE, + .rq_init = rq_init_fair, + .xse_init = xse_init_fair, + .xse_deinit = xse_deinit_fair, + .dequeue_ctx = dequeue_ctx_fair, + .enqueue_ctx = enqueue_ctx_fair, + .pick_next_ctx = pick_next_ctx_fair, + .put_prev_ctx = put_prev_ctx_fair, + .check_preempt = xs_should_preempt_fair, +}; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index bdad82041ada..2905cca41205 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -193,6 +193,10 @@ int xsched_xse_set_class(struct xsched_entity *xse) xse->class = &rt_xsched_class; XSCHED_DEBUG("Context is in RT class %s\n", __func__); break; + case XSCHED_TYPE_CFS: + xse->class = &fair_xsched_class; + XSCHED_DEBUG("Context is in CFS class %s\n", __func__); + break; default: XSCHED_ERR("Xse has incorrect class @ %s\n", __func__); return -EINVAL; @@ -362,7 +366,7 @@ int xsched_schedule(void *input_xcu) while (!kthread_should_stop()) { mutex_unlock(&xcu->xcu_lock); wait_event_interruptible(xcu->wq_xcu_idle, - xcu->xrq.rt.nr_running); + xcu->xrq.rt.nr_running || xcu->xrq.cfs.nr_running || kthread_should_stop()); mutex_lock(&xcu->xcu_lock); if (kthread_should_stop()) { @@ -501,6 +505,10 @@ __init int xsched_sched_init(void) xsched_register_sched_class(&rt_xsched_class); #endif +#ifdef CONFIG_XCU_SCHED_CFS + xsched_register_sched_class(&fair_xsched_class); +#endif + return 0; } late_initcall(xsched_sched_init); -- 2.34.1