[PATCH v4 OLK-6.6 00/20] Support multi-level cgroup
Support multi-level cgroup Liu Kai (20): xsched: add has_running hook to xsched classes for wake-up eligibility check xsched/cfs: move xruntime updates to pick/put paths xsched/cfs: propagate group activation/deactivation up the hierarchy xsched: delete unused code xsched: update group shares using xcu_grp_shares_add/sub helpers xsched: remove redundant xcu parameter from dequeue_ctx xsched/quota: support multi-level cgroup xsched/quota: add throttled flag to cfs_rq for robust hierarchical enqueue under throttling xsched: move throttling-related fields from xsched_group_xcu_priv to xsched_rq_cfs xsched: harden and clarify xcu_move_task() logic xsched: change rq_init() parameter from struct xsched_cu to struct xsched_rq xsched: refactor xsched_entity initialization xsched: modify xcu_cg_initialize_components helper xsched: add init_fair_xsched_group() for XCU-specific fair group initialization xsched/core: immediate cleanup of remaining operators on task exit xsched: replace magic number with SCHED_CLASS_MAX_LENGTH for scheduler class string length xsched: simplify root group check by direct comparison with root_xcg xsched: restructure xcu_cgrp_subsys teardown to align with cpu_cgrp_subsys lifecycle xsched: remove the standalone xcu_cfs_cg_deinit() helper xsched: fix potential NULL dereference in xsched_quota_timeout_update drivers/xcu/xcu_group.c | 2 +- include/linux/xsched.h | 92 +++++++++------- kernel/xsched/cfs.c | 208 ++++++++++++++++++----------------- kernel/xsched/cfs_quota.c | 79 +++++++++----- kernel/xsched/cgroup.c | 223 ++++++++++++++++++-------------------- kernel/xsched/core.c | 121 ++++++++++----------- kernel/xsched/rt.c | 44 ++++---- kernel/xsched/vstream.c | 25 +++-- 8 files changed, 414 insertions(+), 380 deletions(-) -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Introduce a new scheduler class hook has_running() to the xsched_class interface. This hook allows each scheduling class (e.g., CFS, RT) to determine whether its associated runqueue contains runnable or pending entities that warrant a wake-up or rescheduling event. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 3 +++ kernel/xsched/cfs.c | 10 ++++++++-- kernel/xsched/core.c | 15 ++++++++++++++- kernel/xsched/rt.c | 27 +++++++++------------------ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 86b248a18f97..53c8fa563140 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -429,6 +429,9 @@ struct xsched_class { /* Check context preemption. */ bool (*check_preempt)(struct xsched_entity *xse); + /* Check if runqueue is not empty */ + bool (*has_running)(struct xsched_cu *xcu); + /* Select jobs from XSE to submit on XCU */ size_t (*select_work)(struct xsched_cu *xcu, struct xsched_entity *xse); }; diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 7c2258b32eaf..5db58bbb917f 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -19,7 +19,7 @@ #define CFS_INNER_RQ_EMPTY(cfs_xse) \ ((cfs_xse)->xruntime == XSCHED_TIME_INF) -void xs_rq_add(struct xsched_entity_cfs *xse) +static void xs_rq_add(struct xsched_entity_cfs *xse) { struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; struct rb_node **link = &cfs_rq->ctx_timeline.rb_root.rb_node; @@ -42,7 +42,7 @@ void xs_rq_add(struct xsched_entity_cfs *xse) rb_insert_color_cached(&xse->run_node, &cfs_rq->ctx_timeline, leftmost); } -void xs_rq_remove(struct xsched_entity_cfs *xse) +static void xs_rq_remove(struct xsched_entity_cfs *xse) { struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; @@ -195,6 +195,11 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; } +static inline bool has_running_fair(struct xsched_cu *xcu) +{ + return !!xcu->xrq.cfs.nr_running; +} + static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) { struct xsched_entity_cfs *xse; @@ -256,4 +261,5 @@ struct xsched_class fair_xsched_class = { .pick_next_ctx = pick_next_ctx_fair, .put_prev_ctx = put_prev_ctx_fair, .check_preempt = xs_should_preempt_fair, + .has_running = has_running_fair, }; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 1bf7a93985bb..13de4bec1ba6 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -366,6 +366,19 @@ struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs) return vsm; } +static bool xcu_has_running(struct xsched_cu *xcu) +{ + bool ret = false; + struct xsched_class *sched; + + mutex_lock(&xcu->xcu_lock); + for_each_xsched_class(sched) + ret |= sched->has_running(xcu); + mutex_unlock(&xcu->xcu_lock); + + return ret; +} + int xsched_schedule(void *input_xcu) { struct xsched_cu *xcu = input_xcu; @@ -375,7 +388,7 @@ int xsched_schedule(void *input_xcu) while (!kthread_should_stop()) { mutex_unlock(&xcu->xcu_lock); wait_event_interruptible(xcu->wq_xcu_idle, - xcu->xrq.rt.nr_running || xcu->xrq.cfs.nr_running || kthread_should_stop()); + xcu_has_running(xcu) || kthread_should_stop()); mutex_lock(&xcu->xcu_lock); if (kthread_should_stop()) { diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index db7d97fed787..d0ab14f07d9a 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -43,33 +43,23 @@ static inline void xse_rt_move_tail(struct xsched_entity *xse) list_move_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]); } -/* Increase RT runqueue total and per prio nr_running stat. */ -static inline void xrq_inc_nr_running(struct xsched_entity *xse, - struct xsched_cu *xcu) -{ - xcu->xrq.rt.nr_running++; -} - -/* Decrease RT runqueue total and per prio nr_running stat - * and raise a bug if nr_running decrease beyond zero. - */ -static inline void xrq_dec_nr_running(struct xsched_entity *xse) +static void dequeue_ctx_rt(struct xsched_entity *xse) { struct xsched_cu *xcu = xse->xcu; + xse_rt_del(xse); xcu->xrq.rt.nr_running--; } -static void dequeue_ctx_rt(struct xsched_entity *xse) +static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) { - xse_rt_del(xse); - xrq_dec_nr_running(xse); + xse_rt_add(xse, xcu); + xcu->xrq.rt.nr_running++; } -static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) +static inline bool has_running_rt(struct xsched_cu *xcu) { - xse_rt_add(xse, xcu); - xrq_inc_nr_running(xse, xcu); + return !!xcu->xrq.rt.nr_running; } static inline struct xsched_entity *xrq_next_xse(struct xsched_cu *xcu, @@ -153,7 +143,8 @@ struct xsched_class rt_xsched_class = { .enqueue_ctx = enqueue_ctx_rt, .pick_next_ctx = pick_next_ctx_rt, .put_prev_ctx = put_prev_ctx_rt, - .check_preempt = check_preempt_ctx_rt + .check_preempt = check_preempt_ctx_rt, + .has_running = has_running_rt, }; void xsched_rt_prio_set(pid_t tgid, unsigned int prio) -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Virtual runtime (xruntime) should reflect the actual execution progress of a scheduling entity, and thus must be updated only when the entity is running— specifically during put_prev (when it stops executing) and pick_next (when its runtime context is established). Previously, xruntime was incorrectly updated during enqueue/dequeue, which could distort fairness accounting. This commit: - Removes xruntime updates from enqueue/dequeue paths - Centralizes xruntime advancement in put_prev_ctx_fair() - Ensures hierarchical propagation only when CONFIG_CGROUP_XCU is enabled, using a clean, unified control flow that avoids code duplication - Maintains correct behavior for both flat and group scheduling configurations The change aligns the scheduler with CFS semantics, where virtual time advances only while an entity is on the XPU. Fixes: 024b85113850 ("xsched: Add xsched CFS class") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 16 +++++--- kernel/xsched/cfs.c | 90 ++++++++++++++++++++++-------------------- kernel/xsched/cgroup.c | 1 - 3 files changed, 57 insertions(+), 50 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 53c8fa563140..b6c8e01748b3 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -297,16 +297,16 @@ struct xsched_group { }; #endif /* CONFIG_CGROUP_XCU */ -#define XSCHED_SE_OF(cfs_xse) \ - (container_of((cfs_xse), struct xsched_entity, cfs)) - #ifdef CONFIG_CGROUP_XCU #define xcg_parent_grp_xcu(xcg) \ ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) -#define xse_parent_grp_xcu(xse_cfs) \ - (&((XSCHED_SE_OF(xse_cfs) \ - ->parent_grp->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id]))) +#define xse_parent_grp_xcu(xse) \ + (&(((xse)->parent_grp->perxcu_priv[(xse)->xcu->id]))) + +#define for_each_xse(__xse) \ + for (; (__xse) && (__xse)->parent_grp; \ + (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) static inline struct xsched_group_xcu_priv * xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) @@ -322,6 +322,10 @@ xse_this_grp(struct xsched_entity_cfs *xse_cfs) { return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; } +#else + +#define for_each_xse(__xse) for (; (__xse); (__xse) = NULL) + #endif /* CONFIG_CGROUP_XCU */ static inline int xse_integrity_check(struct xsched_entity *xse) diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 5db58bbb917f..a0e9cb83350a 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -84,17 +84,13 @@ xs_pick_first(struct xsched_rq_cfs *cfs_rq) */ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) { - struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); - - for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { - u64 new_xrt = xse_cfs->xruntime + xs_calc_delta_fair(delta, xse_cfs->weight); + u64 new_xrt; - xs_cfs_rq_update(xse_cfs, new_xrt); - xse_cfs->sum_exec_runtime += delta; + new_xrt = xse_cfs->xruntime + + xs_calc_delta_fair(delta, xse_cfs->weight); - if (xg->self->parent == NULL) - break; - } + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; } /** @@ -106,24 +102,19 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) */ static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) { - u64 new_xrt; - struct xsched_entity_cfs *entry; + struct xsched_entity_cfs *leftmost; for (; xg; xg = &xcg_parent_grp_xcu(xg)) { xg->cfs_rq->nr_running += task_delta; - entry = xs_pick_first(xg->cfs_rq); - new_xrt = entry ? xs_calc_delta_fair(entry->xruntime, xg->xse.cfs.weight) - : XSCHED_TIME_INF; - xg->cfs_rq->min_xruntime = new_xrt; - xg->xse.cfs.xruntime = new_xrt; + leftmost = xs_pick_first(xg->cfs_rq); + xg->cfs_rq->min_xruntime = leftmost ? + leftmost->xruntime : XSCHED_TIME_INF; if (!xg->xse.on_rq) break; if (!xg->self->parent) break; - - xs_cfs_rq_update(&xg->xse.cfs, new_xrt); } } @@ -143,7 +134,7 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; xs_rq_remove(xse_cfs); - xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + xg_update(xse_parent_grp_xcu(xse), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -160,27 +151,28 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) */ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { - int task_delta; + int task_delta = 1; struct xsched_entity_cfs *first; - struct xsched_rq_cfs *rq, *sub_rq; + struct xsched_rq_cfs *rq; struct xsched_entity_cfs *xse_cfs = &xse->cfs; - rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq; + rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse)->cfs_rq; if (!rq) { XSCHED_WARN("the parent rq this xse [%d] attached cannot be NULL @ %s\n", xse->tgid, __func__); return; } - sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; - if (xse->is_group && !sub_rq) { - XSCHED_WARN("the sub_rq this cgroup-type xse [%d] owned cannot be NULL @ %s\n", - xse->tgid, __func__); - return; - } + if (xse->is_group) { + struct xsched_rq_cfs *sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; - task_delta = - (xse->is_group) ? sub_rq->nr_running : 1; + if (!sub_rq) { + XSCHED_WARN("the sub_rq of this cgroup-type xse [%d] can't be NULL @ %s\n", + xse->tgid, __func__); + return; + } + task_delta = sub_rq->nr_running; + } /* If no XSE or only empty groups */ if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) @@ -189,7 +181,7 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); xs_rq_add(xse_cfs); - xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + xg_update(xse_parent_grp_xcu(xse), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -200,22 +192,30 @@ static inline bool has_running_fair(struct xsched_cu *xcu) return !!xcu->xrq.cfs.nr_running; } +static inline struct xsched_rq_cfs * +next_cfs_rq_of(struct xsched_entity_cfs *xse) +{ +#ifdef CONFIG_CGROUP_XCU + struct xsched_entity *se = container_of(xse, struct xsched_entity, cfs); + + if (se->is_group) + return xse_this_grp_xcu(xse)->cfs_rq; +#endif + return NULL; +} + static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) { struct xsched_entity_cfs *xse; struct xsched_rq_cfs *rq = &xcu->xrq.cfs; + u64 now = ktime_get_ns(); - xse = xs_pick_first(rq); - if (!xse) - return NULL; - - for (; xse && XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) - rq = xse_this_grp_xcu(xse)->cfs_rq; + for (; rq; rq = next_cfs_rq_of(xse)) { + xse = xs_pick_first(rq); + if (!xse) + return NULL; - if (!xse) { - XSCHED_DEBUG("the xse this xcu [%u] is trying to pick is NULL @ %s\n", - xcu->id, __func__); - return NULL; + xse->exec_start = now; } return container_of(xse, struct xsched_entity, cfs); @@ -229,10 +229,14 @@ xs_should_preempt_fair(struct xsched_entity *xse) static void put_prev_ctx_fair(struct xsched_entity *xse) { - struct xsched_entity_cfs *prev = &xse->cfs; + struct xsched_entity *prev = xse; +#ifdef CONFIG_CGROUP_XCU xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime); - xs_update(prev, xse->last_exec_runtime); +#endif + + for_each_xse(prev) + xs_update(&prev->cfs, xse->last_exec_runtime); } void rq_init_fair(struct xsched_cu *xcu) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index e50556a82cea..425aed6b3cec 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -154,7 +154,6 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, xcg->perxcu_priv[id].xse.class = &fair_xsched_class; /* Put new empty groups to the right in parent's rbtree: */ - xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF; xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; xcg->perxcu_priv[id].xse.parent_grp = parent_xg; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- When a task is enqueued or dequeued, its cgroup may transition between empty and non-empty states. To ensure correct hierarchical scheduling: - On enqueue: if a group's nr_running transitions from 0 to 1 and it is not throttled, enqueue its GSE into the parent runqueue and continue propagating upward until reaching an already-active ancestor or root. - On dequeue: if a group's nr_running drops to 0, dequeue its GSE from the parent runqueue and stop propagation once a non-empty ancestor is encountered. This change ensures that only groups with runnable tasks participate in scheduling, matching the behavior of CFS group scheduling in Linux. Fixes: 024b85113850 ("xsched: Add xsched CFS class") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 10 ++-- kernel/xsched/cfs.c | 113 +++++++++++++++++------------------------ kernel/xsched/cgroup.c | 6 +-- kernel/xsched/core.c | 3 +- kernel/xsched/rt.c | 2 + 5 files changed, 58 insertions(+), 76 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index b6c8e01748b3..3cdef751e89b 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -242,6 +242,7 @@ struct xsched_group_xcu_priv { struct xsched_entity xse; /* xse of this group on runqueue */ struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */ struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */ + /* Statistics */ int nr_throttled; u64 throttled_time; @@ -298,12 +299,13 @@ struct xsched_group { #endif /* CONFIG_CGROUP_XCU */ #ifdef CONFIG_CGROUP_XCU -#define xcg_parent_grp_xcu(xcg) \ - ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) - #define xse_parent_grp_xcu(xse) \ (&(((xse)->parent_grp->perxcu_priv[(xse)->xcu->id]))) +#define parent_xse_of(__xse) (&(xse_parent_grp_xcu((__xse))->xse)) + +#define xsched_cfs_rq_of(xse) (xse_parent_grp_xcu((xse))->cfs_rq) + #define for_each_xse(__xse) \ for (; (__xse) && (__xse)->parent_grp; \ (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) @@ -324,6 +326,8 @@ xse_this_grp(struct xsched_entity_cfs *xse_cfs) } #else +#define xsched_cfs_rq_of(xse) (&((xse)->xcu->xrq.cfs)) + #define for_each_xse(__xse) for (; (__xse); (__xse) = NULL) #endif /* CONFIG_CGROUP_XCU */ diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index a0e9cb83350a..3afb08def14f 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -62,14 +62,7 @@ static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt) static inline struct xsched_entity_cfs * xs_pick_first(struct xsched_rq_cfs *cfs_rq) { - struct rb_node *left; - - if (!cfs_rq) { - XSCHED_WARN("the rq cannot be NULL @ %s\n", __func__); - return NULL; - } - - left = rb_first_cached(&cfs_rq->ctx_timeline); + struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); if (!left) return NULL; @@ -93,29 +86,11 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) xse_cfs->sum_exec_runtime += delta; } -/** - * xg_update() - Update container group's xruntime - * @gxcu: Descendant xsched group's private xcu control structure - * - * No locks required to access xsched_group_xcu_priv members, - * because only one worker thread works for one XCU. - */ -static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) +static void update_min_xruntime(struct xsched_rq_cfs *cfs_rq) { - struct xsched_entity_cfs *leftmost; - - for (; xg; xg = &xcg_parent_grp_xcu(xg)) { - xg->cfs_rq->nr_running += task_delta; + struct xsched_entity_cfs *leftmost = xs_pick_first(cfs_rq); - leftmost = xs_pick_first(xg->cfs_rq); - xg->cfs_rq->min_xruntime = leftmost ? - leftmost->xruntime : XSCHED_TIME_INF; - - if (!xg->xse.on_rq) - break; - if (!xg->self->parent) - break; - } + cfs_rq->min_xruntime = leftmost ? leftmost->xruntime : XSCHED_TIME_INF; } /* @@ -125,19 +100,42 @@ static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) */ static void dequeue_ctx_fair(struct xsched_entity *xse) { - int task_delta; - struct xsched_cu *xcu = xse->xcu; - struct xsched_entity_cfs *first; + struct xsched_entity *child = xse; + struct xsched_rq_cfs *rq; + + for_each_xse(child) { + if (!child->on_rq) + break; + + rq = xsched_cfs_rq_of(child); + + xs_rq_remove(&child->cfs); + child->on_rq = false; + rq->nr_running--; + + /** + * Dequeue the group's scheduling entity (GSE) from + * its parent runqueue when the group becomes empty, + * so it no longer participates in scheduling until + * new tasks arrive. + */ + if (rq->nr_running > 0) + break; + } +} + +static void place_xsched_entity(struct xsched_rq_cfs *rq, struct xsched_entity *xse) +{ struct xsched_entity_cfs *xse_cfs = &xse->cfs; - task_delta = - (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; + if (!rq) + return; - xs_rq_remove(xse_cfs); - xg_update(xse_parent_grp_xcu(xse), task_delta); + xse_cfs->cfs_rq = rq; + if (rq->min_xruntime != XSCHED_TIME_INF) + xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); - first = xs_pick_first(&xcu->xrq.cfs); - xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; + xs_rq_add(xse_cfs); } /** @@ -151,40 +149,20 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) */ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { - int task_delta = 1; - struct xsched_entity_cfs *first; struct xsched_rq_cfs *rq; - struct xsched_entity_cfs *xse_cfs = &xse->cfs; + struct xsched_entity *child = xse; - rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse)->cfs_rq; - if (!rq) { - XSCHED_WARN("the parent rq this xse [%d] attached cannot be NULL @ %s\n", - xse->tgid, __func__); - return; - } + for_each_xse(child) { + if (child->on_rq) + break; - if (xse->is_group) { - struct xsched_rq_cfs *sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; + rq = xsched_cfs_rq_of(child); - if (!sub_rq) { - XSCHED_WARN("the sub_rq of this cgroup-type xse [%d] can't be NULL @ %s\n", - xse->tgid, __func__); - return; - } - task_delta = sub_rq->nr_running; + place_xsched_entity(rq, child); + child->on_rq = true; + rq->nr_running++; + update_min_xruntime(rq); } - - /* If no XSE or only empty groups */ - if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) - rq->min_xruntime = xse_cfs->xruntime; - else - xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); - - xs_rq_add(xse_cfs); - xg_update(xse_parent_grp_xcu(xse), task_delta); - - first = xs_pick_first(&xcu->xrq.cfs); - xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; } static inline bool has_running_fair(struct xsched_cu *xcu) @@ -242,6 +220,7 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) void rq_init_fair(struct xsched_cu *xcu) { xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; + xcu->xrq.cfs.min_xruntime = XSCHED_TIME_INF; } void xse_init_fair(struct xsched_entity *xse) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 425aed6b3cec..e466033dbc60 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -102,6 +102,7 @@ void xcu_cfs_root_cg_init(struct xsched_cu *xcu) root_xcg->perxcu_priv[id].xcu_id = id; root_xcg->perxcu_priv[id].self = root_xcg; root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; + root_xcg->perxcu_priv[id].xse.is_group = true; root_xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; } @@ -148,6 +149,7 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, } xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq; xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; + xcg->perxcu_priv[id].cfs_rq->min_xruntime = XSCHED_TIME_INF; xcg->perxcu_priv[id].xse.is_group = true; xcg->perxcu_priv[id].xse.xcu = xcu; @@ -156,10 +158,6 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, /* Put new empty groups to the right in parent's rbtree: */ xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; xcg->perxcu_priv[id].xse.parent_grp = parent_xg; - - mutex_lock(&xcu->xcu_lock); - enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu); - mutex_unlock(&xcu->xcu_lock); } xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 13de4bec1ba6..779e4b4e6fdd 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -127,7 +127,7 @@ void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) } if (!xse->on_rq) { - xse->on_rq = true; + xse->xcu = xcu; xse->class->enqueue_ctx(xse, xcu); XSCHED_DEBUG("Enqueue xse %d @ %s\n", xse->tgid, __func__); } @@ -144,7 +144,6 @@ void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) if (xse->on_rq) { xse->class->dequeue_ctx(xse); - xse->on_rq = false; XSCHED_DEBUG("Dequeue xse %d @ %s\n", xse->tgid, __func__); } } diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index d0ab14f07d9a..0de63deef54e 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -48,12 +48,14 @@ static void dequeue_ctx_rt(struct xsched_entity *xse) struct xsched_cu *xcu = xse->xcu; xse_rt_del(xse); + xse->on_rq = false; xcu->xrq.rt.nr_running--; } static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) { xse_rt_add(xse, xcu); + xse->on_rq = true; xcu->xrq.rt.nr_running++; } -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- This commit cleans up the xsched scheduler core by removing dead, unreferenced, or redundant code that is no longer used in any scheduling path. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 16 ---------------- kernel/xsched/cfs.c | 3 --- 2 files changed, 19 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 3cdef751e89b..16d3bde2dc9b 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -392,22 +392,6 @@ ctx_find_by_tgid_and_xcu(pid_t tgid, struct xsched_cu *xcu) return ret; } -static inline u64 gcd(u64 a, u64 b) -{ - u64 rem; - - while (a != 0 && b != 0) { - if (a > b) { - div64_u64_rem(a, b, &rem); - a = rem; - } else { - div64_u64_rem(b, a, &rem); - b = rem; - } - } - return (a) ? a : b; -} - struct xsched_class { enum xcu_sched_class class_id; size_t kick_slice; diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 3afb08def14f..aa47f7d9ee94 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -16,9 +16,6 @@ */ #include <linux/xsched.h> -#define CFS_INNER_RQ_EMPTY(cfs_xse) \ - ((cfs_xse)->xruntime == XSCHED_TIME_INF) - static void xs_rq_add(struct xsched_entity_cfs *xse) { struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Replace open-coded shares adjustment logic with the xcu_grp_shares_add() and xcu_grp_shares_sub() helper functions when updating group shares in XCU scheduling groups. Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/cgroup.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index e466033dbc60..c8c2fddbfad1 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -592,24 +592,12 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) void xcu_grp_shares_update(struct xsched_group *parent, struct xsched_group *child, u32 shares_cfg) { - int id; - struct xsched_cu *xcu; - if (child->sched_class != XSCHED_TYPE_CFS) return; - parent->children_shares_sum -= child->shares_cfg; - + xcu_grp_shares_sub(parent, child); child->shares_cfg = shares_cfg; - child->weight = child->shares_cfg; - - for_each_active_xcu(xcu, id) { - mutex_lock(&xcu->xcu_lock); - child->perxcu_priv[id].xse.cfs.weight = child->weight; - mutex_unlock(&xcu->xcu_lock); - } - - parent->children_shares_sum += child->shares_cfg; + xcu_grp_shares_add(parent, child); } void xcu_grp_shares_add(struct xsched_group *parent, struct xsched_group *child) -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- The xcu is already bound to xse during enqueue, so passing it again in dequeue_ctx is unnecessary. This change simplifies the interface and avoids redundancy. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 2 +- kernel/xsched/cgroup.c | 4 ++-- kernel/xsched/core.c | 14 +++++++------- kernel/xsched/vstream.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 16d3bde2dc9b..0a09b94886db 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -448,7 +448,7 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); void xsched_rt_prio_set(pid_t tgid, unsigned int prio); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); -void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); +void dequeue_ctx(struct xsched_entity *xse); int delete_ctx(struct xsched_context *ctx); #ifdef CONFIG_CGROUP_XCU diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index c8c2fddbfad1..c9779f4224e0 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -114,7 +114,7 @@ static void xcg_perxcu_cfs_rq_deinit(struct xsched_group *xcg, int max_id) for (i = 0; i < max_id; i++) { xcu = xsched_cu_mgr[i]; mutex_lock(&xcu->xcu_lock); - dequeue_ctx(&xcg->perxcu_priv[i].xse, xcu); + dequeue_ctx(&xcg->perxcu_priv[i].xse); kfree(xcg->perxcu_priv[i].cfs_rq); xcg->perxcu_priv[i].cfs_rq = NULL; mutex_unlock(&xcu->xcu_lock); @@ -423,7 +423,7 @@ void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg, mutex_lock(&xcu->xcu_lock); /* dequeue from the current runqueue */ - dequeue_ctx(xse, xcu); + dequeue_ctx(xse); /* attach to the new_xcg */ xsched_group_xse_attach(new_xcg, xse); /* enqueue to the runqueue in new_xcg */ diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 779e4b4e6fdd..d1ba01b3155c 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -106,7 +106,7 @@ static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu) class->select_work(xcu, next) : select_work_def(xcu, next); if (scheduled == 0) { - dequeue_ctx(next, xcu); + dequeue_ctx(next); return NULL; } @@ -133,15 +133,15 @@ void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) } } -void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) +void dequeue_ctx(struct xsched_entity *xse) { - lockdep_assert_held(&xcu->xcu_lock); - if (xse_integrity_check(xse)) { XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); return; } + lockdep_assert_held(&xse->xcu->xcu_lock); + if (xse->on_rq) { xse->class->dequeue_ctx(xse); XSCHED_DEBUG("Dequeue xse %d @ %s\n", xse->tgid, __func__); @@ -172,7 +172,7 @@ int delete_ctx(struct xsched_context *ctx) mutex_lock(&xcu->xcu_lock); if (curr_xse == xse) xcu->xrq.curr_xse = NULL; - dequeue_ctx(xse, xcu); + dequeue_ctx(xse); #ifdef CONFIG_CGROUP_XCU xsched_group_xse_detach(xse); @@ -412,11 +412,11 @@ int xsched_schedule(void *input_xcu) /* if not deleted yet */ put_prev_ctx(curr_xse); if (!atomic_read(&curr_xse->kicks_pending_cnt)) - dequeue_ctx(curr_xse, xcu); + dequeue_ctx(curr_xse); #ifdef CONFIG_CGROUP_XCU if (xsched_quota_exceed(curr_xse->parent_grp)) { - dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse, xcu); + dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse); curr_xse->parent_grp->perxcu_priv[xcu->id].nr_throttled++; curr_xse->parent_grp->perxcu_priv[xcu->id].start_throttled_time = ktime_get(); diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index bf2f8c6b5c6c..3ea6fcb97530 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -91,7 +91,7 @@ static void xsched_task_free(struct kref *kref) mutex_unlock(&xcu->ctx_list_lock); mutex_lock(&xcu->xcu_lock); - dequeue_ctx(&ctx->xse, xcu); + dequeue_ctx(&ctx->xse); kfree(ctx); mutex_unlock(&xcu->xcu_lock); } -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Extend the existing quota throttling mechanism to support hierarchical enforcement, allowing parent groups to be throttled based on their own quota limits——not just leaf groups. This ensures that resource consumption at any level of the scheduling hierarchy respects its configured budget. - Propagate bandwidth usage upward during task execution so parent groups accumulate runtime. - Apply throttling logic recursively: when a parent group exceeds its quota, it is marked as throttled, and all its descendants are effectively blocked from running (even if they have remaining local quota). - Update unthrottling and enqueue/dequeue paths to respect hierarchical throttling state, ensuring correct scheduler behavior under nested limits. This enhancement strengthens the fairness and predictability of the XCU scheduler in multi-level cgroup deployments, enabling true hierarchical resource control. Fixes: aafde051ac61 ("xsched: Add support for CFS quota for cgroups") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 5 ++++- kernel/xsched/cfs.c | 14 ++++++++++---- kernel/xsched/cfs_quota.c | 31 +++++++++++++++++++++++++++---- kernel/xsched/core.c | 10 ---------- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 0a09b94886db..09a5760591f9 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -310,6 +310,9 @@ struct xsched_group { for (; (__xse) && (__xse)->parent_grp; \ (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) +#define for_each_xsched_group(__xg) \ + for (; (__xg) && (__xg)->parent; (__xg) = (__xg)->parent) + static inline struct xsched_group_xcu_priv * xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) { @@ -466,7 +469,7 @@ void xsched_quota_init(void); void xsched_quota_timeout_init(struct xsched_group *xg); void xsched_quota_timeout_update(struct xsched_group *xg); void xsched_quota_account(struct xsched_group *xg, s64 exec_time); -bool xsched_quota_exceed(struct xsched_group *xg); +void xsched_quota_check(struct xsched_group *xg, struct xsched_cu *xcu); void xsched_quota_refill(struct work_struct *work); #define XCU_PERIOD_MIN_MS 1 diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index aa47f7d9ee94..df843c06e748 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -206,12 +206,18 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) { struct xsched_entity *prev = xse; -#ifdef CONFIG_CGROUP_XCU - xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime); -#endif - for_each_xse(prev) xs_update(&prev->cfs, xse->last_exec_runtime); + +#ifdef CONFIG_CGROUP_XCU + struct xsched_group *group = xse->parent_grp; + struct xsched_cu *xcu = xse->xcu; + + for_each_xsched_group(group) { + xsched_quota_account(group, (s64)xse->last_exec_runtime); + xsched_quota_check(group, xcu); + } +#endif } void rq_init_fair(struct xsched_cu *xcu) diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index 70316dab682a..bdbd2330e1b3 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -19,6 +19,28 @@ static struct workqueue_struct *quota_workqueue; +static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu) +{ + int xcu_id = xcu->id; + ktime_t now = ktime_get(); + + if (!xg || READ_ONCE(xg->is_offline)) + return; + + lockdep_assert_held(&xcu->xcu_lock); + + xg->perxcu_priv[xcu_id].nr_throttled++; + xg->perxcu_priv[xcu_id].start_throttled_time = now; + + /** + * When an xse triggers XCU throttling, only the corresponding gse is + * dequeued from this XCU's group scheduling entity (gse) hierarchy, + * no further propagation or global dequeue occurs, ensuring throttling + * is scoped to the affected XCU. + */ + dequeue_ctx(&xg->perxcu_priv[xcu_id].xse); +} + static void xsched_group_unthrottle(struct xsched_group *xg) { uint32_t id; @@ -84,15 +106,16 @@ void xsched_quota_account(struct xsched_group *xg, s64 exec_time) spin_unlock(&xg->lock); } -bool xsched_quota_exceed(struct xsched_group *xg) +void xsched_quota_check(struct xsched_group *xg, struct xsched_cu *xcu) { - bool ret; + bool throttled; spin_lock(&xg->lock); - ret = (xg->quota > 0) ? (xg->runtime >= xg->quota) : false; + throttled = (xg->quota > 0) ? (xg->runtime >= xg->quota) : false; spin_unlock(&xg->lock); - return ret; + if (throttled) + xsched_group_throttle(xg, xcu); } void xsched_quota_init(void) diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index d1ba01b3155c..e9b0c6c4c86c 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -414,22 +414,12 @@ int xsched_schedule(void *input_xcu) if (!atomic_read(&curr_xse->kicks_pending_cnt)) dequeue_ctx(curr_xse); -#ifdef CONFIG_CGROUP_XCU - if (xsched_quota_exceed(curr_xse->parent_grp)) { - dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse); - curr_xse->parent_grp->perxcu_priv[xcu->id].nr_throttled++; - curr_xse->parent_grp->perxcu_priv[xcu->id].start_throttled_time = - ktime_get(); - } -#endif - xcu->xrq.curr_xse = NULL; } return 0; } - /* Initializes all xsched XCU objects. * Should only be called from xsched_xcu_register function. */ -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Introduce a throttled flag in cfs_rq to refine group enqueue logic in the presence of throttling, ensuring correctness in both flat and cgroup-enabled (CONFIG_CGROUP_XCU) scheduling hierarchies: - When a group is unthrottled, only update its throttled state, do not immediately enqueue it. Enqueuing is deferred until an xse is actually added to the group. This prevents enqueuing empty groups, which could otherwise cause the scheduler to pick a null or invalid xse and trigger runtime anomalies. - During upward enqueue traversal, stop as soon as a throttled cfs_rq is encountered. This avoids enqueuing group xsched entity that are still under throttling, preserving scheduler invariants. This change enhances scheduler robustness and simplifies throttling logic across configurations. Fixes: 8952ea1a5fb9 ("xsched: fix task stall when quota is disabled") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 10 ++++++++++ kernel/xsched/cfs.c | 10 +++++++++- kernel/xsched/cfs_quota.c | 39 +++++++++++++++++++++++++++------------ 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 09a5760591f9..76b4ce815870 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -83,6 +83,8 @@ struct xsched_rq_cfs { unsigned int load; u64 min_xruntime; struct rb_root_cached ctx_timeline; + + bool throttled; }; /* Base XSched runqueue object structure that contains both mutual and @@ -327,6 +329,14 @@ xse_this_grp(struct xsched_entity_cfs *xse_cfs) { return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; } + +static inline bool xsched_entity_throttled(struct xsched_entity *xse) +{ + struct xsched_group_xcu_priv *grp_xcu = + container_of(xse, struct xsched_group_xcu_priv, xse); + + return grp_xcu->cfs_rq->throttled; +} #else #define xsched_cfs_rq_of(xse) (&((xse)->xcu->xrq.cfs)) diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index df843c06e748..635e90b9afa1 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -150,11 +150,19 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) struct xsched_entity *child = xse; for_each_xse(child) { + /* + * Terminate upward traversal of parent groups if + * Xse is already enqueued or Group is throttled. + */ if (child->on_rq) break; - rq = xsched_cfs_rq_of(child); +#ifdef CONFIG_CGROUP_XCU + if (child->is_group && xsched_entity_throttled(child)) + break; +#endif + rq = xsched_cfs_rq_of(child); place_xsched_entity(rq, child); child->on_rq = true; rq->nr_running++; diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index bdbd2330e1b3..dbf6f88e3f07 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -29,6 +29,10 @@ static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu lockdep_assert_held(&xcu->xcu_lock); + if (xg->perxcu_priv[xcu_id].cfs_rq->throttled) + return; + + xg->perxcu_priv[xcu_id].cfs_rq->throttled = true; xg->perxcu_priv[xcu_id].nr_throttled++; xg->perxcu_priv[xcu_id].start_throttled_time = now; @@ -43,28 +47,39 @@ static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu static void xsched_group_unthrottle(struct xsched_group *xg) { - uint32_t id; struct xsched_cu *xcu; + ktime_t now = ktime_get(); + int id; for_each_active_xcu(xcu, id) { mutex_lock(&xcu->xcu_lock); - if (!xg || READ_ONCE(xg->is_offline) || - READ_ONCE(xg->sched_class) != XSCHED_TYPE_CFS) { + + if (!xg || READ_ONCE(xg->is_offline)) { mutex_unlock(&xcu->xcu_lock); return; } - if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) { - enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); - wake_up_interruptible(&xcu->wq_xcu_idle); - if (xg->perxcu_priv[id].start_throttled_time != 0) { - xg->perxcu_priv[id].throttled_time += - ktime_to_ns(ktime_sub(ktime_get(), - xg->perxcu_priv[id].start_throttled_time)); + if (!xg->perxcu_priv[id].cfs_rq || + !xg->perxcu_priv[id].cfs_rq->throttled) { + mutex_unlock(&xcu->xcu_lock); + continue; + } - xg->perxcu_priv[id].start_throttled_time = 0; - } + /* + * Avoid inserting empty groups into the rbtree; + * only mark them as throttled. + */ + xg->perxcu_priv[id].cfs_rq->throttled = false; + xg->perxcu_priv[id].throttled_time += + ktime_to_ns(ktime_sub(now, + xg->perxcu_priv[id].start_throttled_time)); + xg->perxcu_priv[id].start_throttled_time = 0; + + if (xg->perxcu_priv[id].cfs_rq->nr_running > 0) { + enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); + wake_up_interruptible(&xcu->wq_xcu_idle); } + mutex_unlock(&xcu->xcu_lock); } } -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- The fields nr_throttled, start_throttled_time, and throttled_time are exclusively used in the CFS scheduling path and have no meaning in non-CFS contexts. To improve code clarity and data structure organization, these members are now moved from the generic xsched_group_xcu_priv structure into the CFS-specific xsched_rq_cfs sub-structure. - Encapsulates throttling state within the CFS context where it is actually used, making the code easier to maintain and extend. - Reduces unnecessary fields for non-CFS schedulers. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 11 ++++++----- kernel/xsched/cfs_quota.c | 27 ++++++++++++++------------- kernel/xsched/cgroup.c | 9 +++++---- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 76b4ce815870..ade26d148809 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -85,6 +85,10 @@ struct xsched_rq_cfs { struct rb_root_cached ctx_timeline; bool throttled; + /* Statistics */ + int nr_throttled; + u64 throttled_time; + ktime_t start_throttled_time; }; /* Base XSched runqueue object structure that contains both mutual and @@ -244,11 +248,6 @@ struct xsched_group_xcu_priv { struct xsched_entity xse; /* xse of this group on runqueue */ struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */ struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */ - - /* Statistics */ - int nr_throttled; - u64 throttled_time; - ktime_t start_throttled_time; }; enum xcu_file_type { @@ -308,6 +307,8 @@ struct xsched_group { #define xsched_cfs_rq_of(xse) (xse_parent_grp_xcu((xse))->cfs_rq) +#define xsched_group_cfs_rq(__xg, __id) ((__xg)->perxcu_priv[(__id)].cfs_rq) + #define for_each_xse(__xse) \ for (; (__xse) && (__xse)->parent_grp; \ (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index dbf6f88e3f07..713bd6436cb7 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -22,19 +22,20 @@ static struct workqueue_struct *quota_workqueue; static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu) { int xcu_id = xcu->id; - ktime_t now = ktime_get(); + struct xsched_rq_cfs *cfs_rq; if (!xg || READ_ONCE(xg->is_offline)) return; lockdep_assert_held(&xcu->xcu_lock); - if (xg->perxcu_priv[xcu_id].cfs_rq->throttled) + cfs_rq = xsched_group_cfs_rq(xg, xcu_id); + if (cfs_rq->throttled) return; - xg->perxcu_priv[xcu_id].cfs_rq->throttled = true; - xg->perxcu_priv[xcu_id].nr_throttled++; - xg->perxcu_priv[xcu_id].start_throttled_time = now; + cfs_rq->throttled = true; + cfs_rq->nr_throttled++; + cfs_rq->start_throttled_time = ktime_get(); /** * When an xse triggers XCU throttling, only the corresponding gse is @@ -47,6 +48,7 @@ static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu static void xsched_group_unthrottle(struct xsched_group *xg) { + struct xsched_rq_cfs *cfs_rq; struct xsched_cu *xcu; ktime_t now = ktime_get(); int id; @@ -59,8 +61,8 @@ static void xsched_group_unthrottle(struct xsched_group *xg) return; } - if (!xg->perxcu_priv[id].cfs_rq || - !xg->perxcu_priv[id].cfs_rq->throttled) { + cfs_rq = xsched_group_cfs_rq(xg, id); + if (!cfs_rq || !cfs_rq->throttled) { mutex_unlock(&xcu->xcu_lock); continue; } @@ -69,13 +71,12 @@ static void xsched_group_unthrottle(struct xsched_group *xg) * Avoid inserting empty groups into the rbtree; * only mark them as throttled. */ - xg->perxcu_priv[id].cfs_rq->throttled = false; - xg->perxcu_priv[id].throttled_time += - ktime_to_ns(ktime_sub(now, - xg->perxcu_priv[id].start_throttled_time)); - xg->perxcu_priv[id].start_throttled_time = 0; + cfs_rq->throttled = false; + cfs_rq->throttled_time += ktime_to_ns( + ktime_sub(now, cfs_rq->start_throttled_time)); + cfs_rq->start_throttled_time = 0; - if (xg->perxcu_priv[id].cfs_rq->nr_running > 0) { + if (cfs_rq->nr_running > 0) { enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); wake_up_interruptible(&xcu->wq_xcu_idle); } diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index c9779f4224e0..95a09485f667 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -686,20 +686,21 @@ static int xcu_stat(struct seq_file *sf, void *v) { struct cgroup_subsys_state *css = seq_css(sf); struct xsched_group *xcucg = xcu_cg_from_css(css); + struct xsched_rq_cfs *cfs_rq; u64 nr_throttled = 0; u64 throttled_time = 0; u64 exec_runtime = 0; int xcu_id; - struct xsched_cu *xcu; if (xcucg->sched_class == XSCHED_TYPE_RT) { seq_printf(sf, "RT group stat is not supported @ %s.\n", __func__); return 0; } - for_each_active_xcu(xcu, xcu_id) { - nr_throttled += xcucg->perxcu_priv[xcu_id].nr_throttled; - throttled_time += xcucg->perxcu_priv[xcu_id].throttled_time; + for (xcu_id = 0; xcu_id < num_active_xcu; xcu_id++) { + cfs_rq = xsched_group_cfs_rq(xcucg, xcu_id); + nr_throttled += cfs_rq->nr_throttled; + throttled_time += cfs_rq->throttled_time; exec_runtime += xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime; } -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- The original xcu_move_task() had fragile assumptions and unclear critical sections. This patch improves correctness and readability by: - Keeping lock ordering safe: release old_xcg->lock (spinlock) before acquiring xcu->xcu_lock (sleeping mutex). - Separating the group-list manipulation (under spinlock) from the runqueue operations (under mutex), making both critical sections shorter and semantically clearer. - Use plain list_for_each_entry since the loop breaks immediately after deletion, no need for _safe when no further traversal occurs. These changes enhance robustness during task migration and make the code easier to audit and maintain. Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/cgroup.c | 52 ++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 95a09485f667..fa6523cc451f 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -399,41 +399,43 @@ static void xcu_cancel_attach(struct cgroup_taskset *tset) void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg, struct xsched_group *new_xcg) { - struct xsched_entity *xse, *tmp; + struct xsched_entity *xse; struct xsched_cu *xcu; + if (!old_xcg || !new_xcg) + return; + spin_lock(&old_xcg->lock); - list_for_each_entry_safe(xse, tmp, &old_xcg->members, group_node) { - if (xse->owner_pid != task_pid_nr(task)) - continue; + list_for_each_entry(xse, &old_xcg->members, group_node) { + if (xse->owner_pid == task_pid_nr(task)) { + if (WARN_ON_ONCE(xse->parent_grp != old_xcg)) + break; - if (old_xcg != xse->parent_grp) { - WARN_ON(old_xcg != xse->parent_grp); - spin_unlock(&old_xcg->lock); - return; + /* delete from the old_xcg */ + list_del(&xse->group_node); + xse->parent_grp = NULL; + break; } + } - xcu = xse->xcu; - - /* delete from the old_xcg */ - list_del(&xse->group_node); - - spin_unlock(&old_xcg->lock); - - mutex_lock(&xcu->xcu_lock); - /* dequeue from the current runqueue */ - dequeue_ctx(xse); - /* attach to the new_xcg */ - xsched_group_xse_attach(new_xcg, xse); - /* enqueue to the runqueue in new_xcg */ - enqueue_ctx(xse, xcu); - mutex_unlock(&xcu->xcu_lock); + spin_unlock(&old_xcg->lock); + /* xse not found */ + if (list_entry_is_head(xse, &old_xcg->members, group_node)) return; - } - spin_unlock(&old_xcg->lock); + xcu = xse->xcu; + + mutex_lock(&xcu->xcu_lock); + /* dequeue from the current runqueue */ + dequeue_ctx(xse); + /* attach to the new_xcg */ + xsched_group_xse_attach(new_xcg, xse); + /* enqueue to the runqueue in new_xcg */ + enqueue_ctx(xse, xcu); + wake_up_interruptible(&xcu->wq_xcu_idle); + mutex_unlock(&xcu->xcu_lock); } static void xcu_attach(struct cgroup_taskset *tset) -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- To align with the unified scheduling entity model and improve layering clarity, this commit updates the signature of rq_init() to accept a struct xsched_rq * instead of struct xsched_cu *. The xsched_rq structure now serves as the canonical runqueue representation across both CFS and RT scheduling classes, while xsched_cu is being phased out as a top-level interface for runqueue initialization. It will simplify future support for mixed-class cgroups by decoupling runqueue setup from CU-specific abstractions. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 3 +-- kernel/xsched/cfs.c | 10 +++++++--- kernel/xsched/core.c | 2 +- kernel/xsched/rt.c | 9 ++++++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index ade26d148809..08f0c0a74555 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -96,7 +96,6 @@ struct xsched_rq_cfs { */ struct xsched_rq { struct xsched_entity *curr_xse; - const struct xsched_class *class; int state; int nr_running; @@ -418,7 +417,7 @@ struct xsched_class { void (*xse_deinit)(struct xsched_entity *xse); /* Initialize a new runqueue per xcu */ - void (*rq_init)(struct xsched_cu *xcu); + void (*rq_init)(struct xsched_rq *xrq); /* Removes a given XSE from it's runqueue. */ void (*dequeue_ctx)(struct xsched_entity *xse); diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 635e90b9afa1..14397d0e48f4 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -228,10 +228,14 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) #endif } -void rq_init_fair(struct xsched_cu *xcu) +void rq_init_fair(struct xsched_rq *xrq) { - xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; - xcu->xrq.cfs.min_xruntime = XSCHED_TIME_INF; + if (!xrq) + return; + + xrq->cfs.nr_running = 0; + xrq->cfs.ctx_timeline = RB_ROOT_CACHED; + xrq->cfs.min_xruntime = XSCHED_TIME_INF; } void xse_init_fair(struct xsched_entity *xse) diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index e9b0c6c4c86c..631920ae11d3 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -445,7 +445,7 @@ int xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, int xcu_id) /* Initialize current XCU's runqueue. */ for_each_xsched_class(sched) - sched->rq_init(xcu); + sched->rq_init(&xcu->xrq); /* This worker should set XCU to XSCHED_XCU_WAIT_IDLE. * If after initialization XCU still has XSCHED_XCU_NONE diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index 0de63deef54e..f62500634347 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -111,14 +111,17 @@ static bool check_preempt_ctx_rt(struct xsched_entity *xse) return true; } -void rq_init_rt(struct xsched_cu *xcu) +void rq_init_rt(struct xsched_rq *xrq) { int prio = 0; - xcu->xrq.rt.nr_running = 0; + if (!xrq) + return; + + xrq->rt.nr_running = 0; for_each_xse_prio(prio) { - INIT_LIST_HEAD(&xcu->xrq.rt.rq[prio]); + INIT_LIST_HEAD(&xrq->rt.rq[prio]); } } -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- This commit refactors the xsched_entity (xse) initialization logic to be scheduling-class-aware, improving modularity and extensibility for future scheduler classes. - Introduce a new helper function find_xsched_class() that determines the appropriate scheduling class (e.g., CFS, RT) based on entity or task properties. - Replace monolithic initialization code with a dispatch mechanism: after identifying the scheduling class, invoke its xse_init() callback to initialize class-specific fields in the xsched_entity. This refactor enhances code clarity, reduces duplication, and establishes a clean interface for supporting multiple scheduling policies under the unified xsched_entity model. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 6 ++-- kernel/xsched/cfs.c | 1 + kernel/xsched/cgroup.c | 10 +++---- kernel/xsched/core.c | 61 ++++++++++++++++++++--------------------- kernel/xsched/rt.c | 6 ++-- kernel/xsched/vstream.c | 2 +- 6 files changed, 42 insertions(+), 44 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 08f0c0a74555..e48e6d404b1c 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -54,9 +54,6 @@ extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; extern struct xsched_class rt_xsched_class; extern struct xsched_class fair_xsched_class; -#define xsched_first_class \ - list_first_entry(&(xsched_class_list), struct xsched_class, node) - #define for_each_xsched_class(class) \ list_for_each_entry((class), &(xsched_class_list), node) @@ -455,7 +452,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm, int xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, int xcu_id); int xsched_schedule(void *input_xcu); -int xsched_init_entity(struct xsched_context *ctx, struct vstream_info *vs); +int init_xsched_entity(struct xsched_context *ctx, struct vstream_info *vs); int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx); int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); @@ -463,6 +460,7 @@ void xsched_rt_prio_set(pid_t tgid, unsigned int prio); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); void dequeue_ctx(struct xsched_entity *xse); int delete_ctx(struct xsched_context *ctx); +const struct xsched_class *find_xsched_class(int class_id); #ifdef CONFIG_CGROUP_XCU /* Xsched group manage functions */ diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 14397d0e48f4..d8298f574387 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -240,6 +240,7 @@ void rq_init_fair(struct xsched_rq *xrq) void xse_init_fair(struct xsched_entity *xse) { + xse->class = &fair_xsched_class; xse->cfs.weight = XSCHED_CFS_WEIGHT_DFLT; } diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index fa6523cc451f..9e3659dc08f4 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -103,7 +103,7 @@ void xcu_cfs_root_cg_init(struct xsched_cu *xcu) root_xcg->perxcu_priv[id].self = root_xcg; root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; root_xcg->perxcu_priv[id].xse.is_group = true; - root_xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; + fair_xsched_class.xse_init(&root_xcg->perxcu_priv[id].xse); } static void xcg_perxcu_cfs_rq_deinit(struct xsched_group *xcg, int max_id) @@ -151,13 +151,11 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; xcg->perxcu_priv[id].cfs_rq->min_xruntime = XSCHED_TIME_INF; - xcg->perxcu_priv[id].xse.is_group = true; - xcg->perxcu_priv[id].xse.xcu = xcu; - xcg->perxcu_priv[id].xse.class = &fair_xsched_class; - /* Put new empty groups to the right in parent's rbtree: */ - xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; + fair_xsched_class.xse_init(&xcg->perxcu_priv[id].xse); + xcg->perxcu_priv[id].xse.is_group = true; xcg->perxcu_priv[id].xse.parent_grp = parent_xg; + xcg->perxcu_priv[id].xse.xcu = xcu; } xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 631920ae11d3..5c2fd2f7dbd6 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -184,25 +184,19 @@ int delete_ctx(struct xsched_context *ctx) return 0; } -int xsched_xse_set_class(struct xsched_entity *xse) +const struct xsched_class *find_xsched_class(int class_id) { - struct xsched_class *sched = xsched_first_class; + struct xsched_class *sched; - if (!sched) { - XSCHED_ERR("No xsched classes registered @ %s\n", __func__); - return -EINVAL; - } + if (class_id >= XSCHED_TYPE_NUM) + return NULL; -#ifdef CONFIG_CGROUP_XCU - xsched_group_inherit(current, xse); for_each_xsched_class(sched) { - if (sched->class_id == xse->parent_grp->sched_class) - break; + if (sched->class_id == class_id) + return sched; } -#endif - xse->class = sched; - return 0; + return NULL; } static void submit_kick(struct vstream_metadata *vsm) @@ -463,10 +457,28 @@ int xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, int xcu_id) return 0; } -int xsched_init_entity(struct xsched_context *ctx, struct vstream_info *vs) +int init_xsched_entity(struct xsched_context *ctx, struct vstream_info *vs) { - int err = 0; - struct xsched_entity *xse = &ctx->xse; + int err = 0, class_id = XSCHED_TYPE_DFLT; + struct xsched_entity *xse; + const struct xsched_class *sched; + + if (!ctx || !vs || WARN_ON(vs->xcu == NULL)) + return -EINVAL; + + xse = &ctx->xse; + +#ifdef CONFIG_CGROUP_XCU + xsched_group_inherit(current, xse); + /* inherit the scheduler class from the parent group */ + class_id = xse->parent_grp->sched_class; +#endif + + sched = find_xsched_class(class_id); + if (!sched) + return -ENOENT; + + sched->xse_init(xse); atomic_set(&xse->kicks_pending_cnt, 0); atomic_set(&xse->submitted_one_kick, 0); @@ -483,28 +495,15 @@ int xsched_init_entity(struct xsched_context *ctx, struct vstream_info *vs) XSCHED_ERR( "Couldn't find valid xcu for vstream %u dev_id %u @ %s\n", vs->id, vs->dev_id, __func__); - return -EINVAL; + return err; } xse->ctx = ctx; - - if (vs->xcu == NULL) { - WARN_ON(vs->xcu == NULL); - return -EINVAL; - } - xse->xcu = vs->xcu; - err = xsched_xse_set_class(xse); - if (err) { - XSCHED_ERR("Fail to set xse class @ %s\n", __func__); - return err; - } - xse->class->xse_init(xse); - WRITE_ONCE(xse->on_rq, false); - spin_lock_init(&xse->xse_lock); + return err; } diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index f62500634347..3e39135bd065 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -130,8 +130,10 @@ void xse_init_rt(struct xsched_entity *xse) struct task_struct *p; p = find_task_by_vpid(xse->tgid); - xse->rt.prio = p->_resvd->xse_attr.xsched_priority; - XSCHED_DEBUG("Xse init: set priority=%d.\n", xse->rt.prio); + if (p) + xse->rt.prio = p->_resvd->xse_attr.xsched_priority; + + xse->class = &rt_xsched_class; xse->rt.timeslice = XSCHED_RT_TIMESLICE; INIT_LIST_HEAD(&xse->rt.list_node); } diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index 3ea6fcb97530..80986e04d7b0 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -229,7 +229,7 @@ static int alloc_ctx_from_vstream(struct vstream_info *vstream_info, init_xsched_ctx(*ctx, vstream_info); - ret = xsched_init_entity(*ctx, vstream_info); + ret = init_xsched_entity(*ctx, vstream_info); if (ret) { XSCHED_ERR("Fail to initialize XSE for context @ %s\n", __func__); -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Modify the xcu_cg_initialize_components helper function to centralize and standardize the initialization logic for scheduling groups. This helper encapsulates common setup steps and provides a clear, single point of maintenance for future enhancements to group initialization. Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/cgroup.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 9e3659dc08f4..b819544c7dcc 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -70,8 +70,10 @@ static int xcu_cg_set_file_show(struct xsched_group *xg, int sched_class) * proper functioning of the xsched_group. * * @param xcg Pointer to the xsched_group to be initialized. + * @param parent_xcg Pointer to the parent xsched_group to be inherited. */ -static void xcu_cg_initialize_components(struct xsched_group *xcg) +static void init_xsched_group( + struct xsched_group *xcg, struct xsched_group *parent_xcg) { spin_lock_init(&xcg->lock); INIT_LIST_HEAD(&xcg->members); @@ -79,16 +81,16 @@ static void xcu_cg_initialize_components(struct xsched_group *xcg) xsched_quota_timeout_init(xcg); INIT_WORK(&xcg->refill_work, xsched_quota_refill); WRITE_ONCE(xcg->is_offline, false); + + xcg->sched_class = parent_xcg ? parent_xcg->sched_class : XSCHED_TYPE_DFLT; + xcg->parent = parent_xcg; + xcg->runtime = 0; } void xcu_cg_subsys_init(void) { - xcu_cg_initialize_components(root_xcg); + init_xsched_group(root_xcg, NULL); - root_xcg->sched_class = XSCHED_TYPE_DFLT; - root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; - root_xcg->quota = XSCHED_TIME_INF; - root_xcg->runtime = 0; xsched_quota_init(); xsched_group_cache = KMEM_CACHE(xsched_group, 0); @@ -158,11 +160,10 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, xcg->perxcu_priv[id].xse.xcu = xcu; } - xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; - xcu_grp_shares_add(parent_xg, xcg); xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; xcg->quota = XSCHED_TIME_INF; - xcg->runtime = 0; + xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; + xcu_grp_shares_add(parent_xg, xcg); return 0; } @@ -181,10 +182,8 @@ static void xcu_cfs_cg_deinit(struct xsched_group *xcg) static int xcu_cg_init(struct xsched_group *xcg, struct xsched_group *parent_xg) { - xcu_cg_initialize_components(xcg); - xcg->parent = parent_xg; + init_xsched_group(xcg, parent_xg); list_add_tail(&xcg->group_node, &parent_xg->children_groups); - xcg->sched_class = parent_xg->sched_class; switch (xcg->sched_class) { case XSCHED_TYPE_CFS: -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Introduce init_fair_xsched_group() to handle XCU-specific initialization logic for scheduling groups. This function is called after the generic xsched_group_init() to ensure that common group structures are fully set up before applying XCU-dependent configuration. The change is preparatory and non-functional on its own, laying groundwork for robust per-XCU group management in upcoming features such as hierarchical CFS/RT subgroups. Signed-off-by: Liu Kai <liukai284@huawei.com> --- drivers/xcu/xcu_group.c | 2 +- include/linux/xsched.h | 9 ++++++++- kernel/xsched/cfs.c | 11 ++++++++--- kernel/xsched/cgroup.c | 38 +++++++++++++++++++------------------- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c index 1cf159b8f57d..b53501759a09 100644 --- a/drivers/xcu/xcu_group.c +++ b/drivers/xcu/xcu_group.c @@ -328,7 +328,7 @@ int xsched_xcu_register(struct xcu_group *group, uint32_t phys_id) } #ifdef CONFIG_CGROUP_XCU - xcu_cfs_root_cg_init(xcu); + init_fair_xsched_group(root_xcg, xcu, &xcu->xrq.cfs); #endif /* CONFIG_CGROUP_XCU */ return 0; diff --git a/include/linux/xsched.h b/include/linux/xsched.h index e48e6d404b1c..e393377781e9 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -51,8 +51,10 @@ extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; +extern struct xsched_group *root_xcg; extern struct xsched_class rt_xsched_class; extern struct xsched_class fair_xsched_class; +extern struct list_head xsched_class_list; #define for_each_xsched_class(class) \ list_for_each_entry((class), &(xsched_class_list), node) @@ -462,11 +464,16 @@ void dequeue_ctx(struct xsched_entity *xse); int delete_ctx(struct xsched_context *ctx); const struct xsched_class *find_xsched_class(int class_id); +#ifdef CONFIG_XCU_SCHED_CFS +void init_xsched_cfs_rq(struct xsched_rq_cfs *cfs_rq); +#endif + #ifdef CONFIG_CGROUP_XCU /* Xsched group manage functions */ void xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); void xcu_cg_subsys_init(void); -void xcu_cfs_root_cg_init(struct xsched_cu *xcu); +void init_fair_xsched_group(struct xsched_group *xg, + struct xsched_cu *xcu, struct xsched_rq_cfs *cfs_rq); void xcu_grp_shares_update(struct xsched_group *parent, struct xsched_group *child, u32 shares_cfg); void xcu_grp_shares_add(struct xsched_group *parent, struct xsched_group *child); diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index d8298f574387..6b74f96a6860 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -228,14 +228,19 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) #endif } +void init_xsched_cfs_rq(struct xsched_rq_cfs *cfs_rq) +{ + cfs_rq->nr_running = 0; + cfs_rq->ctx_timeline = RB_ROOT_CACHED; + cfs_rq->min_xruntime = XSCHED_TIME_INF; +} + void rq_init_fair(struct xsched_rq *xrq) { if (!xrq) return; - xrq->cfs.nr_running = 0; - xrq->cfs.ctx_timeline = RB_ROOT_CACHED; - xrq->cfs.min_xruntime = XSCHED_TIME_INF; + init_xsched_cfs_rq(&xrq->cfs); } void xse_init_fair(struct xsched_entity *xse) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index b819544c7dcc..32db2afa82ac 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -97,15 +97,23 @@ void xcu_cg_subsys_init(void) xcg_attach_entry_cache = KMEM_CACHE(xcg_attach_entry, 0); } -void xcu_cfs_root_cg_init(struct xsched_cu *xcu) +void init_fair_xsched_group(struct xsched_group *xg, + struct xsched_cu *xcu, struct xsched_rq_cfs *cfs_rq) { int id = xcu->id; - root_xcg->perxcu_priv[id].xcu_id = id; - root_xcg->perxcu_priv[id].self = root_xcg; - root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; - root_xcg->perxcu_priv[id].xse.is_group = true; - fair_xsched_class.xse_init(&root_xcg->perxcu_priv[id].xse); + if (xg != root_xcg && WARN_ON(!xg->parent)) + return; + + xg->perxcu_priv[id].xcu_id = id; + xg->perxcu_priv[id].self = xg; + xg->perxcu_priv[id].cfs_rq = cfs_rq; + xg->perxcu_priv[id].xse.xcu = xcu; + xg->perxcu_priv[id].xse.is_group = true; + xg->perxcu_priv[id].xse.parent_grp = xg->parent; + + /* Put new empty groups to the right in parent's rbtree */ + fair_xsched_class.xse_init(&xg->perxcu_priv[id].xse); } static void xcg_perxcu_cfs_rq_deinit(struct xsched_group *xcg, int max_id) @@ -140,24 +148,16 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, struct xsched_rq_cfs *sub_cfs_rq; for_each_active_xcu(xcu, id) { - xcg->perxcu_priv[id].xcu_id = id; - xcg->perxcu_priv[id].self = xcg; - sub_cfs_rq = kzalloc(sizeof(*sub_cfs_rq), GFP_KERNEL); if (!sub_cfs_rq) { - XSCHED_ERR("Fail to alloc cfs runqueue on xcu %d\n", id); + XSCHED_ERR("Fail to alloc runqueue on xcu %d\n", id); xcg_perxcu_cfs_rq_deinit(xcg, id); return -ENOMEM; } - xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq; - xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; - xcg->perxcu_priv[id].cfs_rq->min_xruntime = XSCHED_TIME_INF; - - /* Put new empty groups to the right in parent's rbtree: */ - fair_xsched_class.xse_init(&xcg->perxcu_priv[id].xse); - xcg->perxcu_priv[id].xse.is_group = true; - xcg->perxcu_priv[id].xse.parent_grp = parent_xg; - xcg->perxcu_priv[id].xse.xcu = xcu; + init_xsched_cfs_rq(sub_cfs_rq); + + /* call init_fair_xsched_group() after init_xsched_group() */ + init_fair_xsched_group(xcg, xcu, sub_cfs_rq); } xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- To prevent potential resource leaks and ensure that all resources are properly cleaned up when a task exits, this commit modifies the existing behavior so that any remaining operators are immediately cleared without waiting for their completion. Fixes: 76c15076abcb ("xsched: Add basic scheduler core support") Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/core.c | 18 ++++++------------ kernel/xsched/vstream.c | 21 ++++++++++++++------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 5c2fd2f7dbd6..b715b1401043 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -153,21 +153,15 @@ int delete_ctx(struct xsched_context *ctx) struct xsched_cu *xcu = ctx->xse.xcu; struct xsched_entity *curr_xse = xcu->xrq.curr_xse; struct xsched_entity *xse = &ctx->xse; + int pending; - if (xse_integrity_check(xse)) { - XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); + if (xse_integrity_check(xse)) return -EINVAL; - } - - if (!xse->xcu) { - XSCHED_ERR("Try to delete ctx that is not attached to xcu @ %s\n", - __func__); - return -EINVAL; - } - /* Wait till context has been submitted. */ - while (atomic_read(&xse->kicks_pending_cnt)) - usleep_range(100, 200); + pending = atomic_read(&xse->kicks_pending_cnt); + if (pending > 0) + XSCHED_WARN("Delete xse %d on xcu %u with pending %d kicks\n", + xse->tgid, xcu->id, pending); mutex_lock(&xcu->xcu_lock); if (curr_xse == xse) diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index 80986e04d7b0..b39e97682dfb 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -69,23 +69,30 @@ static int vstream_file_create(struct vstream_info *vs) static void xsched_task_free(struct kref *kref) { struct xsched_context *ctx; - vstream_info_t *vs, *tmp; + vstream_info_t *vs, *tmp_vs; + vstream_metadata_t *vsm, *tmp_vsm; struct xsched_cu *xcu; ctx = container_of(kref, struct xsched_context, kref); xcu = ctx->xse.xcu; - /* Wait utill xse dequeues */ - while (READ_ONCE(ctx->xse.on_rq)) - usleep_range(100, 200); - mutex_lock(&xcu->ctx_list_lock); - list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) { + delete_ctx(ctx); + list_for_each_entry_safe(vs, tmp_vs, &ctx->vstream_list, ctx_node) { list_del(&vs->ctx_node); + + /* delete pending kicks */ + mutex_lock(&xcu->xcu_lock); + spin_lock(&vs->stream_lock); + list_for_each_entry_safe(vsm, tmp_vsm, &vs->metadata_list, node) { + list_del(&vsm->node); + kfree(vsm); + } + spin_unlock(&vs->stream_lock); + mutex_unlock(&xcu->xcu_lock); kfree(vs); } - delete_ctx(ctx); list_del(&ctx->ctx_node); --xcu->nr_ctx; mutex_unlock(&xcu->ctx_list_lock); -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- This commit replaces a hardcoded magic number used for the maximum length of scheduler class name strings with the named constant SCHED_CLASS_MAX_LENGTH. No functional change is introduced, this is a pure cleanup for better code hygiene. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 3 +-- kernel/xsched/cgroup.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index e393377781e9..e6baca2211be 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -39,6 +39,7 @@ #define XSCHED_CFS_WEIGHT_DFLT 1024 #define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC) #define XSCHED_CFG_SHARE_DFLT 1024 +#define SCHED_CLASS_MAX_LENGTH 4 /* * A default kick slice for RT class XSEs. @@ -494,8 +495,6 @@ void xsched_quota_refill(struct work_struct *work); #define XCUCG_SET_FILE_RETRY_COUNT 100 #define XCUCG_SET_FILE_DELAY_MS 10 -#define SCHED_CLASS_MAX_LENGTH 4 - #endif static inline u64 xs_calc_delta(u64 delta_exec, u32 base_weight, u32 weight) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 32db2afa82ac..a12a5cd86094 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -34,7 +34,7 @@ static LIST_HEAD(xcg_attach_list); static DEFINE_MUTEX(xcg_mutex); static DEFINE_MUTEX(xcu_file_show_mutex); -static const char xcu_sched_name[XSCHED_TYPE_NUM][4] = { +static const char xcu_sched_name[XSCHED_TYPE_NUM][SCHED_CLASS_MAX_LENGTH] = { [XSCHED_TYPE_RT] = "rt", [XSCHED_TYPE_CFS] = "cfs" }; -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Replace the dedicated xsched_group_is_root() helper function with a direct pointer comparison against the global root_xcg. Since the root scheduling group is a singleton and uniquely identified by root_xcg, this approach is functionally equivalent but more efficient and straightforward. All existing callers have been updated accordingly. This is a clean, non-functional refactor that enhances maintainability. Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 2 +- kernel/xsched/cgroup.c | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index e6baca2211be..b9a159d1aba8 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -313,7 +313,7 @@ struct xsched_group { (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) #define for_each_xsched_group(__xg) \ - for (; (__xg) && (__xg)->parent; (__xg) = (__xg)->parent) + for (; (__xg) != root_xcg; (__xg) = (__xg)->parent) static inline struct xsched_group_xcu_priv * xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index a12a5cd86094..e2cd8a798438 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -102,6 +102,7 @@ void init_fair_xsched_group(struct xsched_group *xg, { int id = xcu->id; + /* Ensure non-root group has a valid parent */ if (xg != root_xcg && WARN_ON(!xg->parent)) return; @@ -202,11 +203,6 @@ inline struct xsched_group *xcu_cg_from_css(struct cgroup_subsys_state *css) return css ? container_of(css, struct xsched_group, css) : NULL; } -static inline bool xsched_group_is_root(struct xsched_group *xg) -{ - return xg && !xg->parent; -} - /** * xcu_css_alloc() - Allocate and init xcu cgroup. * @parent_css: css of parent xcu cgroup @@ -299,7 +295,7 @@ static void xcu_css_offline(struct cgroup_subsys_state *css) cancel_work_sync(&xcg->refill_work); cancel_work_sync(&xcg->file_show_work); - if (!xsched_group_is_root(xcg)) { + if (xcg != root_xcg) { switch (xcg->sched_class) { case XSCHED_TYPE_CFS: xcu_cfs_cg_deinit(xcg); @@ -548,7 +544,7 @@ static ssize_t xcu_sched_class_write(struct kernfs_open_file *of, char *buf, xg = xcu_cg_from_css(css); /* only the first level of root can switch scheduler type */ - if (!xsched_group_is_root(xg->parent)) { + if (xg->parent != root_xcg) { css_put(css); return -EINVAL; } -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 ---------------------------------------- Refactor the xcu_cgrp_subsys cgroup destruction path to follow the same well-established lifecycle pattern used by cpu_cgrp_subsys, clearly separating concerns across the three key cgroup teardown callbacks: - offline: Handles only xsched-specific logic. No memory or structural teardown occurs here. - released: Removes the group from the xsched hierarchy and deletes the group node once all references are gone. - free: Performs final resource cleanup—cancels pending unthrottle timers, destroys deferred work items (e.g., worker tasks), and frees all allocated memory associated with the group. This separation: - Prevents use-after-free and race conditions by respecting cgroup reference semantics. - Improves maintainability and consistency with core kernel cgroup patterns. The change is functionally equivalent but significantly more robust and aligned with upstream cgroup best practices. Fixes: 34a49359681b ("xsched: prevent NULL deref by refcounting css and tracking offline state") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 2 -- kernel/xsched/cfs_quota.c | 15 ++------- kernel/xsched/cgroup.c | 65 ++++++++++++++++++++++----------------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index b9a159d1aba8..e4b08066c676 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -293,8 +293,6 @@ struct xsched_group { /* to control the xcu.{period, quota, shares} files shown or not */ struct cgroup_file xcu_file[NR_XCU_FILE_TYPES]; struct work_struct file_show_work; - - bool is_offline; }; #endif /* CONFIG_CGROUP_XCU */ diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index 713bd6436cb7..fb4cca6a62f4 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -24,9 +24,6 @@ static void xsched_group_throttle(struct xsched_group *xg, struct xsched_cu *xcu int xcu_id = xcu->id; struct xsched_rq_cfs *cfs_rq; - if (!xg || READ_ONCE(xg->is_offline)) - return; - lockdep_assert_held(&xcu->xcu_lock); cfs_rq = xsched_group_cfs_rq(xg, xcu_id); @@ -56,11 +53,6 @@ static void xsched_group_unthrottle(struct xsched_group *xg) for_each_active_xcu(xcu, id) { mutex_lock(&xcu->xcu_lock); - if (!xg || READ_ONCE(xg->is_offline)) { - mutex_unlock(&xcu->xcu_lock); - return; - } - cfs_rq = xsched_group_cfs_rq(xg, id); if (!cfs_rq || !cfs_rq->throttled) { mutex_unlock(&xcu->xcu_lock); @@ -87,9 +79,7 @@ static void xsched_group_unthrottle(struct xsched_group *xg) void xsched_quota_refill(struct work_struct *work) { - struct xsched_group *xg; - - xg = container_of(work, struct xsched_group, refill_work); + struct xsched_group *xg = container_of(work, struct xsched_group, refill_work); spin_lock(&xg->lock); xg->runtime = max((xg->runtime - xg->quota), (s64)0); @@ -151,8 +141,7 @@ void xsched_quota_timeout_update(struct xsched_group *xg) hrtimer_cancel(t); - if (!xg || READ_ONCE(xg->is_offline) || - READ_ONCE(xg->sched_class) != XSCHED_TYPE_CFS) + if (!xg) return; if (xg->quota > 0 && xg->period > 0) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index e2cd8a798438..cc09b7bac34a 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -80,7 +80,6 @@ static void init_xsched_group( INIT_LIST_HEAD(&xcg->children_groups); xsched_quota_timeout_init(xcg); INIT_WORK(&xcg->refill_work, xsched_quota_refill); - WRITE_ONCE(xcg->is_offline, false); xcg->sched_class = parent_xcg ? parent_xcg->sched_class : XSCHED_TYPE_DFLT; xcg->parent = parent_xcg; @@ -233,19 +232,19 @@ static void xcu_css_free(struct cgroup_subsys_state *css) { struct xsched_group *xcg = xcu_cg_from_css(css); + hrtimer_cancel(&xcg->quota_timeout); + cancel_work_sync(&xcg->refill_work); + cancel_work_sync(&xcg->file_show_work); + + if (xcg->sched_class == XSCHED_TYPE_CFS) + xcg_perxcu_cfs_rq_deinit(xcg, num_active_xcu); + kmem_cache_free(xsched_group_cache, xcg); } static void delay_xcu_cg_set_file_show_workfn(struct work_struct *work) { - struct xsched_group *xg; - - xg = container_of(work, struct xsched_group, file_show_work); - - if (!xg) { - XSCHED_ERR("xsched_group cannot be null @ %s", __func__); - return; - } + struct xsched_group *xg = container_of(work, struct xsched_group, file_show_work); for (int i = 0; i < XCUCG_SET_FILE_RETRY_COUNT; i++) { if (!xcu_cg_set_file_show(xg, xg->sched_class)) @@ -289,23 +288,23 @@ static void xcu_css_offline(struct cgroup_subsys_state *css) xcg = xcu_cg_from_css(css); - WRITE_ONCE(xcg->is_offline, true); - - hrtimer_cancel(&xcg->quota_timeout); - cancel_work_sync(&xcg->refill_work); - cancel_work_sync(&xcg->file_show_work); + if (xcg == root_xcg) + return; - if (xcg != root_xcg) { - switch (xcg->sched_class) { - case XSCHED_TYPE_CFS: - xcu_cfs_cg_deinit(xcg); - break; - default: - XSCHED_INFO("xcu_cgroup: deinit RT group css=0x%lx\n", - (uintptr_t)&xcg->css); - break; - } + switch (xcg->sched_class) { + case XSCHED_TYPE_CFS: + xcu_grp_shares_sub(xcg->parent, xcg); + break; + default: + XSCHED_INFO("xcu_cgroup: deinit RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; } +} + +static void xcu_css_releasd(struct cgroup_subsys_state *css) +{ + struct xsched_group *xcg = xcu_cg_from_css(css); list_del(&xcg->group_node); } @@ -682,22 +681,31 @@ static int xcu_stat(struct seq_file *sf, void *v) struct cgroup_subsys_state *css = seq_css(sf); struct xsched_group *xcucg = xcu_cg_from_css(css); struct xsched_rq_cfs *cfs_rq; + struct xsched_cu *xcu; u64 nr_throttled = 0; u64 throttled_time = 0; u64 exec_runtime = 0; - int xcu_id; + int id; if (xcucg->sched_class == XSCHED_TYPE_RT) { seq_printf(sf, "RT group stat is not supported @ %s.\n", __func__); return 0; } - for (xcu_id = 0; xcu_id < num_active_xcu; xcu_id++) { - cfs_rq = xsched_group_cfs_rq(xcucg, xcu_id); + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + + cfs_rq = xsched_group_cfs_rq(xcucg, id); + if (!cfs_rq) { + mutex_unlock(&xcu->xcu_lock); + continue; + } nr_throttled += cfs_rq->nr_throttled; throttled_time += cfs_rq->throttled_time; exec_runtime += - xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime; + xcucg->perxcu_priv[id].xse.cfs.sum_exec_runtime; + + mutex_unlock(&xcu->xcu_lock); } seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); @@ -754,6 +762,7 @@ struct cgroup_subsys xcu_cgrp_subsys = { .css_alloc = xcu_css_alloc, .css_online = xcu_css_online, .css_offline = xcu_css_offline, + .css_released = xcu_css_releasd, .css_free = xcu_css_free, .can_attach = xcu_can_attach, .cancel_attach = xcu_cancel_attach, -- 2.34.1
hulk inclusion category: cleanup bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 ---------------------------------------- Remove the standalone xcu_cfs_cg_deinit() helper and inline its functionality directly into the scheduler class switch handler in xcu_cg_set_sched_class(). This change simplifies the code by eliminating a single-use helper and ensures that CFS-specific teardown——namely, removing the group’s shares from its parent and deinitializing per-XCU runqueues——is performed precisely when the scheduling class changes. Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/cgroup.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index cc09b7bac34a..1e233a88d0f5 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -168,12 +168,6 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, return 0; } -static void xcu_cfs_cg_deinit(struct xsched_group *xcg) -{ - xcg_perxcu_cfs_rq_deinit(xcg, num_active_xcu); - xcu_grp_shares_sub(xcg->parent, xcg); -} - /** * xcu_cg_init() - Initialize non-root xsched_group structure. * @xcg: new xsched_cgroup @@ -498,7 +492,8 @@ static int xcu_cg_set_sched_class(struct xsched_group *xg, int type) /* deinit old type if necessary */ switch (xg->sched_class) { case XSCHED_TYPE_CFS: - xcu_cfs_cg_deinit(xg); + xcu_grp_shares_sub(xg->parent, xg); + xcg_perxcu_cfs_rq_deinit(xg, num_active_xcu); break; default: break; -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Move quota_timeout hrtimer access after the NULL check on xg to avoid possible dereference of a null pointer. This ensures safe early return when xg is invalid. Fixes: aafde051ac61 ("xsched: Add support for CFS quota for cgroups") Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/xsched/cfs_quota.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index fb4cca6a62f4..591b22968024 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -137,13 +137,14 @@ void xsched_quota_timeout_init(struct xsched_group *xg) void xsched_quota_timeout_update(struct xsched_group *xg) { - struct hrtimer *t = &xg->quota_timeout; - - hrtimer_cancel(t); + struct hrtimer *t; if (!xg) return; + t = &xg->quota_timeout; + hrtimer_cancel(t); + if (xg->quota > 0 && xg->period > 0) hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); else -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/21039 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PUN... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/21039 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PUN...
participants (2)
-
Liu Kai -
patchwork bot