hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- When a task is enqueued or dequeued, its cgroup may transition between empty and non-empty states. To ensure correct hierarchical scheduling: - On enqueue: if a group's nr_running transitions from 0 to 1 and it is not throttled, enqueue its GSE into the parent runqueue and continue propagating upward until reaching an already-active ancestor or root. - On dequeue: if a group's nr_running drops to 0, dequeue its GSE from the parent runqueue and stop propagation once a non-empty ancestor is encountered. This change ensures that only groups with runnable tasks participate in scheduling, matching the behavior of CFS group scheduling in Linux. Fixes: 024b85113850 ("xsched: Add xsched CFS class") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 10 ++-- kernel/xsched/cfs.c | 113 +++++++++++++++++------------------------ kernel/xsched/cgroup.c | 6 +-- kernel/xsched/core.c | 3 +- kernel/xsched/rt.c | 2 + 5 files changed, 58 insertions(+), 76 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index b6c8e01748b3..3cdef751e89b 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -242,6 +242,7 @@ struct xsched_group_xcu_priv { struct xsched_entity xse; /* xse of this group on runqueue */ struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */ struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */ + /* Statistics */ int nr_throttled; u64 throttled_time; @@ -298,12 +299,13 @@ struct xsched_group { #endif /* CONFIG_CGROUP_XCU */ #ifdef CONFIG_CGROUP_XCU -#define xcg_parent_grp_xcu(xcg) \ - ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) - #define xse_parent_grp_xcu(xse) \ (&(((xse)->parent_grp->perxcu_priv[(xse)->xcu->id]))) +#define parent_xse_of(__xse) (&(xse_parent_grp_xcu((__xse))->xse)) + +#define xsched_cfs_rq_of(xse) (xse_parent_grp_xcu((xse))->cfs_rq) + #define for_each_xse(__xse) \ for (; (__xse) && (__xse)->parent_grp; \ (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) @@ -324,6 +326,8 @@ xse_this_grp(struct xsched_entity_cfs *xse_cfs) } #else +#define xsched_cfs_rq_of(xse) (&((xse)->xcu->xrq.cfs)) + #define for_each_xse(__xse) for (; (__xse); (__xse) = NULL) #endif /* CONFIG_CGROUP_XCU */ diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index a0e9cb83350a..3afb08def14f 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -62,14 +62,7 @@ static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt) static inline struct xsched_entity_cfs * xs_pick_first(struct xsched_rq_cfs *cfs_rq) { - struct rb_node *left; - - if (!cfs_rq) { - XSCHED_WARN("the rq cannot be NULL @ %s\n", __func__); - return NULL; - } - - left = rb_first_cached(&cfs_rq->ctx_timeline); + struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); if (!left) return NULL; @@ -93,29 +86,11 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) xse_cfs->sum_exec_runtime += delta; } -/** - * xg_update() - Update container group's xruntime - * @gxcu: Descendant xsched group's private xcu control structure - * - * No locks required to access xsched_group_xcu_priv members, - * because only one worker thread works for one XCU. - */ -static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) +static void update_min_xruntime(struct xsched_rq_cfs *cfs_rq) { - struct xsched_entity_cfs *leftmost; - - for (; xg; xg = &xcg_parent_grp_xcu(xg)) { - xg->cfs_rq->nr_running += task_delta; + struct xsched_entity_cfs *leftmost = xs_pick_first(cfs_rq); - leftmost = xs_pick_first(xg->cfs_rq); - xg->cfs_rq->min_xruntime = leftmost ? - leftmost->xruntime : XSCHED_TIME_INF; - - if (!xg->xse.on_rq) - break; - if (!xg->self->parent) - break; - } + cfs_rq->min_xruntime = leftmost ? leftmost->xruntime : XSCHED_TIME_INF; } /* @@ -125,19 +100,42 @@ static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) */ static void dequeue_ctx_fair(struct xsched_entity *xse) { - int task_delta; - struct xsched_cu *xcu = xse->xcu; - struct xsched_entity_cfs *first; + struct xsched_entity *child = xse; + struct xsched_rq_cfs *rq; + + for_each_xse(child) { + if (!child->on_rq) + break; + + rq = xsched_cfs_rq_of(child); + + xs_rq_remove(&child->cfs); + child->on_rq = false; + rq->nr_running--; + + /** + * Dequeue the group's scheduling entity (GSE) from + * its parent runqueue when the group becomes empty, + * so it no longer participates in scheduling until + * new tasks arrive. + */ + if (rq->nr_running > 0) + break; + } +} + +static void place_xsched_entity(struct xsched_rq_cfs *rq, struct xsched_entity *xse) +{ struct xsched_entity_cfs *xse_cfs = &xse->cfs; - task_delta = - (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; + if (!rq) + return; - xs_rq_remove(xse_cfs); - xg_update(xse_parent_grp_xcu(xse), task_delta); + xse_cfs->cfs_rq = rq; + if (rq->min_xruntime != XSCHED_TIME_INF) + xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); - first = xs_pick_first(&xcu->xrq.cfs); - xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; + xs_rq_add(xse_cfs); } /** @@ -151,40 +149,20 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) */ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { - int task_delta = 1; - struct xsched_entity_cfs *first; struct xsched_rq_cfs *rq; - struct xsched_entity_cfs *xse_cfs = &xse->cfs; + struct xsched_entity *child = xse; - rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse)->cfs_rq; - if (!rq) { - XSCHED_WARN("the parent rq this xse [%d] attached cannot be NULL @ %s\n", - xse->tgid, __func__); - return; - } + for_each_xse(child) { + if (child->on_rq) + break; - if (xse->is_group) { - struct xsched_rq_cfs *sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; + rq = xsched_cfs_rq_of(child); - if (!sub_rq) { - XSCHED_WARN("the sub_rq of this cgroup-type xse [%d] can't be NULL @ %s\n", - xse->tgid, __func__); - return; - } - task_delta = sub_rq->nr_running; + place_xsched_entity(rq, child); + child->on_rq = true; + rq->nr_running++; + update_min_xruntime(rq); } - - /* If no XSE or only empty groups */ - if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) - rq->min_xruntime = xse_cfs->xruntime; - else - xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); - - xs_rq_add(xse_cfs); - xg_update(xse_parent_grp_xcu(xse), task_delta); - - first = xs_pick_first(&xcu->xrq.cfs); - xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; } static inline bool has_running_fair(struct xsched_cu *xcu) @@ -242,6 +220,7 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) void rq_init_fair(struct xsched_cu *xcu) { xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; + xcu->xrq.cfs.min_xruntime = XSCHED_TIME_INF; } void xse_init_fair(struct xsched_entity *xse) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 425aed6b3cec..e466033dbc60 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -102,6 +102,7 @@ void xcu_cfs_root_cg_init(struct xsched_cu *xcu) root_xcg->perxcu_priv[id].xcu_id = id; root_xcg->perxcu_priv[id].self = root_xcg; root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; + root_xcg->perxcu_priv[id].xse.is_group = true; root_xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; } @@ -148,6 +149,7 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, } xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq; xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; + xcg->perxcu_priv[id].cfs_rq->min_xruntime = XSCHED_TIME_INF; xcg->perxcu_priv[id].xse.is_group = true; xcg->perxcu_priv[id].xse.xcu = xcu; @@ -156,10 +158,6 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, /* Put new empty groups to the right in parent's rbtree: */ xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; xcg->perxcu_priv[id].xse.parent_grp = parent_xg; - - mutex_lock(&xcu->xcu_lock); - enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu); - mutex_unlock(&xcu->xcu_lock); } xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 13de4bec1ba6..779e4b4e6fdd 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -127,7 +127,7 @@ void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) } if (!xse->on_rq) { - xse->on_rq = true; + xse->xcu = xcu; xse->class->enqueue_ctx(xse, xcu); XSCHED_DEBUG("Enqueue xse %d @ %s\n", xse->tgid, __func__); } @@ -144,7 +144,6 @@ void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) if (xse->on_rq) { xse->class->dequeue_ctx(xse); - xse->on_rq = false; XSCHED_DEBUG("Dequeue xse %d @ %s\n", xse->tgid, __func__); } } diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c index d0ab14f07d9a..0de63deef54e 100644 --- a/kernel/xsched/rt.c +++ b/kernel/xsched/rt.c @@ -48,12 +48,14 @@ static void dequeue_ctx_rt(struct xsched_entity *xse) struct xsched_cu *xcu = xse->xcu; xse_rt_del(xse); + xse->on_rq = false; xcu->xrq.rt.nr_running--; } static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) { xse_rt_add(xse, xcu); + xse->on_rq = true; xcu->xrq.rt.nr_running++; } -- 2.34.1