hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8423 -------------------------------- Virtual runtime (xruntime) should reflect the actual execution progress of a scheduling entity, and thus must be updated only when the entity is running— specifically during put_prev (when it stops executing) and pick_next (when its runtime context is established). Previously, xruntime was incorrectly updated during enqueue/dequeue, which could distort fairness accounting. This commit: - Removes xruntime updates from enqueue/dequeue paths - Centralizes xruntime advancement in put_prev_ctx_fair() - Ensures hierarchical propagation only when CONFIG_CGROUP_XCU is enabled, using a clean, unified control flow that avoids code duplication - Maintains correct behavior for both flat and group scheduling configurations The change aligns the scheduler with CFS semantics, where virtual time advances only while an entity is on the XPU. Fixes: 024b85113850 ("xsched: Add xsched CFS class") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 16 +++++--- kernel/xsched/cfs.c | 90 ++++++++++++++++++++++-------------------- kernel/xsched/cgroup.c | 1 - 3 files changed, 57 insertions(+), 50 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 53c8fa563140..b6c8e01748b3 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -297,16 +297,16 @@ struct xsched_group { }; #endif /* CONFIG_CGROUP_XCU */ -#define XSCHED_SE_OF(cfs_xse) \ - (container_of((cfs_xse), struct xsched_entity, cfs)) - #ifdef CONFIG_CGROUP_XCU #define xcg_parent_grp_xcu(xcg) \ ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) -#define xse_parent_grp_xcu(xse_cfs) \ - (&((XSCHED_SE_OF(xse_cfs) \ - ->parent_grp->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id]))) +#define xse_parent_grp_xcu(xse) \ + (&(((xse)->parent_grp->perxcu_priv[(xse)->xcu->id]))) + +#define for_each_xse(__xse) \ + for (; (__xse) && (__xse)->parent_grp; \ + (__xse) = &(xse_parent_grp_xcu((__xse))->xse)) static inline struct xsched_group_xcu_priv * xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) @@ -322,6 +322,10 @@ xse_this_grp(struct xsched_entity_cfs *xse_cfs) { return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; } +#else + +#define for_each_xse(__xse) for (; (__xse); (__xse) = NULL) + #endif /* CONFIG_CGROUP_XCU */ static inline int xse_integrity_check(struct xsched_entity *xse) diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 5db58bbb917f..a0e9cb83350a 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -84,17 +84,13 @@ xs_pick_first(struct xsched_rq_cfs *cfs_rq) */ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) { - struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); - - for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { - u64 new_xrt = xse_cfs->xruntime + xs_calc_delta_fair(delta, xse_cfs->weight); + u64 new_xrt; - xs_cfs_rq_update(xse_cfs, new_xrt); - xse_cfs->sum_exec_runtime += delta; + new_xrt = xse_cfs->xruntime + + xs_calc_delta_fair(delta, xse_cfs->weight); - if (xg->self->parent == NULL) - break; - } + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; } /** @@ -106,24 +102,19 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) */ static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) { - u64 new_xrt; - struct xsched_entity_cfs *entry; + struct xsched_entity_cfs *leftmost; for (; xg; xg = &xcg_parent_grp_xcu(xg)) { xg->cfs_rq->nr_running += task_delta; - entry = xs_pick_first(xg->cfs_rq); - new_xrt = entry ? xs_calc_delta_fair(entry->xruntime, xg->xse.cfs.weight) - : XSCHED_TIME_INF; - xg->cfs_rq->min_xruntime = new_xrt; - xg->xse.cfs.xruntime = new_xrt; + leftmost = xs_pick_first(xg->cfs_rq); + xg->cfs_rq->min_xruntime = leftmost ? + leftmost->xruntime : XSCHED_TIME_INF; if (!xg->xse.on_rq) break; if (!xg->self->parent) break; - - xs_cfs_rq_update(&xg->xse.cfs, new_xrt); } } @@ -143,7 +134,7 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; xs_rq_remove(xse_cfs); - xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + xg_update(xse_parent_grp_xcu(xse), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -160,27 +151,28 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) */ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { - int task_delta; + int task_delta = 1; struct xsched_entity_cfs *first; - struct xsched_rq_cfs *rq, *sub_rq; + struct xsched_rq_cfs *rq; struct xsched_entity_cfs *xse_cfs = &xse->cfs; - rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq; + rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse)->cfs_rq; if (!rq) { XSCHED_WARN("the parent rq this xse [%d] attached cannot be NULL @ %s\n", xse->tgid, __func__); return; } - sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; - if (xse->is_group && !sub_rq) { - XSCHED_WARN("the sub_rq this cgroup-type xse [%d] owned cannot be NULL @ %s\n", - xse->tgid, __func__); - return; - } + if (xse->is_group) { + struct xsched_rq_cfs *sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; - task_delta = - (xse->is_group) ? sub_rq->nr_running : 1; + if (!sub_rq) { + XSCHED_WARN("the sub_rq of this cgroup-type xse [%d] can't be NULL @ %s\n", + xse->tgid, __func__); + return; + } + task_delta = sub_rq->nr_running; + } /* If no XSE or only empty groups */ if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) @@ -189,7 +181,7 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); xs_rq_add(xse_cfs); - xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + xg_update(xse_parent_grp_xcu(xse), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -200,22 +192,30 @@ static inline bool has_running_fair(struct xsched_cu *xcu) return !!xcu->xrq.cfs.nr_running; } +static inline struct xsched_rq_cfs * +next_cfs_rq_of(struct xsched_entity_cfs *xse) +{ +#ifdef CONFIG_CGROUP_XCU + struct xsched_entity *se = container_of(xse, struct xsched_entity, cfs); + + if (se->is_group) + return xse_this_grp_xcu(xse)->cfs_rq; +#endif + return NULL; +} + static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) { struct xsched_entity_cfs *xse; struct xsched_rq_cfs *rq = &xcu->xrq.cfs; + u64 now = ktime_get_ns(); - xse = xs_pick_first(rq); - if (!xse) - return NULL; - - for (; xse && XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) - rq = xse_this_grp_xcu(xse)->cfs_rq; + for (; rq; rq = next_cfs_rq_of(xse)) { + xse = xs_pick_first(rq); + if (!xse) + return NULL; - if (!xse) { - XSCHED_DEBUG("the xse this xcu [%u] is trying to pick is NULL @ %s\n", - xcu->id, __func__); - return NULL; + xse->exec_start = now; } return container_of(xse, struct xsched_entity, cfs); @@ -229,10 +229,14 @@ xs_should_preempt_fair(struct xsched_entity *xse) static void put_prev_ctx_fair(struct xsched_entity *xse) { - struct xsched_entity_cfs *prev = &xse->cfs; + struct xsched_entity *prev = xse; +#ifdef CONFIG_CGROUP_XCU xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime); - xs_update(prev, xse->last_exec_runtime); +#endif + + for_each_xse(prev) + xs_update(&prev->cfs, xse->last_exec_runtime); } void rq_init_fair(struct xsched_cu *xcu) diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index e50556a82cea..425aed6b3cec 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -154,7 +154,6 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, xcg->perxcu_priv[id].xse.class = &fair_xsched_class; /* Put new empty groups to the right in parent's rbtree: */ - xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF; xcg->perxcu_priv[id].xse.cfs.weight = XSCHED_CFS_WEIGHT_DFLT; xcg->perxcu_priv[id].xse.parent_grp = parent_xg; -- 2.34.1