hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IDB5TR ----------------------------------------- NULL pointer dereferences were observed when xsched paths accessed xsched_group fields after the corresponding cgroup had entered the offline/free sequence. css_get() only protects the cgroup from being freed, but does not prevent xcu_css_offline() from running concurrently. As a result, timer callbacks, workqueues and enqueue paths could still race with offline teardown and access partially destroyed group state. Fix this properly by: - introducing an explicit xcg->is_offline flag to mark offline state - checking is_offline in quota timeout and unthrottle paths - moving group deinit and list removal strictly into the css_free() path - protecting css lifetime with css_get()/css_put() - adding NULL checks for guard against unexpected rq corruption This ensures that: - offline and free are cleanly separated - no async paths operate on an already-offline group - css lifetime is protected without relying on global locks Fixes: 43bbefc53356 ("xsched: Add XCU control group implementation and its backend in xsched CFS") Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- include/linux/xsched.h | 2 ++ kernel/xsched/cfs.c | 26 +++++++++++++++++++++++--- kernel/xsched/cfs_quota.c | 7 +++++++ kernel/xsched/cgroup.c | 33 +++++++++++++++++++-------------- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index a277c70b605b..e33c91c6d969 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -296,6 +296,8 @@ struct xsched_group { /* to control the xcu.{period, quota, shares} files shown or not */ struct cgroup_file xcu_file[NR_XCU_FILE_TYPES]; struct work_struct file_show_work; + + bool is_offline; }; #endif /* CONFIG_CGROUP_XCU */ diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 883ba6974450..ad3d1652cb9b 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -63,7 +63,14 @@ static inline struct xsched_entity_cfs * xs_pick_first(struct xsched_rq_cfs *cfs_rq) { struct xsched_entity_cfs *xse_cfs; - struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); + struct rb_node *left; + + if (!cfs_rq) { + XSCHED_WARN("the rq cannot be NULL @ %s\n", __func__); + return NULL; + } + + left = rb_first_cached(&cfs_rq->ctx_timeline); if (!left) return NULL; @@ -159,12 +166,25 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { int task_delta; struct xsched_entity_cfs *first; - struct xsched_rq_cfs *rq; + struct xsched_rq_cfs *rq, *sub_rq; struct xsched_entity_cfs *xse_cfs = &xse->cfs; rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq; + if (!rq) { + XSCHED_WARN("the parent rq this xse [%d] attached cannot be NULL @ %s\n", + xse->tgid, __func__); + return; + } + + sub_rq = xse_this_grp_xcu(xse_cfs)->cfs_rq; + if (xse->is_group && !sub_rq) { + XSCHED_WARN("the sub_rq this cgroup-type xse [%d] owned cannot be NULL @ %s\n", + xse->tgid, __func__); + return; + } + task_delta = - (xse->is_group) ? xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running : 1; + (xse->is_group) ? sub_rq->nr_running : 1; /* If no XSE or only empty groups */ if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c index 2e17a48c071b..a9c01a6ef388 100644 --- a/kernel/xsched/cfs_quota.c +++ b/kernel/xsched/cfs_quota.c @@ -26,6 +26,10 @@ static void xsched_group_unthrottle(struct xsched_group *xg) for_each_active_xcu(xcu, id) { mutex_lock(&xcu->xcu_lock); + if (READ_ONCE(xg->is_offline)) { + mutex_unlock(&xcu->xcu_lock); + return; + } if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) { enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); wake_up_interruptible(&xcu->wq_xcu_idle); @@ -107,6 +111,9 @@ void xsched_quota_timeout_update(struct xsched_group *xg) hrtimer_cancel(t); + if (READ_ONCE(xg->is_offline)) + return; + if (xg->quota > 0 && xg->period > 0) hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); else diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 859a901b0b48..f351967e6935 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -78,6 +78,7 @@ static void xcu_cg_initialize_components(struct xsched_group *xcg) INIT_LIST_HEAD(&xcg->children_groups); xsched_quota_timeout_init(xcg); INIT_WORK(&xcg->refill_work, xsched_quota_refill); + WRITE_ONCE(xcg->is_offline, false); } void xcu_cg_subsys_init(void) @@ -113,9 +114,9 @@ static void xcg_perxcu_cfs_rq_deinit(struct xsched_group *xcg, int max_id) xcu = xsched_cu_mgr[i]; mutex_lock(&xcu->xcu_lock); dequeue_ctx(&xcg->perxcu_priv[i].xse, xcu); - mutex_unlock(&xcu->xcu_lock); kfree(xcg->perxcu_priv[i].cfs_rq); xcg->perxcu_priv[i].cfs_rq = NULL; + mutex_unlock(&xcu->xcu_lock); } } @@ -242,10 +243,23 @@ static void xcu_css_free(struct cgroup_subsys_state *css) { struct xsched_group *xcg = xcu_cg_from_css(css); + if (!xsched_group_is_root(xcg)) { + switch (xcg->sched_class) { + case XSCHED_TYPE_CFS: + xcu_cfs_cg_deinit(xcg); + break; + default: + XSCHED_INFO("xcu_cgroup: deinit RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; + } + } + + list_del(&xcg->group_node); + kmem_cache_free(xsched_group_cache, xcg); } - static void delay_xcu_cg_set_file_show_workfn(struct work_struct *work) { struct xsched_group *xg; @@ -298,21 +312,12 @@ static void xcu_css_offline(struct cgroup_subsys_state *css) struct xsched_group *xcg; xcg = xcu_cg_from_css(css); - if (!xsched_group_is_root(xcg)) { - switch (xcg->sched_class) { - case XSCHED_TYPE_CFS: - xcu_cfs_cg_deinit(xcg); - break; - default: - XSCHED_INFO("xcu_cgroup: deinit RT group css=0x%lx\n", - (uintptr_t)&xcg->css); - break; - } - } + + WRITE_ONCE(xcg->is_offline, true); + hrtimer_cancel(&xcg->quota_timeout); cancel_work_sync(&xcg->refill_work); cancel_work_sync(&xcg->file_show_work); - list_del(&xcg->group_node); } static void xsched_group_xse_attach(struct xsched_group *xg, -- 2.34.1