hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IDB5TR ----------------------------------------- The previous CFS group weight calculation used a "product / share" scheme to normalize children weights: weight = prod(shares_cfg_red[]) / shares_cfg_red[i] When users configured very large shares values (u64 range), the intermediate product (sh_prod_red) could easily overflow u64 and wrap around to a much smaller value. As a result, all computed children->weight became zero: sh_prod_red < shares_cfg_red[i] => weight = 0 This further caused the global weight gcd (w_gcd) to remain zero, and finally triggered a fatal division by zero in the last normalization step: weight = weight / w_gcd (0 / 0) This patch fixes the problem by: 1. Limiting shares_cfg from u64 to u32 to prevent arithmetic overflow. 2. Removing the fragile product-based normalization algorithm entirely. 3. Using shares_cfg directly as the scheduling weight. 4. Introducing xs_calc_delta_fair() helpers to safely scale vruntime with mul_u64_u32_div(), avoiding direct division by zero. 5. Updating all CFS vruntime update paths to use the new safe delta helpers. Fixes: 43bbefc53356 ("xsched: Add XCU control group implementation and its backend in xsched CFS") Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- include/linux/xsched.h | 25 ++++++++++++-- kernel/xsched/cfs.c | 5 +-- kernel/xsched/cgroup.c | 78 ++++++++++++++++++++++-------------------- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index e33c91c6d969..9aca677dbf94 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -7,6 +7,7 @@ #include <linux/vstream.h> #include <linux/xcu_group.h> #include <linux/xsched_types.h> +#include <linux/math64.h> #ifndef pr_fmt #define pr_fmt(fmt) fmt @@ -268,8 +269,7 @@ struct xsched_group { int prio; /* Bandwidth setting: shares value set by user */ - u64 shares_cfg; - u64 shares_cfg_red; + u32 shares_cfg; u32 weight; u64 children_shares_sum; @@ -456,7 +456,10 @@ int delete_ctx(struct xsched_context *ctx); void xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); void xcu_cg_subsys_init(void); void xcu_cfs_root_cg_init(struct xsched_cu *xcu); -void xcu_grp_shares_update(struct xsched_group *parent); +void xcu_grp_shares_update(struct xsched_group *parent, + struct xsched_group *child, u32 shares_cfg); +void xcu_grp_shares_add(struct xsched_group *parent, struct xsched_group *child); +void xcu_grp_shares_sub(struct xsched_group *parent, struct xsched_group *child); void xsched_group_xse_detach(struct xsched_entity *xse); void xsched_quota_init(void); @@ -477,4 +480,20 @@ void xsched_quota_refill(struct work_struct *work); #endif +static inline u64 xs_calc_delta(u64 delta_exec, u32 base_weight, u32 weight) +{ + if (unlikely(weight == 0)) + weight = 1; + + if (weight == base_weight) + return delta_exec; + + return mul_u64_u32_div(delta_exec, base_weight, weight); +} + +static inline u64 xs_calc_delta_fair(u64 delta_exec, u32 weight) +{ + return xs_calc_delta(delta_exec, XSCHED_CFG_SHARE_DFLT, weight); +} + #endif /* !__LINUX_XSCHED_H__ */ diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index ad3d1652cb9b..d2615939292f 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -89,7 +89,7 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { - u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + u64 new_xrt = xse_cfs->xruntime + xs_calc_delta_fair(delta, xse_cfs->weight); xs_cfs_rq_update(xse_cfs, new_xrt); xse_cfs->sum_exec_runtime += delta; @@ -115,7 +115,8 @@ static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) xg->cfs_rq->nr_running += task_delta; entry = xs_pick_first(xg->cfs_rq); if (entry) - new_xrt = xg->xse.cfs.sum_exec_runtime * xg->xse.cfs.weight; + new_xrt = xs_calc_delta_fair(xg->xse.cfs.sum_exec_runtime, + xg->xse.cfs.weight); else new_xrt = XSCHED_TIME_INF; diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index f351967e6935..20ac33121f81 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -164,7 +164,7 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, } xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; - xcu_grp_shares_update(parent_xg); + xcu_grp_shares_add(parent_xg, xcg); xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; xcg->quota = XSCHED_TIME_INF; xcg->runtime = 0; @@ -175,7 +175,7 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, static void xcu_cfs_cg_deinit(struct xsched_group *xcg) { xcg_perxcu_cfs_rq_deinit(xcg, num_active_xcu); - xcu_grp_shares_update(xcg->parent); + xcu_grp_shares_sub(xcg->parent, xcg); } /** @@ -593,47 +593,52 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) return ret; } -void xcu_grp_shares_update(struct xsched_group *parent) +void xcu_grp_shares_update(struct xsched_group *parent, struct xsched_group *child, u32 shares_cfg) { int id; struct xsched_cu *xcu; - struct xsched_group *children; - u64 rem, sh_sum = 0, sh_gcd = 0, w_gcd = 0, sh_prod_red = 1; - list_for_each_entry(children, &parent->children_groups, group_node) { - if (children->sched_class == XSCHED_TYPE_CFS) - sh_gcd = gcd(sh_gcd, children->shares_cfg); - } + if (child->sched_class != XSCHED_TYPE_CFS) + return; - list_for_each_entry(children, &parent->children_groups, group_node) { - if (children->sched_class == XSCHED_TYPE_CFS) { - sh_sum += children->shares_cfg; - children->shares_cfg_red = div64_u64(children->shares_cfg, sh_gcd); - div64_u64_rem(sh_prod_red, children->shares_cfg_red, &rem); - if (rem) - sh_prod_red *= children->shares_cfg_red; - } - } + parent->children_shares_sum -= child->shares_cfg; - parent->children_shares_sum = sh_sum; + child->shares_cfg = shares_cfg; + child->weight = child->shares_cfg; - list_for_each_entry(children, &parent->children_groups, group_node) { - if (children->sched_class == XSCHED_TYPE_CFS) { - children->weight = div64_u64(sh_prod_red, children->shares_cfg_red); - w_gcd = gcd(w_gcd, children->weight); - } + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + child->perxcu_priv[id].xse.cfs.weight = child->weight; + mutex_unlock(&xcu->xcu_lock); } - list_for_each_entry(children, &parent->children_groups, group_node) { - if (children->sched_class == XSCHED_TYPE_CFS) { - children->weight = div64_u64(children->weight, w_gcd); - for_each_active_xcu(xcu, id) { - mutex_lock(&xcu->xcu_lock); - children->perxcu_priv[id].xse.cfs.weight = children->weight; - mutex_unlock(&xcu->xcu_lock); - } - } + parent->children_shares_sum += child->shares_cfg; +} + +void xcu_grp_shares_add(struct xsched_group *parent, struct xsched_group *child) +{ + int id; + struct xsched_cu *xcu; + + if (child->sched_class != XSCHED_TYPE_CFS) + return; + + child->weight = child->shares_cfg; + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + child->perxcu_priv[id].xse.cfs.weight = child->weight; + mutex_unlock(&xcu->xcu_lock); } + + parent->children_shares_sum += child->shares_cfg; +} + +void xcu_grp_shares_sub(struct xsched_group *parent, struct xsched_group *child) +{ + if (child->sched_class != XSCHED_TYPE_CFS) + return; + + parent->children_shares_sum -= child->shares_cfg; } static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, @@ -670,12 +675,11 @@ static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, xsched_quota_timeout_update(xcucg); break; case XCU_FILE_SHARES: - if (val < XCU_SHARES_MIN || val > U64_MAX) { + if (val < XCU_SHARES_MIN || val > U32_MAX) { ret = -EINVAL; break; } - xcucg->shares_cfg = val; - xcu_grp_shares_update(xcucg->parent); + xcu_grp_shares_update(xcucg->parent, xcucg, val); break; default: XSCHED_ERR("invalid operation %lu @ %s\n", cft->private, __func__); @@ -711,7 +715,7 @@ static int xcu_stat(struct seq_file *sf, void *v) } seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); - seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, + seq_printf(sf, "shares cfg: %u/%llu x%u\n", xcucg->shares_cfg, xcucg->parent->children_shares_sum, xcucg->weight); seq_printf(sf, "quota: %lld\n", xcucg->quota); seq_printf(sf, "used: %lld\n", xcucg->runtime); -- 2.34.1