[PATCH OLK-6.6 0/5] Soft domain improves and bugfixes

Zhang Qiao (5): sched: Add cmdline sched_soft_domain switch for soft domain feature sched: Rework cpu.soft_domain_nr_cpu sched: Fix soft domain group memleak sched: Consider task affinity in wake_soft_domain() sched: Fix might sleep in atomic section issue kernel/sched/core.c | 16 ++-- kernel/sched/fair.c | 63 +++++++-------- kernel/sched/sched.h | 17 +++- kernel/sched/soft_domain.c | 162 ++++++++++++++++++++++++++++++++----- 4 files changed, 199 insertions(+), 59 deletions(-) -- 2.18.0.huawei.25

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16667 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/FFJ... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16667 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/FFJ...

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICB7K1 -------------------------------- Addd a command-line "sched_soft_domain" switch for the soft domain feature; this switch is enabled by default. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- kernel/sched/soft_domain.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c index 5c56428833d1..833e1d7c1073 100644 --- a/kernel/sched/soft_domain.c +++ b/kernel/sched/soft_domain.c @@ -17,6 +17,30 @@ #include <linux/sort.h> +static DEFINE_STATIC_KEY_TRUE(__soft_domain_switch); + +static int __init soft_domain_switch_setup(char *str) +{ + int val = 0; + + if (kstrtoint(str, 0, &val)) + pr_warn("sched_soft_domain parameter is error: %s\n", str); + else { + if (val == 1) + static_branch_enable(&__soft_domain_switch); + else if (val == 0) + static_branch_disable(&__soft_domain_switch); + } + + return 1; +} +__setup("sched_soft_domain=", soft_domain_switch_setup); + +static bool soft_domain_enabled(void) +{ + return static_branch_likely(&__soft_domain_switch); +} + static DEFINE_PER_CPU(struct soft_domain *, g_sf_d); static void free_sub_soft_domain(struct soft_domain *sf_d); @@ -87,6 +111,8 @@ static void free_soft_domain(void) if (sf_d) free_sub_soft_domain(sf_d); } + + static_branch_disable(&__soft_domain_switch); } void build_soft_domain(void) @@ -95,6 +121,9 @@ void build_soft_domain(void) static struct cpumask cpus; int i, ret; + if (!soft_domain_enabled()) + return; + cpumask_copy(&cpus, cpu_active_mask); rcu_read_lock(); for_each_cpu(i, &cpus) { @@ -349,6 +378,9 @@ int sched_group_set_soft_domain(struct task_group *tg, long val) { int ret = 0; + if (!soft_domain_enabled()) + return -EPERM; + if (val < -1 || val > nr_node_ids) return -EINVAL; -- 2.18.0.huawei.25

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICB7K1 -------------------------------- Rework cpu.soft_domain_nr_cpu: 1. When enabling a soft domain group, Update all child cgroup's cpu.soft_domain_nr_cpu value. 2. Prohibit write cpu.soft_domain_nr_cpu when soft domain disabled. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- kernel/sched/core.c | 7 +--- kernel/sched/sched.h | 1 + kernel/sched/soft_domain.c | 68 ++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fa8c8e5853f1..1539ab766993 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11761,15 +11761,10 @@ static int cpu_soft_domain_quota_write_u64(struct cgroup_subsys_state *css, { struct task_group *tg = css_tg(css); - if (tg->sf_ctx->policy != 0) - return -EINVAL; - if (val > cpumask_weight(cpumask_of_node(0))) return -EINVAL; - tg->sf_ctx->nr_cpus = (int)val; - - return 0; + return sched_group_set_soft_domain_quota(tg, val); } static u64 cpu_soft_domain_quota_read_u64(struct cgroup_subsys_state *css, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6b8aa78272e3..cb9f8779ffc0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3762,6 +3762,7 @@ void build_soft_domain(void); int init_soft_domain(struct task_group *tg); int sched_group_set_soft_domain(struct task_group *tg, long val); +int sched_group_set_soft_domain_quota(struct task_group *tg, long val); static inline struct cpumask *soft_domain_span(unsigned long span[]) { diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c index 833e1d7c1073..d4433633a725 100644 --- a/kernel/sched/soft_domain.c +++ b/kernel/sched/soft_domain.c @@ -61,7 +61,7 @@ static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) sf_d->nr_available_cpus = cpumask_weight(span); cpumask_copy(to_cpumask(sf_d->span), span); - for_each_cpu_and(i, sched_domain_span(sd), cpus) { + for_each_cpu_and(i, span, cpus) { struct soft_subdomain *sub_d = NULL; sub_d = kzalloc_node(sizeof(struct soft_subdomain) + cpumask_size(), @@ -70,13 +70,12 @@ static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) free_sub_soft_domain(sf_d); return -ENOMEM; } - list_add_tail(&sub_d->node, &sf_d->child_domain); - cpumask_copy(soft_domain_span(sub_d->span), cpu_clustergroup_mask(i)); + cpumask_and(soft_domain_span(sub_d->span), span, cpu_clustergroup_mask(i)); cpumask_andnot(cpus, cpus, cpu_clustergroup_mask(i)); } - for_each_cpu(i, sched_domain_span(sd)) { + for_each_cpu(i, span) { rcu_assign_pointer(per_cpu(g_sf_d, i), sf_d); } @@ -166,6 +165,7 @@ static int subdomain_cmp(const void *a, const void *b) struct soft_domain_args { int policy; + int nr_cpu; struct cpumask *cpus; }; @@ -174,9 +174,10 @@ static int tg_set_soft_domain(struct task_group *tg, void *data) struct soft_domain_args *args = (struct soft_domain_args *)data; tg->sf_ctx->policy = args->policy; - if (args->policy) + if (args->policy) { cpumask_copy(to_cpumask(tg->sf_ctx->span), args->cpus); - else + tg->sf_ctx->nr_cpus = args->nr_cpu; + } else cpumask_clear(to_cpumask(tg->sf_ctx->span)); return 0; @@ -193,8 +194,6 @@ static int __calc_cpu(struct task_group *tg) nr_cpu = DIV_ROUND_UP_ULL(tg->cfs_bandwidth.quota, tg->cfs_bandwidth.period); #endif - tg->sf_ctx->nr_cpus = nr_cpu; - return nr_cpu; } @@ -231,23 +230,36 @@ static struct soft_domain *find_idlest_llc(long policy, int cpu; int max_cpu = 0; struct soft_domain *idlest = NULL; + unsigned long min_util = ULONG_MAX; /* The user has specified the llc. */ if (policy > 0) { - cpu = cpumask_first(cpumask_of_node(policy-1)); - idlest = rcu_dereference(per_cpu(g_sf_d, cpu)); - return idlest; + for_each_cpu(cpu, cpumask_of_node(policy-1)) { + idlest = rcu_dereference(per_cpu(g_sf_d, cpu)); + if (idlest != NULL) + break; + } + + if (idlest && nr_cpu <= cpumask_weight(to_cpumask(idlest->span))) + return idlest; + + return NULL; } cpumask_copy(cpus, cpu_active_mask); for_each_cpu(cpu, cpus) { struct soft_domain *sf_d = NULL; - unsigned long min_util = ULONG_MAX; + struct cpumask *mask; sf_d = rcu_dereference(per_cpu(g_sf_d, cpu)); if (sf_d == NULL) continue; + mask = to_cpumask(sf_d->span); + cpumask_andnot(cpus, cpus, mask); + if (nr_cpu > cpumask_weight(mask)) + continue; + /* * LLC selection order: * 1. When the number of idle cpus meet the requirements, @@ -260,15 +272,13 @@ static struct soft_domain *find_idlest_llc(long policy, max_cpu = sf_d->nr_available_cpus; idlest = sf_d; } else if (max_cpu == 0) { /* No llc meets the demand */ - unsigned long util = sum_util(to_cpumask(sf_d->span)); + unsigned long util = sum_util(mask); if (idlest == NULL || util < min_util) { idlest = sf_d; min_util = util; } } - - cpumask_andnot(cpus, cpus, to_cpumask(sf_d->span)); } return idlest; @@ -279,9 +289,9 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) int cpu; int ret = 0; cpumask_var_t cpus; - int nr_cpu = __calc_cpu(tg); struct soft_domain_args args; struct domain_node nodes[NR_MAX_CLUSTER] = {0}; + int nr_cpu = __calc_cpu(tg); if (check_policy(tg, policy)) return -EINVAL; @@ -315,7 +325,7 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) cpumask_clear(cpus); sort(nodes, nr, sizeof(struct domain_node), subdomain_cmp, NULL); - sf_d->nr_available_cpus -= min(sf_d->nr_available_cpus, tmp_cpu); + sf_d->nr_available_cpus -= tmp_cpu; for (i = 0; i < nr; i++) { sub_d = nodes[i].sud_d; tmpmask = to_cpumask(sub_d->span); @@ -329,12 +339,14 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) /* 3. attach task group to softdomain. */ args.policy = policy; args.cpus = cpus; + args.nr_cpu = tmp_cpu; walk_tg_tree_from(tg, tg_set_soft_domain, tg_nop, &args); /* * 4. TODO * add tg to llc domain task_groups list for load balance. */ + tg->sf_ctx->nr_cpus = tmp_cpu; tg->sf_ctx->sf_d = sf_d; } else { ret = -EINVAL; @@ -357,7 +369,7 @@ static int __sched_group_unset_soft_domain(struct task_group *tg) struct list_head *children = NULL; /* If parent has set soft domain, child group can't unset itself. */ - if (tg->parent->sf_ctx->policy != 0) + if (tg->parent->sf_ctx != NULL && tg->parent->sf_ctx->policy != 0) return -EINVAL; sf_d = tg->sf_ctx->sf_d; @@ -403,3 +415,23 @@ int sched_group_set_soft_domain(struct task_group *tg, long val) return ret; } + +int sched_group_set_soft_domain_quota(struct task_group *tg, long val) +{ + int ret = 0; + + if (!soft_domain_enabled()) + return -EPERM; + + mutex_lock(&soft_domain_mutex); + if (tg->sf_ctx->policy != 0) { + ret = -EINVAL; + goto out; + } else + tg->sf_ctx->nr_cpus = (int)val; + +out: + mutex_unlock(&soft_domain_mutex); + + return ret; +} -- 2.18.0.huawei.25

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICB7K1 -------------------------------- Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- kernel/sched/core.c | 8 +++++ kernel/sched/fair.c | 20 ++---------- kernel/sched/sched.h | 14 +++++++-- kernel/sched/soft_domain.c | 62 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1539ab766993..e2080b7a9d37 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10800,6 +10800,13 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) return 0; } +static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + + offline_soft_domain(tg); +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -12232,6 +12239,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, .css_online = cpu_cgroup_css_online, + .css_offline = cpu_cgroup_css_offline, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .css_extra_stat_show = cpu_extra_stat_show, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 21bd2ca4172d..2ef6b9063cc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -14846,22 +14846,6 @@ void free_fair_sched_group(struct task_group *tg) kfree(tg->se); } -#ifdef CONFIG_SCHED_SOFT_DOMAIN -int init_soft_domain(struct task_group *tg) -{ - struct soft_domain_ctx *sf_ctx = NULL; - - sf_ctx = kzalloc(sizeof(*sf_ctx) + cpumask_size(), GFP_KERNEL); - if (!sf_ctx) - return -ENOMEM; - - sf_ctx->policy = 0; - tg->sf_ctx = sf_ctx; - - return 0; -} -#endif - int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct sched_entity *se; @@ -14882,7 +14866,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) if (ret) goto err; - ret = init_soft_domain(tg); + ret = init_soft_domain(tg, parent); if (ret) goto err; @@ -14908,6 +14892,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) kfree(cfs_rq); err: destroy_auto_affinity(tg); + destroy_soft_domain(tg); return 0; } @@ -14937,6 +14922,7 @@ void unregister_fair_sched_group(struct task_group *tg) destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); destroy_auto_affinity(tg); + destroy_soft_domain(tg); for_each_possible_cpu(cpu) { if (tg->se[cpu]) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cb9f8779ffc0..fe5821c48fed 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3759,8 +3759,9 @@ bool bpf_sched_is_cpu_allowed(struct task_struct *p, int cpu); #ifdef CONFIG_SCHED_SOFT_DOMAIN void build_soft_domain(void); -int init_soft_domain(struct task_group *tg); - +int init_soft_domain(struct task_group *tg, struct task_group *parent); +int destroy_soft_domain(struct task_group *tg); +void offline_soft_domain(struct task_group *tg); int sched_group_set_soft_domain(struct task_group *tg, long val); int sched_group_set_soft_domain_quota(struct task_group *tg, long val); @@ -3771,7 +3772,14 @@ static inline struct cpumask *soft_domain_span(unsigned long span[]) #else static inline void build_soft_domain(void) { } -static inline int init_soft_domain(struct task_group *tg) +static inline int init_soft_domain(struct task_group *tg, struct task_group *parent) +{ + return 0; +} + +void offline_soft_domain(struct task_group *tg) { } + +static inline int destroy_soft_domain(struct task_group *tg) { return 0; } diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c index d4433633a725..c34be1fee3e0 100644 --- a/kernel/sched/soft_domain.c +++ b/kernel/sched/soft_domain.c @@ -435,3 +435,65 @@ int sched_group_set_soft_domain_quota(struct task_group *tg, long val) return ret; } + +int init_soft_domain(struct task_group *tg, struct task_group *parent) +{ + struct soft_domain_ctx *sf_ctx = NULL; + struct soft_domain_ctx *psf_ctx = NULL; + + if (!soft_domain_enabled()) + return 0; + + sf_ctx = kzalloc(sizeof(*sf_ctx) + cpumask_size(), GFP_KERNEL); + if (!sf_ctx) + return -ENOMEM; + + mutex_lock(&soft_domain_mutex); + psf_ctx = parent->sf_ctx; + if (psf_ctx) { + sf_ctx->policy = psf_ctx->policy; + sf_ctx->nr_cpus = psf_ctx->nr_cpus; + cpumask_copy(to_cpumask(sf_ctx->span), to_cpumask(psf_ctx->span)); + } + + tg->sf_ctx = sf_ctx; + mutex_unlock(&soft_domain_mutex); + + return 0; +} + +void offline_soft_domain(struct task_group *tg) +{ + struct soft_domain_ctx *sf_ctx = NULL; + struct soft_domain_ctx *psf_ctx = NULL; + + if (!soft_domain_enabled()) + return; + + sf_ctx = tg->sf_ctx; + psf_ctx = tg->parent->sf_ctx; + + if (!sf_ctx) + return; + + mutex_lock(&soft_domain_mutex); + if (sf_ctx->policy != 0) { + /* + * parent group is not set, this group set + * soft domain by user. + */ + if (psf_ctx == NULL || psf_ctx->policy == 0) + __sched_group_unset_soft_domain(tg); + } + mutex_unlock(&soft_domain_mutex); +} + +int destroy_soft_domain(struct task_group *tg) +{ + if (!soft_domain_enabled()) + return 0; + + kfree(tg->sf_ctx); + + return 0; +} -- 2.18.0.huawei.25

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICB7K1 -------------------------------- For a soft_domain task, the CPU selection should be the intersection of the soft domain, cpu_active_mask, and task affinity. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- kernel/sched/fair.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ef6b9063cc5..b6ffe4117a65 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9176,22 +9176,25 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, #ifdef CONFIG_SCHED_SOFT_DOMAIN static int wake_soft_domain(struct task_struct *p, int target) { - struct cpumask *mask = NULL; + struct cpumask *mask = this_cpu_cpumask_var_ptr(select_rq_mask); struct soft_domain_ctx *ctx = NULL; - rcu_read_lock(); ctx = task_group(p)->sf_ctx; if (!ctx || ctx->policy == 0) - goto unlock; + goto out; - mask = to_cpumask(ctx->span); - if (cpumask_test_cpu(target, mask)) - goto unlock; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_and(mask, to_cpumask(ctx->span), p->select_cpus); +#else + cpumask_and(mask, to_cpumask(ctx->span), p->cpus_ptr); +#endif + cpumask_and(mask, mask, cpu_active_mask); + if (cpumask_empty(mask) || cpumask_test_cpu(target, mask)) + goto out; else target = cpumask_any_distribute(mask); -unlock: - rcu_read_unlock(); +out: return target; } @@ -9251,11 +9254,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) new_cpu = prev_cpu; } -#ifdef CONFIG_SCHED_SOFT_DOMAIN - if (sched_feat(SOFT_DOMAIN)) - new_cpu = prev_cpu = wake_soft_domain(p, prev_cpu); -#endif - #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->select_cpus); #else @@ -9264,6 +9262,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) } rcu_read_lock(); + +#ifdef CONFIG_SCHED_SOFT_DOMAIN + if (sched_feat(SOFT_DOMAIN)) + new_cpu = prev_cpu = wake_soft_domain(p, prev_cpu); +#endif #ifdef CONFIG_BPF_SCHED if (bpf_sched_enabled()) { ctx.task = p; -- 2.18.0.huawei.25

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICB7K1 -------------------------------- destroy_auto_affinity() is called from atomic section, and smart_grid_usage_dec() might sleep will cause a atomic sleep issue. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- kernel/sched/core.c | 1 + kernel/sched/fair.c | 14 ++++++++++++-- kernel/sched/sched.h | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e2080b7a9d37..349c9f5fc55d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10804,6 +10804,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); + offline_auto_affinity(tg); offline_soft_domain(tg); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b6ffe4117a65..c4c3afa6e7b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7282,7 +7282,7 @@ int init_auto_affinity(struct task_group *tg) return 0; } -static void destroy_auto_affinity(struct task_group *tg) +void offline_auto_affinity(struct task_group *tg) { struct auto_affinity *auto_affi = tg->auto_affinity; @@ -7294,11 +7294,21 @@ static void destroy_auto_affinity(struct task_group *tg) if (auto_affi->period_active) smart_grid_usage_dec(); +} + +static void destroy_auto_affinity(struct task_group *tg) +{ + struct auto_affinity *auto_affi = tg->auto_affinity; + + if (!smart_grid_enabled()) + return; + + if (unlikely(!auto_affi)) + return; hrtimer_cancel(&auto_affi->period_timer); sched_grid_zone_del_af(auto_affi); free_affinity_domains(&auto_affi->ad); - kfree(tg->auto_affinity); tg->auto_affinity = NULL; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fe5821c48fed..b418b334b804 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -573,6 +573,7 @@ static inline int sched_task_is_throttled(struct task_struct *p, int cpu) extern void start_auto_affinity(struct auto_affinity *auto_affi); extern void stop_auto_affinity(struct auto_affinity *auto_affi); extern int init_auto_affinity(struct task_group *tg); +void offline_auto_affinity(struct task_group *tg); extern void tg_update_affinity_domains(int cpu, int online); extern int tg_rebuild_affinity_domains(int cpu, struct auto_affinity *auto_affi); @@ -583,6 +584,7 @@ static inline int init_auto_affinity(struct task_group *tg) } static inline void tg_update_affinity_domains(int cpu, int online) {} +static inline void offline_auto_affinity(struct task_group *tg) { } #endif #ifdef CONFIG_FAIR_GROUP_SCHED -- 2.18.0.huawei.25
participants (2)
-
patchwork bot
-
Zhang Qiao