patch 1->7: Introduce dynamic affinity for cfs scheduler. patch 8->9: Tiny bugfix.
v1->v2: * move sysctl interface to fair.c
Hui Tang (7): sched: Introduce dynamic affinity for cfs scheduler cpuset: Introduce new interface for scheduler dynamic affinity sched: Adjust wakeup cpu range according CPU util dynamicly sched: Adjust cpu allowed in load balance dynamicly sched: Add statistics for scheduler dynamic affinity sched: Add cmdline for dynamic affinity config: enable CONFIG_QOS_SCHED_DYNAMIC_AFFINITY by default
zhangwei123171 (2): sched/fair: Remove invalid cpu selection logic in dynamic affinity sched/fair: Modify idle cpu judgment in dynamic affinity
arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + fs/proc/base.c | 73 ++++++++++ include/linux/sched.h | 22 +++ init/Kconfig | 10 ++ init/init_task.c | 3 + kernel/cgroup/cpuset.c | 153 ++++++++++++++++++++- kernel/fork.c | 16 +++ kernel/sched/core.c | 98 +++++++++++++ kernel/sched/debug.c | 6 + kernel/sched/fair.c | 183 +++++++++++++++++++++++++ kernel/sched/features.h | 7 + 12 files changed, 572 insertions(+), 1 deletion(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Dynamic affinity set preferred cpus for task. When the utilization of taskgroup's preferred cpu is low, task only run in cpus preferred to enhance cpu resource locality and reduce interference between task cgroups, otherwise task can burst preferred cpus to use external cpu within cpus allowed.
Signed-off-by: Hui Tang tanghui20@huawei.com --- init/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig index b6952df34ec3..129c320d8628 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1026,6 +1026,16 @@ config RT_GROUP_SCHED
endif #CGROUP_SCHED
+config QOS_SCHED_DYNAMIC_AFFINITY + bool "qos dynamic affinity" + depends on CPUSETS + default n + help + This feature lets you allocate preferred cpus to taskgroup. If enabled, + it will make taskgroup only to use preferred cpus when cpu utilization + of taskgroup is below threshold setted, otherwise make taskgroup to use + cpus allowed. + config SCHED_MM_CID def_bool y depends on SMP && RSEQ
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Add 'prefer_cpus' sysfs and related interface in cgroup cpuset.
Signed-off-by: Hui Tang tanghui20@huawei.com --- fs/proc/base.c | 73 ++++++++++++++++++++ include/linux/sched.h | 10 +++ init/init_task.c | 3 + kernel/cgroup/cpuset.c | 150 ++++++++++++++++++++++++++++++++++++++++- kernel/fork.c | 13 ++++ kernel/sched/core.c | 95 ++++++++++++++++++++++++++ 6 files changed, 343 insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c index ffd54617c354..243c15919e18 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3165,6 +3165,76 @@ static const struct file_operations proc_setgroups_operations = { }; #endif /* CONFIG_USER_NS */
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + +static int preferred_cpuset_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (p->prefer_cpus) + seq_printf(m, "%*pbl\n", cpumask_pr_args(p->prefer_cpus)); + else + seq_putc(m, '\n'); + + put_task_struct(p); + + return 0; +} + +static ssize_t preferred_cpuset_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + cpumask_var_t new_mask; + int retval; + struct inode *inode = file_inode(file); + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_put_task; + } + + retval = cpumask_parselist_user(buf, count, new_mask); + if (retval < 0) + goto out_free_cpumask; + + retval = set_prefer_cpus_ptr(p, new_mask); + if (retval < 0) + goto out_free_cpumask; + + retval = count; + +out_free_cpumask: + free_cpumask_var(new_mask); +out_put_task: + put_task_struct(p); + + return retval; +} + +static int preferred_cpuset_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, preferred_cpuset_show, inode); +} + +static const struct file_operations proc_preferred_cpuset_operations = { + .open = preferred_cpuset_open, + .write = preferred_cpuset_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3691,6 +3761,9 @@ static const struct pid_entry tid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + REG("preferred_cpuset", 0644, proc_preferred_cpuset_operations), +#endif };
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/sched.h b/include/linux/sched.h index 3520e3fbaa91..9ae33ae2b6e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1537,6 +1537,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_t *prefer_cpus; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. @@ -2469,4 +2473,10 @@ static inline int sched_qos_cpu_overload(void) } #endif
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +int set_prefer_cpus_ptr(struct task_struct *p, + const struct cpumask *new_mask); +int sched_prefer_cpus_fork(struct task_struct *p, struct cpumask *mask); +void sched_prefer_cpus_free(struct task_struct *p); +#endif #endif diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b..ac0c5850f74b 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -207,6 +207,9 @@ struct task_struct init_task #ifdef CONFIG_SECURITY .security = NULL, #endif +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + .prefer_cpus = NULL, +#endif #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4749e0c86c62..01f4ff02e7b2 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -115,6 +115,9 @@ struct cpuset { /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; nodemask_t mems_allowed; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_var_t prefer_cpus; +#endif
/* effective CPUs and Memory Nodes allow to tasks */ cpumask_var_t effective_cpus; @@ -212,6 +215,9 @@ static inline bool is_prs_invalid(int prs_state) struct tmpmasks { cpumask_var_t addmask, delmask; /* For partition root */ cpumask_var_t new_cpus; /* For update_cpumasks_hier() */ +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_var_t prefer_cpus; +#endif };
static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) @@ -597,15 +603,24 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) { cpumask_var_t *pmask1, *pmask2, *pmask3; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_var_t *pmask4; +#endif
if (cs) { pmask1 = &cs->cpus_allowed; pmask2 = &cs->effective_cpus; pmask3 = &cs->subparts_cpus; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + pmask4 = &cs->prefer_cpus; +#endif } else { pmask1 = &tmp->new_cpus; pmask2 = &tmp->addmask; pmask3 = &tmp->delmask; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + pmask4 = &tmp->prefer_cpus; +#endif }
if (!zalloc_cpumask_var(pmask1, GFP_KERNEL)) @@ -616,9 +631,17 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
if (!zalloc_cpumask_var(pmask3, GFP_KERNEL)) goto free_two; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!zalloc_cpumask_var(pmask4, GFP_KERNEL)) + goto free_three; +#endif
return 0;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +free_three: + free_cpumask_var(*pmask3); +#endif free_two: free_cpumask_var(*pmask2); free_one: @@ -634,11 +657,17 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) { if (cs) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + free_cpumask_var(cs->prefer_cpus); +#endif free_cpumask_var(cs->cpus_allowed); free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->subparts_cpus); } if (tmp) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + free_cpumask_var(tmp->prefer_cpus); +#endif free_cpumask_var(tmp->new_cpus); free_cpumask_var(tmp->addmask); free_cpumask_var(tmp->delmask); @@ -662,6 +691,9 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) return NULL; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_copy(trial->prefer_cpus, cs->prefer_cpus); +#endif cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; @@ -743,6 +775,12 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) if (cur == &top_cpuset) goto out;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + ret = -EINVAL; + if (!cpumask_subset(cur->prefer_cpus, trial->cpus_allowed)) + goto out; +#endif + par = parent_cs(cur);
/* @@ -791,6 +829,66 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) return ret; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +static cpumask_var_t prefer_cpus_attach; + +static void update_tasks_prefer_cpumask(struct cpuset *cs) +{ + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&cs->css, 0, &it); + while ((task = css_task_iter_next(&it))) + set_prefer_cpus_ptr(task, cs->prefer_cpus); + css_task_iter_end(&it); +} + +/* + * update_prefer_cpumask - update the prefer_cpus mask of a cpuset and + * all tasks in it + * @cs: the cpuset to consider + * @trialcs: trial cpuset + * @buf: buffer of cpu numbers written to this cpuset + */ +static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs, + const char *buf) +{ + int retval; + + if (cs == &top_cpuset) + return -EACCES; + + /* + * An empty prefer_cpus is ok which mean that the cpuset tasks disable + * dynamic affinity feature. + * Since cpulist_parse() fails on an empty mask, we special case + * that parsing. + */ + if (!*buf) { + cpumask_clear(trialcs->prefer_cpus); + } else { + retval = cpulist_parse(buf, trialcs->prefer_cpus); + if (retval < 0) + return retval; + } + + /* Nothing to do if the cpus didn't change */ + if (cpumask_equal(cs->prefer_cpus, trialcs->prefer_cpus)) + return 0; + + if (!cpumask_subset(trialcs->prefer_cpus, cs->cpus_allowed)) + return -EINVAL; + + update_tasks_prefer_cpumask(trialcs); + + spin_lock_irq(&callback_lock); + cpumask_copy(cs->prefer_cpus, trialcs->prefer_cpus); + spin_unlock_irq(&callback_lock); + + return 0; +} +#endif + #ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). @@ -2655,6 +2753,10 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) * fail. TODO: have a better way to handle failure here */ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_copy(prefer_cpus_attach, cs->prefer_cpus); + set_prefer_cpus_ptr(task, prefer_cpus_attach); +#endif
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flags(cs, task); @@ -2762,6 +2864,9 @@ typedef enum { FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + FILE_DYNAMIC_CPULIST, +#endif } cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, @@ -2892,6 +2997,11 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, case FILE_MEMLIST: retval = update_nodemask(cs, trialcs, buf); break; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + case FILE_DYNAMIC_CPULIST: + retval = update_prefer_cpumask(cs, trialcs, buf); + break; +#endif default: retval = -EINVAL; break; @@ -2939,6 +3049,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) case FILE_SUBPARTS_CPULIST: seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus)); break; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + case FILE_DYNAMIC_CPULIST: + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->prefer_cpus)); + break; +#endif default: ret = -EINVAL; } @@ -3161,7 +3276,15 @@ static struct cftype legacy_files[] = { .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE_ENABLED, }, - +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + { + .name = "preferred_cpus", + .seq_show = cpuset_common_seq_show, + .write = cpuset_write_resmask, + .max_write_len = (100U + 6 * NR_CPUS), + .private = FILE_DYNAMIC_CPULIST, + }, +#endif { } /* terminate */ };
@@ -3327,6 +3450,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_copy(cs->prefer_cpus, parent->prefer_cpus); +#endif spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); @@ -3480,6 +3606,9 @@ static void cpuset_fork(struct task_struct *task) return;
set_cpus_allowed_ptr(task, current->cpus_ptr); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + set_prefer_cpus_ptr(task, current->prefer_cpus); +#endif task->mems_allowed = current->mems_allowed; return; } @@ -3526,17 +3655,26 @@ int __init cpuset_init(void) BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL)); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + BUG_ON(!alloc_cpumask_var(&top_cpuset.prefer_cpus, GFP_KERNEL)); +#endif
cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_clear(top_cpuset.prefer_cpus); +#endif
fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); top_cpuset.relax_domain_level = -1;
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + BUG_ON(!alloc_cpumask_var(&prefer_cpus_attach, GFP_KERNEL)); +#endif
return 0; } @@ -3573,6 +3711,9 @@ hotplug_update_tasks_legacy(struct cpuset *cs, struct cpumask *new_cpus, nodemask_t *new_mems, bool cpus_updated, bool mems_updated) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_t prefer_cpus; +#endif bool is_empty;
spin_lock_irq(&callback_lock); @@ -3591,6 +3732,13 @@ hotplug_update_tasks_legacy(struct cpuset *cs, if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs);
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_subset(cs->prefer_cpus, cs->cpus_allowed)) { + cpumask_and(&prefer_cpus, cs->prefer_cpus, cs->cpus_allowed); + cpumask_copy(cs->prefer_cpus, &prefer_cpus); + update_tasks_prefer_cpumask(cs); + } +#endif is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed);
diff --git a/kernel/fork.c b/kernel/fork.c index edbb16be9b39..38a2d6d026b2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -625,6 +625,9 @@ void free_task(struct task_struct *tsk) if (tsk->flags & PF_KTHREAD) free_kthread_struct(tsk); bpf_task_storage_free(tsk); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + sched_prefer_cpus_free(tsk); +#endif free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1139,6 +1142,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->seccomp.filter = NULL; #endif
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + tsk->prefer_cpus = NULL; +#endif + setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); @@ -2357,6 +2364,12 @@ __latent_entropy struct task_struct *copy_process(
rt_mutex_init_task(p);
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + retval = sched_prefer_cpus_fork(p, current->prefer_cpus); + if (retval) + goto bad_fork_free; +#endif + lockdep_assert_irqs_enabled(); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a1c73dea1f77..a1cebed8dae8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11570,6 +11570,101 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, return 0; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +int sched_prefer_cpus_fork(struct task_struct *p, struct cpumask *mask) +{ + p->prefer_cpus = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!p->prefer_cpus) + return -ENOMEM; + + if (mask) + cpumask_copy(p->prefer_cpus, mask); + else + cpumask_clear(p->prefer_cpus); + + return 0; +} + +void sched_prefer_cpus_free(struct task_struct *p) +{ + kfree(p->prefer_cpus); +} + +static void do_set_prefer_cpus(struct task_struct *p, + const struct cpumask *new_mask) +{ + struct rq *rq = task_rq(p); + bool queued, running; + + lockdep_assert_held(&p->pi_lock); + + queued = task_on_rq_queued(p); + running = task_current(rq, p); + + if (queued) { + /* + * Because __kthread_bind() calls this on blocked tasks without + * holding rq->lock. + */ + lockdep_assert_held(&rq->__lock); + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); + } + if (running) + put_prev_task(rq, p); + + cpumask_copy(p->prefer_cpus, new_mask); + + if (queued) + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); + if (running) + set_next_task(rq, p); +} + +/* + * Change a given task's prefer CPU affinity. Prioritize migrate the thread to + * prefer cpus according to preferred bitmask. + * + * NOTE: the caller must have a valid reference to the task, the + * task must not exit() & deallocate itself prematurely. The + * call is not atomic; no spinlocks may be held. + */ +static int __set_prefer_cpus_ptr(struct task_struct *p, + const struct cpumask *new_mask) +{ + struct rq_flags rf; + struct rq *rq; + int ret = 0; + + if (unlikely(!p->prefer_cpus)) + return -EINVAL; + + rq = task_rq_lock(p, &rf); + update_rq_clock(rq); + + if (cpumask_equal(p->prefer_cpus, new_mask)) + goto out; + + if (!cpumask_subset(new_mask, p->cpus_ptr)) { + ret = -EINVAL; + goto out; + } + + do_set_prefer_cpus(p, new_mask); +out: + task_rq_unlock(rq, p, &rf); + + return ret; +} + +int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ + if (p->sched_class != &fair_sched_class) + return 0; + + return __set_prefer_cpus_ptr(p, new_mask); +} +#endif + #ifdef CONFIG_CFS_BANDWIDTH static int cpu_max_show(struct seq_file *sf, void *v) {
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Compare taskgroup 'util_avg' in perferred cpu with capacity preferred cpu, dynamicly adjust cpu range for task wakeup process.
Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 1 + kernel/sched/fair.c | 154 ++++++++++++++++++++++++++++++++++++++++ kernel/sched/features.h | 7 ++ 3 files changed, 162 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 9ae33ae2b6e9..de8f02515715 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1539,6 +1539,7 @@ struct task_struct {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY cpumask_t *prefer_cpus; + const cpumask_t *select_cpus; #endif
/* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8ae0d65713aa..4d8e1b75258c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -166,6 +166,15 @@ static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; #endif
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +/* + * Low utilization threshold for CPU + * + * (default: 85%), units: percentage of CPU utilization) + */ +int sysctl_sched_util_low_pct = 85; +#endif + #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { { @@ -214,6 +223,17 @@ static struct ctl_table sched_fair_sysctls[] = { .extra1 = SYSCTL_ONE_HUNDRED, .extra2 = &one_thousand, }, +#endif +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + { + .procname = "sched_util_low_pct", + .data = &sysctl_sched_util_low_pct, + .maxlen = sizeof(sysctl_sched_util_low_pct), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, #endif {} }; @@ -6981,7 +7001,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */ +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + for_each_cpu_and(i, sched_group_span(group), p->select_cpus) { +#else for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { +#endif struct rq *rq = cpu_rq(i);
if (!sched_core_cookie_match(rq, p)) @@ -7028,7 +7052,11 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p { int new_cpu = cpu;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_intersects(sched_domain_span(sd), p->select_cpus)) +#else if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) +#endif return prev_cpu;
/* @@ -7152,7 +7180,11 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle = false; if (*idle_cpu == -1) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->select_cpus)) { +#else if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { +#endif *idle_cpu = cpu; break; } @@ -7178,7 +7210,11 @@ static int select_idle_smt(struct task_struct *p, int target) { int cpu;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + for_each_cpu_and(cpu, cpu_smt_mask(target), p->select_cpus) { +#else for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { +#endif if (cpu == target) continue; if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) @@ -7226,7 +7262,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool struct sched_domain *this_sd = NULL; u64 time = 0;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_and(cpus, sched_domain_span(sd), p->select_cpus); +#else cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); +#endif
if (sched_feat(SIS_PROP) && !has_idle_core) { u64 avg_cost, avg_idle, span_avg; @@ -7399,6 +7439,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) lockdep_assert_irqs_disabled();
if ((available_idle_cpu(target) || sched_idle_cpu(target)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(target, p->select_cpus) && +#endif asym_fits_cpu(task_util, util_min, util_max, target)) return target;
@@ -7407,6 +7450,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ if (prev != target && cpus_share_cache(prev, target) && (available_idle_cpu(prev) || sched_idle_cpu(prev)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(prev, p->select_cpus) && +#endif asym_fits_cpu(task_util, util_min, util_max, prev)) return prev;
@@ -7433,7 +7479,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(recent_used_cpu, p->select_cpus) && +#else cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) && +#endif asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { return recent_used_cpu; } @@ -7968,6 +8018,83 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) return target; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +static inline bool prefer_cpus_valid(struct task_struct *p) +{ + return p->prefer_cpus && + !cpumask_empty(p->prefer_cpus) && + !cpumask_equal(p->prefer_cpus, p->cpus_ptr) && + cpumask_subset(p->prefer_cpus, p->cpus_ptr); +} + +static inline unsigned long taskgroup_cpu_util(struct task_group *tg, + int cpu) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + if (tg->se[cpu] && sched_feat(DA_UTIL_TASKGROUP)) + return tg->se[cpu]->avg.util_avg; +#endif + return cpu_util_cfs(cpu); +} + +/* + * set_task_select_cpus: select the cpu range for task + * @p: the task whose available cpu range will to set + * @idlest_cpu: the cpu which is the idlest in prefer cpus + * + * If sum of 'util_avg' among 'prefer_cpus' lower than the percentage + * 'sysctl_sched_util_low_pct' of 'prefer_cpus' capacity, select + * 'prefer_cpus' range for task, otherwise select 'cpus_ptr' for task. + * + * The available cpu range set to p->select_cpus. Idlest cpu in preferred cpus + * set to @idlest_cpu, which is set to wakeup cpu when fast path wakeup cpu + * without p->select_cpus. + */ +static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, + int sd_flag) +{ + unsigned long util_avg_sum = 0; + unsigned long tg_capacity = 0; + long min_util = INT_MIN; + struct task_group *tg; + long spare; + int cpu; + + p->select_cpus = p->cpus_ptr; + if (!prefer_cpus_valid(p)) + return; + + rcu_read_lock(); + tg = task_group(p); + for_each_cpu(cpu, p->prefer_cpus) { + if (idlest_cpu && available_idle_cpu(cpu)) { + *idlest_cpu = cpu; + } else if (idlest_cpu) { + spare = (long)(capacity_of(cpu) - + taskgroup_cpu_util(tg, cpu)); + if (spare > min_util) { + min_util = spare; + *idlest_cpu = cpu; + } + } + + if (available_idle_cpu(cpu)) { + rcu_read_unlock(); + p->select_cpus = p->prefer_cpus; + return; + } + + util_avg_sum += taskgroup_cpu_util(tg, cpu); + tg_capacity += capacity_of(cpu); + } + rcu_read_unlock(); + + if (tg_capacity > cpumask_weight(p->prefer_cpus) && + util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) + p->select_cpus = p->prefer_cpus; +} +#endif + /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE, @@ -7988,11 +8115,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) int want_affine = 0; /* SD_flags and WF_flags share the first nibble */ int sd_flag = wake_flags & 0xF; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + int idlest_cpu = 0; +#endif
/* * required for stable ->cpus_allowed */ lockdep_assert_held(&p->pi_lock); + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + set_task_select_cpus(p, &idlest_cpu, sd_flag); +#endif + if (wake_flags & WF_TTWU) { record_wakee(p);
@@ -8007,7 +8142,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) new_cpu = prev_cpu; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->select_cpus); +#else want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); +#endif }
rcu_read_lock(); @@ -8018,7 +8157,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) */ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + new_cpu = cpu; + if (cpu != prev_cpu && + cpumask_test_cpu(prev_cpu, p->select_cpus)) +#else if (cpu != prev_cpu) +#endif new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
sd = NULL; /* Prefer wake_affine over balance flags */ @@ -8045,6 +8190,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) } rcu_read_unlock();
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_test_cpu(new_cpu, p->select_cpus)) + new_cpu = idlest_cpu; +#endif return new_cpu; }
@@ -10468,8 +10617,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int local_group;
/* Skip over this group if it has no CPUs allowed */ +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_intersects(sched_group_span(group), + p->select_cpus)) +#else if (!cpumask_intersects(sched_group_span(group), p->cpus_ptr)) +#endif continue;
/* Skip over this group if no cookie matched */ diff --git a/kernel/sched/features.h b/kernel/sched/features.h index f770168230ae..4dd46de2f827 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -89,3 +89,10 @@ SCHED_FEAT(UTIL_EST_FASTUP, true) SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(HZ_BW, true) + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +/* + * Use util_avg of bottom-Level taskgroup + */ +SCHED_FEAT(DA_UTIL_TASKGROUP, true) +#endif
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Not allow task to migrate out of cpu preferred.
Signed-off-by: Hui Tang tanghui20@huawei.com --- kernel/sched/fair.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4d8e1b75258c..686da2f99f95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9210,7 +9210,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (kthread_is_per_cpu(p)) return 0;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + set_task_select_cpus(p, NULL, 0); + if (!cpumask_test_cpu(env->dst_cpu, p->select_cpus)) { +#else if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { +#endif int cpu;
schedstat_inc(p->stats.nr_failed_migrations_affine); @@ -9233,7 +9238,11 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/* Prevent to re-select dst_cpu via env's CPUs: */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (cpumask_test_cpu(cpu, p->select_cpus)) { +#else if (cpumask_test_cpu(cpu, p->cpus_ptr)) { +#endif env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 5 +++++ kernel/sched/debug.c | 4 ++++ kernel/sched/fair.c | 11 +++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index de8f02515715..479ee3cece5d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -543,6 +543,11 @@ struct sched_statistics { #ifdef CONFIG_SCHED_CORE u64 core_forceidle_sum; #endif + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + u64 nr_wakeups_preferred_cpus; + u64 nr_wakeups_force_preferred_cpus; +#endif #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4c3d0d9f3db6..1fe9aefc7baf 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1039,6 +1039,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_wakeups_affine_attempts); P_SCHEDSTAT(nr_wakeups_passive); P_SCHEDSTAT(nr_wakeups_idle); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + P_SCHEDSTAT(nr_wakeups_preferred_cpus); + P_SCHEDSTAT(nr_wakeups_force_preferred_cpus); +#endif
avg_atom = p->se.sum_exec_runtime; if (nr_switches) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 686da2f99f95..45b93abc933d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8081,6 +8081,8 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, if (available_idle_cpu(cpu)) { rcu_read_unlock(); p->select_cpus = p->prefer_cpus; + if (sd_flag & SD_BALANCE_WAKE) + schedstat_inc(p->stats.nr_wakeups_preferred_cpus); return; }
@@ -8090,8 +8092,11 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, rcu_read_unlock();
if (tg_capacity > cpumask_weight(p->prefer_cpus) && - util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) + util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) { p->select_cpus = p->prefer_cpus; + if (sd_flag & SD_BALANCE_WAKE) + schedstat_inc(p->stats.nr_wakeups_preferred_cpus); + } } #endif
@@ -8191,8 +8196,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) rcu_read_unlock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - if (!cpumask_test_cpu(new_cpu, p->select_cpus)) + if (!cpumask_test_cpu(new_cpu, p->select_cpus)) { new_cpu = idlest_cpu; + schedstat_inc(p->stats.nr_wakeups_force_preferred_cpus); + } #endif return new_cpu; }
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Add cmdline 'dynamic_affinity' to control dynamic affinity feature, which is disabled by default.
Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 6 ++++++ kernel/cgroup/cpuset.c | 3 +++ kernel/fork.c | 11 +++++++---- kernel/sched/core.c | 3 +++ kernel/sched/debug.c | 6 ++++-- kernel/sched/fair.c | 13 +++++++++++++ 6 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 479ee3cece5d..fe8556ff7fb3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2484,5 +2484,11 @@ int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask); int sched_prefer_cpus_fork(struct task_struct *p, struct cpumask *mask); void sched_prefer_cpus_free(struct task_struct *p); + +extern struct static_key_false __dynamic_affinity_switch; +static inline bool dynamic_affinity_enabled(void) +{ + return static_branch_unlikely(&__dynamic_affinity_switch); +} #endif #endif diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 01f4ff02e7b2..cfdca8aeabda 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -858,6 +858,9 @@ static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cs == &top_cpuset) return -EACCES;
+ if (!dynamic_affinity_enabled()) + return -EPERM; + /* * An empty prefer_cpus is ok which mean that the cpuset tasks disable * dynamic affinity feature. diff --git a/kernel/fork.c b/kernel/fork.c index 38a2d6d026b2..8483e5e7d920 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -626,7 +626,8 @@ void free_task(struct task_struct *tsk) free_kthread_struct(tsk); bpf_task_storage_free(tsk); #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - sched_prefer_cpus_free(tsk); + if (dynamic_affinity_enabled()) + sched_prefer_cpus_free(tsk); #endif free_task_struct(tsk); } @@ -2365,9 +2366,11 @@ __latent_entropy struct task_struct *copy_process( rt_mutex_init_task(p);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - retval = sched_prefer_cpus_fork(p, current->prefer_cpus); - if (retval) - goto bad_fork_free; + if (dynamic_affinity_enabled()) { + retval = sched_prefer_cpus_fork(p, current->prefer_cpus); + if (retval) + goto bad_fork_free; + } #endif
lockdep_assert_irqs_enabled(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a1cebed8dae8..58c274b655ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11635,6 +11635,9 @@ static int __set_prefer_cpus_ptr(struct task_struct *p, struct rq *rq; int ret = 0;
+ if (!dynamic_affinity_enabled()) + return -EPERM; + if (unlikely(!p->prefer_cpus)) return -EINVAL;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 1fe9aefc7baf..eee2d05dc90a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1040,8 +1040,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_wakeups_passive); P_SCHEDSTAT(nr_wakeups_idle); #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - P_SCHEDSTAT(nr_wakeups_preferred_cpus); - P_SCHEDSTAT(nr_wakeups_force_preferred_cpus); + if (dynamic_affinity_enabled()) { + P_SCHEDSTAT(nr_wakeups_preferred_cpus); + P_SCHEDSTAT(nr_wakeups_force_preferred_cpus); + } #endif
avg_atom = p->se.sum_exec_runtime; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 45b93abc933d..0a554d13adbf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8019,8 +8019,21 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) }
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + +DEFINE_STATIC_KEY_FALSE(__dynamic_affinity_switch); + +static int __init dynamic_affinity_switch_setup(char *__unused) +{ + static_branch_enable(&__dynamic_affinity_switch); + return 1; +} +__setup("dynamic_affinity", dynamic_affinity_switch_setup); + static inline bool prefer_cpus_valid(struct task_struct *p) { + if (!dynamic_affinity_enabled()) + return false; + return p->prefer_cpus && !cpumask_empty(p->prefer_cpus) && !cpumask_equal(p->prefer_cpus, p->cpus_ptr) &&
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LL9S
--------------------------------
Signed-off-by: Hui Tang tanghui20@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 039f3496af78..8f9513006e1f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -166,6 +166,7 @@ CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_SCHED_MM_CID=y +CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f481c1c8b4d1..7a93d314bee2 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -188,6 +188,7 @@ CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_SCHED_MM_CID=y +CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y
From: zhangwei123171 zhangwei123171@jd.com
jingdong inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PGQ2
--------------------------------
The CPU selected by select_task_rq_fair may not be in the corresponding cpuset. This will be corrected in the subsequent fallback process.
dynamic affinity should not break this logic. Fixes: f6cee1481527 ("sched: Adjust wakeup cpu range according CPU util dynamicly") Signed-off-by: zhangwei123171 zhangwei123171@jd.com Signed-off-by: Hui Tang tanghui20@huawei.com --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0a554d13adbf..5f1889102226 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8134,7 +8134,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) /* SD_flags and WF_flags share the first nibble */ int sd_flag = wake_flags & 0xF; #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - int idlest_cpu = 0; + int idlest_cpu = -1; #endif
/* @@ -8209,7 +8209,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) rcu_read_unlock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - if (!cpumask_test_cpu(new_cpu, p->select_cpus)) { + if (idlest_cpu != -1 && !cpumask_test_cpu(new_cpu, p->select_cpus)) { new_cpu = idlest_cpu; schedstat_inc(p->stats.nr_wakeups_force_preferred_cpus); }
From: zhangwei123171 zhangwei123171@jd.com
jingdong inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PGQ3
--------------------------------
In a co-location scenario, when an online container uses the dynamic affinity capability, the CPU that is full of offline tasks will also be used as an idle CPU, which is more friendly to online tasks.
Fixes: f6cee1481527 ("sched: Adjust wakeup cpu range according CPU util dynamicly") Signed-off-by: zhangwei123171 zhangwei123171@jd.com Signed-off-by: Hui Tang tanghui20@huawei.com --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5f1889102226..7f727a53f3df 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8080,7 +8080,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, rcu_read_lock(); tg = task_group(p); for_each_cpu(cpu, p->prefer_cpus) { - if (idlest_cpu && available_idle_cpu(cpu)) { + if (idlest_cpu && (available_idle_cpu(cpu) || sched_idle_cpu(cpu))) { *idlest_cpu = cpu; } else if (idlest_cpu) { spare = (long)(capacity_of(cpu) - @@ -8091,7 +8091,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, } }
- if (available_idle_cpu(cpu)) { + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) { rcu_read_unlock(); p->select_cpus = p->prefer_cpus; if (sd_flag & SD_BALANCE_WAKE)