Some fixes for smart grid, as follows:
Hui Tang (5): sched: Add static key to reduce noise sched: fix smart grid usage count sched: fix WARN found by deadlock detect sched: Fix possible deadlock in tg_set_dynamic_affinity_mode sched: Fix negative count for jump label
include/linux/sched.h | 1 + include/linux/sched/grid_qos.h | 12 ++++ kernel/cgroup/cpuset.c | 3 + kernel/sched/core.c | 9 +-- kernel/sched/fair.c | 148 +++++++++++++++++++++++++++++------------ kernel/sched/grid/qos.c | 14 ++-- 6 files changed, 132 insertions(+), 55 deletions(-)
From: Hui Tang tanghui20@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7A718
--------------------------------
Add static key to reduce noise when not enable dynamic affinity. There are better performance in some case, such for lmbench.
Fixes: 243865da2684 ("cpuset: Introduce new interface for scheduler ...") Signed-off-by: Hui Tang tanghui20@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Signed-off-by: Zhang Changzhong zhangchangzhong@huawei.com --- include/linux/sched.h | 1 + kernel/cgroup/cpuset.c | 3 +++ kernel/sched/fair.c | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3e95733..8fd8c5b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2007,6 +2007,7 @@ int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask); int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig); void sched_prefer_cpus_free(struct task_struct *p); +void dynamic_affinity_enable(void); #endif
#ifdef CONFIG_QOS_SCHED_SMART_GRID diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d7ec7f9..ae2b1ad 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -625,6 +625,9 @@ static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs,
update_tasks_prefer_cpumask(trialcs);
+ if (!cpumask_empty(trialcs->prefer_cpus)) + dynamic_affinity_enable(); + spin_lock_irq(&callback_lock); cpumask_copy(cs->prefer_cpus, trialcs->prefer_cpus); spin_unlock_irq(&callback_lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1cd08e1..19ee03b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7131,6 +7131,29 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) }
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + +#ifdef CONFIG_JUMP_LABEL +static DEFINE_STATIC_KEY_FALSE(__dynamic_affinity_used); + +static inline bool dynamic_affinity_used(void) +{ + return static_branch_unlikely(&__dynamic_affinity_used); +} + +void dynamic_affinity_enable(void) +{ + static_branch_enable_cpuslocked(&__dynamic_affinity_used); +} + +#else /* CONFIG_JUMP_LABEL */ +static bool dynamic_affinity_used(void) +{ + return true; +} + +void dynamic_affinity_enable(void) {} +#endif + /* * Low utilization threshold for CPU * @@ -7246,7 +7269,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f time = schedstat_start_time();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - set_task_select_cpus(p, &idlest_cpu, sd_flag); + p->select_cpus = &p->cpus_allowed; + if (dynamic_affinity_used()) + set_task_select_cpus(p, &idlest_cpu, sd_flag); #endif
if (sd_flag & SD_BALANCE_WAKE) { @@ -8272,7 +8297,10 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) return 0;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY - set_task_select_cpus(p, NULL, 0); + p->select_cpus = &p->cpus_allowed; + if (dynamic_affinity_used()) + set_task_select_cpus(p, NULL, 0); + if (!cpumask_test_cpu(env->dst_cpu, p->select_cpus)) { #else if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
From: Hui Tang tanghui20@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7D98G CVE: NA
----------------------------------------
smart_grid_usage_dec() should called when free taskgroup if the mode is auto.
Signed-off-by: Hui Tang tanghui20@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Signed-off-by: Zhang Changzhong zhangchangzhong@huawei.com --- kernel/sched/fair.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 19ee03b..2cc33b7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5687,6 +5687,9 @@ static void destroy_auto_affinity(struct task_group *tg) { struct auto_affinity *auto_affi = tg->auto_affinity;
+ if (auto_affi->mode) + smart_grid_usage_dec(); + hrtimer_cancel(&auto_affi->period_timer); free_affinity_domains(&auto_affi->ad);
From: Hui Tang tanghui20@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7BQZ0 CVE: NA
----------------------------------------
The WARNING report when run: echo 1 > /sys/fs/cgroup/cpu/cpu.dynamic_affinity_mode
[ 147.276757] WARNING: CPU: 5 PID: 1770 at kernel/cpu.c:326 \ lockdep_assert_cpus_held+0xac/0xd0 [ 147.279670] Kernel panic - not syncing: panic_on_warn set ... [ 147.279670] [ 147.282211] CPU: 5 PID: 1770 Comm: bash Kdump: loaded Not tainted 4.19 [ 147.284796] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996).. [ 147.290963] Call Trace: [ 147.292459] dump_stack+0xc6/0x11e [ 147.294295] ? lockdep_assert_cpus_held+0xa0/0xd0 [ 147.296876] panic+0x1d6/0x46b [ 147.298591] ? refcount_error_report+0x2a5/0x2a5 [ 147.301131] ? kmsg_dump_rewind_nolock+0xde/0xde [ 147.303738] ? sched_clock_cpu+0x18/0x1b0 [ 147.305943] ? __warn+0x1d1/0x210 [ 147.307831] ? lockdep_assert_cpus_held+0xac/0xd0 [ 147.310469] __warn+0x1ec/0x210 [ 147.312271] ? lockdep_assert_cpus_held+0xac/0xd0 [ 147.314838] report_bug+0x1ee/0x2b0 [ 147.316798] fixup_bug.part.4+0x37/0x80 [ 147.318946] do_error_trap+0x21c/0x260 [ 147.321062] ? fixup_bug.part.4+0x80/0x80 [ 147.323253] ? check_preemption_disabled+0x34/0x1f0 [ 147.324886] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 147.326277] ? lockdep_hardirqs_off+0x1cb/0x2b0 [ 147.327505] ? error_entry+0x9a/0x130 [ 147.328523] ? trace_hardirqs_off_caller+0x59/0x1a0 [ 147.329844] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 147.331124] invalid_op+0x14/0x20 [ 147.332057] ? vprintk_func+0x68/0x1a0 [ 147.333082] ? lockdep_assert_cpus_held+0xac/0xd0 [ 147.334355] ? lockdep_assert_cpus_held+0xac/0xd0 [ 147.335624] ? static_key_slow_inc_cpuslocked+0x5a/0x230 [ 147.337079] ? tg_set_dynamic_affinity_mode+0x4f/0x70 [ 147.338444] ? cgroup_file_write+0x471/0x6a0 [ 147.339604] ? cgroup_css.part.4+0x100/0x100 [ 147.340782] ? cgroup_css.part.4+0x100/0x100 [ 147.341943] ? kernfs_fop_write+0x2af/0x430 [ 147.343083] ? kernfs_vma_page_mkwrite+0x230/0x230 [ 147.344401] ? __vfs_write+0xef/0x680 [ 147.345404] ? kernel_read+0x110/0x110
Signed-off-by: Hui Tang tanghui20@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Signed-off-by: Zhang Changzhong zhangchangzhong@huawei.com --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2cc33b7..ee153e3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5300,12 +5300,12 @@ struct static_key __smart_grid_used;
static void smart_grid_usage_inc(void) { - static_key_slow_inc_cpuslocked(&__smart_grid_used); + static_key_slow_inc(&__smart_grid_used); }
static void smart_grid_usage_dec(void) { - static_key_slow_dec_cpuslocked(&__smart_grid_used); + static_key_slow_dec(&__smart_grid_used); }
static void tg_update_task_prefer_cpus(struct task_group *tg)
From: Hui Tang tanghui20@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7CGD0 CVE: NA
----------------------------------------
Deadlock occurs in two situations as follows:
The first case:
tg_set_dynamic_affinity_mode --- raw_spin_lock_irq(&auto_affi->lock); ->start_auto_affintiy --- trigger timer ->tg_update_task_prefer_cpus >css_task_inter_next ->raw_spin_unlock_irq
hr_timer_run_queues ->sched_auto_affi_period_timer --- try spin lock (&auto_affi->lock)
The second case as follows:
[ 291.470810] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: [ 291.472715] rcu: 1-...0: (0 ticks this GP) idle=a6a/1/0x4000000000000002 softirq=78516/78516 fqs=5249 [ 291.475268] rcu: (detected by 6, t=21006 jiffies, g=202169, q=9862) [ 291.477038] Sending NMI from CPU 6 to CPUs 1: [ 291.481268] NMI backtrace for cpu 1 [ 291.481273] CPU: 1 PID: 1923 Comm: sh Kdump: loaded Not tainted 4.19.90+ #150 [ 291.481278] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 [ 291.481281] RIP: 0010:queued_spin_lock_slowpath+0x136/0x9a0 [ 291.481289] Code: c0 74 3f 49 89 dd 48 89 dd 48 b8 00 00 00 00 00 fc ff df 49 c1 ed 03 83 e5 07 49 01 c5 83 c5 03 48 83 05 c4 66 b9 05 01 f3 90 <41> 0f b6 45 00 40 38 c5 7c 08 84 c0 0f 85 ad 07 00 00 0 [ 291.481292] RSP: 0018:ffff88801de87cd8 EFLAGS: 00000002 [ 291.481297] RAX: 0000000000000101 RBX: ffff888001be0a28 RCX: ffffffffb8090f7d [ 291.481301] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff888001be0a28 [ 291.481304] RBP: 0000000000000003 R08: ffffed100037c146 R09: ffffed100037c146 [ 291.481307] R10: 000000001106b143 R11: ffffed100037c145 R12: 1ffff11003bd0f9c [ 291.481311] R13: ffffed100037c145 R14: fffffbfff7a38dee R15: dffffc0000000000 [ 291.481315] FS: 00007fac4f306740(0000) GS:ffff88801de80000(0000) knlGS:0000000000000000 [ 291.481318] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 291.481321] CR2: 00007fac4f4bb650 CR3: 00000000046b6000 CR4: 00000000000006e0 [ 291.481323] Call Trace: [ 291.481324] <IRQ> [ 291.481326] ? osq_unlock+0x2a0/0x2a0 [ 291.481329] ? check_preemption_disabled+0x4c/0x290 [ 291.481331] ? rcu_accelerate_cbs+0x33/0xed0 [ 291.481333] _raw_spin_lock_irqsave+0x83/0xa0 [ 291.481336] sched_auto_affi_period_timer+0x251/0x820 [ 291.481338] ? __remove_hrtimer+0x151/0x200 [ 291.481340] __hrtimer_run_queues+0x39d/0xa50 [ 291.481343] ? tg_update_affinity_domain_down+0x460/0x460 [ 291.481345] ? enqueue_hrtimer+0x2e0/0x2e0 [ 291.481348] ? ktime_get_update_offsets_now+0x1d7/0x2c0 [ 291.481350] hrtimer_run_queues+0x243/0x470 [ 291.481352] run_local_timers+0x5e/0x150 [ 291.481354] update_process_times+0x36/0xb0 [ 291.481357] tick_sched_handle.isra.4+0x7c/0x180 [ 291.481359] tick_nohz_handler+0xd1/0x1d0 [ 291.481365] smp_apic_timer_interrupt+0x12c/0x4e0 [ 291.481368] apic_timer_interrupt+0xf/0x20 [ 291.481370] </IRQ> [ 291.481372] ? smp_call_function_many+0x68c/0x840 [ 291.481375] ? smp_call_function_many+0x6ab/0x840 [ 291.481377] ? arch_unregister_cpu+0x60/0x60 [ 291.481379] ? native_set_fixmap+0x100/0x180 [ 291.481381] ? arch_unregister_cpu+0x60/0x60 [ 291.481384] ? set_task_select_cpus+0x116/0x940 [ 291.481386] ? smp_call_function+0x53/0xc0 [ 291.481388] ? arch_unregister_cpu+0x60/0x60 [ 291.481390] ? on_each_cpu+0x49/0xf0 [ 291.481393] ? set_task_select_cpus+0x115/0x940 [ 291.481395] ? text_poke_bp+0xff/0x180 [ 291.481397] ? poke_int3_handler+0xc0/0xc0 [ 291.481400] ? __set_prefer_cpus_ptr.constprop.4+0x1cd/0x900 [ 291.481402] ? hrtick+0x1b0/0x1b0 [ 291.481404] ? set_task_select_cpus+0x115/0x940 [ 291.481407] ? __jump_label_transform.isra.0+0x3a1/0x470 [ 291.481409] ? kernel_init+0x280/0x280 [ 291.481411] ? kasan_check_read+0x1d/0x30 [ 291.481413] ? mutex_lock+0x96/0x100 [ 291.481415] ? __mutex_lock_slowpath+0x30/0x30 [ 291.481418] ? arch_jump_label_transform+0x52/0x80 [ 291.481420] ? set_task_select_cpus+0x115/0x940 [ 291.481422] ? __jump_label_update+0x1a1/0x1e0 [ 291.481424] ? jump_label_update+0x2ee/0x3b0 [ 291.481427] ? static_key_slow_inc_cpuslocked+0x1c8/0x2d0 [ 291.481430] ? start_auto_affinity+0x190/0x200 [ 291.481432] ? tg_set_dynamic_affinity_mode+0xad/0xf0 [ 291.481435] ? cpu_affinity_mode_write_u64+0x22/0x30 [ 291.481437] ? cgroup_file_write+0x46f/0x660 [ 291.481439] ? cgroup_init_cftypes+0x300/0x300 [ 291.481441] ? __mutex_lock_slowpath+0x30/0x30
Signed-off-by: Hui Tang tanghui20@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Signed-off-by: Zhang Changzhong zhangchangzhong@huawei.com --- include/linux/sched/grid_qos.h | 12 +++++ kernel/sched/core.c | 9 +--- kernel/sched/fair.c | 105 +++++++++++++++++++++++++---------------- kernel/sched/grid/qos.c | 14 ++++-- 4 files changed, 87 insertions(+), 53 deletions(-)
diff --git a/include/linux/sched/grid_qos.h b/include/linux/sched/grid_qos.h index cea2bf6..23d08db 100644 --- a/include/linux/sched/grid_qos.h +++ b/include/linux/sched/grid_qos.h @@ -2,6 +2,7 @@ #ifndef _LINUX_SCHED_GRID_QOS_H #define _LINUX_SCHED_GRID_QOS_H #include <linux/nodemask.h> +#include <linux/sched.h>
#ifdef CONFIG_QOS_SCHED_SMART_GRID enum sched_grid_qos_class { @@ -61,6 +62,7 @@ struct sched_grid_qos_power {
struct sched_grid_qos_affinity { nodemask_t mem_preferred_node_mask; + const struct cpumask *prefer_cpus; };
struct task_struct; @@ -72,6 +74,11 @@ struct sched_grid_qos { int (*affinity_set)(struct task_struct *p); };
+static inline int sched_qos_affinity_set(struct task_struct *p) +{ + return p->grid_qos->affinity_set(p); +} + int sched_grid_qos_fork(struct task_struct *p, struct task_struct *orig); void sched_grid_qos_free(struct task_struct *p);
@@ -88,5 +95,10 @@ sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask) { return preferred_nid; } + +static inline int sched_qos_affinity_set(struct task_struct *p) +{ + return 0; +} #endif #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a60f58a..8a7535c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6988,9 +6988,6 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, int tg_set_dynamic_affinity_mode(struct task_group *tg, u64 mode) { struct auto_affinity *auto_affi = tg->auto_affinity; - int ret = 0; - - raw_spin_lock_irq(&auto_affi->lock);
/* auto mode*/ if (mode == 1) { @@ -6998,14 +6995,10 @@ int tg_set_dynamic_affinity_mode(struct task_group *tg, u64 mode) } else if (mode == 0) { stop_auto_affinity(auto_affi); } else { - raw_spin_unlock_irq(&auto_affi->lock); return -EINVAL; }
- auto_affi->mode = mode; - raw_spin_unlock_irq(&auto_affi->lock); - - return ret; + return 0; }
static u64 cpu_affinity_mode_read_u64(struct cgroup_subsys_state *css, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee153e3..d2acee4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -28,9 +28,7 @@ #include <linux/delay.h> #include <linux/tracehook.h> #endif -#ifdef CONFIG_QOS_SCHED_SMART_GRID #include <linux/sched/grid_qos.h> -#endif #include <trace/events/sched.h>
/* @@ -5293,6 +5291,7 @@ static unsigned long target_load(int cpu, int type); static unsigned long capacity_of(int cpu); static int sched_idle_cpu(int cpu); static unsigned long weighted_cpuload(struct rq *rq); +static inline bool prefer_cpus_valid(struct task_struct *p);
int sysctl_affinity_adjust_delay_ms = 5000;
@@ -5308,22 +5307,29 @@ static void smart_grid_usage_dec(void) static_key_slow_dec(&__smart_grid_used); }
-static void tg_update_task_prefer_cpus(struct task_group *tg) +static inline struct cpumask *task_prefer_cpus(struct task_struct *p) { - struct affinity_domain *ad = &tg->auto_affinity->ad; - struct task_struct *task; - struct css_task_iter it; + struct affinity_domain *ad;
- css_task_iter_start(&tg->css, 0, &it); - while ((task = css_task_iter_next(&it))) { - if (tg == &root_task_group && !task->mm) - continue; + if (!smart_grid_used()) + return p->prefer_cpus;
- set_prefer_cpus_ptr(task, ad->domains[ad->curr_level]); - /* grid_qos must not be NULL */ - task->grid_qos->affinity_set(task); - } - css_task_iter_end(&it); + if (task_group(p)->auto_affinity->mode == 0) + return &p->cpus_allowed; + + ad = &task_group(p)->auto_affinity->ad; + return ad->domains[ad->curr_level]; +} + +static inline int dynamic_affinity_mode(struct task_struct *p) +{ + if (!prefer_cpus_valid(p)) + return -1; + + if (smart_grid_used()) + return task_group(p)->auto_affinity->mode == 0 ? -1 : 1; + + return 0; }
static void affinity_domain_up(struct task_group *tg) @@ -5344,8 +5350,6 @@ static void affinity_domain_up(struct task_group *tg)
if (level == ad->dcount) return; - - tg_update_task_prefer_cpus(tg); }
static void affinity_domain_down(struct task_group *tg) @@ -5366,8 +5370,6 @@ static void affinity_domain_down(struct task_group *tg)
if (!level) return; - - tg_update_task_prefer_cpus(tg); }
static enum hrtimer_restart sched_auto_affi_period_timer(struct hrtimer *timer) @@ -5433,8 +5435,6 @@ static int tg_update_affinity_domain_down(struct task_group *tg, void *data) if (!smart_grid_used()) return 0;
- if (auto_affi->mode) - tg_update_task_prefer_cpus(tg); return 0; }
@@ -5452,35 +5452,41 @@ void tg_update_affinity_domains(int cpu, int online)
void start_auto_affinity(struct auto_affinity *auto_affi) { - struct task_group *tg = auto_affi->tg; ktime_t delay_ms;
- if (auto_affi->period_active == 1) + raw_spin_lock_irq(&auto_affi->lock); + if (auto_affi->period_active == 1) { + raw_spin_unlock_irq(&auto_affi->lock); return; - - tg_update_task_prefer_cpus(tg); + }
auto_affi->period_active = 1; + auto_affi->mode = 1; delay_ms = ms_to_ktime(sysctl_affinity_adjust_delay_ms); hrtimer_forward_now(&auto_affi->period_timer, delay_ms); hrtimer_start_expires(&auto_affi->period_timer, HRTIMER_MODE_ABS_PINNED); + raw_spin_unlock_irq(&auto_affi->lock); + smart_grid_usage_inc(); }
void stop_auto_affinity(struct auto_affinity *auto_affi) { - struct task_group *tg = auto_affi->tg; struct affinity_domain *ad = &auto_affi->ad;
- if (auto_affi->period_active == 0) + raw_spin_lock_irq(&auto_affi->lock); + if (auto_affi->period_active == 0) { + raw_spin_unlock_irq(&auto_affi->lock); return; + }
hrtimer_cancel(&auto_affi->period_timer); auto_affi->period_active = 0; + auto_affi->mode = 0; ad->curr_level = ad->dcount > 0 ? ad->dcount - 1 : 0; + raw_spin_unlock_irq(&auto_affi->lock);
- tg_update_task_prefer_cpus(tg); smart_grid_usage_dec(); }
@@ -5698,6 +5704,19 @@ static void destroy_auto_affinity(struct task_group *tg) } #else static void destroy_auto_affinity(struct task_group *tg) {} + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +static inline struct cpumask *task_prefer_cpus(struct task_struct *p) +{ + return p->prefer_cpus; +} +#endif + +static inline int dynamic_affinity_mode(struct task_struct *p) +{ + return 0; +} + #endif
/************************************************** @@ -7166,10 +7185,11 @@ int sysctl_sched_util_low_pct = 85;
static inline bool prefer_cpus_valid(struct task_struct *p) { - return p->prefer_cpus && - !cpumask_empty(p->prefer_cpus) && - !cpumask_equal(p->prefer_cpus, &p->cpus_allowed) && - cpumask_subset(p->prefer_cpus, &p->cpus_allowed); + struct cpumask *prefer_cpus = task_prefer_cpus(p); + + return !cpumask_empty(prefer_cpus) && + !cpumask_equal(prefer_cpus, &p->cpus_allowed) && + cpumask_subset(prefer_cpus, &p->cpus_allowed); }
/* @@ -7193,20 +7213,23 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, long min_util = INT_MIN; struct task_group *tg; long spare; - int cpu; + int cpu, mode;
- p->select_cpus = &p->cpus_allowed; - if (!prefer_cpus_valid(p)) + rcu_read_lock(); + mode = dynamic_affinity_mode(p); + if (mode == -1) { + rcu_read_unlock(); return; - - if (smart_grid_used()) { - p->select_cpus = p->prefer_cpus; + } else if (mode == 1) { + p->select_cpus = task_prefer_cpus(p); if (idlest_cpu) *idlest_cpu = cpumask_first(p->select_cpus); + sched_qos_affinity_set(p); + rcu_read_unlock(); return; }
- rcu_read_lock(); + /* manual mode */ tg = task_group(p); for_each_cpu(cpu, p->prefer_cpus) { if (unlikely(!tg->se[cpu])) @@ -7273,7 +7296,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY p->select_cpus = &p->cpus_allowed; - if (dynamic_affinity_used()) + if (dynamic_affinity_used() || smart_grid_used()) set_task_select_cpus(p, &idlest_cpu, sd_flag); #endif
@@ -8301,7 +8324,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY p->select_cpus = &p->cpus_allowed; - if (dynamic_affinity_used()) + if (dynamic_affinity_used() || smart_grid_used()) set_task_select_cpus(p, NULL, 0);
if (!cpumask_test_cpu(env->dst_cpu, p->select_cpus)) { diff --git a/kernel/sched/grid/qos.c b/kernel/sched/grid/qos.c index 525778d..f0f10df 100644 --- a/kernel/sched/grid/qos.c +++ b/kernel/sched/grid/qos.c @@ -23,20 +23,26 @@ #include <linux/sched/grid_qos.h> #include "internal.h"
-static int qos_affinity_set(struct task_struct *p) +static inline int qos_affinity_set(struct task_struct *p) { int n; struct sched_grid_qos_affinity *affinity = &p->grid_qos->affinity;
- nodes_clear(affinity->mem_preferred_node_mask); + if (likely(affinity->prefer_cpus == p->select_cpus)) + return 0; + /* * We want the memory allocation to be as close to the CPU * as possible, and adjust after getting memory bandwidth usage. */ - for (n = 0; n < nr_node_ids; n++) - if (cpumask_intersects(cpumask_of_node(n), p->prefer_cpus)) + for (n = 0; n < nr_node_ids; n++) { + if (cpumask_intersects(cpumask_of_node(n), p->select_cpus)) node_set(n, affinity->mem_preferred_node_mask); + else + node_clear(n, affinity->mem_preferred_node_mask); + }
+ affinity->prefer_cpus = p->select_cpus; return 0; }
From: Hui Tang tanghui20@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7DA63 CVE: NA
--------------------------------
Add mutex lock to prevent negative count for jump label.
[28612.530675] ------------[ cut here ]------------ [28612.532708] jump label: negative count! [28612.535031] WARNING: CPU: 4 PID: 3899 at kernel/jump_label.c:202 __static_key_slow_dec_cpuslocked+0x204/0x240 [28612.538216] Kernel panic - not syncing: panic_on_warn set ... [28612.538216] [28612.540487] CPU: 4 PID: 3899 Comm: sh Kdump: loaded Not tainted [28612.542788] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) [28612.546455] Call Trace: [28612.547339] dump_stack+0xc6/0x11e [28612.548546] ? __static_key_slow_dec_cpuslocked+0x200/0x240 [28612.550352] panic+0x1d6/0x46b [28612.551375] ? refcount_error_report+0x2a5/0x2a5 [28612.552915] ? kmsg_dump_rewind_nolock+0xde/0xde [28612.554358] ? sched_clock_cpu+0x18/0x1b0 [28612.555699] ? __warn+0x1d1/0x210 [28612.556799] ? __static_key_slow_dec_cpuslocked+0x204/0x240 [28612.558548] __warn+0x1ec/0x210 [28612.559621] ? __static_key_slow_dec_cpuslocked+0x204/0x240 [28612.561536] report_bug+0x1ee/0x2b0 [28612.562706] fixup_bug.part.4+0x37/0x80 [28612.563937] do_error_trap+0x21c/0x260 [28612.565109] ? fixup_bug.part.4+0x80/0x80 [28612.566453] ? check_preemption_disabled+0x34/0x1f0 [28612.567991] ? trace_hardirqs_off_thunk+0x1a/0x1c [28612.569534] ? lockdep_hardirqs_off+0x1cb/0x2b0 [28612.570993] ? error_entry+0x9a/0x130 [28612.572138] ? trace_hardirqs_off_caller+0x59/0x1a0 [28612.573710] ? trace_hardirqs_off_thunk+0x1a/0x1c [28612.575232] invalid_op+0x14/0x20 [root@lo[ca2lh8ost6 12.576387] ? vprintk_func+0x68/0x1a0 [28612.577827] ? __static_key_slow_dec_cpuslocked+0x204/0x240 smartg[ri2d]8# 612.579662] ? __static_key_slow_dec_cpuslocked+0x204/0x240 [28612.581781] ? static_key_disable+0x30/0x30 [28612.583248] ? s tatic_key_slow_dec+0x57/0x90 [28612.584997] ? tg_set_dynamic_affinity_mode+0x42/0x70 [28612.586714] ? cgroup_file_write+0x471/0x6a0 [28612.588162] ? cgroup_css.part.4+0x100/0x100 [28612.589579] ? cgroup_css.part.4+0x100/0x100 [28612.591031] ? kernfs_fop_write+0x2af/0x430 [28612.592625] ? kernfs_vma_page_mkwrite+0x230/0x230 [28612.594274] ? __vfs_write+0xef/0x680 [28612.595590] ? kernel_read+0x110/0x110 ea8612.596899] ? check_preemption_disabled+0x3mkd4ir/: 0canxno1t fcr0
Signed-off-by: Hui Tang tanghui20@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Signed-off-by: Zhang Changzhong zhangchangzhong@huawei.com --- kernel/sched/fair.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d2acee4..3041652 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5286,6 +5286,8 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} #define AUTO_AFFINITY_DEFAULT_PERIOD_MS 2000 #define IS_DOMAIN_SET(level, mask) ((1 << (level)) & (mask))
+static DEFINE_MUTEX(smart_grid_used_mutex); + static inline unsigned long cpu_util(int cpu); static unsigned long target_load(int cpu, int type); static unsigned long capacity_of(int cpu); @@ -5454,9 +5456,11 @@ void start_auto_affinity(struct auto_affinity *auto_affi) { ktime_t delay_ms;
+ mutex_lock(&smart_grid_used_mutex); raw_spin_lock_irq(&auto_affi->lock); if (auto_affi->period_active == 1) { raw_spin_unlock_irq(&auto_affi->lock); + mutex_unlock(&smart_grid_used_mutex); return; }
@@ -5469,15 +5473,18 @@ void start_auto_affinity(struct auto_affinity *auto_affi) raw_spin_unlock_irq(&auto_affi->lock);
smart_grid_usage_inc(); + mutex_unlock(&smart_grid_used_mutex); }
void stop_auto_affinity(struct auto_affinity *auto_affi) { struct affinity_domain *ad = &auto_affi->ad;
+ mutex_lock(&smart_grid_used_mutex); raw_spin_lock_irq(&auto_affi->lock); if (auto_affi->period_active == 0) { raw_spin_unlock_irq(&auto_affi->lock); + mutex_unlock(&smart_grid_used_mutex); return; }
@@ -5488,6 +5495,7 @@ void stop_auto_affinity(struct auto_affinity *auto_affi) raw_spin_unlock_irq(&auto_affi->lock);
smart_grid_usage_dec(); + mutex_unlock(&smart_grid_used_mutex); }
static struct sched_group *sd_find_idlest_group(struct sched_domain *sd) @@ -5693,7 +5701,7 @@ static void destroy_auto_affinity(struct task_group *tg) { struct auto_affinity *auto_affi = tg->auto_affinity;
- if (auto_affi->mode) + if (auto_affi->period_active) smart_grid_usage_dec();
hrtimer_cancel(&auto_affi->period_timer);