hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ CVE: NA
--------------------------------
Add sysctl to enable or disable mem_sampling actions for numa_balance.
A static key controlled by sysctl is added. When this is set to false, numa balance will use the native prot_none scheme, and when set to true, it will use hardware sampling instead of the native scheme.
Signed-off-by: Ze Zuo zuoze1@huawei.com --- include/linux/sched/sysctl.h | 11 +++++++ kernel/sched/core.c | 56 ++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 11 +++++++ 3 files changed, 78 insertions(+)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 943dfc67fff4..a97d73a6e426 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -137,4 +137,15 @@ int sysctl_mem_sampling_enable(struct ctl_table *table, int write, void *buffer, #define sysctl_mem_sampling_mode 0 #endif
+#define NUMA_BALANCING_HW_DISABLED 0x0 +#define NUMA_BALANCING_HW_NORMAL 0x1 + +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING +extern int sysctl_numa_balacing_hw_mode; +int sysctl_numabalancing_mem_sampling(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#else +#define sysctl_numa_balacing_hw_mode 0 +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 589594e1f16f..2580de7f82b9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3542,6 +3542,57 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, #endif
DEFINE_STATIC_KEY_FALSE(sched_numabalancing_mem_sampling); + +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + +int sysctl_numa_balacing_hw_mode; + +static void __set_numabalancing_mem_sampling_state(bool enabled) +{ + if (enabled) { + numa_balancing_mem_sampling_cb_register(); + static_branch_enable(&sched_numabalancing_mem_sampling); + } else { + numa_balancing_mem_sampling_cb_unregister(); + static_branch_disable(&sched_numabalancing_mem_sampling); + } +} + +void set_numabalancing_mem_sampling_state(bool enabled) +{ + if (enabled) + sysctl_numa_balacing_hw_mode = NUMA_BALANCING_HW_NORMAL; + else + sysctl_numa_balacing_hw_mode = NUMA_BALANCING_HW_DISABLED; + __set_numabalancing_mem_sampling_state(enabled); +} + +#ifdef CONFIG_PROC_SYSCTL + +int sysctl_numabalancing_mem_sampling(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = static_branch_likely(&sched_numabalancing_mem_sampling); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + if (write && static_branch_likely(&mem_sampling_access_hints)) + set_numabalancing_mem_sampling_state(state); + + return err; +} +#endif +#endif + DEFINE_STATIC_KEY_FALSE(mem_sampling_access_hints);
#ifdef CONFIG_MEM_SAMPLING @@ -3564,6 +3615,11 @@ void set_mem_sampling_state(bool enabled) else sysctl_mem_sampling_mode = MEM_SAMPLING_DISABLED; __set_mem_sampling_state(enabled); + +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + if (!enabled) + set_numabalancing_mem_sampling_state(enabled); +#endif }
#ifdef CONFIG_PROC_SYSCTL diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 067ac4b6b095..d49a149abed3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1876,6 +1876,17 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_MEM_SAMPLING */ +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + { + .procname = "numa_balancing_mem_sampling", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numabalancing_mem_sampling, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_NUMABALANCING_MEM_SAMPLING */ { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period,