hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ CVE: NA
--------------------------------
Add sysctl to enable or disable mem_sampling actions for numa_balance.
A static key controlled by sysctl is added. When this is set to false, numa balance will use the native prot_none scheme, and when set to true, it will use hardware sampling instead of the native scheme.
Signed-off-by: Ze Zuo zuoze1@huawei.com --- mm/mem_sampling.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-)
diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index 8423b1c1fa2c..638fbf9db404 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -29,6 +29,8 @@ struct static_key_false mem_sampling_access_hints;
#define MEM_SAMPLING_DISABLED 0x0 #define MEM_SAMPLING_NORMAL 0x1 +#define NUMA_BALANCING_HW_DISABLED 0x0 +#define NUMA_BALANCING_HW_NORMAL 0x1
static int mem_sampling_override __initdata; struct mem_sampling_record_cb_list_entry { @@ -298,10 +300,60 @@ static inline enum mem_sampling_type_enum mem_sampling_get_type(void) #endif }
-DEFINE_STATIC_KEY_FALSE(mem_sampling_access_hints); - DEFINE_STATIC_KEY_FALSE(sched_numabalancing_mem_sampling);
+#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + +int sysctl_numa_balacing_hw_mode; + +static void __set_numabalancing_mem_sampling_state(bool enabled) +{ + if (enabled) { + numa_balancing_mem_sampling_cb_register(); + static_branch_enable(&sched_numabalancing_mem_sampling); + } else { + numa_balancing_mem_sampling_cb_unregister(); + static_branch_disable(&sched_numabalancing_mem_sampling); + } +} + +void set_numabalancing_mem_sampling_state(bool enabled) +{ + if (enabled) + sysctl_numa_balacing_hw_mode = NUMA_BALANCING_HW_NORMAL; + else + sysctl_numa_balacing_hw_mode = NUMA_BALANCING_HW_DISABLED; + __set_numabalancing_mem_sampling_state(enabled); +} + +#ifdef CONFIG_PROC_SYSCTL + +int sysctl_numabalancing_mem_sampling(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = static_branch_likely(&sched_numabalancing_mem_sampling); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + if (write && static_branch_likely(&mem_sampling_access_hints)) + set_numabalancing_mem_sampling_state(state); + + return err; +} +#endif +#endif + +DEFINE_STATIC_KEY_FALSE(mem_sampling_access_hints); + int sysctl_mem_sampling_mode;
static void __set_mem_sampling_state(bool enabled) @@ -321,6 +373,11 @@ void set_mem_sampling_state(bool enabled) else sysctl_mem_sampling_mode = MEM_SAMPLING_DISABLED; __set_mem_sampling_state(enabled); + +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + if (!enabled) + set_numabalancing_mem_sampling_state(enabled); +#endif }
#ifdef CONFIG_PROC_SYSCTL @@ -355,6 +412,17 @@ static struct ctl_table ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#ifdef CONFIG_NUMABALANCING_MEM_SAMPLING + { + .procname = "numa_balancing_mem_sampling", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numabalancing_mem_sampling, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_NUMABALANCING_MEM_SAMPLING */ {} };
@@ -405,7 +473,7 @@ static int __init mem_sampling_init(void) check_mem_sampling_enable();
if (!register_sysctl_table(mem_sampling_dir_table)) { - pr_err("register page cache limit sysctl failed."); + pr_err("register mem sampling sysctl failed."); return -ENOMEM; }