
hulk inclusion category:feature bugzilla:https://gitee.com/openeuler/kernel/issues/IC8KS8 CVE: NA -------------------------------- Extend the /proc/sys/kernel/mem_sampling_enable sysctl interface to support fine-grained control over mem_sampling and its integration with NUMA balancing. Supported values: 0 - Disable mem_sampling entirely 1 - Enable mem_sampling (used by perf or other subsystems) 2 - Enable mem_sampling and allow NUMA balancing to consume access hints Although mem_sampling and perf both rely on the same hardware sampling resources (e.g. ARM SPE), perf is allowed to preempt mem_sampling: if perf starts sampling, mem_sampling will be automatically stopped to avoid resource conflicts. This ensures that tools using perf have priority, while still allowing NUMA and other kernel subsystems to use sampling data when perf is idle. The sysctl interface allows dynamic switching at runtime without reboot. Signed-off-by: Ze Zuo <zuoze1@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> Signed-off-by: Shuang Yan <yanshuang7@huawei.com> --- mm/mem_sampling.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index a9832924509c..f6c84b2986f9 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -25,11 +25,16 @@ #define MEM_SAMPLING_DISABLED 0x0 #define MEM_SAMPLING_NORMAL 0x1 +#define MEM_SAMPLING_MIN_VALUE 0 +#define MEM_SAMPLING_MAX_VALUE 3 struct mem_sampling_ops_struct mem_sampling_ops; static int mem_sampling_override __initdata; static int sysctl_mem_sampling_mode; +static const int mem_sampling_min_value = MEM_SAMPLING_MIN_VALUE; +static const int mem_sampling_max_value = MEM_SAMPLING_MAX_VALUE; + /* keep track of who use the SPE */ DEFINE_PER_CPU(enum arm_spe_user_e, arm_spe_user); EXPORT_PER_CPU_SYMBOL_GPL(arm_spe_user); @@ -269,10 +274,13 @@ static void numa_balancing_mem_sampling_cb_unregister(void) } static void set_numabalancing_mem_sampling_state(bool enabled) { - if (enabled) + if (enabled) { numa_balancing_mem_sampling_cb_register(); - else + static_branch_enable(&sched_numabalancing_mem_sampling); + } else { numa_balancing_mem_sampling_cb_unregister(); + static_branch_disable(&sched_numabalancing_mem_sampling); + } } #else static inline void set_numabalancing_mem_sampling_state(bool enabled) { } @@ -395,18 +403,40 @@ static int proc_mem_sampling_enable(struct ctl_table *table, int write, { struct ctl_table t; int err; - int state = sysctl_mem_sampling_mode; + int state = 0; + + if (static_branch_likely(&mem_sampling_access_hints)) + state = 1; + if (static_branch_likely(&sched_numabalancing_mem_sampling)) + state = 2; if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; t = *table; t.data = &state; + t.extra1 = (int *)&mem_sampling_min_value; + t.extra2 = (int *)&mem_sampling_max_value; err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (err < 0) return err; - if (write) - set_mem_sampling_state(state); + if (write) { + switch (state) { + case 0: + set_mem_sampling_state(false); + break; + case 1: + set_mem_sampling_state(false); + set_mem_sampling_state(true); + break; + case 2: + set_mem_sampling_state(true); + set_numabalancing_mem_sampling_state(true); + break; + default: + return -EINVAL; + } + } return err; } @@ -418,7 +448,7 @@ static struct ctl_table mem_sampling_sysctls[] = { .mode = 0644, .proc_handler = proc_mem_sampling_enable, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = (int *)&mem_sampling_max_value, }, {} }; -- 2.25.1