hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ CVE: NA
--------------------------------
Add sysctl to enable or disable mem_sampling actions.
A static key controlled by sysctl is added. When this is set to false, mem_sampling disable new sampling requests and stop hardware pmu in interrupts, otherwise permit the sampling requests, and automatically continue sampling in interrupts.
Signed-off-by: Ze Zuo zuoze1@huawei.com Signed-off-by: Tong Tiangen tongtiangen@huawei.com Signed-off-by: Shuang Yan yanshuang7@huawei.com --- include/linux/mem_sampling.h | 8 ++++++ include/linux/sched/sysctl.h | 11 +++++++++ kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 11 +++++++++ mm/mem_sampling.c | 27 +++++++++++++++++--- 5 files changed, 102 insertions(+), 3 deletions(-)
diff --git a/include/linux/mem_sampling.h b/include/linux/mem_sampling.h index b5e5a64e3150..afe93762bf4d 100644 --- a/include/linux/mem_sampling.h +++ b/include/linux/mem_sampling.h @@ -96,4 +96,12 @@ static inline int arm_spe_enabled(void) } #endif /* CONFIG_ARM_SPE */
+extern struct static_key_false mem_sampling_access_hints; +#ifdef CONFIG_MEM_SAMPLING +extern void set_mem_sampling_state(bool enabled); +#else +static inline void set_mem_sampling_state(bool enabled) +{ +} +#endif /* CONFIG_MEM_SAMPLING */ #endif /* __MEM_SAMPLING_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 5cd5b3c579d3..943dfc67fff4 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -126,4 +126,15 @@ int sched_cluster_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif
+#define MEM_SAMPLING_DISABLED 0x0 +#define MEM_SAMPLING_NORMAL 0x1 + +#ifdef CONFIG_MEM_SAMPLING +extern int sysctl_mem_sampling_mode; +int sysctl_mem_sampling_enable(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +#else +#define sysctl_mem_sampling_mode 0 +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8a4478fc4123..03b8d737a66f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -19,6 +19,7 @@
#include <asm/irq_regs.h> #include <asm/switch_to.h> +#include <linux/mem_sampling.h> #include <asm/tlb.h>
#include "../workqueue_internal.h" @@ -3540,6 +3541,53 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, #endif #endif
+DEFINE_STATIC_KEY_FALSE(mem_sampling_access_hints); + +#ifdef CONFIG_MEM_SAMPLING +int sysctl_mem_sampling_mode; + +static void __set_mem_sampling_state(bool enabled) +{ + if (enabled) + static_branch_enable(&mem_sampling_access_hints); + else + static_branch_disable(&mem_sampling_access_hints); +} + +void set_mem_sampling_state(bool enabled) +{ + if (!mem_sampling_ops.sampling_start) + return; + if (enabled) + sysctl_mem_sampling_mode = MEM_SAMPLING_NORMAL; + else + sysctl_mem_sampling_mode = MEM_SAMPLING_DISABLED; + __set_mem_sampling_state(enabled); +} + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_mem_sampling_enable(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = sysctl_mem_sampling_mode; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_mem_sampling_state(state); + return err; +} +#endif +#endif + #ifdef CONFIG_SCHEDSTATS
DEFINE_STATIC_KEY_FALSE(sched_schedstats); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f3f43b2def7f..067ac4b6b095 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1865,6 +1865,17 @@ static struct ctl_table kern_table[] = { }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ +#ifdef CONFIG_MEM_SAMPLING + { + .procname = "mem_sampling_enable", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_mem_sampling_enable, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_MEM_SAMPLING */ { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index da17d74c28b3..50a2986e3689 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -2,7 +2,6 @@
#define pr_fmt(fmt) "mem_sampling: " fmt
- #include <linux/slab.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -13,6 +12,7 @@
struct mem_sampling_ops_struct mem_sampling_ops;
+static int mem_sampling_override __initdata; struct mem_sampling_record_cb_list_entry { struct list_head list; mem_sampling_record_cb_type cb; @@ -56,6 +56,8 @@ void mem_sampling_record_cb_unregister(mem_sampling_record_cb_type cb)
void mem_sampling_sched_in(struct task_struct *prev, struct task_struct *curr) { + if (!static_branch_unlikely(&mem_sampling_access_hints)) + return;
if (!mem_sampling_ops.sampling_start) return; @@ -87,7 +89,11 @@ void mem_sampling_process(struct mem_sampling_record *record_base, int nr_record } } out: - mem_sampling_ops.sampling_continue(); + /* if mem_sampling_access_hints is set to false, stop sampling */ + if (static_branch_unlikely(&mem_sampling_access_hints)) + mem_sampling_ops.sampling_continue(); + else + mem_sampling_ops.sampling_stop(); }
static inline enum mem_sampling_type_enum mem_sampling_get_type(void) @@ -99,14 +105,27 @@ static inline enum mem_sampling_type_enum mem_sampling_get_type(void) #endif }
+static void __init check_mem_sampling_enable(void) +{ + bool mem_sampling_default = false; + + /* Parsed by setup_mem_sampling. override == 1 enables, -1 disables */ + if (mem_sampling_override) + set_mem_sampling_state(mem_sampling_override == 1); + else + set_mem_sampling_state(mem_sampling_default); +} + static int __init mem_sampling_init(void) { enum mem_sampling_type_enum mem_sampling_type = mem_sampling_get_type();
switch (mem_sampling_type) { case MEM_SAMPLING_ARM_SPE: - if (!arm_spe_enabled()) + if (!arm_spe_enabled()) { + set_mem_sampling_state(false); return -ENODEV; + } mem_sampling_ops.sampling_start = arm_spe_start, mem_sampling_ops.sampling_stop = arm_spe_stop, mem_sampling_ops.sampling_continue = arm_spe_continue, @@ -117,8 +136,10 @@ static int __init mem_sampling_init(void) default: pr_info("unsupport hardware pmu type(%d), disable access hint!\n", mem_sampling_type); + set_mem_sampling_state(false); return -ENODEV; } + check_mem_sampling_enable();
pr_info("mem_sampling layer access profiling setup for NUMA Balancing and DAMON etc.\n"); return 0;