
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC8KS8 CVE: NA -------------------------------- This patch introduces the concept of `arm_spe_user` to represent different SPE usage scenarios, allowing the separation of basic SPE operations from the perf driver implementation. This change builds upon previous patches that refactor the SPE framework to support multiple independent SPE drivers. Core SPE operations such as interrupt handling, enabling, and disabling are now isolated from perf, enabling the SPE feature to be consumed by two main types of users: 1. Kernel subsystems that require memory access sampling, such as NUMA balancing, DAMON, etc. These users interact with SPE through an abstraction layer (e.g., `mem_sampling`), which starts and stops SPE tracing independently of perf. 2. User space via the perf subsystem, which continues to operate the SPE driver as before, but now through a cleaner interface. This abstraction allows flexible sharing of SPE infrastructure without tight coupling between perf and other kernel modules. It also avoids intrusive modifications to perf internals by enabling perf to control the SPE driver through a decoupled interface. Signed-off-by: Ze Zuo <zuoze1@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- drivers/perf/arm_spe_pmu.c | 56 +++++++++++++++++++++++++++++++++++- include/linux/mem_sampling.h | 19 ++++++++++++ mm/mem_sampling.c | 8 ++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 6a7221d272e4..0fc9600f14a4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -33,6 +33,9 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/vmalloc.h> +#if IS_ENABLED(CONFIG_MEM_SAMPLING) +#include <linux/mem_sampling.h> +#endif #include <asm/barrier.h> #include <asm/cpufeature.h> @@ -591,13 +594,21 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) * If we've lost data, disable profiling and also set the PARTIAL * flag to indicate that the last record is corrupted. */ +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_spe() && FIELD_GET(PMBSR_EL1_DL, pmbsr)) +#else if (FIELD_GET(PMBSR_EL1_DL, pmbsr)) +#endif perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED | PERF_AUX_FLAG_PARTIAL); /* Report collisions to userspace so that it can up the period */ +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_spe() && FIELD_GET(PMBSR_EL1_DL, pmbsr)) +#else if (FIELD_GET(PMBSR_EL1_COLL, pmbsr)) perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION); +#endif /* We only expect buffer management events */ switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) { @@ -630,7 +641,12 @@ arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle) ret = SPE_PMU_BUF_FAULT_ACT_FATAL; out_stop: +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_spe()) + arm_spe_perf_aux_output_end(handle); +#else arm_spe_perf_aux_output_end(handle); +#endif return ret; } @@ -640,7 +656,11 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) struct perf_event *event = handle->event; enum arm_spe_pmu_buf_fault_action act; +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_spe() && !perf_get_aux(handle)) +#else if (!perf_get_aux(handle)) +#endif return IRQ_NONE; act = arm_spe_pmu_buf_get_fault_act(handle); @@ -651,7 +671,12 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) * Ensure perf callbacks have completed, which may disable the * profiling buffer in response to a TRUNCATION flag. */ +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_spe()) + irq_work_run(); +#else irq_work_run(); +#endif switch (act) { case SPE_PMU_BUF_FAULT_ACT_FATAL: @@ -671,6 +696,12 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) * PMBPTR might be misaligned, but we'll burn that bridge * when we get to it. */ +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + if (spe_user_is_mem_saampling()) { + mem_sampling_process(); + break; + } +#endif if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) { arm_spe_perf_aux_output_begin(handle, event); isb(); @@ -766,6 +797,10 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + arm_spe_set_user(SPE_USER_PERF); +#endif + hwc->state = 0; arm_spe_perf_aux_output_begin(handle, event); if (hwc->state) @@ -805,8 +840,16 @@ static void arm_spe_pmu_stop(struct perf_event *event, int flags) struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle); /* If we're already stopped, then nothing to do */ - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_STOPPED) { +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + /* + * PERF_HES_STOPPED maybe set in arm_spe_perf_aux_output_begin, + * we switch user here. + */ + arm_spe_set_user(SPE_USER_MEM_SAMPLING); +#endif return; + } /* Stop all trace generation */ arm_spe_pmu_disable_and_drain_local(); @@ -837,6 +880,9 @@ static void arm_spe_pmu_stop(struct perf_event *event, int flags) } hwc->state |= PERF_HES_STOPPED; +#if IS_ENABLED(CONFIG_MEM_SAMPLING) + arm_spe_set_user(SPE_USER_MEM_SAMPLING); +#endif } static int arm_spe_pmu_add(struct perf_event *event, int flags) @@ -1314,6 +1360,14 @@ static int arm_spe_pmu_device_probe(struct platform_device *pdev) return ret; } +#if IS_ENABLED(CONFIG_MEM_SAMPLING) +void arm_spe_set_user(enum arm_spe_user_e user) +{ + __this_cpu_write(arm_spe_user, user); + __arm_spe_pmu_reset_local(); +} +#endif + static int arm_spe_pmu_device_remove(struct platform_device *pdev) { struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); diff --git a/include/linux/mem_sampling.h b/include/linux/mem_sampling.h index 3e000a0deced..42dad6438531 100644 --- a/include/linux/mem_sampling.h +++ b/include/linux/mem_sampling.h @@ -28,6 +28,12 @@ enum mem_sampling_op_type { MEM_SAMPLING_ST = 1 << 1, }; +enum arm_spe_user_e { + SPE_USER_PERF, + SPE_USER_MEM_SAMPLING, +}; +DECLARE_PER_CPU(enum arm_spe_user_e, arm_spe_user); + struct mem_sampling_record { enum mem_sampling_sample_type type; int err; @@ -79,4 +85,17 @@ static inline int mm_spe_getnum_record(void) { return 0; } static inline struct mm_spe_buf *mm_spe_getbuf_addr(void) { return NULL; } static inline int mm_spe_enabled(void) { return 0; } #endif /* CONFIG_ARM_SPE_MEM_SAMPLING */ + +#if IS_ENABLED(CONFIG_MEM_SAMPLING) +void mem_sampling_process(void); +void arm_spe_set_user(enum arm_spe_user_e user); +static inline bool spe_user_is_spe(void) +{ + return __this_cpu_read(arm_spe_user) == SPE_USER_PERF; +} +static inline bool spe_user_is_mem_saampling(void) +{ + return __this_cpu_read(arm_spe_user) == SPE_USER_MEM_SAMPLING; +} +#endif /* CONFIG_MEM_SAMPLING */ #endif /* __MEM_SAMPLING_H */ diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index 551c18452b2e..e9b2e14d28f1 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -21,6 +21,10 @@ struct mem_sampling_ops_struct mem_sampling_ops; +/* keep track of who use the SPE */ +DEFINE_PER_CPU(enum arm_spe_user_e, arm_spe_user); +EXPORT_PER_CPU_SYMBOL_GPL(arm_spe_user); + /* * Callbacks should be registered using mem_sampling_record_cb_register() * by NUMA, DAMON and etc during their initialisation. @@ -103,6 +107,7 @@ static inline enum mem_sampling_type_enum mem_sampling_get_type(void) static int __init mem_sampling_init(void) { enum mem_sampling_type_enum mem_sampling_type = mem_sampling_get_type(); + int cpu; switch (mem_sampling_type) { case MEM_SAMPLING_ARM_SPE: @@ -121,6 +126,9 @@ static int __init mem_sampling_init(void) return -ENODEV; } + for_each_possible_cpu(cpu) + per_cpu(arm_spe_user, cpu) = SPE_USER_MEM_SAMPLING; + return 0; } late_initcall(mem_sampling_init); -- 2.25.1