From: Anshuman Khandual anshuman.khandual@arm.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8EC9K CVE: NA
Reference: https://lore.kernel.org/lkml/20230711082455.215983-6-anshuman.khandual@arm.c...
--------------------------------
Branch stack sampling support i.e capturing branch records during execution in core perf, rides along with normal HW events being scheduled on the PMU. This prepares ARMV8 PMU framework for branch stack support on relevant PMUs with required HW implementation.
ARMV8 PMU hardware support for branch stack sampling is indicated via a new feature flag called 'has_branch_stack' that can be ascertained via probing. This modifies current gate in armpmu_event_init() which blocks branch stack sampling based perf events unconditionally. Instead allows such perf events getting initialized on supporting PMU hardware.
Branch stack sampling is enabled and disabled along with regular PMU events the relevant hardware also needs to be driven in tandem. This adds required function callbacks in armv8pmu_branch_xxx() format, to drive the PMU branch stack hardware when supported. This also adds fallback stub definitions for these callbacks for PMUs which would not have required support.
Finally this adds a new buffer i.e 'struct branch_records', which can hold captured branch records during PMU IRQ processing before being passed on to the perf ring buffer. These buffers are per cpu, and dynamically allocated only for supporting ARMV8 PMU. These buffers can hold 'MAX_BRANCH_RECORDS' branch record entries.
This enables PERF_ATTACH_TASK_DATA for branch stack sampling perf events to make them hold context branch records in their task_ctx_data. This will get used to stash branch records that would have been lost when a given process schedules out after a short run on the CPU without an event overflow.
Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual anshuman.khandual@arm.com Signed-off-by: Junhao He hejunhao3@huawei.com --- arch/arm64/include/asm/perf_event.h | 47 +++++++++++++++ arch/arm64/kernel/perf_event.c | 90 ++++++++++++++++++++++++++++- drivers/perf/arm_pmu.c | 19 +++++- include/linux/perf/arm_pmu.h | 22 ++++++- 4 files changed, 174 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 60731f602d3e..74a36ebdcd26 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -239,12 +239,59 @@ /* PMMIR_EL1.SLOTS mask */ #define ARMV8_PMU_SLOTS_MASK 0xff
+struct pmu_hw_events; +struct arm_pmu; +struct perf_event; + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + +static inline void armv8pmu_branch_reset(void) +{ +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) +{ +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); + return false; +} + +static inline void armv8pmu_branch_enable(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void armv8pmu_branch_disable(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ +} + +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + return 0; +} + +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ +} #endif
#define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cdb3d4549b3a..abf31b78208e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -715,10 +715,16 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); + + if (has_branch_stack(event)) + armv8pmu_branch_enable(event); }
static void armv8pmu_disable_event(struct perf_event *event) { + if (has_branch_stack(event)) + armv8pmu_branch_disable(event); + /* * Disable counter */ @@ -792,6 +798,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue;
+ /* + * PMU IRQ should remain asserted until all branch records + * are captured and processed into struct perf_sample_data. + */ + if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { + armv8pmu_branch_read(cpuc, event); + data.br_stack = &cpuc->branches->branch_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -871,6 +887,24 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, clear_bit(idx - 1, cpuc->used_mask); }
+static void armv8pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu); + void *task_ctx = ctx ? ctx->task_ctx_data : NULL; + + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_reset(); + } +} + /* * Add an event filter to a given event. */ @@ -947,6 +981,9 @@ static void armv8pmu_reset(void *info) pmcr |= ARMV8_PMU_PMCR_LP;
armv8pmu_pmcr_write(pmcr); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_reset(); }
static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -964,6 +1001,12 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT);
+ if (has_branch_stack(event)) { + event->attach_state |= PERF_ATTACH_TASK_DATA; + if (!armv8pmu_branch_attr_valid(event)) + return -EOPNOTSUPP; + } + if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT;
@@ -1056,6 +1099,35 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); else cpu_pmu->reg_pmmir = 0; + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * percpu memory allocated for 'records' gets completely consumed + * here, and never required to be freed up later. So permanently + * losing access to this anchor i.e 'records' is acceptable. + * + * Otherwise this allocation handle would have to be saved up for + * free_percpu() release later if required. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; }
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1072,7 +1144,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret;
- return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu); + if (ret) + return ret; + + ret = branch_records_alloc(cpu_pmu); + if (ret) { + armv8pmu_task_ctx_cache_free(cpu_pmu); + return ret; + } + } + return 0; }
static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, @@ -1097,6 +1183,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->reset = armv8pmu_reset; cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->filter_match = armv8pmu_filter_match; + cpu_pmu->sched_task = armv8pmu_sched_task; + cpu_pmu->branch_reset = armv8pmu_branch_reset;
cpu_pmu->name = name; cpu_pmu->map_event = map_event; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 58e266c3dea8..3df2f9ba388c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -313,6 +313,11 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx;
+ WARN_ON_ONCE(!hw_events->brbe_users); + hw_events->brbe_users--; + if (!hw_events->brbe_users) + hw_events->brbe_context = NULL; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); @@ -329,6 +334,13 @@ armpmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx;
+ if (event->ctx->task && hw_events->brbe_context != event->ctx) { + hw_events->brbe_context = event->ctx; + if (armpmu->branch_reset) + armpmu->branch_reset(); + } + hw_events->brbe_users++; + /* An event following a process won't be stopped earlier */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return -ENOENT; @@ -508,8 +520,11 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT;
- /* does not support taken branch sampling */ - if (has_branch_stack(event)) + /* + * Branch stack sampling events are allowed + * only on PMU which has required support. + */ + if (has_branch_stack(event) && !armpmu->has_branch_stack) return -EOPNOTSUPP;
if (armpmu->map_event(event) == -ENOENT) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index f3a45200c354..2096aec5fe3f 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -41,6 +41,18 @@ }, \ }
+/* + * Maximum branch record entries which could be processed + * for core perf branch stack sampling support, regardless + * of the hardware support available on a given ARM PMU. + */ +#define MAX_BRANCH_RECORDS 64 + +struct branch_records { + struct perf_branch_stack branch_stack; + struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS]; +}; + /* The events for a given PMU register set. */ struct pmu_hw_events { /* @@ -67,6 +79,11 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu;
int irq; + + struct branch_records *branches; + void *brbe_context; + unsigned int brbe_users; + unsigned long brbe_sample_type; };
enum armpmu_attr_groups { @@ -98,9 +115,12 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); void (*sched_task)(struct perf_event_context *ctx, bool sched_in); + void (*branch_reset)(void); int (*filter_match)(struct perf_event *event); int num_events; - bool secure_access; /* 32-bit ARM only */ + unsigned int secure_access : 1, /* 32-bit ARM only */ + has_branch_stack: 1, /* 64-bit ARM only */ + reserved : 30; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000