hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ CVE: NA
--------------------------------
Use SPE (Statistical Profiling Extension) feature present in ARM processors for memory access tracking.
Each cpu core has two buffers, one for storing the raw SPE data and the other for decoded records.
Signed-off-by: Ze Zuo zuoze1@huawei.com Signed-off-by: Tong Tiangen tongtiangen@huawei.com --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/arm/Kconfig | 2 + drivers/arm/spe/Kconfig | 10 + drivers/arm/spe/Makefile | 2 + drivers/arm/spe/spe-decoder/Makefile | 2 + drivers/arm/spe/spe-decoder/arm-spe-decoder.c | 213 +++++ drivers/arm/spe/spe-decoder/arm-spe-decoder.h | 74 ++ .../arm/spe/spe-decoder/arm-spe-pkt-decoder.c | 227 +++++ .../arm/spe/spe-decoder/arm-spe-pkt-decoder.h | 153 ++++ drivers/arm/spe/spe.c | 790 ++++++++++++++++++ drivers/arm/spe/spe.h | 120 +++ drivers/perf/arm_pmu_acpi.c | 2 +- 13 files changed, 1597 insertions(+), 1 deletion(-) create mode 100644 drivers/arm/Kconfig create mode 100644 drivers/arm/spe/Kconfig create mode 100644 drivers/arm/spe/Makefile create mode 100644 drivers/arm/spe/spe-decoder/Makefile create mode 100644 drivers/arm/spe/spe-decoder/arm-spe-decoder.c create mode 100644 drivers/arm/spe/spe-decoder/arm-spe-decoder.h create mode 100644 drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.c create mode 100644 drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.h create mode 100644 drivers/arm/spe/spe.c create mode 100644 drivers/arm/spe/spe.h
diff --git a/drivers/Kconfig b/drivers/Kconfig index f765c3a688b6..840137d90167 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -244,4 +244,6 @@ source "drivers/ub/Kconfig"
source "drivers/cpuinspect/Kconfig"
+source "drivers/arm/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 4e390005ded7..8264e814d3d6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -195,3 +195,4 @@ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ obj-$(CONFIG_ROH) += roh/ obj-$(CONFIG_UB) += ub/ +obj-$(CONFIG_ARM_SPE) += arm/spe/ diff --git a/drivers/arm/Kconfig b/drivers/arm/Kconfig new file mode 100644 index 000000000000..a0c7a25220cc --- /dev/null +++ b/drivers/arm/Kconfig @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +source "drivers/arm/spe/Kconfig" diff --git a/drivers/arm/spe/Kconfig b/drivers/arm/spe/Kconfig new file mode 100644 index 000000000000..5ede60666349 --- /dev/null +++ b/drivers/arm/spe/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# arm spe dirver +# +config ARM_SPE + bool "In-kernel SPE for driver for page access profiling" + depends on ARM64 && !ARM_SPE_PMU + help + Enable support for the ARMv8.2 Statistical Profiling Extension, which + provides periodic sampling of operations in the CPU pipeline. diff --git a/drivers/arm/spe/Makefile b/drivers/arm/spe/Makefile new file mode 100644 index 000000000000..46c43e5974e1 --- /dev/null +++ b/drivers/arm/spe/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARM_SPE) += spe.o spe-decoder/arm-spe-decoder.o spe-decoder/arm-spe-pkt-decoder.o diff --git a/drivers/arm/spe/spe-decoder/Makefile b/drivers/arm/spe/spe-decoder/Makefile new file mode 100644 index 000000000000..4fdae5d38186 --- /dev/null +++ b/drivers/arm/spe/spe-decoder/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y := arm-spe-decoder.o arm-spe-pkt-decoder.o diff --git a/drivers/arm/spe/spe-decoder/arm-spe-decoder.c b/drivers/arm/spe/spe-decoder/arm-spe-decoder.c new file mode 100644 index 000000000000..c9eeb74fecce --- /dev/null +++ b/drivers/arm/spe/spe-decoder/arm-spe-decoder.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arm_spe_decoder.c: ARM SPE support + * Copyright (c) 2017-2018, Arm Ltd. + * Copyright (c) 2024-2025, Huawei Technologies Ltd. + */ + +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/slab.h> + +#include "arm-spe-decoder.h" + +static u64 arm_spe_calc_ip(int index, u64 payload) +{ + u64 ns, el, val; + u32 seen_idx; + + /* Instruction virtual address or Branch target address */ + if (index == SPE_ADDR_PKT_HDR_INDEX_INS || + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { + ns = SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + + /* Fill highest byte for EL1 or EL2 (VHE) mode */ + if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; + + /* Data access virtual address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { + + /* Clean tags */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + + /* + * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet" + * defines the data virtual address payload format, the top byte + * (bits [63:56]) is assigned as top-byte tag; so we only can + * retrieve address value from bits [55:0]. + * + * According to Documentation/arm64/memory.rst, if detects the + * specific pattern in bits [55:52] of payload which falls in + * the kernel space, should fixup the top byte and this allows + * perf tool to parse DSO symbol for data address correctly. + * + * For this reason, if detects the bits [55:52] is 0xf, will + * fill 0xff into the top byte. + */ + val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload); + if ((val & 0xf0ULL) == 0xf0ULL) + payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; + + /* Data access physical address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + } else { + seen_idx = 0; + if (!(seen_idx & BIT(index))) { + seen_idx |= BIT(index); + pr_warn("ignoring unsupported address packet index: 0x%x\n", index); + } + } + + return payload; +} + + +void arm_spe_decoder_free(struct arm_spe_decoder *decoder) +{ + kfree(decoder); +} + +static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder) +{ + int ret; + + do { + if (!decoder->len) + return 0; + + ret = arm_spe_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret <= 0) { + /* Move forward for 1 byte */ + decoder->buf += 1; + decoder->len -= 1; + return -EBADMSG; + } + + decoder->buf += ret; + decoder->len -= ret; + } while (decoder->packet.type == ARM_SPE_PAD); + return 1; +} + +static int arm_spe_read_record(struct arm_spe_decoder *decoder) +{ + int err; + int idx; + u64 payload, ip; + + memset(&decoder->record, 0x0, sizeof(decoder->record)); + decoder->record.context_id = (u64)-1; + while (1) { + err = arm_spe_get_next_packet(decoder); + if (err <= 0) + return err; + + idx = decoder->packet.index; + payload = decoder->packet.payload; + + switch (decoder->packet.type) { + case ARM_SPE_TIMESTAMP: + decoder->record.timestamp = payload; + return 1; + case ARM_SPE_END: + return 1; + case ARM_SPE_ADDRESS: + ip = arm_spe_calc_ip(idx, payload); + if (idx == SPE_ADDR_PKT_HDR_INDEX_INS) + decoder->record.from_ip = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) + decoder->record.to_ip = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) + decoder->record.virt_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) + decoder->record.phys_addr = ip; + break; + case ARM_SPE_COUNTER: + if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) + decoder->record.latency = payload; + break; + case ARM_SPE_CONTEXT: + decoder->record.context_id = payload; + break; + case ARM_SPE_OP_TYPE: + if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { + if (payload & 0x1) + decoder->record.op = ARM_SPE_ST; + else + decoder->record.op = ARM_SPE_LD; + } + break; + case ARM_SPE_EVENTS: + if (payload & BIT(EV_L1D_REFILL)) + decoder->record.type |= ARM_SPE_L1D_MISS; + + if (payload & BIT(EV_L1D_ACCESS)) + decoder->record.type |= ARM_SPE_L1D_ACCESS; + + if (payload & BIT(EV_TLB_WALK)) + decoder->record.type |= ARM_SPE_TLB_MISS; + + if (payload & BIT(EV_TLB_ACCESS)) + decoder->record.type |= ARM_SPE_TLB_ACCESS; + + if (payload & BIT(EV_LLC_MISS)) + decoder->record.type |= ARM_SPE_LLC_MISS; + + if (payload & BIT(EV_LLC_ACCESS)) + decoder->record.type |= ARM_SPE_LLC_ACCESS; + + if (payload & BIT(EV_REMOTE_ACCESS)) + decoder->record.type |= ARM_SPE_REMOTE_ACCESS; + + if (payload & BIT(EV_MISPRED)) + decoder->record.type |= ARM_SPE_BRANCH_MISS; + + break; + case ARM_SPE_DATA_SOURCE: + decoder->record.source = payload; + break; + case ARM_SPE_BAD: + break; + case ARM_SPE_PAD: + break; + default: + pr_err("Get packet error!\n"); + return -1; + } + } + return 0; +} + +static bool arm_spe_decode(struct arm_spe_decoder *decoder) +{ + if (decoder->len) { + if (arm_spe_read_record(decoder) == 1) + return true; + } + return false; +} + +void arm_spe_decode_buf(const unsigned char *buf, size_t len) +{ + struct arm_spe_decoder decoder; + + decoder.buf = buf; + decoder.len = len; + + while (arm_spe_decode(&decoder)) + arm_spe_record_enqueue(&(decoder.record)); + +} +EXPORT_SYMBOL(arm_spe_decode_buf); diff --git a/drivers/arm/spe/spe-decoder/arm-spe-decoder.h b/drivers/arm/spe/spe-decoder/arm-spe-decoder.h new file mode 100644 index 000000000000..06d212c8b2cc --- /dev/null +++ b/drivers/arm/spe/spe-decoder/arm-spe-decoder.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arm_spe_decoder.h: Arm Statistical Profiling Extensions support + * Copyright (c) 2019-2020, Arm Ltd. + * Copyright (c) 2024-2025, Huawei Technologies Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_DECODER_H__ +#define INCLUDE__ARM_SPE_DECODER_H__ + +#include <linux/stddef.h> + +#include "arm-spe-pkt-decoder.h" + +enum arm_spe_sample_type { + ARM_SPE_L1D_ACCESS = 1 << 0, + ARM_SPE_L1D_MISS = 1 << 1, + ARM_SPE_LLC_ACCESS = 1 << 2, + ARM_SPE_LLC_MISS = 1 << 3, + ARM_SPE_TLB_ACCESS = 1 << 4, + ARM_SPE_TLB_MISS = 1 << 5, + ARM_SPE_BRANCH_MISS = 1 << 6, + ARM_SPE_REMOTE_ACCESS = 1 << 7, +}; + +enum arm_spe_op_type { + ARM_SPE_LD = 1 << 0, + ARM_SPE_ST = 1 << 1, +}; + +enum arm_spe_neoverse_data_source { + ARM_SPE_NV_L1D = 0x0, + ARM_SPE_NV_L2 = 0x8, + ARM_SPE_NV_PEER_CORE = 0x9, + ARM_SPE_NV_LOCAL_CLUSTER = 0xa, + ARM_SPE_NV_SYS_CACHE = 0xb, + ARM_SPE_NV_PEER_CLUSTER = 0xc, + ARM_SPE_NV_REMOTE = 0xd, + ARM_SPE_NV_DRAM = 0xe, +}; + +struct arm_spe_record { + enum arm_spe_sample_type type; + int err; + u32 op; + u32 latency; + u64 from_ip; + u64 to_ip; + u64 timestamp; + u64 virt_addr; + u64 phys_addr; + u64 context_id; + u16 source; +}; + +struct arm_spe_buffer { + const unsigned char *buf; + size_t len; + u64 offset; + u64 trace_nr; +}; + +struct arm_spe_decoder { + struct arm_spe_record record; + const unsigned char *buf; + size_t len; + struct arm_spe_pkt packet; +}; + +void arm_spe_decoder_free(struct arm_spe_decoder *decoder); +void arm_spe_decode_buf(const unsigned char *buf, size_t len); +void arm_spe_record_enqueue(struct arm_spe_record *record); + +#endif diff --git a/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.c b/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.c new file mode 100644 index 000000000000..aeec43448779 --- /dev/null +++ b/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/byteorder/generic.h> + +#include "arm-spe-pkt-decoder.h" + +/* + * Extracts the field "sz" from header bits and converts to bytes: + * 00 : byte (1) + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) + */ +static unsigned int arm_spe_payload_len(unsigned char hdr) +{ + return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4); +} + +static int arm_spe_get_payload(const unsigned char *buf, size_t len, + unsigned char ext_hdr, + struct arm_spe_pkt *packet) +{ + size_t payload_len = arm_spe_payload_len(buf[ext_hdr]); + + if (len < 1 + ext_hdr + payload_len) + return ARM_SPE_NEED_MORE_BYTES; + + buf += 1 + ext_hdr; + + switch (payload_len) { + case 1: + packet->payload = *(uint8_t *)buf; + break; + case 2: + packet->payload = le16_to_cpu(*(uint16_t *)buf); + break; + case 4: + packet->payload = le32_to_cpu(*(uint32_t *)buf); + break; + case 8: + packet->payload = le64_to_cpu(*(uint64_t *)buf); + break; + default: + return ARM_SPE_BAD_PACKET; + } + + return 1 + ext_hdr + payload_len; +} + +static int arm_spe_get_pad(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_PAD; + return 1; +} + +static int arm_spe_get_alignment(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int alignment = 1 << ((buf[0] & 0xf) + 1); + + if (len < alignment) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_PAD; + return alignment - (((uintptr_t)buf) & (alignment - 1)); +} + +static int arm_spe_get_end(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_END; + return 1; +} + +static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_TIMESTAMP; + return arm_spe_get_payload(buf, len, 0, packet); +} + +static int arm_spe_get_events(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_EVENTS; + + /* we use index to identify Events with a less number of + * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, + * LLC-REFILL, and REMOTE-ACCESS events are identified if + * index > 1. + */ + packet->index = arm_spe_payload_len(buf[0]); + + return arm_spe_get_payload(buf, len, 0, packet); +} + +static int arm_spe_get_data_source(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_DATA_SOURCE; + return arm_spe_get_payload(buf, len, 0, packet); +} + +static int arm_spe_get_context(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_CONTEXT; + packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]); + return arm_spe_get_payload(buf, len, 0, packet); +} + +static int arm_spe_get_op_type(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_OP_TYPE; + packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]); + return arm_spe_get_payload(buf, len, 0, packet); +} + +static int arm_spe_get_counter(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_COUNTER; + + if (ext_hdr) + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); + else + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); + + return arm_spe_get_payload(buf, len, ext_hdr, packet); +} + +static int arm_spe_get_addr(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_ADDRESS; + + if (ext_hdr) + packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]); + else + packet->index = SPE_HDR_SHORT_INDEX(buf[0]); + + return arm_spe_get_payload(buf, len, ext_hdr, packet); +} + +static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int hdr; + unsigned char ext_hdr = 0; + + memset(packet, 0, sizeof(struct arm_spe_pkt)); + + if (!len) + return ARM_SPE_NEED_MORE_BYTES; + + hdr = buf[0]; + + if (hdr == SPE_HEADER0_PAD) + return arm_spe_get_pad(packet); + + if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */ + return arm_spe_get_end(packet); + + if (hdr == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + + if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) { + /* 16-bit extended format header */ + if (len == 1) + return ARM_SPE_BAD_PACKET; + + ext_hdr = 1; + hdr = buf[1]; + if (hdr == SPE_HEADER1_ALIGNMENT) + return arm_spe_get_alignment(buf, len, packet); + } + + /* + * The short format header's byte 0 or the extended format header's + * byte 1 has been assigned to 'hdr', which uses the same encoding for + * address packet and counter packet, so don't need to distinguish if + * it's short format or extended format and handle in once. + */ + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, ext_hdr, packet); + + if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, ext_hdr, packet); + + return ARM_SPE_BAD_PACKET; +} + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret; + + ret = arm_spe_do_get_packet(buf, len, packet); + /* put multiple consecutive PADs on the same line, up to + * the fixed-width output format of 16 bytes per line. + */ + if (ret > 0 && packet->type == ARM_SPE_PAD) { + while (ret < 16 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} diff --git a/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.h b/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.h new file mode 100644 index 000000000000..1a67b580b47f --- /dev/null +++ b/drivers/arm/spe/spe-decoder/arm-spe-pkt-decoder.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ +#define INCLUDE__ARM_SPE_PKT_DECODER_H__ + +#include <linux/stddef.h> + +#define ARM_SPE_PKT_DESC_MAX 256 +#define ARM_SPE_NEED_MORE_BYTES -1 +#define ARM_SPE_BAD_PACKET -2 +#define ARM_SPE_PKT_MAX_SZ 16 + +enum arm_spe_pkt_type { + ARM_SPE_BAD, + ARM_SPE_PAD, + ARM_SPE_END, + ARM_SPE_TIMESTAMP, + ARM_SPE_ADDRESS, + ARM_SPE_COUNTER, + ARM_SPE_CONTEXT, + ARM_SPE_OP_TYPE, + ARM_SPE_EVENTS, + ARM_SPE_DATA_SOURCE, +}; + +struct arm_spe_pkt { + enum arm_spe_pkt_type type; + unsigned char index; + uint64_t payload; +}; + +/* Short header (HEADER0) and extended header (HEADER1) */ +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_TIMESTAMP 0x71 +/* Mask for event & data source */ +#define SPE_HEADER0_MASK1 (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0)) +#define SPE_HEADER0_EVENTS 0x42 +#define SPE_HEADER0_SOURCE 0x43 +/* Mask for context & operation */ +#define SPE_HEADER0_MASK2 GENMASK_ULL(7, 2) +#define SPE_HEADER0_CONTEXT 0x64 +#define SPE_HEADER0_OP_TYPE 0x48 +/* Mask for extended format */ +#define SPE_HEADER0_EXTENDED 0x20 +/* Mask for address & counter */ +#define SPE_HEADER0_MASK3 GENMASK_ULL(7, 3) +#define SPE_HEADER0_ADDRESS 0xb0 +#define SPE_HEADER0_COUNTER 0x98 +#define SPE_HEADER1_ALIGNMENT 0x0 + +#define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0)) +#define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \ + SPE_HDR_SHORT_INDEX(h1)) + +/* Address packet header */ +#define SPE_ADDR_PKT_HDR_INDEX_INS 0x0 +#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 +#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 +#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4 + +/* Address packet payload */ +#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 +#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v) ((v) & GENMASK_ULL(55, 0)) +#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v) (((v) & GENMASK_ULL(55, 48)) >> 48) + +#define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) +#define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) +#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62) +#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56) + +#define SPE_ADDR_PKT_EL0 0 +#define SPE_ADDR_PKT_EL1 1 +#define SPE_ADDR_PKT_EL2 2 +#define SPE_ADDR_PKT_EL3 3 + +/* Context packet header */ +#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) + +/* Counter packet header */ +#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0 +#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 +#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 + +/* Event packet payload */ +enum arm_spe_events { + EV_EXCEPTION_GEN = 0, + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, + EV_TLB_WALK = 5, + EV_NOT_TAKEN = 6, + EV_MISPRED = 7, + EV_LLC_ACCESS = 8, + EV_LLC_MISS = 9, + EV_REMOTE_ACCESS = 10, + EV_ALIGNMENT = 11, + EV_PARTIAL_PREDICATE = 17, + EV_EMPTY_PREDICATE = 18, +}; + +/* Operation packet header */ +#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0)) +#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0 +#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 +#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 + +#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) + +#define SPE_OP_PKT_COND BIT(0) + +#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 + +#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) + +#define SPE_OP_PKT_AR BIT(4) +#define SPE_OP_PKT_EXCL BIT(3) +#define SPE_OP_PKT_AT BIT(2) +#define SPE_OP_PKT_ST BIT(0) + +#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) + +#define SPE_OP_PKT_SVE_SG BIT(7) +/* + * SVE effective vector length (EVL) is stored in byte 0 bits [6:4]; + * the length is rounded up to a power of two and use 32 as one step, + * so EVL calculation is: + * + * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4]) + */ +#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4)) +#define SPE_OP_PKT_SVE_PRED BIT(2) +#define SPE_OP_PKT_SVE_FP BIT(1) + +#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type); + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet); + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len); +#endif diff --git a/drivers/arm/spe/spe.c b/drivers/arm/spe/spe.c new file mode 100644 index 000000000000..726f071002dd --- /dev/null +++ b/drivers/arm/spe/spe.c @@ -0,0 +1,790 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * spe.c: Arm Statistical Profiling Extensions support + * Copyright (c) 2019-2020, Arm Ltd. + * Copyright (c) 2024-2025, Huawei Technologies Ltd. + */ + +#define PMUNAME "arm_spe" +#define DRVNAME PMUNAME "_driver" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> + +#include "spe-decoder/arm-spe-decoder.h" +#include "spe-decoder/arm-spe-pkt-decoder.h" +#include "spe.h" + +static long __percpu irq_dev_id; + +static struct arm_spe *spe; + +#define SPE_INIT_FAIL 0 +#define SPE_INIT_SUCC 1 +static int spe_probe_status = SPE_INIT_FAIL; + +/* Keep track of our dynamic hotplug state */ +static enum cpuhp_state arm_spe_online; + +DEFINE_PER_CPU(struct arm_spe_buf, per_cpu_spe_buf); + +static inline int arm_spe_per_buffer_alloc(int cpu) +{ + struct arm_spe_buf *spe_buf = &per_cpu(per_cpu_spe_buf, cpu); + void *alloc_base; + + if (spe_buf->base && spe_buf->record_base) + return 0; + + /* alloc spe raw data buffer */ + alloc_base = kzalloc_node(SPE_BUFFER_MAX_SIZE, GFP_KERNEL, cpu_to_node(cpu)); + if (unlikely(!alloc_base)) { + pr_err("alloc spe raw data buffer failed.\n"); + return -ENOMEM; + } + + spe_buf->base = alloc_base; + spe_buf->size = SPE_BUFFER_SIZE; + spe_buf->cur = alloc_base + SPE_BUFFER_MAX_SIZE - SPE_BUFFER_SIZE; + spe_buf->period = SPE_SAMPLE_PERIOD; + + /* alloc record buffer */ + spe_buf->record_size = SPE_RECORD_ENTRY_SIZE * SPE_RECORD_BUFFER_MAX_RECORDS; + spe_buf->record_base = kzalloc_node(spe_buf->record_size, GFP_KERNEL, cpu_to_node(cpu)); + if (unlikely(!spe_buf->record_base)) { + pr_err("alloc spe record buffer failed.\n"); + return -ENOMEM; + } + + return 0; +} + +static int arm_spe_buffer_alloc(void) +{ + int cpu, ret = 0; + cpumask_t *mask = &spe->supported_cpus; + + for_each_possible_cpu(cpu) { + if (!cpumask_test_cpu(cpu, mask)) + continue; + ret = arm_spe_per_buffer_alloc(cpu); + if (ret) + return ret; + } + return ret; +} + +static inline void arm_spe_per_buffer_free(int cpu) +{ + struct arm_spe_buf *spe_buf = &per_cpu(per_cpu_spe_buf, cpu); + + if (!spe_buf->base) + return; + + kfree(spe_buf->base); + spe_buf->cur = NULL; + spe_buf->base = NULL; + spe_buf->size = 0; + + kfree(spe_buf->record_base); + spe_buf->record_base = NULL; + spe_buf->record_size = 0; +} + +static inline void arm_spe_buffer_free(void) +{ + cpumask_t *mask = &spe->supported_cpus; + int cpu; + + for_each_possible_cpu(cpu) { + if (!cpumask_test_cpu(cpu, mask)) + continue; + arm_spe_per_buffer_free(cpu); + } +} + +static void arm_spe_buffer_init(void) +{ + u64 base, limit; + struct arm_spe_buf *spe_buf = this_cpu_ptr(&per_cpu_spe_buf); + + if (!spe_buf || !spe_buf->cur || !spe_buf->size) { + /* + * We still need to clear the limit pointer, since the + * profiler might only be disabled by virtue of a fault. + */ + limit = 0; + goto out_write_limit; + } + + base = (u64)spe_buf->cur; + limit = ((u64)spe_buf->cur + spe_buf->size) | PMBLIMITR_EL1_E; + write_sysreg_s(base, SYS_PMBPTR_EL1); + +out_write_limit: + write_sysreg_s(limit, SYS_PMBLIMITR_EL1); + +} + +static void arm_spe_disable_and_drain_local(void) +{ + /* Disable profiling at EL0 and EL1 */ + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Drain any buffered data */ + psb_csync(); + dsb(nsh); + + /* Disable the profiling buffer */ + write_sysreg_s(0, SYS_PMBLIMITR_EL1); + isb(); +} + +/* IRQ handling */ +static enum arm_spe_buf_fault_action arm_spe_buf_get_fault_act(void) +{ + const char *err_str; + u64 pmbsr; + enum arm_spe_buf_fault_action ret; + + /* + * Ensure new profiling data is visible to the CPU and any external + * aborts have been resolved. + */ + psb_csync(); + dsb(nsh); + + /* Ensure hardware updates to PMBPTR_EL1 are visible */ + isb(); + + /* Service required? */ + pmbsr = read_sysreg_s(SYS_PMBSR_EL1); + if (!FIELD_GET(PMBSR_EL1_S, pmbsr)) + return SPE_PMU_BUF_FAULT_ACT_SPURIOUS; + + /* We only expect buffer management events */ + switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) { + case PMBSR_EL1_EC_BUF: + /* Handled below */ + break; + case PMBSR_EL1_EC_FAULT_S1: + case PMBSR_EL1_EC_FAULT_S2: + err_str = "Unexpected buffer fault"; + goto out_err; + default: + err_str = "Unknown error code"; + goto out_err; + } + + /* Buffer management event */ + switch (FIELD_GET(PMBSR_EL1_BUF_BSC_MASK, pmbsr)) { + case PMBSR_EL1_BUF_BSC_FULL: + ret = SPE_PMU_BUF_FAULT_ACT_OK; + goto out_stop; + default: + err_str = "Unknown buffer status code"; + } + +out_err: + pr_err_ratelimited( + "%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n", + err_str, smp_processor_id(), pmbsr, + read_sysreg_s(SYS_PMBPTR_EL1), + read_sysreg_s(SYS_PMBLIMITR_EL1)); + ret = SPE_PMU_BUF_FAULT_ACT_FATAL; + +out_stop: + return ret; +} + +void arm_spe_stop(void) +{ + arm_spe_disable_and_drain_local(); +} + +static u64 arm_spe_to_pmsfcr(void) +{ + u64 reg = 0; + + if (spe->load_filter) + reg |= PMSFCR_EL1_LD; + + if (spe->store_filter) + reg |= PMSFCR_EL1_ST; + + if (spe->branch_filter) + reg |= PMSFCR_EL1_B; + + if (reg) + reg |= PMSFCR_EL1_FT; + + if (spe->event_filter) + reg |= PMSFCR_EL1_FE; + + if (spe->inv_event_filter) + reg |= PMSFCR_EL1_FnE; + + if (spe->min_latency) + reg |= PMSFCR_EL1_FL; + + return reg; +} + +static u64 arm_spe_to_pmsevfr(void) +{ + return spe->event_filter; +} + +static u64 arm_spe_to_pmsnevfr(void) +{ + return spe->inv_event_filter; +} + +static u64 arm_spe_to_pmslatfr(void) +{ + return spe->min_latency; +} + +static void arm_spe_sanitise_period(struct arm_spe_buf *spe_buf) +{ + u64 period = spe_buf->period; + u64 max_period = PMSIRR_EL1_INTERVAL_MASK; + + if (period < spe->min_period) + period = spe->min_period; + else if (period > max_period) + period = max_period; + else + period &= max_period; + + spe_buf->period = period; +} + +static u64 arm_spe_to_pmsirr(void) +{ + u64 reg = 0; + struct arm_spe_buf *spe_buf = this_cpu_ptr(&per_cpu_spe_buf); + + arm_spe_sanitise_period(spe_buf); + + if (spe->jitter) + reg |= 0x1; + + reg |= spe_buf->period << 8; + + return reg; +} + +static u64 arm_spe_to_pmscr(void) +{ + u64 reg = 0; + + if (spe->ts_enable) + reg |= PMSCR_EL1_TS; + + if (spe->pa_enable) + reg |= PMSCR_EL1_PA; + + if (spe->pct_enable < 0x4) + reg |= spe->pct_enable << 6; + + if (spe->exclude_user) + reg |= PMSCR_EL1_E0SPE; + + if (spe->exclude_kernel) + reg |= PMSCR_EL1_E1SPE; + + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) + reg |= PMSCR_EL1_CX; + + return reg; +} + +int arm_spe_start(void) +{ + u64 reg; + int cpu = smp_processor_id(); + + if (!cpumask_test_cpu(cpu, &spe->supported_cpus)) + return -ENOENT; + + arm_spe_buffer_init(); + + reg = arm_spe_to_pmsfcr(); + write_sysreg_s(reg, SYS_PMSFCR_EL1); + + reg = arm_spe_to_pmsevfr(); + write_sysreg_s(reg, SYS_PMSEVFR_EL1); + + if (spe->features & SPE_PMU_FEAT_INV_FILT_EVT) { + reg = arm_spe_to_pmsnevfr(); + write_sysreg_s(reg, SYS_PMSNEVFR_EL1); + } + + reg = arm_spe_to_pmslatfr(); + + write_sysreg_s(reg, SYS_PMSLATFR_EL1); + + reg = arm_spe_to_pmsirr(); + write_sysreg_s(reg, SYS_PMSIRR_EL1); + isb(); + + reg = arm_spe_to_pmscr(); + isb(); + write_sysreg_s(reg, SYS_PMSCR_EL1); + return 0; +} + +void arm_spe_continue(void) +{ + int reg; + + arm_spe_buffer_init(); + reg = arm_spe_to_pmscr(); + + isb(); + write_sysreg_s(reg, SYS_PMSCR_EL1); +} + +int arm_spe_enabled(void) +{ + return spe_probe_status == SPE_INIT_SUCC; +} + +static irqreturn_t arm_spe_irq_handler(int irq, void *dev) +{ + enum arm_spe_buf_fault_action act; + struct arm_spe_buf *spe_buf = this_cpu_ptr(&per_cpu_spe_buf); + + act = arm_spe_buf_get_fault_act(); + + switch (act) { + case SPE_PMU_BUF_FAULT_ACT_FATAL: + /* + * If a fatal exception occurred then leaving the profiling + * buffer enabled is a recipe waiting to happen. Since + * fatal faults don't always imply truncation, make sure + * that the profiling buffer is disabled explicitly before + * clearing the syndrome register. + */ + arm_spe_disable_and_drain_local(); + break; + case SPE_PMU_BUF_FAULT_ACT_OK: + spe_buf->nr_records = 0; + arm_spe_decode_buf(spe_buf->cur, spe_buf->size); + break; + + case SPE_PMU_BUF_FAULT_ACT_SPURIOUS: + /* We've seen you before, but GCC has the memory of a sieve. */ + arm_spe_stop(); + break; + } + + /* The buffer pointers are now sane, so resume profiling. */ + write_sysreg_s(0, SYS_PMBSR_EL1); + return IRQ_HANDLED; +} + + +static void __arm_spe_dev_probe(void *data) +{ + int fld; + u64 reg; + + fld = cpuid_feature_extract_unsigned_field( + read_cpuid(ID_AA64DFR0_EL1), ID_AA64DFR0_EL1_PMSVer_SHIFT); + if (!fld) { + pr_err("unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n", + fld, smp_processor_id()); + return; + } + spe->pmsver = (u16)fld; + + /* Read PMBIDR first to determine whether or not we have access */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (FIELD_GET(PMBIDR_EL1_P, reg)) { + pr_err("profiling buffer owned by higher exception level\n"); + return; + } + + /* Minimum alignment. If it's out-of-range, then fail the probe */ + fld = FIELD_GET(PMBIDR_EL1_ALIGN, reg); + spe->align = 1 << fld; + if (spe->align > SZ_2K) { + pr_err("unsupported PMBIDR.Align [%d] on CPU %d\n", fld, + smp_processor_id()); + return; + } + + /* It's now safe to read PMSIDR and figure out what we've got */ + reg = read_sysreg_s(SYS_PMSIDR_EL1); + if (FIELD_GET(PMSIDR_EL1_FE, reg)) + spe->features |= SPE_PMU_FEAT_FILT_EVT; + + if (FIELD_GET(PMSIDR_EL1_FnE, reg)) + spe->features |= SPE_PMU_FEAT_INV_FILT_EVT; + + if (FIELD_GET(PMSIDR_EL1_FT, reg)) + spe->features |= SPE_PMU_FEAT_FILT_TYP; + + if (FIELD_GET(PMSIDR_EL1_FL, reg)) + spe->features |= SPE_PMU_FEAT_FILT_LAT; + + if (FIELD_GET(PMSIDR_EL1_ARCHINST, reg)) + spe->features |= SPE_PMU_FEAT_ARCH_INST; + + if (FIELD_GET(PMSIDR_EL1_LDS, reg)) + spe->features |= SPE_PMU_FEAT_LDS; + + if (FIELD_GET(PMSIDR_EL1_ERND, reg)) + spe->features |= SPE_PMU_FEAT_ERND; + + /* This field has a spaced out encoding, so just use a look-up */ + fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); + switch (fld) { + case PMSIDR_EL1_INTERVAL_256: + spe->min_period = 256; + break; + case PMSIDR_EL1_INTERVAL_512: + spe->min_period = 512; + break; + case PMSIDR_EL1_INTERVAL_768: + spe->min_period = 768; + break; + case PMSIDR_EL1_INTERVAL_1024: + spe->min_period = 1024; + break; + case PMSIDR_EL1_INTERVAL_1536: + spe->min_period = 1536; + break; + case PMSIDR_EL1_INTERVAL_2048: + spe->min_period = 2048; + break; + case PMSIDR_EL1_INTERVAL_3072: + spe->min_period = 3072; + break; + case PMSIDR_EL1_INTERVAL_4096: + spe->min_period = 4096; + break; + default: + pr_warn("unknown PMSIDR_EL1.Interval [%d]; assuming 8\n", fld); + fallthrough; + } + + /* Maximum record size. If it's out-of-range, then fail the probe */ + fld = FIELD_GET(PMSIDR_EL1_MAXSIZE, reg); + spe->max_record_sz = 1 << fld; + if (spe->max_record_sz > SZ_2K || spe->max_record_sz < 16) { + pr_err("unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n", fld, + smp_processor_id()); + return; + } + + fld = FIELD_GET(PMSIDR_EL1_COUNTSIZE, reg); + switch (fld) { + case PMSIDR_EL1_COUNTSIZE_12_BIT_SAT: + spe->counter_sz = 12; + break; + case PMSIDR_EL1_COUNTSIZE_16_BIT_SAT: + spe->counter_sz = 16; + break; + default: + pr_warn("unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n", fld); + fallthrough; + } + + pr_info("probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, min_period %u, align %u, features 0x%llx]\n", + spe->pmsver - 1, cpumask_pr_args(&spe->supported_cpus), + spe->max_record_sz, spe->min_period, spe->align, spe->features); + + spe->features |= SPE_PMU_FEAT_DEV_PROBED; +} + +static void __arm_spe_reset_local(void) +{ + /* + * This is probably overkill, as we have no idea where we're + * draining any buffered data to... + */ + arm_spe_disable_and_drain_local(); + + /* Reset the buffer base pointer */ + write_sysreg_s(0, SYS_PMBPTR_EL1); + isb(); + + /* Clear any pending management interrupts */ + write_sysreg_s(0, SYS_PMBSR_EL1); + isb(); +} + +static void __arm_spe_setup_one(void) +{ + __arm_spe_reset_local(); + enable_percpu_irq(spe->irq, IRQ_TYPE_NONE); +} + +static void __arm_spe_stop_one(void) +{ + disable_percpu_irq(spe->irq); + __arm_spe_reset_local(); +} + +static int arm_spe_cpu_startup(unsigned int cpu, struct hlist_node *node) +{ + struct arm_spe *spe; + + spe = hlist_entry_safe(node, struct arm_spe, hotplug_node); + if (!cpumask_test_cpu(cpu, &spe->supported_cpus)) + return 0; + + /* Alloc per cpu spe buffer */ + arm_spe_per_buffer_alloc(cpu); + + /* Reset pmu and enable irq */ + __arm_spe_setup_one(); + + return 0; +} + +static int arm_spe_cpu_teardown(unsigned int cpu, struct hlist_node *node) +{ + struct arm_spe *spe; + + spe = hlist_entry_safe(node, struct arm_spe, hotplug_node); + if (!cpumask_test_cpu(cpu, &spe->supported_cpus)) + return 0; + + /* Disable irq and reset pmu */ + __arm_spe_stop_one(); + + /* Release per cpu spe buffer */ + arm_spe_per_buffer_free(cpu); + + return 0; +} + +static int arm_spe_dev_init(void) +{ + int ret; + cpumask_t *mask = &spe->supported_cpus; + + + /* Make sure we probe the hardware on a relevant CPU */ + ret = smp_call_function_any(mask, __arm_spe_dev_probe, NULL, 1); + if (ret || !(spe->features & SPE_PMU_FEAT_DEV_PROBED)) + return -ENXIO; + + /* Request our PPIs (note that the IRQ is still disabled) */ + ret = request_percpu_irq(spe->irq, arm_spe_irq_handler, DRVNAME, + &irq_dev_id); + if (ret) + return ret; + + /* + * Register our hotplug notifier now so we don't miss any events. + * This will enable the IRQ for any supported CPUs that are already + * up. + */ + ret = cpuhp_state_add_instance(arm_spe_online, + &spe->hotplug_node); + if (ret) + free_percpu_irq(spe->irq, &irq_dev_id); + + return ret; +} + +static void arm_spe_dev_teardown(void) +{ + arm_spe_buffer_free(); + cpuhp_state_remove_instance(arm_spe_online, &spe->hotplug_node); + free_percpu_irq(spe->irq, &irq_dev_id); +} + +static const struct of_device_id arm_spe_of_match[] = { + { .compatible = "arm,statistical-profiling-extension-v1", + .data = (void *)1 }, + { /* Sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, arm_spe_of_match); + +static const struct platform_device_id arm_spe_match[] = { + { ARMV8_SPE_PDEV_NAME, 0 }, + {} +}; +MODULE_DEVICE_TABLE(platform, arm_spe_match); + +/* Driver and device probing */ +static int arm_spe_irq_probe(void) +{ + struct platform_device *pdev = spe->pdev; + int irq = platform_get_irq(pdev, 0); + + if (irq < 0) + return -ENXIO; + + if (!irq_is_percpu(irq)) { + dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq); + return -EINVAL; + } + + if (irq_get_percpu_devid_partition(irq, &spe->supported_cpus)) { + dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); + return -EINVAL; + } + + spe->irq = irq; + return 0; +} + +static void arm_spe_sample_para_init(void) +{ + spe->sample_period = SPE_SAMPLE_PERIOD; + spe->jitter = 1; + spe->load_filter = 1; + spe->store_filter = 1; + spe->branch_filter = 0; + spe->inv_event_filter = 0; + spe->event_filter = 0x2; + + spe->ts_enable = 1; + spe->pa_enable = 1; + spe->pct_enable = 0; + + spe->exclude_user = 1; + spe->exclude_kernel = 0; + + spe->min_latency = 120; +} + +void arm_spe_record_enqueue(struct arm_spe_record *record) +{ + struct arm_spe_buf *spe_buf = this_cpu_ptr(&per_cpu_spe_buf); + struct arm_spe_record *record_tail; + + if (spe_buf->nr_records >= SPE_RECORD_BUFFER_MAX_RECORDS) { + pr_err("nr_records exceeded!\n"); + return; + } + + record_tail = spe_buf->record_base + + spe_buf->nr_records * SPE_RECORD_ENTRY_SIZE; + *record_tail = *(struct arm_spe_record *)record; + spe_buf->nr_records++; + +} + +static int arm_spe_device_probe(struct platform_device *pdev) +{ + + int ret; + struct device *dev; + /* + * If kernelspace is unmapped when running at EL0, then the SPE + * buffer will fault and prematurely terminate the AUX session. + */ + if (arm64_kernel_unmapped_at_el0()) { + dev_warn_once(dev, "buffer inaccessible. Try passing "kpti=off" on the kernel command line\n"); + return -EPERM; + } + + if (!pdev) { + pr_err("pdev is NULL!\n"); + return -ENODEV; + } + + dev = &pdev->dev; + if (!dev) { + pr_err("dev is NULL!\n"); + return -ENODEV; + } + + spe = devm_kzalloc(dev, sizeof(*spe), GFP_KERNEL); + if (!spe) + return -ENOMEM; + + spe->pdev = pdev; + platform_set_drvdata(pdev, spe); + + ret = arm_spe_irq_probe(); + if (ret) + goto out_free; + + ret = arm_spe_dev_init(); + if (ret) + goto out_free; + + /* + * Ensure that all CPUs that support SPE can apply for the cache + * area, with each CPU defaulting to 4K * 2. Failure to do so will + * result in the inability to collect SPE data in kernel mode. + */ + ret = arm_spe_buffer_alloc(); + if (ret) + goto out_teardown; + + arm_spe_sample_para_init(); + + spe_probe_status = SPE_INIT_SUCC; + + return 0; + +out_teardown: + arm_spe_dev_teardown(); +out_free: + kfree(spe); + return ret; +} + +static int arm_spe_device_remove(struct platform_device *pdev) +{ + arm_spe_dev_teardown(); + return 0; +} + +static struct platform_driver arm_spe_driver = { + .id_table = arm_spe_match, + .driver = { + .name = DRVNAME, + .of_match_table = of_match_ptr(arm_spe_of_match), + .suppress_bind_attrs = true, + }, + .probe = arm_spe_device_probe, + .remove = arm_spe_device_remove, +}; + +static int __init arm_spe_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME, + arm_spe_cpu_startup, + arm_spe_cpu_teardown); + if (ret < 0) + return ret; + arm_spe_online = ret; + + ret = platform_driver_register(&arm_spe_driver); + + if (ret) + cpuhp_remove_multi_state(arm_spe_online); + + return ret; +} + +static void __exit arm_spe_exit(void) +{ + /* + * TODO: Find a clean way to disable SPE so that SPE + * can be used for perf. + */ + platform_driver_unregister(&arm_spe_driver); + cpuhp_remove_multi_state(arm_spe_online); + arm_spe_buffer_free(); +} + +module_init(arm_spe_init); +module_exit(arm_spe_exit); diff --git a/drivers/arm/spe/spe.h b/drivers/arm/spe/spe.h new file mode 100644 index 000000000000..4348d15581dd --- /dev/null +++ b/drivers/arm/spe/spe.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * spe.h: Arm Statistical Profiling Extensions support + * Copyright (c) 2019-2020, Arm Ltd. + * Copyright (c) 2024-2025, Huawei Technologies Ltd. + */ + +#ifndef __SPE_H +#define __SPE_H + +#define SPE_BUFFER_MAX_SIZE (PAGE_SIZE) +#define SPE_BUFFER_SIZE (PAGE_SIZE / 32) + +#define SPE_SAMPLE_PERIOD 1024 + +#define SPE_RECORD_BUFFER_MAX_RECORDS (100) +#define SPE_RECORD_ENTRY_SIZE sizeof(struct arm_spe_record) + +#define SPE_PMU_FEAT_FILT_EVT (1UL << 0) +#define SPE_PMU_FEAT_FILT_TYP (1UL << 1) +#define SPE_PMU_FEAT_FILT_LAT (1UL << 2) +#define SPE_PMU_FEAT_ARCH_INST (1UL << 3) +#define SPE_PMU_FEAT_LDS (1UL << 4) +#define SPE_PMU_FEAT_ERND (1UL << 5) +#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) +#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) +#define PMBLIMITR_EL1_E GENMASK(0, 0) +#define PMBSR_EL1_S GENMASK(17, 17) +#define PMBSR_EL1_EC GENMASK(31, 26) +#define PMBSR_EL1_EC_BUF UL(0b000000) +#define PMBSR_EL1_EC_FAULT_S1 UL(0b100100) +#define PMBSR_EL1_EC_FAULT_S2 UL(0b100101) +#define PMBSR_EL1_MSS_MASK GENMASK(15, 0) +#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK +#define PMBSR_EL1_BUF_BSC_FULL 0x1UL +#define PMSFCR_EL1_LD GENMASK(17, 17) +#define PMSFCR_EL1_ST GENMASK(18, 18) +#define PMSFCR_EL1_B GENMASK(16, 16) +#define PMSFCR_EL1_FnE GENMASK(3, 3) +#define PMSFCR_EL1_FT GENMASK(1, 1) +#define PMSFCR_EL1_FE GENMASK(0, 0) +#define PMSFCR_EL1_FL GENMASK(2, 2) +#define PMSIRR_EL1_INTERVAL_MASK GENMASK(31, 8) +#define PMSCR_EL1_TS GENMASK(5, 5) +#define PMSCR_EL1_PA GENMASK(4, 4) +#define PMSCR_EL1_CX GENMASK(3, 3) +#define PMSCR_EL1_E1SPE GENMASK(1, 1) +#define PMSCR_EL1_E0SPE GENMASK(0, 0) +#define ID_AA64DFR0_EL1_PMSVer_SHIFT 32 +#define PMBIDR_EL1_P GENMASK(4, 4) +#define PMBIDR_EL1_ALIGN GENMASK(3, 0) +#define PMSIDR_EL1_FE GENMASK(0, 0) +#define PMSIDR_EL1_FnE GENMASK(6, 6) +#define PMSIDR_EL1_FT GENMASK(1, 1) +#define PMSIDR_EL1_ARCHINST GENMASK(3, 3) +#define PMSIDR_EL1_LDS GENMASK(4, 4) +#define PMSIDR_EL1_ERND GENMASK(5, 5) +#define PMSIDR_EL1_INTERVAL GENMASK(11, 8) +#define PMSIDR_EL1_INTERVAL_256 UL(0b0000) +#define PMSIDR_EL1_INTERVAL_512 UL(0b0010) +#define PMSIDR_EL1_INTERVAL_768 UL(0b0011) +#define PMSIDR_EL1_INTERVAL_1024 UL(0b0100) +#define PMSIDR_EL1_INTERVAL_1536 UL(0b0101) +#define PMSIDR_EL1_INTERVAL_2048 UL(0b0110) +#define PMSIDR_EL1_INTERVAL_3072 UL(0b0111) +#define PMSIDR_EL1_INTERVAL_4096 UL(0b1000) +#define PMSIDR_EL1_MAXSIZE GENMASK(15, 12) +#define PMSIDR_EL1_COUNTSIZE GENMASK(19, 16) +#define PMSIDR_EL1_COUNTSIZE_12_BIT_SAT UL(0b0010) +#define PMSIDR_EL1_COUNTSIZE_16_BIT_SAT UL(0b0011) +#define PMSIDR_EL1_FL GENMASK(2, 2) +#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) +#define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) + +enum arm_spe_buf_fault_action { + SPE_PMU_BUF_FAULT_ACT_SPURIOUS, + SPE_PMU_BUF_FAULT_ACT_FATAL, + SPE_PMU_BUF_FAULT_ACT_OK, +}; + +struct arm_spe { + struct pmu pmu; + struct platform_device *pdev; + cpumask_t supported_cpus; + struct hlist_node hotplug_node; + int irq; /* PPI */ + u16 pmsver; + u16 min_period; + u16 counter_sz; + u64 features; + u16 max_record_sz; + u16 align; + u64 sample_period; + local64_t period_left; + bool jitter; + bool load_filter; + bool store_filter; + bool branch_filter; + u64 inv_event_filter; + u16 min_latency; + u64 event_filter; + bool ts_enable; + bool pa_enable; + u8 pct_enable; + bool exclude_user; + bool exclude_kernel; +}; + +struct arm_spe_buf { + void *cur; /* for spe raw data buffer */ + int size; + int period; + void *base; + + void *record_base; /* for spe record buffer */ + int record_size; + int nr_records; +}; + +#endif /* __SPE_H */ diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 0d284fda87ac..b6b7998a5f9f 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -124,7 +124,7 @@ arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len, return ret; }
-#if IS_ENABLED(CONFIG_ARM_SPE_PMU) +#if IS_ENABLED(CONFIG_ARM_SPE_PMU) || IS_ENABLED(CONFIG_ARM_SPE) static struct resource spe_resources[] = { { /* irq */