
hulk inclusion category:feature bugzilla:https://gitee.com/openeuler/kernel/issues/IC8KS8 CVE: NA -------------------------------- Currently, the boost SPE feature detects devices based on implementor and part_num, but the collected data is not yet utilized by other kernel modules. This patch enhances the boost SPE functionality. This feature enable richer memory access profiling and improve the potential for cross-module usage of SPE-collected data. Signed-off-by: Ze Zuo <zuoze1@huawei.com> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- drivers/arm/mm_monitor/mm_spe.c | 71 +++++++++++++++++++ drivers/arm/mm_monitor/mm_spe.h | 35 ++++++++- .../mm_monitor/spe-decoder/arm-spe-decoder.c | 12 ++++ .../mm_monitor/spe-decoder/arm-spe-decoder.h | 3 + .../spe-decoder/arm-spe-pkt-decoder.h | 5 ++ 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/arm/mm_monitor/mm_spe.c b/drivers/arm/mm_monitor/mm_spe.c index f5c3668bb656..daf92539690f 100644 --- a/drivers/arm/mm_monitor/mm_spe.c +++ b/drivers/arm/mm_monitor/mm_spe.c @@ -159,6 +159,12 @@ static void mm_spe_disable_and_drain_local(void) /* Disable the profiling buffer */ write_sysreg_s(0, SYS_PMBLIMITR_EL1); isb(); + + /* Disable boost_spe profiling */ + if (spe->support_boost_spe) { + write_sysreg_s(0, SYS_OMHTPG_EL1); + isb(); + } } static u64 mm_spe_to_pmsfcr(void) @@ -189,6 +195,39 @@ static u64 mm_spe_to_pmsfcr(void) return reg; } +static u64 arm_spe_to_htpg(void) +{ + u64 reg = 0; + struct boost_spe_contol *boost_spe = &spe->boost_spe; + + if (boost_spe->rmt_acc_en) + reg |= SYS_OMHTPG_EL1_RMEN; + + if (boost_spe->boost_spe_en_cfg < 0x4) + reg |= boost_spe->boost_spe_en_cfg; + + if (boost_spe->record_sel) + reg |= SYS_OMHTPG_EL1_REC_SEL; + + if (boost_spe->pop_uop_sel) + reg |= SYS_OMHTPG_EL1_POP_UOP_SEL; + + if (boost_spe->sft_cfg < 0x4) + reg |= boost_spe->sft_cfg << SYS_OMHTPG_EL1_SFT_CFG_SHIFT; + + if (boost_spe->boost_spe_pa_flt_en || boost_spe->rmt_acc_pa_flt_en) { + reg |= 1 < SYS_OMHTPG_EL1_PAEN_SHIFT; + reg |= 1 < SYS_OMHTPG_EL1_RMPAFLEN_SHIFT; + + if (boost_spe->pa_flt_pt < 0x8000000 && boost_spe->pa_flt_mask < 0x8000000) { + reg |= boost_spe->pa_flt_pt << SYS_OMHTPG_EL1_PAFL_SHIFT; + reg |= boost_spe->pa_flt_mask << SYS_OMHTPG_EL1_PAFLMK_SHIFT; + } + } + + return reg; +} + static u64 mm_spe_to_pmsevfr(void) { return spe->event_filter; @@ -291,6 +330,13 @@ int mm_spe_start(void) reg = mm_spe_to_pmscr(); isb(); write_sysreg_s(reg, SYS_PMSCR_EL1); + + if (spe->support_boost_spe) { + reg = arm_spe_to_htpg(); + isb(); + write_sysreg_s(reg, SYS_OMHTPG_EL1); + } + return 0; } @@ -357,8 +403,30 @@ static const struct platform_device_id mm_spe_match[] = { }; MODULE_DEVICE_TABLE(platform, mm_spe_match); +static void arm_spe_boost_spe_para_init(void) +{ + struct boost_spe_contol *boost_spe = &spe->boost_spe; + + boost_spe->record_sel = 1; + boost_spe->pop_uop_sel = 0; + boost_spe->rmt_acc_pa_flt_en = 0; + boost_spe->rmt_acc_en = 1; + boost_spe->boost_spe_pa_flt_en = 0; + boost_spe->pa_flt_pt = 0; + boost_spe->pa_flt_mask = 0; + boost_spe->sft_cfg = 0; + boost_spe->boost_spe_en_cfg = 0x3; +} + static void mm_spe_sample_para_init(void) { + u64 implementor = read_cpuid_implementor(); + u64 part_num = read_cpuid_part_number(); + + /* Is support boost_spe sampling? */ + if (implementor == ARM_CPU_IMP_HISI && part_num == 0xd06) + spe->support_boost_spe = true; + spe->sample_period = SPE_SAMPLE_PERIOD; spe->jitter = 1; spe->load_filter = 1; @@ -375,6 +443,9 @@ static void mm_spe_sample_para_init(void) spe->exclude_kernel = 0; spe->min_latency = 120; + + if (spe->support_boost_spe) + arm_spe_boost_spe_para_init(); } void mm_spe_record_enqueue(struct arm_spe_record *record) diff --git a/drivers/arm/mm_monitor/mm_spe.h b/drivers/arm/mm_monitor/mm_spe.h index bd0a1574a1b0..5ffc11cb951a 100644 --- a/drivers/arm/mm_monitor/mm_spe.h +++ b/drivers/arm/mm_monitor/mm_spe.h @@ -4,7 +4,7 @@ #define __SPE_H #define SPE_BUFFER_MAX_SIZE (PAGE_SIZE) -#define SPE_BUFFER_SIZE (PAGE_SIZE / 32) +#define SPE_BUFFER_SIZE (PAGE_SIZE / 16) #define SPE_SAMPLE_PERIOD 1024 @@ -12,11 +12,43 @@ #define SPE_RECORD_ENTRY_SIZE sizeof(struct mem_sampling_record) #define ARMV8_SPE_MEM_SAMPLING_PDEV_NAME "arm,mm_spe,spe-v1" +/* boost_spe sampling controls */ +#define SYS_OMHTPG_EL1 sys_reg(3, 0, 15, 8, 2) +#define SYS_OMHTPG_EL1_RMCF_SHIFT 0 +#define SYS_OMHTPG_EL1_RMCF_MASK 0x3UL +#define SYS_OMHTPG_EL1_RMEN GENMASK(2, 2) +#define SYS_OMHTPG_EL1_RMEN_SHIFT 2 +#define SYS_OMHTPG_EL1_PAFL GENMASK(3, 3) +#define SYS_OMHTPG_EL1_PAFL_SHIFT 3 +#define SYS_OMHTPG_EL1_PAFL_MASK 0x7FFFFFFUL +#define SYS_OMHTPG_EL1_PAFLMK_SHIFT 30 +#define SYS_OMHTPG_EL1_PAFLMK_MASK 0x7FFFFFFUL +#define SYS_OMHTPG_EL1_PAEN_SHIFT 57 + +#define SYS_OMHTPG_EL1_RMPAFLEN_SHIFT 58 +#define SYS_OMHTPG_EL1_POP_UOP_SEL GENMASK(59, 59) +#define SYS_OMHTPG_EL1_SFT_CFG_SHIFT 60 +#define SYS_OMHTPG_EL1_SFT_CFG_MASK 0x3UL +#define SYS_OMHTPG_EL1_REC_SEL GENMASK(62, 62) + +struct boost_spe_contol { + u32 boost_spe_en_cfg; + u32 pa_flt_pt; + u32 pa_flt_mask; + u64 sft_cfg; + bool boost_spe_pa_flt_en; + bool rmt_acc_en; + bool rmt_acc_pa_flt_en; + bool pop_uop_sel; + bool record_sel; +}; + struct mm_spe { struct pmu pmu; struct platform_device *pdev; cpumask_t supported_cpus; struct hlist_node hotplug_node; + struct boost_spe_contol boost_spe; int irq; /* PPI */ u16 pmsver; u16 min_period; @@ -38,6 +70,7 @@ struct mm_spe { u8 pct_enable; bool exclude_user; bool exclude_kernel; + bool support_boost_spe; }; struct mm_spe_buf { diff --git a/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.c b/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.c index d84d01f8bb07..1394d377c061 100644 --- a/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.c +++ b/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.c @@ -61,6 +61,13 @@ static u64 arm_spe_calc_ip(int index, u64 payload) } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { /* Clean highest byte */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); + /* Boost_spe hot data access physical address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_BOOST_SPE_DATA_PHYS) { + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_BOOST_SPE(payload); + /* Remote Data access physical address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_REMOTE_DATA_PHYS) { + /* Clean highest byte */ + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); } else { seen_idx = 0; if (!(seen_idx & BIT(index))) { @@ -132,6 +139,11 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.virt_addr = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) decoder->record.phys_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_BOOST_SPE_DATA_PHYS) + decoder->record.boost_spe_addr[decoder->record.boost_spe_idx++] + = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_REMOTE_DATA_PHYS) + decoder->record.remote_addr = ip; break; case ARM_SPE_COUNTER: if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) diff --git a/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.h b/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.h index 3af4a15107f0..3ccc32de8afc 100644 --- a/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.h +++ b/drivers/arm/mm_monitor/spe-decoder/arm-spe-decoder.h @@ -49,6 +49,9 @@ struct arm_spe_record { u64 virt_addr; u64 phys_addr; u64 context_id; + u64 boost_spe_addr[8]; + u64 remote_addr; + u16 boost_spe_idx; u16 source; }; diff --git a/drivers/arm/mm_monitor/spe-decoder/arm-spe-pkt-decoder.h b/drivers/arm/mm_monitor/spe-decoder/arm-spe-pkt-decoder.h index 1a67b580b47f..873c3590e4a8 100644 --- a/drivers/arm/mm_monitor/spe-decoder/arm-spe-pkt-decoder.h +++ b/drivers/arm/mm_monitor/spe-decoder/arm-spe-pkt-decoder.h @@ -63,6 +63,8 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 #define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 #define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4 +#define SPE_ADDR_PKT_HDR_INDEX_BOOST_SPE_DATA_PHYS 0x6 +#define SPE_ADDR_PKT_HDR_INDEX_REMOTE_DATA_PHYS 0x7 /* Address packet payload */ #define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 @@ -79,6 +81,9 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_EL2 2 #define SPE_ADDR_PKT_EL3 3 +/* Boost_spe address packet payload */ +#define SPE_ADDR_PKT_ADDR_GET_BYTES_BOOST_SPE(v) ((v) & GENMASK_ULL(52, 12)) + /* Context packet header */ #define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) -- 2.25.1