
From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICFKG8 ---------------------------------------------------------------------- HIP12 provides hardware metric sampling with adjacent counters counter_2n and counter_2n+1. Overflow of counter_2n+1 will result in an interrupt while overflow of counter_2n will load initial value, which are stored in dedicated registers reload_counter_2n and reload_counter_2n+1, to both counters. With the ability above, software could only perform sampling during handling interrupt of counter_2n and configure different values of reload_counter_2n and reload_counter_2n+1, which realizes hardware metric sampling. For example, perf record -e '\ {armv8_pmuv3_0/cpu_cycles,period=1000000,hw_metric=1/, \ armv8_pmuv3_0/inst_retired,period=800000,hw_metric=1/}:u' \ -- <workload> Above command will only perform sampling when IPC < 800000 / 1000000, since the interrupt will only appear when cpu_cycles reaches 1000000 and inst_retired is less than 800000. Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Yushan Wang <wangyushan@hisilicon.com> Signed-off-by: Qizhi Zhang <zhangqizhi3@h-partners.com> --- drivers/perf/arm_pmu.c | 6 ++ drivers/perf/arm_pmuv3.c | 177 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 181 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 57cd2d1a9a18..5621bbc828af 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -422,6 +422,12 @@ validate_group(struct perf_event *event) */ memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); + /* + * Make percpu_pmu null so that PMU might get a chance to know if + * get_event_idx is called for validation. + */ + fake_pmu.percpu_pmu = NULL; + if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index baa700ab5e03..f3ab7f9cba86 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -319,6 +319,9 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = { #define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */ #define ATTR_CFG_FLD_threshold_LO 5 #define ATTR_CFG_FLD_threshold_HI 16 +#define ATTR_CFG_FLD_hw_metric_CFG config2 +#define ATTR_CFG_FLD_hw_metric_LO 0 +#define ATTR_CFG_FLD_hw_metric_HI 0 GEN_PMU_FORMAT_ATTR(event); GEN_PMU_FORMAT_ATTR(long); @@ -326,6 +329,7 @@ GEN_PMU_FORMAT_ATTR(rdpmc); GEN_PMU_FORMAT_ATTR(threshold_count); GEN_PMU_FORMAT_ATTR(threshold_compare); GEN_PMU_FORMAT_ATTR(threshold); +GEN_PMU_FORMAT_ATTR(hw_metric); static int sysctl_perf_user_access __read_mostly; @@ -358,6 +362,27 @@ static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr) return (th_compare << 1) | th_count; } +static inline bool armv8pmu_event_is_hw_metric(struct perf_event *event) +{ + return ATTR_CFG_GET_FLD(&event->attr, hw_metric); +} + +static bool armpmu_support_hisi_hw_metric(void) +{ + static const struct midr_range hip12_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + { } + }; + + /* + * Feature of hw metric requires access to EL1 registers to accomplish, + * which will cause kernel panic in virtual machine because of lack of + * authority. Thus, this feature is banned for virtual machines. + */ + return is_midr_in_range_list(read_cpuid_id(), hip12_cpus) && + is_kernel_in_hyp_mode(); +} + static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, &format_attr_long.attr, @@ -365,11 +390,23 @@ static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_threshold.attr, &format_attr_threshold_compare.attr, &format_attr_threshold_count.attr, + &format_attr_hw_metric.attr, NULL, }; +static umode_t +armv8pmu_format_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + if (attr == &format_attr_hw_metric.attr && !armpmu_support_hisi_hw_metric()) + return 0; + + return attr->mode; +} + static const struct attribute_group armv8_pmuv3_format_attr_group = { .name = "format", + .is_visible = armv8pmu_format_attr_is_visible, .attrs = armv8_pmuv3_format_attrs, }; @@ -603,6 +640,39 @@ static void armv8pmu_write_evcntr(int idx, u64 value) write_pmevcntrn(counter, value); } +static inline void armv8pmu_write_reload_counter(struct perf_event *event, + u64 value) +{ + /* Need to be event->hw.idx - 1 since counter 0 is PMCCNTR_EL0 */ + int idx = event->hw.idx - 1; + +#define HW_METRIC_RELOAD_CNTR(n) sys_reg(3, 3, 15, 3, (2 + n)) +#define write_hw_metric_reload_cntr(_value, _n) \ + do { \ + switch (_n) { \ + case 0: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(0)); break; \ + case 1: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(1)); break; \ + case 2: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(2)); break; \ + case 3: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(3)); break; \ + case 4: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(4)); break; \ + case 5: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(5)); break; \ + default: \ + WARN(1, "Invalid hw_metric reload counter index\n"); \ + dev_err(event->pmu->dev, "event is 0x%lx index is %x\n",\ + event->hw.config_base, event->hw.idx); \ + } \ + } while (0) + write_hw_metric_reload_cntr(value, idx); +#undef write_hw_metric_reload_cntr +#undef HW_METRIC_RELOAD_CNTR +} + static void armv8pmu_write_hw_counter(struct perf_event *event, u64 value) { @@ -614,6 +684,9 @@ static void armv8pmu_write_hw_counter(struct perf_event *event, } else { armv8pmu_write_evcntr(idx, value); } + + if (armv8pmu_event_is_hw_metric(event)) + armv8pmu_write_reload_counter(event, value); } static void armv8pmu_write_counter(struct perf_event *event, u64 value) @@ -688,6 +761,36 @@ static void armv8pmu_enable_counter(u32 mask) write_pmcntenset(mask); } +#define HISI_DTU_CTLR_EL1 sys_reg(3, 0, 15, 8, 4) +#define HISI_DTU_CTLR_EL1_CHK_GROUP0 BIT(15) + +static inline void armv8pmu_enable_hw_metric(struct perf_event *event, bool enable) +{ + int idx = event->hw.idx; + u64 reg; + + /* + * Configure the chicken bit on leader event enabling. + */ + if (event != event->group_leader) + return; + + /* Convert the idx since we only use general counters, counter 0 is + * used for PMCCNTR_EL0. + */ + idx -= 1; + + reg = read_sysreg_s(HISI_DTU_CTLR_EL1); + if (enable) + reg |= HISI_DTU_CTLR_EL1_CHK_GROUP0 << (idx >> 1); + else + reg &= ~(HISI_DTU_CTLR_EL1_CHK_GROUP0 << (idx >> 1)); + + write_sysreg_s(reg, HISI_DTU_CTLR_EL1); + + reg = read_sysreg_s(HISI_DTU_CTLR_EL1); +} + static void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -696,8 +799,12 @@ static void armv8pmu_enable_event_counter(struct perf_event *event) kvm_set_pmu_events(mask, attr); /* We rely on the hypervisor switch code to enable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) + if (!kvm_pmu_counter_deferred(attr)) { armv8pmu_enable_counter(mask); + + if (armv8pmu_event_is_hw_metric(event)) + armv8pmu_enable_hw_metric(event, true); + } } static void armv8pmu_disable_counter(u32 mask) @@ -718,8 +825,12 @@ static void armv8pmu_disable_event_counter(struct perf_event *event) kvm_clr_pmu_events(mask); /* We rely on the hypervisor switch code to disable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) + if (!kvm_pmu_counter_deferred(attr)) { armv8pmu_disable_counter(mask); + + if (armv8pmu_event_is_hw_metric(event)) + armv8pmu_enable_hw_metric(event, false); + } } static void armv8pmu_enable_intens(u32 mask) @@ -1005,6 +1116,59 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static int armv8pmu_check_hw_metric_event(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + int hw_metric_cnt = 0; + + if (cpuc->percpu_pmu) { + for_each_sibling_event(sibling, leader) { + if (armv8pmu_event_is_hw_metric(sibling)) + hw_metric_cnt++; + } + + if (hw_metric_cnt != 1) + return -EINVAL; + } else { + if (event == leader) + return 0; + + if (!armv8pmu_event_is_hw_metric(leader)) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (armv8pmu_event_is_hw_metric(sibling)) + hw_metric_cnt++; + } + + if (hw_metric_cnt > 0) + return -EINVAL; + } + + return 0; +} + +static int armv8pmu_get_hw_metric_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct perf_event *leader = event->group_leader; + int leader_idx; + + if (armv8pmu_check_hw_metric_event(cpuc, event)) + return -EINVAL; + + if (event == leader || leader->hw.idx < 1) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + + leader_idx = leader->hw.idx; + if (cpuc->events[leader_idx - 1]) + return -EAGAIN; + + return leader_idx - 1; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -1012,6 +1176,12 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + if (armv8pmu_event_is_hw_metric(event)) + return armv8pmu_get_hw_metric_event_idx(cpuc, event); + else if (event != event->group_leader && + armv8pmu_event_is_hw_metric(event->group_leader)) + return -EINVAL; + /* Always prefer to place a cycle counter into the cycle counter. */ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && !armv8pmu_event_get_threshold(&event->attr)) { @@ -1235,6 +1405,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT; + if (armv8pmu_event_is_hw_metric(event) && !armpmu_support_hisi_hw_metric()) + return -EOPNOTSUPP; + /* * User events must be allocated into a single counter, and so * must not be chained. -- 2.33.0