From: Like Xu like.xu@linux.intel.com
mainline inclusion from mainline-v5.4 commit 3ca270fc9edb258d5bfa271bcf851614e9e6e7d4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5SDUS CVE: NA
-------------
Currently, perf_event_period() is used by user tools via ioctl. Based on naming convention, exporting perf_event_period() for kernel users (such as KVM) who may recalibrate the event period for their assigned counter according to their requirements.
The perf_event_period() is an external accessor, just like the perf_event_{en,dis}able() and should thus use perf_event_ctx_lock().
Suggested-by: Kan Liang kan.liang@linux.intel.com Signed-off-by: Like Xu like.xu@linux.intel.com Acked-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Yanan Wang wangyanan55@huawei.com Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 28 +++++++++++++++++++++------- 2 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 41bc1d3b311f..514962a49c62 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1336,6 +1336,7 @@ extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); +extern int perf_event_period(struct perf_event *event, u64 value); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1408,6 +1409,10 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } +static inline int perf_event_period(struct perf_event *event, u64 value) +{ + return -EINVAL; +} #endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index 616cccf9c835..b1e4beb6931c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5034,16 +5034,11 @@ static int perf_event_check_period(struct perf_event *event, u64 value) return event->pmu->check_period(event, value); }
-static int perf_event_period(struct perf_event *event, u64 __user *arg) +static int _perf_event_period(struct perf_event *event, u64 value) { - u64 value; - if (!is_sampling_event(event)) return -EINVAL;
- if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - if (!value) return -EINVAL;
@@ -5058,6 +5053,19 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) return 0; }
+int perf_event_period(struct perf_event *event, u64 value) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_event_period(event, value); + perf_event_ctx_unlock(event, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(perf_event_period); + static const struct file_operations perf_fops;
static inline int perf_fget_light(int fd, struct fd *p) @@ -5101,8 +5109,14 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon return _perf_event_refresh(event, arg);
case PERF_EVENT_IOC_PERIOD: - return perf_event_period(event, (u64 __user *)arg); + { + u64 value; + + if (copy_from_user(&value, (u64 __user *)arg, sizeof(value))) + return -EFAULT;
+ return _perf_event_period(event, value); + } case PERF_EVENT_IOC_ID: { u64 id = primary_event_id(event);
From: Eric Hankland ehankland@google.com
mainline inclusion from mainline-v5.5 commit 4400cf546b4bb62d49198f6642add01bf6e9b34d category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5SDUS CVE: NA
-------------
Correct the logic in intel_pmu_set_msr() for fixed and general purpose counters. This was recently changed to set pmc->counter without taking in to account the value of pmc_read_counter() which will be incorrect if the counter is currently running and non-zero; this changes back to the old logic which accounted for the value of currently running counters.
Signed-off-by: Eric Hankland ehankland@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Yanan Wang wangyanan55@huawei.com Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- arch/x86/kvm/pmu_intel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index c3f103e2b08e..2bdc8a5775ee 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -241,13 +241,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { - if (msr_info->host_initiated) - pmc->counter = data; - else - pmc->counter = (s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; + pmc->counter += data - pmc_read_counter(pmc); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter = data; + pmc->counter += data - pmc_read_counter(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel)
From: Eric Hankland ehankland@google.com
mainline inclusion from mainline-v5.6 commit 168d918f2643d7d3f0240e768d40b4f8aba3540a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5SDUS CVE: NA
-------------
The sample_period of a counter tracks when that counter will overflow and set global status/trigger a PMI. However this currently only gets set when the initial counter is created or when a counter is resumed; this updates the sample period after a wrmsr so running counters will accurately reflect their new value.
Signed-off-by: Eric Hankland ehankland@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Yanan Wang wangyanan55@huawei.com [Yanan Wang: Adapt the code to linux v4.19] Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- arch/x86/kvm/pmu.c | 2 +- arch/x86/kvm/pmu.h | 9 +++++++++ arch/x86/kvm/pmu_intel.c | 6 ++++++ 3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index acc8d217f656..4fe0bfcf0304 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -113,7 +113,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, .config = config, };
- attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); + attr.sample_period = get_sample_period(pmc, pmc->counter);
if (in_tx) attr.config |= HSW_IN_TX; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..273d4146fb28 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -103,6 +103,15 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) return NULL; }
+static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) +{ + u64 sample_period = (-counter_value) & pmc_bitmask(pmc); + + if (!sample_period) + sample_period = pmc_bitmask(pmc) + 1; + return sample_period; +} + void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 2bdc8a5775ee..29479bc27294 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -244,9 +244,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); + if (pmc->perf_event) + perf_event_period(pmc->perf_event, + get_sample_period(pmc, data)); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); + if (pmc->perf_event) + perf_event_period(pmc->perf_event, + get_sample_period(pmc, data)); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel)
From: Like Xu likexu@tencent.com
mainline inclusion from mainline-v5.18 commit 75189d1de1b377e580ebd2d2c55914631eac9c64 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5SDUS CVE: NA
-------------
NMI-watchdog is one of the favorite features of kernel developers, but it does not work in AMD guest even with vPMU enabled and worse, the system misrepresents this capability via /proc.
This is a PMC emulation error. KVM does not pass the latest valid value to perf_event in time when guest NMI-watchdog is running, thus the perf_event corresponding to the watchdog counter will enter the old state at some point after the first guest NMI injection, forcing the hardware register PMC0 to be constantly written to 0x800000000001.
Meanwhile, the running counter should accurately reflect its new value based on the latest coordinated pmc->counter (from vPMC's point of view) rather than the value written directly by the guest.
Fixes: 168d918f2643 ("KVM: x86: Adjust counter sample period after a wrmsr") Reported-by: Dongli Cao caodongli@kingsoft.com Signed-off-by: Like Xu likexu@tencent.com Reviewed-by: Yanan Wang wangyanan55@huawei.com Tested-by: Yanan Wang wangyanan55@huawei.com Reviewed-by: Jim Mattson jmattson@google.com Message-Id: 20220409015226.38619-1-likexu@tencent.com Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Yanan Wang wangyanan55@huawei.com [Yanan Wang: Adapt the code to linux v4.19] Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- arch/x86/kvm/pmu.h | 9 +++++++++ arch/x86/kvm/pmu_amd.c | 1 + arch/x86/kvm/pmu_intel.c | 8 ++------ 3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 273d4146fb28..22710c16f548 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -112,6 +112,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) return sample_period; }
+static inline void pmc_update_sample_period(struct kvm_pmc *pmc) +{ + if (!pmc->perf_event) + return; + + perf_event_period(pmc->perf_event, + get_sample_period(pmc, pmc->counter)); +} + void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 41dff881e0f0..437bbaea20e6 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -242,6 +242,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { pmc->counter += data - pmc_read_counter(pmc); + pmc_update_sample_period(pmc); return 0; } /* MSR_EVNTSELn */ diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 29479bc27294..a4298a8c4765 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -244,15 +244,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel)