From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I919BF CVE: NA
------------------------------------------
Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items. The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and each line of vcpu_stat is a collection of various kvm exits for a vcpu. And through vcpu_stat, we only need to open one file to tail performance of virtual machine, which is more convenient.
Signed-off-by: chenjiajun chenjiajun8@huawei.com Signed-off-by: liangtian liangtian13@huawei.com --- arch/x86/include/asm/kvm_host.h | 12 +++++ arch/x86/kvm/vmx/vmx.c | 5 +++ arch/x86/kvm/x86.c | 79 +++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 43 ++++++++++++++++-- 4 files changed, 135 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24200a9d2d09..d5508e7133a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1519,6 +1519,18 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 cr_exits; + u64 msr_rd_exits; + u64 msr_wr_exits; + u64 apic_wr_exits; + u64 ept_vio_exits; + u64 ept_mis_exits; + u64 pause_exits; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9bba5352582c..2ceab7c8d189 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5448,6 +5448,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; + vcpu->stat.cr_exits++; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ val = kvm_register_read(vcpu, reg); @@ -5673,6 +5674,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu) */ u32 offset = exit_qualification & 0xff0;
+ vcpu->stat.apic_wr_exits++; kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -5741,6 +5743,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) u64 error_code;
exit_qualification = vmx_get_exit_qual(vcpu); + vcpu->stat.ept_vio_exits++;
/* * EPT violation happened while executing iret from NMI, @@ -5800,6 +5803,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) * nGPA here instead of the required GPA. */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + vcpu->stat.ept_mis_exits++; if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); @@ -5918,6 +5922,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) */ static int handle_pause(struct kvm_vcpu *vcpu) { + vcpu->stat.pause_exits++; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0d86c92ce323..78bb7ea573ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -244,6 +244,42 @@ EXPORT_SYMBOL_GPL(host_arch_capabilities); #ifdef CONFIG_ARCH_VCPU_STAT /* debugfs entries of Detail For vcpu stat EXtension */ struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("pf_fixed", pf_fixed), + DFX_STAT("pf_guest", pf_guest), + DFX_STAT("tlb_flush", tlb_flush), + DFX_STAT("invlpg", invlpg), + DFX_STAT("exits", exits), + DFX_STAT("io_exits", io_exits), + DFX_STAT("mmio_exits", mmio_exits), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("irq_window", irq_window_exits), + DFX_STAT("nmi_window", nmi_window_exits), + DFX_STAT("halt_exits", halt_exits), + DFX_STAT("halt_successful_poll", halt_successful_poll), + DFX_STAT("halt_attempted_poll", halt_attempted_poll), + DFX_STAT("halt_wakeup", halt_wakeup), + DFX_STAT("request_irq", request_irq_exits), + DFX_STAT("irq_exits", irq_exits), + DFX_STAT("host_state_reload", host_state_reload), + DFX_STAT("fpu_reload", fpu_reload), + DFX_STAT("insn_emulation", insn_emulation), + DFX_STAT("insn_emulation_fail", insn_emulation_fail), + DFX_STAT("hypercalls", hypercalls), + DFX_STAT("irq_injections", irq_injections), + DFX_STAT("nmi_injections", nmi_injections), + DFX_STAT("cr_exits", cr_exits), + DFX_STAT("msr_rd_exits", msr_rd_exits), + DFX_STAT("msr_wr_exits", msr_wr_exits), + DFX_STAT("apic_wr_exits", apic_wr_exits), + DFX_STAT("ept_vio_exits", ept_vio_exits), + DFX_STAT("ept_mis_exits", ept_mis_exits), + DFX_STAT("pause_exits", pause_exits), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), { NULL } }; #endif @@ -310,6 +346,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, preemption_other), STATS_DESC_IBOOLEAN(VCPU, guest_mode), STATS_DESC_COUNTER(VCPU, notify_window_exits), +#ifdef CONFIG_ARCH_VCPU_STAT + STATS_DESC_DFX_COUNTER(DFX, vcpu_stat), +#endif };
const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -2062,6 +2101,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r;
+ vcpu->stat.msr_rd_exits++; r = kvm_get_msr_with_filter(vcpu, ecx, &data);
if (!r) { @@ -2087,6 +2127,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r;
+ vcpu->stat.msr_wr_exits++; r = kvm_set_msr_with_filter(vcpu, ecx, data);
if (!r) { @@ -3500,6 +3541,28 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) kvm_hv_vcpu_purge_flush_tlb(vcpu); }
+#ifdef CONFIG_ARCH_VCPU_STAT +static u64 accumulate_stat_steal_time(u64 *last_steal) +{ + u64 delta; + + if (*last_steal == 0) + delta = 0; + else + delta = current->sched_info.run_delay - *last_steal; + + *last_steal = current->sched_info.run_delay; + return delta; +} + +static void update_stat_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = accumulate_stat_steal_time(&vcpu->stat.steal); + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} +#endif
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) { @@ -3532,6 +3595,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) u64 steal; u32 version;
+#ifdef CONFIG_ARCH_VCPU_STAT + update_stat_steal_time(vcpu); +#endif if (kvm_xen_msr_enabled(vcpu->kvm)) { kvm_xen_runstate_set_running(vcpu); return; @@ -10880,6 +10946,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu);
r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath); +#ifdef CONFIG_ARCH_VCPU_STAT + vcpu->stat.utime = current->utime; + vcpu->stat.stime = current->stime; + vcpu->stat.gtime = current->gtime; +#endif + return r;
cancel_injection: @@ -13654,6 +13726,13 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+#ifdef CONFIG_ARCH_VCPU_STAT +void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} +#endif + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2d66956bf994..e8bae436646d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5886,6 +5886,35 @@ __weak void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) #define DFX_MAX_VCPU 1024 #define DFX_MAX_VCPU_STAT_SIZE 1024
+/* + * copy of seq_buf_alloc of kernel, kernel not export it + */ +static void *dfx_seq_buf_alloc(unsigned long size) +{ + return kvmalloc(size, GFP_KERNEL_ACCOUNT); +} + +static void dfx_seq_buf_free(const void *buf) +{ + kvfree(buf); +} + +static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr) +{ + char *buf; + size_t size; + + size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE; + buf = dfx_seq_buf_alloc(size); + if (!buf) + return -ENOMEM; + if (p->buf) + dfx_seq_buf_free(p->buf); + p->buf = buf; + p->size = size; + return 0; +} + static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) { struct kvm *kvm; @@ -5903,6 +5932,14 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) } mutex_unlock(&kvm_lock); vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); + if (!vcpu_nr) { + seq_putc(p, '\n'); + return 0; + } + + if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr)) + return -ENOMEM; + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); if (!vcpu_stats) return -ENOMEM; @@ -5912,7 +5949,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) kvm_for_each_vcpu(i, vcpu, kvm) { if (index >= vcpu_nr) break; - memcpy(vcpu_stats + index, &vcpu->stat, + memcpy(vcpu_stats + index, &(vcpu->stat), sizeof(struct kvm_vcpu_stat)); kvm_arch_vcpu_stat_reset(&vcpu->stat); ++index; @@ -5943,9 +5980,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) { - size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1); - - return single_open_size(file, __dfx_vcpu_stats_get, NULL, size); + return single_open(file, __dfx_vcpu_stats_get, NULL); }
static const struct file_operations dfx_stat_fops = {