From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items. The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and each line of vcpu_stat is a collection of various kvm exits for a vcpu. And through vcpu_stat, we only need to open one file to tail performance of virtual machine, which is more convenient.
Signed-off-by: Feng Lin linfeng23@huawei.com Signed-off-by: chenjiajun chenjiajun8@huawei.com --- arch/x86/include/asm/kvm_host.h | 12 ++++++ arch/x86/kvm/vmx/vmx.c | 10 +++++ arch/x86/kvm/x86.c | 65 +++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 57 +++++++++++++++++++++++------ 4 files changed, 132 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index adbe88e4be12..730ffde044b4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1058,6 +1058,18 @@ struct kvm_vcpu_stat { u64 req_event; u64 halt_poll_success_ns; u64 halt_poll_fail_ns; + u64 cr_exits; + u64 msr_rd_exits; + u64 msr_wr_exits; + u64 apic_wr_exits; + u64 ept_vio_exits; + u64 ept_mis_exits; + u64 pause_exits; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; };
struct x86_instruction_info; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b9751..053359a7cf00 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -364,6 +364,11 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + void vmx_vmexit(void);
#define vmx_insn_failed(fmt...) \ @@ -4996,6 +5001,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; + vcpu->stat.cr_exits++; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ val = kvm_register_readl(vcpu, reg); @@ -5240,6 +5246,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu) u32 offset = exit_qualification & 0xfff;
/* APIC-write VM exit is trap-like and thus no need to adjust IP */ + vcpu->stat.apic_wr_exits++; kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -5308,6 +5315,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) u64 error_code;
exit_qualification = vmx_get_exit_qual(vcpu); + vcpu->stat.ept_vio_exits++;
/* * EPT violation happened while executing iret from NMI, @@ -5366,6 +5374,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) * nGPA here instead of the required GPA. */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + vcpu->stat.ept_mis_exits++; if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); @@ -5480,6 +5489,7 @@ static void vmx_enable_tdp(void) */ static int handle_pause(struct kvm_vcpu *vcpu) { + vcpu->stat.pause_exits++; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1943bb8c5403..06ecae7718bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -242,11 +242,48 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VM_STAT("largepages", lpages, .mode = 0444), VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444), VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions), + { "vcpu_stat", 0, KVM_STAT_DFX }, { NULL } };
/* debugfs entries of Detail For vcpu stat EXtension */ struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("pf_fixed", pf_fixed), + DFX_STAT("pf_guest", pf_guest), + DFX_STAT("tlb_flush", tlb_flush), + DFX_STAT("invlpg", invlpg), + DFX_STAT("exits", exits), + DFX_STAT("io_exits", io_exits), + DFX_STAT("mmio_exits", mmio_exits), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("irq_window", irq_window_exits), + DFX_STAT("nmi_window", nmi_window_exits), + DFX_STAT("halt_exits", halt_exits), + DFX_STAT("halt_successful_poll", halt_successful_poll), + DFX_STAT("halt_attempted_poll", halt_attempted_poll), + DFX_STAT("halt_wakeup", halt_wakeup), + DFX_STAT("request_irq", request_irq_exits), + DFX_STAT("irq_exits", irq_exits), + DFX_STAT("host_state_reload", host_state_reload), + DFX_STAT("fpu_reload", fpu_reload), + DFX_STAT("insn_emulation", insn_emulation), + DFX_STAT("insn_emulation_fail", insn_emulation_fail), + DFX_STAT("hypercalls", hypercalls), + DFX_STAT("irq_injections", irq_injections), + DFX_STAT("nmi_injections", nmi_injections), + DFX_STAT("cr_exits", cr_exits), + DFX_STAT("msr_rd_exits", msr_rd_exits), + DFX_STAT("msr_wr_exits", msr_wr_exits), + DFX_STAT("apic_wr_exits", apic_wr_exits), + DFX_STAT("ept_vio_exits", ept_vio_exits), + DFX_STAT("ept_mis_exits", ept_mis_exits), + DFX_STAT("pause_exits", pause_exits), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), { NULL } };
@@ -1718,6 +1755,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r;
+ vcpu->stat.msr_rd_exits++; r = kvm_get_msr(vcpu, ecx, &data);
/* MSR read failed? See if we should ask user space */ @@ -1747,6 +1785,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r;
+ vcpu->stat.msr_wr_exits++; r = kvm_set_msr(vcpu, ecx, data);
/* MSR write failed? See if we should ask user space */ @@ -2953,11 +2992,33 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.tlb_flush_guest(vcpu); }
+static u64 accumulate_stat_steal_time(u64 *last_steal) +{ + u64 delta; + + if (*last_steal == 0) + delta = 0; + else + delta = current->sched_info.run_delay - *last_steal; + + *last_steal = current->sched_info.run_delay; + return delta; +} + +static void update_stat_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = accumulate_stat_steal_time(&vcpu->stat.steal); + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} + static void record_steal_time(struct kvm_vcpu *vcpu) { struct kvm_host_map map; struct kvm_steal_time *st;
+ update_stat_steal_time(vcpu); if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return;
@@ -9030,6 +9091,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath); + vcpu->stat.utime = current->utime; + vcpu->stat.stime = current->stime; + vcpu->stat.gtime = current->gtime; + return r;
cancel_injection: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 944d0ea2295e..6ad1da625520 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4623,6 +4623,35 @@ void __attribute__((weak)) kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_s #define DFX_MAX_VCPU 1024 #define DFX_MAX_VCPU_STAT_SIZE 1024
+/* + * copy of seq_buf_alloc of kernel, kernel not export it + */ +static void *dfx_seq_buf_alloc(unsigned long size) +{ + return kvmalloc(size, GFP_KERNEL_ACCOUNT); +} + +static void dfx_seq_buf_free(const void *buf) +{ + kvfree(buf); +} + +static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr) +{ + char *buf; + size_t size; + + size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE; + buf = dfx_seq_buf_alloc(size); + if (!buf) + return -ENOMEM; + if (p->buf) + dfx_seq_buf_free(p->buf); + p->buf = buf; + p->size = size; + return 0; +} + static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) { struct kvm *kvm; @@ -4634,28 +4663,35 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - kvm_for_each_vcpu(i, vcpu, kvm) + kvm_for_each_vcpu(i, vcpu, kvm) { vcpu_nr++; + } mutex_unlock(&kvm_lock); - vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); - vcpu_stats = kzalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat), - GFP_KERNEL); + if (!vcpu_nr) { + seq_putc(p, '\n'); + return 0; + } + + if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr)) + return -ENOMEM; + + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); if (!vcpu_stats) return -ENOMEM;
mutex_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) + list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (index >= vcpu_nr) break; - memcpy(vcpu_stats + index, &vcpu->stat, + memcpy(vcpu_stats + index, &(vcpu->stat), sizeof(struct kvm_vcpu_stat)); kvm_arch_vcpu_stat_reset(&vcpu->stat); ++index; } + } mutex_unlock(&kvm_lock); - for (i = 0; i < vcpu_nr; i++) { for (dp = dfx_debugfs_entries; dp->name; ++dp) { switch (dp->dfx_kind) { @@ -4673,16 +4709,13 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) } seq_putc(p, '\n'); } - - kzfree(vcpu_stats); + vfree(vcpu_stats); return 0; }
static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) { - size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1); - - return single_open_size(file, __dfx_vcpu_stats_get, NULL, size); + return single_open(file, __dfx_vcpu_stats_get, NULL); }
static const struct file_operations dfx_stat_fops = {