[PATCH OLK-6.6 4/6] kvm: debugfs: Export x86 kvm exits to vcpu_stat

28 Feb 2024

From: chenjiajun <chenjiajun8@huawei.com>

virt inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I919BF
CVE: NA

------------------------------------------

Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items.
The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and
each line of vcpu_stat is a collection of various kvm exits for a vcpu.
And through vcpu_stat, we only need to open one file to
tail performance of virtual machine, which is more convenient.

Signed-off-by: chenjiajun <chenjiajun8@huawei.com>
Signed-off-by: liangtian <liangtian13@huawei.com>
---
 arch/x86/include/asm/kvm_host.h | 12 +++++
 arch/x86/kvm/vmx/vmx.c          |  5 +++
 arch/x86/kvm/x86.c              | 79 +++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c             | 46 ++++++++++++++++---
 4 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 24200a9d2d09..d5508e7133a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1519,6 +1519,18 @@ struct kvm_vcpu_stat {
 	u64 irq_injections;
 	u64 nmi_injections;
 	u64 req_event;
+	u64 cr_exits;
+	u64 msr_rd_exits;
+	u64 msr_wr_exits;
+	u64 apic_wr_exits;
+	u64 ept_vio_exits;
+	u64 ept_mis_exits;
+	u64 pause_exits;
+	u64 steal;
+	u64 st_max;
+	u64 utime;
+	u64 stime;
+	u64 gtime;
 	u64 nested_run;
 	u64 directed_yield_attempted;
 	u64 directed_yield_successful;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9bba5352582c..2ceab7c8d189 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5448,6 +5448,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 	exit_qualification = vmx_get_exit_qual(vcpu);
 	cr = exit_qualification & 15;
 	reg = (exit_qualification >> 8) & 15;
+	vcpu->stat.cr_exits++;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
 		val = kvm_register_read(vcpu, reg);
@@ -5673,6 +5674,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu)
 	 */
 	u32 offset = exit_qualification & 0xff0;
 
+	vcpu->stat.apic_wr_exits++;
 	kvm_apic_write_nodecode(vcpu, offset);
 	return 1;
 }
@@ -5741,6 +5743,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	u64 error_code;
 
 	exit_qualification = vmx_get_exit_qual(vcpu);
+	vcpu->stat.ept_vio_exits++;
 
 	/*
 	 * EPT violation happened while executing iret from NMI,
@@ -5800,6 +5803,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	 * nGPA here instead of the required GPA.
 	 */
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+	vcpu->stat.ept_mis_exits++;
 	if (!is_guest_mode(vcpu) &&
 	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		trace_kvm_fast_mmio(gpa);
@@ -5918,6 +5922,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+	vcpu->stat.pause_exits++;
 	if (!kvm_pause_in_guest(vcpu->kvm))
 		grow_ple_window(vcpu);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0d86c92ce323..78bb7ea573ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -244,6 +244,42 @@ EXPORT_SYMBOL_GPL(host_arch_capabilities);
 #ifdef CONFIG_ARCH_VCPU_STAT
 /* debugfs entries of Detail For vcpu stat EXtension */
 struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = {
+	DFX_STAT("pid", pid),
+	DFX_STAT("pf_fixed", pf_fixed),
+	DFX_STAT("pf_guest", pf_guest),
+	DFX_STAT("tlb_flush", tlb_flush),
+	DFX_STAT("invlpg", invlpg),
+	DFX_STAT("exits", exits),
+	DFX_STAT("io_exits", io_exits),
+	DFX_STAT("mmio_exits", mmio_exits),
+	DFX_STAT("signal_exits", signal_exits),
+	DFX_STAT("irq_window", irq_window_exits),
+	DFX_STAT("nmi_window", nmi_window_exits),
+	DFX_STAT("halt_exits", halt_exits),
+	DFX_STAT("halt_successful_poll", halt_successful_poll),
+	DFX_STAT("halt_attempted_poll", halt_attempted_poll),
+	DFX_STAT("halt_wakeup", halt_wakeup),
+	DFX_STAT("request_irq", request_irq_exits),
+	DFX_STAT("irq_exits", irq_exits),
+	DFX_STAT("host_state_reload", host_state_reload),
+	DFX_STAT("fpu_reload", fpu_reload),
+	DFX_STAT("insn_emulation", insn_emulation),
+	DFX_STAT("insn_emulation_fail", insn_emulation_fail),
+	DFX_STAT("hypercalls", hypercalls),
+	DFX_STAT("irq_injections", irq_injections),
+	DFX_STAT("nmi_injections", nmi_injections),
+	DFX_STAT("cr_exits", cr_exits),
+	DFX_STAT("msr_rd_exits", msr_rd_exits),
+	DFX_STAT("msr_wr_exits", msr_wr_exits),
+	DFX_STAT("apic_wr_exits", apic_wr_exits),
+	DFX_STAT("ept_vio_exits", ept_vio_exits),
+	DFX_STAT("ept_mis_exits", ept_mis_exits),
+	DFX_STAT("pause_exits", pause_exits),
+	DFX_STAT("steal", steal),
+	DFX_STAT("st_max", st_max),
+	DFX_STAT("utime", utime),
+	DFX_STAT("stime", stime),
+	DFX_STAT("gtime", gtime),
 	{ NULL }
 };
 #endif
@@ -310,6 +346,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, preemption_other),
 	STATS_DESC_IBOOLEAN(VCPU, guest_mode),
 	STATS_DESC_COUNTER(VCPU, notify_window_exits),
+#ifdef CONFIG_ARCH_VCPU_STAT
+	STATS_DESC_DFX_COUNTER(DFX, vcpu_stat),
+#endif
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -2062,6 +2101,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 	u64 data;
 	int r;
 
+	vcpu->stat.msr_rd_exits++;
 	r = kvm_get_msr_with_filter(vcpu, ecx, &data);
 
 	if (!r) {
@@ -2087,6 +2127,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 	u64 data = kvm_read_edx_eax(vcpu);
 	int r;
 
+	vcpu->stat.msr_wr_exits++;
 	r = kvm_set_msr_with_filter(vcpu, ecx, data);
 
 	if (!r) {
@@ -3500,6 +3541,28 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 	kvm_hv_vcpu_purge_flush_tlb(vcpu);
 }
 
+#ifdef CONFIG_ARCH_VCPU_STAT
+static u64 accumulate_stat_steal_time(u64 *last_steal)
+{
+	u64 delta;
+
+	if (*last_steal == 0)
+		delta = 0;
+	else
+		delta = current->sched_info.run_delay - *last_steal;
+
+	*last_steal = current->sched_info.run_delay;
+	return delta;
+}
+
+static void update_stat_steal_time(struct kvm_vcpu *vcpu)
+{
+	u64 delta;
+
+	delta = accumulate_stat_steal_time(&vcpu->stat.steal);
+	vcpu->stat.st_max = max(vcpu->stat.st_max, delta);
+}
+#endif
 
 static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
@@ -3532,6 +3595,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	u64 steal;
 	u32 version;
 
+#ifdef CONFIG_ARCH_VCPU_STAT
+	update_stat_steal_time(vcpu);
+#endif
 	if (kvm_xen_msr_enabled(vcpu->kvm)) {
 		kvm_xen_runstate_set_running(vcpu);
 		return;
@@ -10880,6 +10946,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_from_vapic(vcpu);
 
 	r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
+#ifdef CONFIG_ARCH_VCPU_STAT
+	vcpu->stat.utime = current->utime;
+	vcpu->stat.stime = current->stime;
+	vcpu->stat.gtime = current->gtime;
+#endif
+
 	return r;
 
 cancel_injection:
@@ -13654,6 +13726,13 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
 
+#ifdef CONFIG_ARCH_VCPU_STAT
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat)
+{
+	vcpu_stat->st_max = 0;
+}
+#endif
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ce76c43d88e7..683aa6946dec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5886,6 +5886,35 @@ __weak void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat)
 #define DFX_MAX_VCPU            1024
 #define DFX_MAX_VCPU_STAT_SIZE  1024
 
+/*
+ * copy of seq_buf_alloc of kernel, kernel not export it
+ */
+static void *dfx_seq_buf_alloc(unsigned long size)
+{
+	return kvmalloc(size, GFP_KERNEL_ACCOUNT);
+}
+
+static void dfx_seq_buf_free(const void *buf)
+{
+	kvfree(buf);
+}
+
+static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr)
+{
+	char *buf;
+	size_t size;
+
+	size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE;
+	buf = dfx_seq_buf_alloc(size);
+	if (!buf)
+		return -ENOMEM;
+	if (p->buf)
+		dfx_seq_buf_free(p->buf);
+	p->buf = buf;
+	p->size = size;
+	return 0;
+}
+
 static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 {
 	struct kvm *kvm;
@@ -5903,20 +5932,29 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 		}
 	mutex_unlock(&kvm_lock);
 	vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU);
+	if (!vcpu_nr) {
+		seq_putc(p, '\n');
+		return 0;
+	}
+
+	if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr))
+		return -ENOMEM;
+
 	vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat));
 	if (!vcpu_stats)
 		return -ENOMEM;
 
 	mutex_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list)
+	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (index >= vcpu_nr)
 				break;
-			memcpy(vcpu_stats + index, &vcpu->stat,
+			memcpy(vcpu_stats + index, &(vcpu->stat),
 				sizeof(struct kvm_vcpu_stat));
 			kvm_arch_vcpu_stat_reset(&vcpu->stat);
 			++index;
 		}
+	}
 	mutex_unlock(&kvm_lock);
 
 	for (i = 0; i < vcpu_nr; i++) {
@@ -5943,9 +5981,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 
 static int dfx_vcpu_stats_open(struct inode *inode, struct file *file)
 {
-	size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1);
-
-	return single_open_size(file, __dfx_vcpu_stats_get, NULL, size);
+	return single_open(file, __dfx_vcpu_stats_get, NULL);
 }
 
 static const struct file_operations dfx_stat_fops = {
-- 
2.33.0