From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
Export kvm vcpu info in debugfs entry /sys/kernel/debug/kvm/vcpu_stat, this entry contains kvm exits and cputime items now. And then, we can get all kvm vcpu info through this debugfs instead of one entry per kvm exits items, which will greatly facilitates the collection of performance status. Currently, only support x86 and aarch64.
-- ChangeList: v3: modify kzfree in patches
v2: modify architecture-dependent st_max to avoid compile err in other arches
v1: kvm: debugfs: support vcpu_stat debugfs
chenjiajun (4): kvm: debugfs: Export vcpu stat via debugfs kvm: debugfs: export remaining aarch64 kvm exit reasons to debugfs kvm: debugfs: aarch64 export cpu time related items to debugfs kvm: debugfs: Export x86 kvm exits to vcpu_stat
arch/arm64/include/asm/kvm_host.h | 21 ++++ arch/arm64/kvm/arm.c | 23 +++++ arch/arm64/kvm/guest.c | 33 +++++++ arch/arm64/kvm/handle_exit.c | 8 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 1 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 11 +++ arch/x86/include/asm/kvm_host.h | 13 +++ arch/x86/kvm/vmx/vmx.c | 10 ++ arch/x86/kvm/x86.c | 70 +++++++++++++ include/linux/kvm_host.h | 19 ++++ virt/kvm/kvm_main.c | 125 ++++++++++++++++++++++++ 12 files changed, 335 insertions(+)
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
This patch create debugfs entry for vcpu stat. The entry path is /sys/kernel/debug/kvm/vcpu_stat. And vcpu_stat contains partial kvm exits items of vcpu, include: pid, hvc_exit_stat, wfe_exit_stat, wfi_exit_stat, mmio_exit_user, mmio_exit_kernel, exits
Currently, The maximum vcpu limit is 1024.
From this vcpu_stat, user can get the number of these kvm exits items over a period of time, which is helpful to monitor the virtual machine.
Signed-off-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: chenjiajun chenjiajun8@huawei.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/guest.c | 13 +++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 5 ++ include/linux/kvm_host.h | 17 +++++++ virt/kvm/kvm_main.c | 85 +++++++++++++++++++++++++++++++ 6 files changed, 122 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0cd9f0f75c13..1ba35b90bdb4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -454,6 +454,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; u64 halt_successful_poll; u64 halt_attempted_poll; u64 halt_poll_success_ns; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca..c584b0fb7692 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -42,6 +42,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("exits", exits), VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + { "vcpu_stat", 0, KVM_STAT_DFX }, + { NULL } +}; + +/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("hvc_exit_stat", hvc_exit_stat), + DFX_STAT("wfe_exit_stat", wfe_exit_stat), + DFX_STAT("wfi_exit_stat", wfi_exit_stat), + DFX_STAT("mmio_exit_user", mmio_exit_user), + DFX_STAT("mmio_exit_kernel", mmio_exit_kernel), + DFX_STAT("exits", exits), { NULL } };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7e5f33a0d0e2..adbe88e4be12 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1028,6 +1028,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; u64 pf_fixed; u64 pf_guest; u64 tlb_flush; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e545a8a613b1..1943bb8c5403 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -245,6 +245,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } };
+/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + { NULL } +}; + u64 __read_mostly host_xcr0; u64 __read_mostly supported_xcr0; EXPORT_SYMBOL_GPL(supported_xcr0); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7f2e2a09ebbd..b0eddb2a0da4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1151,6 +1151,7 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) enum kvm_stat_kind { KVM_STAT_VM, KVM_STAT_VCPU, + KVM_STAT_DFX, /* Detail For vcpu stat EXtension */ };
struct kvm_stat_data { @@ -1172,10 +1173,26 @@ struct kvm_stats_debugfs_item { { n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ } #define VCPU_STAT(n, x, ...) \ { n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ } +#define DFX_STAT(n, x, ...) \ + { n, offsetof(struct kvm_vcpu_stat, x), DFX_STAT_U64, ## __VA_ARGS__ }
extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir;
+enum dfx_stat_kind { + DFX_STAT_U64, + DFX_STAT_CPUTIME, +}; + +/* Detail For vcpu stat EXtension debugfs item */ +struct dfx_kvm_stats_debugfs_item { + const char *name; + int offset; + enum dfx_stat_kind dfx_kind; + struct dentry *dentry; +}; +extern struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[]; + #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c4..001571199aac 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -152,6 +152,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms;
+/* debugfs entries of Detail For vcpu stat EXtension */ +__weak struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + { NULL } +}; + __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end) { @@ -3230,6 +3235,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (oldpid) synchronize_rcu(); put_pid(oldpid); +#if defined(CONFIG_X86) || defined(CONFIG_ARM64) + vcpu->stat.pid = current->pid; +#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */ } r = kvm_arch_vcpu_ioctl_run(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); @@ -4488,6 +4496,9 @@ static int kvm_stat_data_get(void *data, u64 *val) r = kvm_get_stat_per_vcpu(stat_data->kvm, stat_data->dbgfs_item->offset, val); break; + case KVM_STAT_DFX: + r = -ENOSYS; + break; }
return r; @@ -4510,6 +4521,9 @@ static int kvm_stat_data_clear(void *data, u64 val) r = kvm_clear_stat_per_vcpu(stat_data->kvm, stat_data->dbgfs_item->offset); break; + case KVM_STAT_DFX: + r = -ENOSYS; + break; }
return r; @@ -4602,9 +4616,80 @@ static int vcpu_stat_clear(void *_offset, u64 val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, "%llu\n");
+#define DFX_MAX_VCPU 1024 +#define DFX_MAX_VCPU_STAT_SIZE 1024 + +static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_stat *vcpu_stats; + struct dfx_kvm_stats_debugfs_item *dp; + int vcpu_nr = 0; + int i, index = 0; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) + vcpu_nr++; + mutex_unlock(&kvm_lock); + + vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); + if (!vcpu_stats) + return -ENOMEM; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) { + if (index >= vcpu_nr) + break; + memcpy(vcpu_stats + index, &vcpu->stat, + sizeof(struct kvm_vcpu_stat)); + ++index; + } + mutex_unlock(&kvm_lock); + + for (i = 0; i < vcpu_nr; i++) { + for (dp = dfx_debugfs_entries; dp->name; ++dp) { + switch (dp->dfx_kind) { + case DFX_STAT_U64: + seq_put_decimal_ull(p, " ", + *(u64 *)((void *)&vcpu_stats[i] + dp->offset)); + break; + case DFX_STAT_CPUTIME: + pr_warn("DFX_STAT_CPUTIME not supported currently!"); + break; + default: + pr_warn("Bad dfx_kind in dfx_debugfs_entries!"); + break; + } + } + seq_putc(p, '\n'); + } + + vfree(vcpu_stats); + return 0; +} + +static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) +{ + size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1); + + return single_open_size(file, __dfx_vcpu_stats_get, NULL, size); +} + +static const struct file_operations dfx_stat_fops = { + .open = dfx_vcpu_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static const struct file_operations *stat_fops[] = { [KVM_STAT_VCPU] = &vcpu_stat_fops, [KVM_STAT_VM] = &vm_stat_fops, + [KVM_STAT_DFX] = &dfx_stat_fops, };
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
This patch export remaining aarch64 exit items to vcpu_stat via debugfs. The items include: fp_asimd_exit_stat, irq_exit_stat, sys64_exit_stat, mabt_exit_stat, fail_entry_exit_stat, internal_error_exit_stat, unknown_ec_exit_stat, cp15_32_exit_stat, cp15_64_exit_stat, cp14_mr_exit_stat, cp14_ls_exit_stat, cp14_64_exit_stat, smc_exit_stat, sve_exit_stat, debug_exit_stat
Signed-off-by: Biaoxiang Ye yebiaoxiang@huawei.com Signed-off-by: Zengruan Ye yezengruan@huawei.com Signed-off-by: chenjiajun chenjiajun8@huawei.com --- arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++ arch/arm64/kvm/guest.c | 15 +++++++++++++++ arch/arm64/kvm/handle_exit.c | 8 ++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 1 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 11 +++++++++++ 6 files changed, 51 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1ba35b90bdb4..db5992ecb207 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -467,6 +467,21 @@ struct kvm_vcpu_stat { u64 mmio_exit_user; u64 mmio_exit_kernel; u64 exits; + u64 fp_asimd_exit_stat; + u64 irq_exit_stat; + u64 sys64_exit_stat; + u64 mabt_exit_stat; + u64 fail_entry_exit_stat; + u64 internal_error_exit_stat; + u64 unknown_ec_exit_stat; + u64 cp15_32_exit_stat; + u64 cp15_64_exit_stat; + u64 cp14_mr_exit_stat; + u64 cp14_ls_exit_stat; + u64 cp14_64_exit_stat; + u64 smc_exit_stat; + u64 sve_exit_stat; + u64 debug_exit_stat; };
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c584b0fb7692..0a9096ba41a4 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -55,6 +55,21 @@ struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { DFX_STAT("mmio_exit_user", mmio_exit_user), DFX_STAT("mmio_exit_kernel", mmio_exit_kernel), DFX_STAT("exits", exits), + DFX_STAT("fp_asimd_exit_stat", fp_asimd_exit_stat), + DFX_STAT("irq_exit_stat", irq_exit_stat), + DFX_STAT("sys64_exit_stat", sys64_exit_stat), + DFX_STAT("mabt_exit_stat", mabt_exit_stat), + DFX_STAT("fail_entry_exit_stat", fail_entry_exit_stat), + DFX_STAT("internal_error_exit_stat", internal_error_exit_stat), + DFX_STAT("unknown_ec_exit_stat", unknown_ec_exit_stat), + DFX_STAT("cp15_32_exit_stat", cp15_32_exit_stat), + DFX_STAT("cp15_64_exit_stat", cp15_64_exit_stat), + DFX_STAT("cp14_mr_exit_stat", cp14_mr_exit_stat), + DFX_STAT("cp14_ls_exit_stat", cp14_ls_exit_stat), + DFX_STAT("cp14_64_exit_stat", cp14_64_exit_stat), + DFX_STAT("smc_exit_stat", smc_exit_stat), + DFX_STAT("sve_exit_stat", sve_exit_stat), + DFX_STAT("debug_exit_stat", debug_exit_stat), { NULL } };
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad..7199dd851454 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -62,6 +62,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, 0, ~0UL); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + vcpu->stat.smc_exit_stat++; return 1; }
@@ -124,6 +125,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = esr; + vcpu->stat.debug_exit_stat++;
switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: @@ -152,6 +154,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu); + vcpu->stat.unknown_ec_exit_stat++; return 1; }
@@ -159,6 +162,7 @@ static int handle_sve(struct kvm_vcpu *vcpu) { /* Until SVE is supported for guests: */ kvm_inject_undefined(vcpu); + vcpu->stat.sve_exit_stat++; return 1; }
@@ -262,6 +266,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
switch (exception_index) { case ARM_EXCEPTION_IRQ: + vcpu->stat.irq_exit_stat++; return 1; case ARM_EXCEPTION_EL1_SERROR: return 1; @@ -273,6 +278,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * is pre-empted by kvm_reboot()'s shutdown call. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return 0; case ARM_EXCEPTION_IL: /* @@ -280,11 +286,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * have been corrupted somehow. Give up. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return -EINVAL; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->stat.internal_error_exit_stat++; return 0; } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1f875a8f20c4..4ef2dfe8186b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -223,6 +223,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) esr_ec != ESR_ELx_EC_SVE) return false;
+ vcpu->stat.fp_asimd_exit_stat++; /* Don't handle SVE traps for non-SVE vcpus here: */ if (!sve_guest) if (esr_ec != ESR_ELx_EC_FP_ASIMD) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 75814a02d189..d6c20d899575 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -763,6 +763,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); + vcpu->stat.mabt_exit_stat++;
if (fault_status == FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1..be13a66b50ed 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2156,6 +2156,8 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); + vcpu->stat.cp14_ls_exit_stat++; + return 1; }
@@ -2326,21 +2328,29 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp15_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); }
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) { + vcpu->stat.cp15_32_exit_stat++; + return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); }
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp14_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); }
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) { + vcpu->stat.cp14_mr_exit_stat++; + return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); }
@@ -2398,6 +2408,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) int ret;
trace_kvm_handle_sys_reg(esr); + vcpu->stat.sys64_exit_stat++;
params.is_aarch32 = false; params.is_32bit = false;
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
This patch export cpu time related items to vcpu_stat. Contain: steal, st_max, utime, stime, gtime
The definitions of these items are: steal: cpu time VCPU waits for PCPU while it is servicing another VCPU st_max: max scheduling delay utime: cpu time in userspace stime: cpu time in sys gtime: cpu time in guest
Through these items, user can get many cpu usage info of vcpu, such as: CPU Usage of Guest = gtime_delta / delta_cputime CPU Usage of Hyp = (utime_delta - gtime_delta + stime_delta) / delta_cputime CPU Usage of Steal = steal_delta / delta_cputime Max Scheduling Delay = st_max
Signed-off-by: liangpeng liangpeng10@huawei.com Signed-off-by: chenjiajun chenjiajun8@huawei.com --- arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/arm64/kvm/arm.c | 23 +++++++++++++++++++++++ arch/arm64/kvm/guest.c | 5 +++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 5 +++++ 5 files changed, 40 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index db5992ecb207..71b5391ec547 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -482,6 +482,11 @@ struct kvm_vcpu_stat { u64 smc_exit_stat; u64 sve_exit_stat; u64 debug_exit_stat; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; };
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0ffb019ca8b..bf77c2f51f65 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -343,6 +343,20 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) preempt_enable(); }
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + +static void update_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = current->sched_info.run_delay - vcpu->stat.steal; + vcpu->stat.steal = current->sched_info.run_delay; + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_s2_mmu *mmu; @@ -376,6 +390,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) else vcpu_set_wfx_traps(vcpu);
+ update_steal_time(vcpu); if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); } @@ -649,6 +664,13 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) } }
+static void update_vcpu_stat_time(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->utime = current->utime; + vcpu_stat->stime = current->stime; + vcpu_stat->gtime = current->gtime; +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -844,6 +866,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) }
ret = handle_exit(vcpu, ret); + update_vcpu_stat_time(&vcpu->stat); }
/* Tell userspace about in-kernel device output levels */ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 0a9096ba41a4..9f678decc8e5 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -70,6 +70,11 @@ struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { DFX_STAT("smc_exit_stat", smc_exit_stat), DFX_STAT("sve_exit_stat", sve_exit_stat), DFX_STAT("debug_exit_stat", debug_exit_stat), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), { NULL } };
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b0eddb2a0da4..9a88fad973c5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1481,6 +1481,8 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat); + typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 001571199aac..8581f3a14da9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4616,6 +4616,10 @@ static int vcpu_stat_clear(void *_offset, u64 val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, "%llu\n");
+void __attribute__((weak)) kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ +} + #define DFX_MAX_VCPU 1024 #define DFX_MAX_VCPU_STAT_SIZE 1024
@@ -4646,6 +4650,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) break; memcpy(vcpu_stats + index, &vcpu->stat, sizeof(struct kvm_vcpu_stat)); + kvm_arch_vcpu_stat_reset(&vcpu->stat); ++index; } mutex_unlock(&kvm_lock);
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items. The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and each line of vcpu_stat is a collection of various kvm exits for a vcpu. And through vcpu_stat, we only need to open one file to tail performance of virtual machine, which is more convenient.
Signed-off-by: Feng Lin linfeng23@huawei.com Signed-off-by: chenjiajun chenjiajun8@huawei.com --- arch/x86/include/asm/kvm_host.h | 12 ++++++ arch/x86/kvm/vmx/vmx.c | 10 +++++ arch/x86/kvm/x86.c | 65 +++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 51 ++++++++++++++++++++++---- 4 files changed, 130 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index adbe88e4be12..730ffde044b4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1058,6 +1058,18 @@ struct kvm_vcpu_stat { u64 req_event; u64 halt_poll_success_ns; u64 halt_poll_fail_ns; + u64 cr_exits; + u64 msr_rd_exits; + u64 msr_wr_exits; + u64 apic_wr_exits; + u64 ept_vio_exits; + u64 ept_mis_exits; + u64 pause_exits; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; };
struct x86_instruction_info; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b9751..053359a7cf00 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -364,6 +364,11 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + void vmx_vmexit(void);
#define vmx_insn_failed(fmt...) \ @@ -4996,6 +5001,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; + vcpu->stat.cr_exits++; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ val = kvm_register_readl(vcpu, reg); @@ -5240,6 +5246,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu) u32 offset = exit_qualification & 0xfff;
/* APIC-write VM exit is trap-like and thus no need to adjust IP */ + vcpu->stat.apic_wr_exits++; kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -5308,6 +5315,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) u64 error_code;
exit_qualification = vmx_get_exit_qual(vcpu); + vcpu->stat.ept_vio_exits++;
/* * EPT violation happened while executing iret from NMI, @@ -5366,6 +5374,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) * nGPA here instead of the required GPA. */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + vcpu->stat.ept_mis_exits++; if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); @@ -5480,6 +5489,7 @@ static void vmx_enable_tdp(void) */ static int handle_pause(struct kvm_vcpu *vcpu) { + vcpu->stat.pause_exits++; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1943bb8c5403..06ecae7718bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -242,11 +242,48 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VM_STAT("largepages", lpages, .mode = 0444), VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444), VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions), + { "vcpu_stat", 0, KVM_STAT_DFX }, { NULL } };
/* debugfs entries of Detail For vcpu stat EXtension */ struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("pf_fixed", pf_fixed), + DFX_STAT("pf_guest", pf_guest), + DFX_STAT("tlb_flush", tlb_flush), + DFX_STAT("invlpg", invlpg), + DFX_STAT("exits", exits), + DFX_STAT("io_exits", io_exits), + DFX_STAT("mmio_exits", mmio_exits), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("irq_window", irq_window_exits), + DFX_STAT("nmi_window", nmi_window_exits), + DFX_STAT("halt_exits", halt_exits), + DFX_STAT("halt_successful_poll", halt_successful_poll), + DFX_STAT("halt_attempted_poll", halt_attempted_poll), + DFX_STAT("halt_wakeup", halt_wakeup), + DFX_STAT("request_irq", request_irq_exits), + DFX_STAT("irq_exits", irq_exits), + DFX_STAT("host_state_reload", host_state_reload), + DFX_STAT("fpu_reload", fpu_reload), + DFX_STAT("insn_emulation", insn_emulation), + DFX_STAT("insn_emulation_fail", insn_emulation_fail), + DFX_STAT("hypercalls", hypercalls), + DFX_STAT("irq_injections", irq_injections), + DFX_STAT("nmi_injections", nmi_injections), + DFX_STAT("cr_exits", cr_exits), + DFX_STAT("msr_rd_exits", msr_rd_exits), + DFX_STAT("msr_wr_exits", msr_wr_exits), + DFX_STAT("apic_wr_exits", apic_wr_exits), + DFX_STAT("ept_vio_exits", ept_vio_exits), + DFX_STAT("ept_mis_exits", ept_mis_exits), + DFX_STAT("pause_exits", pause_exits), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), { NULL } };
@@ -1718,6 +1755,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r;
+ vcpu->stat.msr_rd_exits++; r = kvm_get_msr(vcpu, ecx, &data);
/* MSR read failed? See if we should ask user space */ @@ -1747,6 +1785,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r;
+ vcpu->stat.msr_wr_exits++; r = kvm_set_msr(vcpu, ecx, data);
/* MSR write failed? See if we should ask user space */ @@ -2953,11 +2992,33 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.tlb_flush_guest(vcpu); }
+static u64 accumulate_stat_steal_time(u64 *last_steal) +{ + u64 delta; + + if (*last_steal == 0) + delta = 0; + else + delta = current->sched_info.run_delay - *last_steal; + + *last_steal = current->sched_info.run_delay; + return delta; +} + +static void update_stat_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = accumulate_stat_steal_time(&vcpu->stat.steal); + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} + static void record_steal_time(struct kvm_vcpu *vcpu) { struct kvm_host_map map; struct kvm_steal_time *st;
+ update_stat_steal_time(vcpu); if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return;
@@ -9030,6 +9091,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath); + vcpu->stat.utime = current->utime; + vcpu->stat.stime = current->stime; + vcpu->stat.gtime = current->gtime; + return r;
cancel_injection: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8581f3a14da9..2d55e1e0fb66 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4623,6 +4623,35 @@ void __attribute__((weak)) kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_s #define DFX_MAX_VCPU 1024 #define DFX_MAX_VCPU_STAT_SIZE 1024
+/* + * copy of seq_buf_alloc of kernel, kernel not export it + */ +static void *dfx_seq_buf_alloc(unsigned long size) +{ + return kvmalloc(size, GFP_KERNEL_ACCOUNT); +} + +static void dfx_seq_buf_free(const void *buf) +{ + kvfree(buf); +} + +static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr) +{ + char *buf; + size_t size; + + size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE; + buf = dfx_seq_buf_alloc(size); + if (!buf) + return -ENOMEM; + if (p->buf) + dfx_seq_buf_free(p->buf); + p->buf = buf; + p->size = size; + return 0; +} + static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) { struct kvm *kvm; @@ -4634,27 +4663,35 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - kvm_for_each_vcpu(i, vcpu, kvm) + kvm_for_each_vcpu(i, vcpu, kvm) { vcpu_nr++; + } mutex_unlock(&kvm_lock); - vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); + if (!vcpu_nr) { + seq_putc(p, '\n'); + return 0; + } + + if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr)) + return -ENOMEM; + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); if (!vcpu_stats) return -ENOMEM;
mutex_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) + list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (index >= vcpu_nr) break; - memcpy(vcpu_stats + index, &vcpu->stat, + memcpy(vcpu_stats + index, &(vcpu->stat), sizeof(struct kvm_vcpu_stat)); kvm_arch_vcpu_stat_reset(&vcpu->stat); ++index; } + } mutex_unlock(&kvm_lock); - for (i = 0; i < vcpu_nr; i++) { for (dp = dfx_debugfs_entries; dp->name; ++dp) { switch (dp->dfx_kind) { @@ -4679,9 +4716,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) { - size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1); - - return single_open_size(file, __dfx_vcpu_stats_get, NULL, size); + return single_open(file, __dfx_vcpu_stats_get, NULL); }
static const struct file_operations dfx_stat_fops = {
Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com
On 2020/12/23 9:37, Jiajun Chen wrote:
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
Export kvm vcpu info in debugfs entry /sys/kernel/debug/kvm/vcpu_stat, this entry contains kvm exits and cputime items now. And then, we can get all kvm vcpu info through this debugfs instead of one entry per kvm exits items, which will greatly facilitates the collection of performance status. Currently, only support x86 and aarch64.
-- ChangeList: v3: modify kzfree in patches
v2: modify architecture-dependent st_max to avoid compile err in other arches
v1: kvm: debugfs: support vcpu_stat debugfs
chenjiajun (4): kvm: debugfs: Export vcpu stat via debugfs kvm: debugfs: export remaining aarch64 kvm exit reasons to debugfs kvm: debugfs: aarch64 export cpu time related items to debugfs kvm: debugfs: Export x86 kvm exits to vcpu_stat
arch/arm64/include/asm/kvm_host.h | 21 ++++ arch/arm64/kvm/arm.c | 23 +++++ arch/arm64/kvm/guest.c | 33 +++++++ arch/arm64/kvm/handle_exit.c | 8 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 1 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 11 +++ arch/x86/include/asm/kvm_host.h | 13 +++ arch/x86/kvm/vmx/vmx.c | 10 ++ arch/x86/kvm/x86.c | 70 +++++++++++++ include/linux/kvm_host.h | 19 ++++ virt/kvm/kvm_main.c | 125 ++++++++++++++++++++++++ 12 files changed, 335 insertions(+)
For this series, Acked-by: Xie XiuQi xiexiuqi@huawei.com
On 2020/12/23 9:37, Jiajun Chen wrote:
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: 46853 CVE: NA
Export kvm vcpu info in debugfs entry /sys/kernel/debug/kvm/vcpu_stat, this entry contains kvm exits and cputime items now. And then, we can get all kvm vcpu info through this debugfs instead of one entry per kvm exits items, which will greatly facilitates the collection of performance status. Currently, only support x86 and aarch64.
-- ChangeList: v3: modify kzfree in patches
v2: modify architecture-dependent st_max to avoid compile err in other arches
v1: kvm: debugfs: support vcpu_stat debugfs
chenjiajun (4): kvm: debugfs: Export vcpu stat via debugfs kvm: debugfs: export remaining aarch64 kvm exit reasons to debugfs kvm: debugfs: aarch64 export cpu time related items to debugfs kvm: debugfs: Export x86 kvm exits to vcpu_stat
arch/arm64/include/asm/kvm_host.h | 21 ++++ arch/arm64/kvm/arm.c | 23 +++++ arch/arm64/kvm/guest.c | 33 +++++++ arch/arm64/kvm/handle_exit.c | 8 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 1 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 11 +++ arch/x86/include/asm/kvm_host.h | 13 +++ arch/x86/kvm/vmx/vmx.c | 10 ++ arch/x86/kvm/x86.c | 70 +++++++++++++ include/linux/kvm_host.h | 19 ++++ virt/kvm/kvm_main.c | 125 ++++++++++++++++++++++++ 12 files changed, 335 insertions(+)