From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I919BF CVE: NA
--------------------------------------------
This patch create debugfs entry for vcpu stat. The entry path is /sys/kernel/debug/kvm/vcpu_stat. And vcpu_stat contains partial kvm exits items of vcpu, include: pid, hvc_exit_stat, wfe_exit_stat, wfi_exit_stat, mmio_exit_user, mmio_exit_kernel, exits
Currently, The maximum vcpu limit is 1024.
From this vcpu_stat, user can get the number of these kvm exits items over a period of time, which is helpful to monitor the virtual machine.
Signed-off-by: chenjiajun chenjiajun8@huawei.com Signed-off-by: liangtian liangtian13@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/Kconfig | 9 +++ arch/arm64/kvm/guest.c | 20 +++++- arch/x86/configs/openeuler_defconfig | 1 + arch/x86/include/asm/kvm_host.h | 4 ++ arch/x86/kvm/Kconfig | 9 +++ arch/x86/kvm/x86.c | 7 ++ include/linux/kvm_host.h | 40 +++++++++++ include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 93 ++++++++++++++++++++++++++ 11 files changed, 185 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 0e664de987e8..62c0715598fe 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7681,6 +7681,7 @@ CONFIG_BTREE=y CONFIG_INTERVAL_TREE=y CONFIG_INTERVAL_TREE_SPAN_ITER=y CONFIG_XARRAY_MULTI=y +CONFIG_ARCH_VCPU_STAT=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index af06ccb7ee34..f50d645a8b9c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -897,6 +897,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; struct kvm_vcpu_stat_generic generic; u64 hvc_exit_stat; u64 wfe_exit_stat; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 83c1e09be42e..2e203836c4ef 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -71,4 +71,13 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config ARCH_VCPU_STAT + bool "export kvm exits to vcpu_stat" + depends on KVM + default n + help + Say Y here to enable kvm exits counting + + If unsure, say N. + endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index a1710e5fa72b..944cb5e1b541 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,6 +29,21 @@
#include "trace.h"
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("hvc_exit_stat", hvc_exit_stat), + DFX_STAT("wfe_exit_stat", wfe_exit_stat), + DFX_STAT("wfi_exit_stat", wfi_exit_stat), + DFX_STAT("mmio_exit_user", mmio_exit_user), + DFX_STAT("mmio_exit_kernel", mmio_exit_kernel), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("exits", exits), + { NULL } +}; +#endif + const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -50,7 +65,10 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), +#ifdef CONFIG_ARCH_VCPU_STAT + STATS_DESC_DFX_COUNTER(DFX, vcpu_stat) +#endif };
const struct kvm_stats_header kvm_vcpu_stats_header = { diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 68bf866e2483..46c4d59b569b 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -741,6 +741,7 @@ CONFIG_HAVE_KVM_NO_POLL=y CONFIG_KVM_XFER_TO_GUEST_WORK=y CONFIG_HAVE_KVM_PM_NOTIFIER=y CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y +CONFIG_ARCH_VCPU_STAT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_KVM_INTEL=m diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fb9f5fa96cc9..24200a9d2d09 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1486,6 +1486,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; struct kvm_vcpu_stat_generic generic; u64 pf_taken; u64 pf_fixed; @@ -1505,6 +1506,9 @@ struct kvm_vcpu_stat { u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_wakeup; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ed90f148140d..7843a99a342c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -154,4 +154,13 @@ config KVM_PROVE_MMU config KVM_EXTERNAL_WRITE_TRACKING bool
+config ARCH_VCPU_STAT + bool "export kvm exits to vcpu_stat" + depends on KVM + default n + help + Say Y here to enable kvm exits counting + + If unsure, say N. + endif # VIRTUALIZATION diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e179db7c17da..0d86c92ce323 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -241,6 +241,13 @@ EXPORT_SYMBOL_GPL(host_xss); u64 __read_mostly host_arch_capabilities; EXPORT_SYMBOL_GPL(host_arch_capabilities);
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + { NULL } +}; +#endif + const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdca..f4779f1b90fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1820,6 +1820,16 @@ struct _kvm_stats_desc { }, \ .name = #stat, \ } +#ifdef CONFIG_ARCH_VCPU_STAT +#define DFX_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ + { \ + { \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = 0 \ + }, \ + .name = #stat, \ + } +#endif /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -1839,6 +1849,11 @@ struct _kvm_stats_desc { #define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \ unit, base, exponent, sz, 0) +#ifdef CONFIG_ARCH_VCPU_STAT +#define STATS_DESC_DFX(SCOPE, name, unit, base, exponent) \ + STATS_DESC(SCOPE, name, KVM_STATS_TYPE_DFX, \ + unit, base, exponent, 1, 0) +#endif
/* Cumulative counter, read/write */ #define STATS_DESC_COUNTER(SCOPE, name) \ @@ -1861,6 +1876,12 @@ struct _kvm_stats_desc { #define STATS_DESC_PBOOLEAN(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) +#ifdef CONFIG_ARCH_VCPU_STAT +/* Dfx vcpu stat value, read/write */ +#define STATS_DESC_DFX_COUNTER(SCOPE, name) \ + STATS_DESC_DFX(SCOPE, name, KVM_STATS_UNIT_NONE, \ + KVM_STATS_BASE_POW10, 0) +#endif
/* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ @@ -1897,6 +1918,25 @@ struct _kvm_stats_desc {
extern struct dentry *kvm_debugfs_dir;
+#ifdef CONFIG_ARCH_VCPU_STAT +enum dfx_stat_kind { + DFX_STAT_U64, + DFX_STAT_CPUTIME, +}; + +#define DFX_STAT(n, x, ...) \ + { n, offsetof(struct kvm_vcpu_stat, x), DFX_STAT_U64, ## __VA_ARGS__ } + +/* Detail For vcpu stat EXtension debugfs item */ +struct dfx_kvm_stats_debugfs_item { + const char *name; + int offset; + enum dfx_stat_kind dfx_kind; + struct dentry *dentry; +}; +extern struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[]; +#endif + ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 13065dd96132..1548909b4bad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2175,6 +2175,7 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_DFX (0x5 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
#define KVM_STATS_UNIT_SHIFT 4 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 87c6658bb16b..fc6c7ad11c9c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -154,6 +154,13 @@ static unsigned long long kvm_active_vms;
static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +__weak struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + { NULL } +}; +#endif + __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { } @@ -4161,6 +4168,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (oldpid) synchronize_rcu(); put_pid(oldpid); +#if defined(CONFIG_ARCH_VCPU_STAT) + vcpu->stat.pid = current->pid; +#endif /* defined(CONFIG_ARCH_VCPU_STAT) */ } r = kvm_arch_vcpu_ioctl_run(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); @@ -5868,6 +5878,83 @@ static int vcpu_stat_clear(void *_offset, u64 val) return 0; }
+#ifdef CONFIG_ARCH_VCPU_STAT +__weak void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ +} + +#define DFX_MAX_VCPU 1024 +#define DFX_MAX_VCPU_STAT_SIZE 1024 + +static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_stat *vcpu_stats; + struct dfx_kvm_stats_debugfs_item *dp; + int vcpu_nr = 0; + unsigned long i = 0; + int index = 0; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_nr++; + } + mutex_unlock(&kvm_lock); + vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); + if (!vcpu_stats) + return -ENOMEM; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) { + if (index >= vcpu_nr) + break; + memcpy(vcpu_stats + index, &vcpu->stat, + sizeof(struct kvm_vcpu_stat)); + ++index; + } + mutex_unlock(&kvm_lock); + + for (i = 0; i < vcpu_nr; i++) { + for (dp = dfx_debugfs_entries; dp->name; ++dp) { + switch (dp->dfx_kind) { + case DFX_STAT_U64: + seq_put_decimal_ull(p, " ", + *(u64 *)((void *)&vcpu_stats[i] + dp->offset)); + break; + case DFX_STAT_CPUTIME: + pr_warn("DFX_STAT_CPUTIME not supported currently!"); + break; + default: + pr_warn("Bad dfx_kind in dfx_debugfs_entries!"); + break; + } + } + seq_putc(p, '\n'); + } + + vfree(vcpu_stats); + return 0; +} + +static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) +{ + size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1); + + return single_open_size(file, __dfx_vcpu_stats_get, NULL, size); +} + +static const struct file_operations dfx_stat_fops = { + .open = dfx_vcpu_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n"); @@ -5932,7 +6019,13 @@ static void kvm_init_debug(void)
for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { pdesc = &kvm_vm_stats_desc[i]; +#ifdef CONFIG_ARCH_VCPU_STAT + if ((pdesc->desc.flags & KVM_STATS_TYPE_MASK) == KVM_STATS_TYPE_DFX) + fops = &dfx_stat_fops; + else if (kvm_stats_debugfs_mode(pdesc) & 0222) +#else if (kvm_stats_debugfs_mode(pdesc) & 0222) +#endif fops = &vm_stat_fops; else fops = &vm_stat_readonly_fops;