*** BLURB HERE ***
Xiangyou Xie (1): sched/idle: introduce smart halt polling
chenjiajun (1): kvm: debugfs: Export vcpu stat via debugfs
arch/arm64/Kconfig | 6 ++ arch/arm64/configs/openeuler_defconfig | 3 + arch/arm64/include/asm/kvm_host.h | 21 ++++ arch/arm64/include/asm/thread_info.h | 2 + arch/arm64/kernel/process.c | 4 + arch/arm64/kvm/arm.c | 32 +++++- arch/arm64/kvm/guest.c | 40 +++++++- arch/arm64/kvm/handle_exit.c | 8 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 9 ++ arch/x86/Kconfig | 3 + arch/x86/include/asm/kvm_host.h | 14 +++ arch/x86/kvm/vmx/vmx.c | 8 ++ arch/x86/kvm/x86.c | 86 +++++++++++++++- drivers/cpuidle/Kconfig | 4 +- drivers/cpuidle/cpuidle-haltpoll.c | 98 +++++++++++++++--- drivers/cpuidle/governors/haltpoll.c | 6 +- drivers/cpuidle/poll_state.c | 3 + include/asm-generic/kvm_para.h | 2 +- include/linux/kernel.h | 1 + include/linux/kvm_host.h | 44 ++++++++ include/uapi/linux/kvm.h | 1 + kernel/sched/idle.c | 30 +++++- kernel/sysctl.c | 7 ++ virt/kvm/kvm_main.c | 129 ++++++++++++++++++++++++ 26 files changed, 541 insertions(+), 23 deletions(-)
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I912VN
-------------------------------------------------
In guest, Before entering the real idle, polling for a while. if the current task is set TIF_NEED_RESCHED during the polling process, it will immediately break from the polling loop.
The polling time poll_threshold_ns can be adjusted by sysctl to avoid 100% of the CPU usage in host. This value can be adjusted according to the requirements.
This optimization needs to support _TIF_POLLING_NRFLAG, that can optimize the overhead of ttwu IPI. Wakeup response delay reduced from 4us to 1us. ------------------------------------------------------------------
arm64: Add some definitions of kvm_para*
Currently, ARM does not support kvm_para* of KVM_GUEST. We provide some definitions of kvm_para* functions, although it is only a simple return. ------------------------------------------------------------------
cpuidle: haltpoll: Only check boot_option_idle_override in x86
boot_option_idle_override is defined only in x86/ia64. Since haltpoll supports x86 and arm64, let's check boot_option_idle_override only in x86. ------------------------------------------------------------------
ARM: cpuidle: Add support for cpuidle-haltpoll driver for ARM
Add support for cpuidle-haltpoll driver for ARM. Allow arm to use the couidle-haltpoll driver. ------------------------------------------------------------------
config: enable CONFIG_CPU_IDLE_GOV_HALTPOLL and CONFIG_HALTPOLL_CPUIDLE for arm
We enable haltpoll by default for the improvement of performance. X86 has been supported. Now, we will provide it on ARM. ------------------------------------------------------------------
arm64: Optimize ttwu IPI
When it is to wake up a task in a remote cpu shared LLC , we can simply set need_resched flag, waking up a cpu that is in polling idle. This wakeup action does not require an IPI.
But the premise is that it need to support _TIF_POLLING_NRFLAG ------------------------------------------------------------------
cpuidle: add cpuidle-haltpoll driver module parameter
To ensure energy efficiency, haltpoll is disabled by default. But In some performance scenarios, you can enable haltpoll using the following methods:
echo Y > /sys/module/cpuidle_haltpoll/parameters/force
Signed-off-by: liangtian liangtian13@huawei.com --- arch/arm64/Kconfig | 3 + arch/arm64/configs/openeuler_defconfig | 3 + arch/arm64/include/asm/thread_info.h | 2 + arch/arm64/kernel/process.c | 4 ++ drivers/cpuidle/Kconfig | 4 +- drivers/cpuidle/cpuidle-haltpoll.c | 98 ++++++++++++++++++++++---- drivers/cpuidle/governors/haltpoll.c | 6 +- drivers/cpuidle/poll_state.c | 3 + include/asm-generic/kvm_para.h | 2 +- include/linux/kernel.h | 1 + kernel/sched/idle.c | 30 +++++++- kernel/sysctl.c | 7 ++ 12 files changed, 143 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 85ac1e83f747..83612218295d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -413,6 +413,9 @@ config KASAN_SHADOW_OFFSET config UNWIND_TABLES bool
+config ARCH_HAS_CPU_RELAX + def_bool y + source "arch/arm64/Kconfig.platforms"
source "kernel/livepatch/Kconfig" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 2ddea5999010..276ebd171f90 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -612,10 +612,13 @@ CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y CONFIG_CPU_IDLE_GOV_TEO=y +CONFIG_CPU_IDLE_GOV_HALTPOLL=y
# # ARM CPU Idle Drivers # +CONFIG_ARM_CPUIDLE=y +CONFIG_HALTPOLL_CPUIDLE=y # CONFIG_ARM_PSCI_CPUIDLE is not set # end of ARM CPU Idle Drivers # end of CPU Idle diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index a2596f942500..5cc94a855f4a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -72,6 +72,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SECCOMP 11 /* syscall secure computing */ #define TIF_SYSCALL_EMU 12 /* syscall emulation active */ +#define TIF_POLLING_NRFLAG 16 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -101,6 +102,7 @@ void arch_setup_new_exec(void); #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_32BIT_AARCH64 (1 << TIF_32BIT_AARCH64) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 068e5bb2661b..e1e51ed94736 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -68,6 +68,10 @@ EXPORT_SYMBOL(__stack_chk_guard); void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off);
+#if defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) +EXPORT_SYMBOL(arch_cpu_idle); +#endif + #ifdef CONFIG_HOTPLUG_CPU void __noreturn arch_cpu_idle_dead(void) { diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index cac5997dca50..65a60235836e 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -35,7 +35,7 @@ config CPU_IDLE_GOV_TEO
config CPU_IDLE_GOV_HALTPOLL bool "Haltpoll governor (for virtualized systems)" - depends on KVM_GUEST + depends on KVM_GUEST || ARM64 help This governor implements haltpoll idle state selection, to be used in conjunction with the haltpoll cpuidle driver, allowing @@ -73,7 +73,7 @@ endmenu
config HALTPOLL_CPUIDLE tristate "Halt poll cpuidle driver" - depends on X86 && KVM_GUEST + depends on (X86 && KVM_GUEST) || ARM64 select CPU_IDLE_GOV_HALTPOLL default y help diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index e66df22f9695..7f56eea713b5 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -18,9 +18,17 @@ #include <linux/kvm_para.h> #include <linux/cpuidle_haltpoll.h>
-static bool force __read_mostly; -module_param(force, bool, 0444); -MODULE_PARM_DESC(force, "Load unconditionally"); +static bool force; +MODULE_PARM_DESC(force, "bool, enable haltpoll driver"); +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp); +static int register_haltpoll_driver(void); +static void unregister_haltpoll_driver(void); + +static const struct kernel_param_ops enable_haltpoll_ops = { + .set = enable_haltpoll_driver, + .get = param_get_bool, +}; +module_param_cb(force, &enable_haltpoll_ops, &force, 0644);
static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; @@ -36,6 +44,42 @@ static int default_enter_idle(struct cpuidle_device *dev, return index; }
+ +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp) +{ +#ifdef CONFIG_ARM64 + int ret; + bool do_enable; + + if (!val) + return 0; + + ret = strtobool(val, &do_enable); + + if (ret || force == do_enable) + return ret; + + if (do_enable) { + ret = register_haltpoll_driver(); + + if (!ret) { + pr_info("Enable haltpoll driver.\n"); + force = 1; + } else { + pr_err("Fail to enable haltpoll driver.\n"); + } + } else { + unregister_haltpoll_driver(); + force = 0; + pr_info("Unregister haltpoll driver.\n"); + } + + return ret; +#else + return -1; +#endif +} + static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", .governor = "haltpoll", @@ -84,32 +128,30 @@ static int haltpoll_cpu_offline(unsigned int cpu) return 0; }
-static void haltpoll_uninit(void) -{ - if (haltpoll_hp_state) - cpuhp_remove_state(haltpoll_hp_state); - cpuidle_unregister_driver(&haltpoll_driver);
- free_percpu(haltpoll_cpuidle_devices); - haltpoll_cpuidle_devices = NULL; +static bool haltpoll_want(void) +{ + return kvm_para_has_hint(KVM_HINTS_REALTIME); }
-static bool haltpoll_want(void) +static void haltpoll_uninit(void) { - return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; + unregister_haltpoll_driver(); }
-static int __init haltpoll_init(void) +static int register_haltpoll_driver(void) { int ret; struct cpuidle_driver *drv = &haltpoll_driver;
+#ifdef CONFIG_X86 /* Do not load haltpoll if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV;
- if (!kvm_para_available() || !haltpoll_want()) + if (!force && (!kvm_para_available() || !haltpoll_want())) return -ENODEV; +#endif
cpuidle_poll_state_init(drv);
@@ -135,9 +177,35 @@ static int __init haltpoll_init(void) return ret; }
+static void unregister_haltpoll_driver(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; + +} + +static int __init haltpoll_init(void) +{ + int ret = 0; +#ifdef CONFIG_X86 + /* Do not load haltpoll if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; +#endif + if (force || (haltpoll_want() && kvm_para_available())) + ret = register_haltpoll_driver(); + + return ret; +} + static void __exit haltpoll_exit(void) { - haltpoll_uninit(); + if (haltpoll_cpuidle_devices) + haltpoll_uninit(); }
module_init(haltpoll_init); diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 1dff3a52917d..71c41cba3e96 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -40,6 +40,10 @@ module_param(guest_halt_poll_grow_start, uint, 0644); static bool guest_halt_poll_allow_shrink __read_mostly = true; module_param(guest_halt_poll_allow_shrink, bool, 0644);
+static bool enable __read_mostly = true; +module_param(enable, bool, 0444); +MODULE_PARM_DESC(enable, "Load unconditionally"); + /** * haltpoll_select - selects the next idle state to enter * @drv: cpuidle driver containing state data @@ -143,7 +147,7 @@ static struct cpuidle_governor haltpoll_governor = {
static int __init init_haltpoll(void) { - if (kvm_para_available()) + if (enable) return cpuidle_register_governor(&haltpoll_governor);
return 0; diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 9b6d90a72601..b939e2e25e3d 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -7,6 +7,9 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/sched/idle.h> +#ifdef CONFIG_ARM64 +#include <linux/cpu.h> +#endif
#define POLL_IDLE_RELAX_COUNT 200
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 728e5c5706c4..a0a4cf1cda9a 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -4,7 +4,7 @@
#include <uapi/asm-generic/kvm_para.h>
- +#define KVM_HINTS_REALTIME 0 /* * This function is used by architectures that support kvm to avoid issuing * false soft lockup messages. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cee8fe87e9f4..d0ac98b7d656 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -220,6 +220,7 @@ extern void bust_spinlocks(int yes); extern int root_mountflags;
extern bool early_boot_irqs_disabled; +extern unsigned long poll_threshold_ns;
/* * Values used for system_state. Ordering of the states must not be changed diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 5007b25c5bc6..d09a1ff1c87a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -10,6 +10,12 @@ /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
+/* + * Poll_threshold_ns indicates the maximum polling time before + * entering real idle. + */ +unsigned long poll_threshold_ns; + /** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. @@ -49,6 +55,24 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif
+static void smart_idle_poll(void) +{ + unsigned long poll_duration = poll_threshold_ns; + ktime_t cur, stop; + + if (!poll_duration) + return; + + stop = ktime_add_ns(ktime_get(), poll_duration); + + do { + cpu_relax(); + if (tif_need_resched()) + break; + cur = ktime_get(); + } while (ktime_before(cur, stop)); +} + static noinline int __cpuidle cpu_idle_poll(void) { instrumentation_begin(); @@ -56,6 +80,7 @@ static noinline int __cpuidle cpu_idle_poll(void) stop_critical_timings(); ct_cpuidle_enter();
+ smart_idle_poll(); raw_local_irq_enable(); while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) @@ -237,6 +262,7 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); + unsigned long idle_poll_flag = poll_threshold_ns;
/* * Check if we need to update blocked load @@ -275,9 +301,11 @@ static void do_idle(void) * broadcast device expired for us, we don't want to go deep * idle as we know that the IPI is going to arrive right away. */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired() || + idle_poll_flag) { tick_nohz_idle_restart_tick(); cpu_idle_poll(); + idle_poll_flag = 0; } else { cpuidle_idle_call(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e84df0818517..cdc117677647 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2025,6 +2025,13 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "halt_poll_threshold", + .data = &poll_threshold_ns, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, #ifdef CONFIG_TREE_RCU { .procname = "panic_on_rcu_stall",
From: chenjiajun chenjiajun8@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I919BF CVE: NA
This patch create debugfs entry for vcpu stat. The entry path is /sys/kernel/debug/kvm/vcpu_stat. And vcpu_stat contains partial kvm exits items of vcpu, include: pid, hvc_exit_stat, wfe_exit_stat, wfi_exit_stat, mmio_exit_user, mmio_exit_kernel, exits
Currently, The maximum vcpu limit is 1024.
From this vcpu_stat, user can get the number of these kvm exits items over a period of time, which is helpful to monitor the virtual machine. ------------------------------------------------------------------------
kvm: debugfs: export remaining aarch64 kvm exit reasons to debugfs
This patch export remaining aarch64 exit items to vcpu_stat via debugfs. The items include: fp_asimd_exit_stat, irq_exit_stat, sys64_exit_stat, mabt_exit_stat, fail_entry_exit_stat, internal_error_exit_stat, unknown_ec_exit_stat, cp15_32_exit_stat, cp15_64_exit_stat, cp14_mr_exit_stat, cp14_ls_exit_stat, cp14_64_exit_stat, smc_exit_stat, sve_exit_stat, debug_exit_stat ------------------------------------------------------------------------
kvm: debugfs: aarch64 export cpu time related items to debugfs
This patch export cpu time related items to vcpu_stat. Contain: steal, st_max, utime, stime, gtime
The definitions of these items are: steal: cpu time VCPU waits for PCPU while it is servicing another VCPU st_max: max scheduling delay utime: cpu time in userspace stime: cpu time in sys gtime: cpu time in guest
Through these items, user can get many cpu usage info of vcpu, such as: CPU Usage of Guest = gtime_delta / delta_cputime CPU Usage of Hyp = (utime_delta - gtime_delta + stime_delta) / delta_cputime CPU Usage of Steal = steal_delta / delta_cputime Max Scheduling Delay = st_max ------------------------------------------------------------------------
kvm: debugfs: Export x86 kvm exits to vcpu_stat
Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items. The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and each line of vcpu_stat is a collection of various kvm exits for a vcpu. And through vcpu_stat, we only need to open one file to tail performance of virtual machine, which is more convenient.
At present, there is a flaw in the statistics of KVM exits by debugfs, which only counts trigger times of exits processing function in kvm_vmx_exit_handlers. The kvm exits handles in vmx_exit_handlers_fastpath is omitted, so there is a large numerical error in EXIT_REASON_MSR_WRITE statistics sometimes. ------------------------------------------------------------------------
kvm: debugfs: add EXIT_REASON_PREEMPTION_TIMER to vcpu_stat
Export EXIT_REASON_PREEMPTION_TIMER kvm exits to vcpu_stat debugfs. Add a new column to vcpu_stat, and provide preemption_timer status to virtualization detection tools. ------------------------------------------------------------------------
x86: KVM: Fixed the bug that WAITmax cannot be updated in real time
Since the reset function is in kvm_intel module instead of kvm module, the attribute weak function in kvm_main.c could not be found, which would cause st_max in X86 never be refreshed. The solution is to define the reset function in x86.c under the kvm module.
Signed-off-by: liangtian liangtian13@huawei.com --- arch/arm64/Kconfig | 3 + arch/arm64/include/asm/kvm_host.h | 21 ++++ arch/arm64/kvm/arm.c | 32 +++++- arch/arm64/kvm/guest.c | 40 +++++++- arch/arm64/kvm/handle_exit.c | 8 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 + arch/arm64/kvm/mmu.c | 1 + arch/arm64/kvm/sys_regs.c | 9 ++ arch/x86/Kconfig | 3 + arch/x86/include/asm/kvm_host.h | 17 ++++ arch/x86/kvm/vmx/vmx.c | 8 ++ arch/x86/kvm/x86.c | 86 +++++++++++++++- include/linux/kvm_host.h | 44 ++++++++ include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 129 ++++++++++++++++++++++++ 15 files changed, 401 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 83612218295d..a37e7510063d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -416,6 +416,9 @@ config UNWIND_TABLES config ARCH_HAS_CPU_RELAX def_bool y
+config ARCH_VCPU_STAT + def_bool y + source "arch/arm64/Kconfig.platforms"
source "kernel/livepatch/Kconfig" diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index af06ccb7ee34..6991437c11bc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -897,6 +897,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; struct kvm_vcpu_stat_generic generic; u64 hvc_exit_stat; u64 wfe_exit_stat; @@ -905,6 +906,26 @@ struct kvm_vcpu_stat { u64 mmio_exit_kernel; u64 signal_exits; u64 exits; + u64 fp_asimd_exit_stat; + u64 irq_exit_stat; + u64 sys64_exit_stat; + u64 mabt_exit_stat; + u64 fail_entry_exit_stat; + u64 internal_error_exit_stat; + u64 unknown_ec_exit_stat; + u64 cp15_32_exit_stat; + u64 cp15_64_exit_stat; + u64 cp14_mr_exit_stat; + u64 cp14_ls_exit_stat; + u64 cp14_64_exit_stat; + u64 smc_exit_stat; + u64 sve_exit_stat; + u64 debug_exit_stat; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; };
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4866b3f7b4ea..d9f603c00583 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -421,6 +421,22 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
}
+#ifdef CONFIG_ARCH_VCPU_STAT +void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + +static void update_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = current->sched_info.run_delay - vcpu->stat.steal; + vcpu->stat.steal = current->sched_info.run_delay; + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} +#endif + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_s2_mmu *mmu; @@ -458,7 +474,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear_wfx_traps(vcpu); else vcpu_set_wfx_traps(vcpu); - +#ifdef ARCH_HAS_CPU_RELAX + update_steal_time(vcpu); +#endif if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); kvm_arch_vcpu_load_debug_state_flags(vcpu); @@ -828,6 +846,15 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) return !kvm_supports_32bit_el0(); }
+#ifdef CONFIG_ARCH_VCPU_STAT +static void update_vcpu_stat_time(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->utime = current->utime; + vcpu_stat->stime = current->stime; + vcpu_stat->gtime = current->gtime; +} +#endif + /** * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest * @vcpu: The VCPU pointer @@ -1070,6 +1097,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) }
ret = handle_exit(vcpu, ret); +#ifdef CONFIG_ARCH_VCPU_STAT + update_vcpu_stat_time(&vcpu->stat); +#endif }
/* Tell userspace about in-kernel device output levels */ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index a1710e5fa72b..89e33337ade9 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,6 +29,41 @@
#include "trace.h"
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("hvc_exit_stat", hvc_exit_stat), + DFX_STAT("wfe_exit_stat", wfe_exit_stat), + DFX_STAT("wfi_exit_stat", wfi_exit_stat), + DFX_STAT("mmio_exit_user", mmio_exit_user), + DFX_STAT("mmio_exit_kernel", mmio_exit_kernel), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("exits", exits), + DFX_STAT("fp_asimd_exit_stat", fp_asimd_exit_stat), + DFX_STAT("irq_exit_stat", irq_exit_stat), + DFX_STAT("sys64_exit_stat", sys64_exit_stat), + DFX_STAT("mabt_exit_stat", mabt_exit_stat), + DFX_STAT("fail_entry_exit_stat", fail_entry_exit_stat), + DFX_STAT("internal_error_exit_stat", internal_error_exit_stat), + DFX_STAT("unknown_ec_exit_stat", unknown_ec_exit_stat), + DFX_STAT("cp15_32_exit_stat", cp15_32_exit_stat), + DFX_STAT("cp15_64_exit_stat", cp15_64_exit_stat), + DFX_STAT("cp14_mr_exit_stat", cp14_mr_exit_stat), + DFX_STAT("cp14_ls_exit_stat", cp14_ls_exit_stat), + DFX_STAT("cp14_64_exit_stat", cp14_64_exit_stat), + DFX_STAT("smc_exit_stat", smc_exit_stat), + DFX_STAT("sve_exit_stat", sve_exit_stat), + DFX_STAT("debug_exit_stat", debug_exit_stat), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), + { NULL } +}; +#endif + const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; @@ -50,7 +85,10 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), +#ifdef CONFIG_ARCH_VCPU_STAT + STATS_DESC_DFX_COUNTER(DFX, vcpu_stat) +#endif };
const struct kvm_stats_header kvm_vcpu_stats_header = { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 617ae6dea5d5..90959b8b6228 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -73,6 +73,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_hvc_get_imm(vcpu)) { vcpu_set_reg(vcpu, 0, ~0UL); + vcpu->stat.smc_exit_stat++; return 1; }
@@ -172,6 +173,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) run->debug.arch.hsr = lower_32_bits(esr); run->debug.arch.hsr_high = upper_32_bits(esr); run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; + vcpu->stat.debug_exit_stat++;
switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: @@ -193,6 +195,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu); + vcpu->stat.unknown_ec_exit_stat++; return 1; }
@@ -203,6 +206,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) static int handle_sve(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); + vcpu->stat.sve_exit_stat++; return 1; }
@@ -335,6 +339,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
switch (exception_index) { case ARM_EXCEPTION_IRQ: + vcpu->stat.irq_exit_stat++; return 1; case ARM_EXCEPTION_EL1_SERROR: return 1; @@ -346,6 +351,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * is pre-emptied by kvm_reboot()'s shutdown call. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return 0; case ARM_EXCEPTION_IL: /* @@ -353,11 +359,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) * have been corrupted somehow. Give up. */ run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->stat.fail_entry_exit_stat++; return -EINVAL; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->stat.internal_error_exit_stat++; return 0; } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 657320f453e6..97a54576f2d1 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -326,8 +326,10 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Only handle traps the vCPU can support here: */ switch (esr_ec) { case ESR_ELx_EC_FP_ASIMD: + vcpu->stat.fp_asimd_exit_stat++; break; case ESR_ELx_EC_SVE: + vcpu->stat.sve_exit_stat++; if (!sve_guest) return false; break; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 482280fe22d7..121a3d90240d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1416,6 +1416,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); + vcpu->stat.mabt_exit_stat++;
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9424fa7351bf..a83a43eb116f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2840,6 +2840,8 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); + vcpu->stat.cp14_ls_exit_stat++; + return 1; }
@@ -3116,12 +3118,15 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp15_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); }
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) { struct sys_reg_params params; + vcpu->stat.cp15_32_exit_stat++;
params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
@@ -3139,12 +3144,15 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) { + vcpu->stat.cp14_64_exit_stat++; + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); }
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) { struct sys_reg_params params; + vcpu->stat.cp14_mr_exit_stat++;
params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
@@ -3244,6 +3252,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) int Rt = kvm_vcpu_sys_get_rt(vcpu);
trace_kvm_handle_sys_reg(esr); + vcpu->stat.sys64_exit_stat++;
if (__check_nv_sr_forward(vcpu)) return 1; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 54c10fdcf5d9..68f5019fed8d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -375,6 +375,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y
+config ARCH_VCPU_STAT + def_bool y + config AUDIT_ARCH def_bool y if X86_64
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fb9f5fa96cc9..6cfca75c0001 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1486,6 +1486,7 @@ struct kvm_vm_stat { };
struct kvm_vcpu_stat { + u64 pid; struct kvm_vcpu_stat_generic generic; u64 pf_taken; u64 pf_fixed; @@ -1505,6 +1506,9 @@ struct kvm_vcpu_stat { u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_wakeup; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; @@ -1515,6 +1519,19 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 cr_exits; + u64 msr_rd_exits; + u64 msr_wr_exits; + u64 apic_wr_exits; + u64 ept_vio_exits; + u64 ept_mis_exits; + u64 pause_exits; + u64 steal; + u64 st_max; + u64 utime; + u64 stime; + u64 gtime; + u64 preemption_timer_exits; u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9bba5352582c..9a65021ca339 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5448,6 +5448,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; + vcpu->stat.cr_exits++; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ val = kvm_register_read(vcpu, reg); @@ -5673,6 +5674,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu) */ u32 offset = exit_qualification & 0xff0;
+ vcpu->stat.apic_wr_exits++; kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -5741,6 +5743,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) u64 error_code;
exit_qualification = vmx_get_exit_qual(vcpu); + vcpu->stat.ept_vio_exits++;
/* * EPT violation happened while executing iret from NMI, @@ -5800,6 +5803,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) * nGPA here instead of the required GPA. */ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + vcpu->stat.ept_mis_exits++; if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); @@ -5918,6 +5922,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) */ static int handle_pause(struct kvm_vcpu *vcpu) { + vcpu->stat.pause_exits++; if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu);
@@ -6007,6 +6012,7 @@ static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu) { + ++vcpu->stat.preemption_timer_exits; handle_fastpath_preemption_timer(vcpu); return 1; } @@ -7211,8 +7217,10 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { case EXIT_REASON_MSR_WRITE: + ++vcpu->stat.msr_wr_exits; return handle_fastpath_set_msr_irqoff(vcpu); case EXIT_REASON_PREEMPTION_TIMER: + ++vcpu->stat.preemption_timer_exits; return handle_fastpath_preemption_timer(vcpu); default: return EXIT_FASTPATH_NONE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e179db7c17da..1d0523711fee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -241,6 +241,50 @@ EXPORT_SYMBOL_GPL(host_xss); u64 __read_mostly host_arch_capabilities; EXPORT_SYMBOL_GPL(host_arch_capabilities);
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + DFX_STAT("pid", pid), + DFX_STAT("pf_fixed", pf_fixed), + DFX_STAT("pf_guest", pf_guest), + DFX_STAT("tlb_flush", tlb_flush), + DFX_STAT("invlpg", invlpg), + DFX_STAT("exits", exits), + DFX_STAT("io_exits", io_exits), + DFX_STAT("mmio_exits", mmio_exits), + DFX_STAT("signal_exits", signal_exits), + DFX_STAT("irq_window", irq_window_exits), + DFX_STAT("nmi_window", nmi_window_exits), + DFX_STAT("halt_exits", halt_exits), + DFX_STAT("halt_successful_poll", halt_successful_poll), + DFX_STAT("halt_attempted_poll", halt_attempted_poll), + DFX_STAT("halt_wakeup", halt_wakeup), + DFX_STAT("request_irq", request_irq_exits), + DFX_STAT("irq_exits", irq_exits), + DFX_STAT("host_state_reload", host_state_reload), + DFX_STAT("fpu_reload", fpu_reload), + DFX_STAT("insn_emulation", insn_emulation), + DFX_STAT("insn_emulation_fail", insn_emulation_fail), + DFX_STAT("hypercalls", hypercalls), + DFX_STAT("irq_injections", irq_injections), + DFX_STAT("nmi_injections", nmi_injections), + DFX_STAT("cr_exits", cr_exits), + DFX_STAT("msr_rd_exits", msr_rd_exits), + DFX_STAT("msr_wr_exits", msr_wr_exits), + DFX_STAT("apic_wr_exits", apic_wr_exits), + DFX_STAT("ept_vio_exits", ept_vio_exits), + DFX_STAT("ept_mis_exits", ept_mis_exits), + DFX_STAT("pause_exits", pause_exits), + DFX_STAT("steal", steal), + DFX_STAT("st_max", st_max), + DFX_STAT("utime", utime), + DFX_STAT("stime", stime), + DFX_STAT("gtime", gtime), + DFX_STAT("preemption_timer_exits", preemption_timer_exits), + { NULL } +}; +#endif + const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), STATS_DESC_COUNTER(VM, mmu_shadow_zapped), @@ -303,6 +347,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, preemption_other), STATS_DESC_IBOOLEAN(VCPU, guest_mode), STATS_DESC_COUNTER(VCPU, notify_window_exits), +#ifdef CONFIG_ARCH_VCPU_STAT + STATS_DESC_DFX_COUNTER(DFX, vcpu_stat), +#endif };
const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -2055,6 +2102,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r;
+ vcpu->stat.msr_rd_exits++; r = kvm_get_msr_with_filter(vcpu, ecx, &data);
if (!r) { @@ -2080,6 +2128,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r;
+ vcpu->stat.msr_wr_exits++; r = kvm_set_msr_with_filter(vcpu, ecx, data);
if (!r) { @@ -3493,6 +3542,28 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) kvm_hv_vcpu_purge_flush_tlb(vcpu); }
+#ifdef CONFIG_ARCH_VCPU_STAT +static u64 accumulate_stat_steal_time(u64 *last_steal) +{ + u64 delta; + + if (*last_steal == 0) + delta = 0; + else + delta = current->sched_info.run_delay - *last_steal; + + *last_steal = current->sched_info.run_delay; + return delta; +} + +static void update_stat_steal_time(struct kvm_vcpu *vcpu) +{ + u64 delta; + + delta = accumulate_stat_steal_time(&vcpu->stat.steal); + vcpu->stat.st_max = max(vcpu->stat.st_max, delta); +} +#endif
static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) { @@ -3524,7 +3595,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; u64 steal; u32 version; - +#ifdef CONFIG_ARCH_VCPU_STAT + update_stat_steal_time(vcpu); +#endif if (kvm_xen_msr_enabled(vcpu->kvm)) { kvm_xen_runstate_set_running(vcpu); return; @@ -10873,6 +10946,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu);
r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath); +#ifdef CONFIG_ARCH_VCPU_STAT + vcpu->stat.utime = current->utime; + vcpu->stat.stime = current->stime; + vcpu->stat.gtime = current->gtime; +#endif + return r;
cancel_injection: @@ -13647,6 +13726,11 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdca..328ec923c657 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1820,6 +1820,16 @@ struct _kvm_stats_desc { }, \ .name = #stat, \ } +#ifdef CONFIG_ARCH_VCPU_STAT +#define DFX_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ + { \ + { \ + STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ + .offset = 0 \ + }, \ + .name = #stat, \ + } +#endif /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) @@ -1839,6 +1849,11 @@ struct _kvm_stats_desc { #define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \ unit, base, exponent, sz, 0) +#ifdef CONFIG_ARCH_VCPU_STAT +#define STATS_DESC_DFX(SCOPE, name, unit, base, exponent) \ + STATS_DESC(SCOPE, name, KVM_STATS_TYPE_DFX, \ + unit, base, exponent, 1, 0) +#endif
/* Cumulative counter, read/write */ #define STATS_DESC_COUNTER(SCOPE, name) \ @@ -1861,6 +1876,12 @@ struct _kvm_stats_desc { #define STATS_DESC_PBOOLEAN(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) +#ifdef CONFIG_ARCH_VCPU_STAT +/* Dfx vcpu stat value, read/write */ +#define STATS_DESC_DFX_COUNTER(SCOPE, name) \ + STATS_DESC_DFX(SCOPE, name, KVM_STATS_UNIT_NONE, \ + KVM_STATS_BASE_POW10, 0) +#endif
/* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ @@ -1897,6 +1918,25 @@ struct _kvm_stats_desc {
extern struct dentry *kvm_debugfs_dir;
+#ifdef CONFIG_ARCH_VCPU_STAT +enum dfx_stat_kind { + DFX_STAT_U64, + DFX_STAT_CPUTIME, +}; + +#define DFX_STAT(n, x, ...) \ + { n, offsetof(struct kvm_vcpu_stat, x), DFX_STAT_U64, ## __VA_ARGS__ } + +/* Detail For vcpu stat EXtension debugfs item */ +struct dfx_kvm_stats_debugfs_item { + const char *name; + int offset; + enum dfx_stat_kind dfx_kind; + struct dentry *dentry; +}; +extern struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[]; +#endif + ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, @@ -2281,6 +2321,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+#ifdef CONFIG_ARCH_VCPU_STAT +void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat); +#endif + typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 13065dd96132..1548909b4bad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2175,6 +2175,7 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_DFX (0x5 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
#define KVM_STATS_UNIT_SHIFT 4 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 87c6658bb16b..8c86cf8fc35a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -154,6 +154,13 @@ static unsigned long long kvm_active_vms;
static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
+#ifdef CONFIG_ARCH_VCPU_STAT +/* debugfs entries of Detail For vcpu stat EXtension */ +__weak struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = { + { NULL } +}; +#endif + __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { } @@ -4161,6 +4168,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (oldpid) synchronize_rcu(); put_pid(oldpid); +#if defined(CONFIG_ARCH_VCPU_STAT) + vcpu->stat.pid = current->pid; +#endif /* defined(CONFIG_ARCH_VCPU_STAT) */ } r = kvm_arch_vcpu_ioctl_run(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); @@ -5868,6 +5878,119 @@ static int vcpu_stat_clear(void *_offset, u64 val) return 0; }
+#ifdef CONFIG_ARCH_VCPU_STAT +void __attribute__((weak)) kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ +} + +#define DFX_MAX_VCPU 1024 +#define DFX_MAX_VCPU_STAT_SIZE 1024 + +/* + * copy of seq_buf_alloc of kernel, kernel not export it + */ +static void *dfx_seq_buf_alloc(unsigned long size) +{ + return kvmalloc(size, GFP_KERNEL_ACCOUNT); +} + +static void dfx_seq_buf_free(const void *buf) +{ + kvfree(buf); +} + +static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr) +{ + char *buf; + size_t size; + + size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE; + buf = dfx_seq_buf_alloc(size); + if (!buf) + return -ENOMEM; + if (p->buf) + dfx_seq_buf_free(p->buf); + p->buf = buf; + p->size = size; + return 0; +} + +static int __dfx_vcpu_stats_get(struct seq_file *p, void *v) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_stat *vcpu_stats; + struct dfx_kvm_stats_debugfs_item *dp; + int vcpu_nr = 0; + unsigned long i = 0; + int index = 0; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_nr++; + } + mutex_unlock(&kvm_lock); + vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU); + if (!vcpu_nr) { + seq_putc(p, '\n'); + return 0; + } + + if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr)) + return -ENOMEM; + + vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat)); + if (!vcpu_stats) + return -ENOMEM; + + mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + kvm_for_each_vcpu(i, vcpu, kvm) { + if (index >= vcpu_nr) + break; + memcpy(vcpu_stats + index, &(vcpu->stat), + sizeof(struct kvm_vcpu_stat)); + kvm_arch_vcpu_stat_reset(&vcpu->stat); + ++index; + } + } + mutex_unlock(&kvm_lock); + for (i = 0; i < vcpu_nr; i++) { + for (dp = dfx_debugfs_entries; dp->name; ++dp) { + switch (dp->dfx_kind) { + case DFX_STAT_U64: + seq_put_decimal_ull(p, " ", + *(u64 *)((void *)&vcpu_stats[i] + dp->offset)); + break; + case DFX_STAT_CPUTIME: + pr_warn("DFX_STAT_CPUTIME not supported currently!"); + break; + default: + pr_warn("Bad dfx_kind in dfx_debugfs_entries!"); + break; + } + } + seq_putc(p, '\n'); + } + + vfree(vcpu_stats); + return 0; +} + +static int dfx_vcpu_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, __dfx_vcpu_stats_get, NULL); +} + +static const struct file_operations dfx_stat_fops = { + .open = dfx_vcpu_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n"); @@ -5943,7 +6066,13 @@ static void kvm_init_debug(void)
for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { pdesc = &kvm_vcpu_stats_desc[i]; +#ifdef CONFIG_ARCH_VCPU_STAT + if ((pdesc->desc.flags & KVM_STATS_TYPE_MASK) == KVM_STATS_TYPE_DFX) + fops = &dfx_stat_fops; + else if(kvm_stats_debugfs_mode(pdesc) & 0222) +#else if (kvm_stats_debugfs_mode(pdesc) & 0222) +#endif fops = &vcpu_stat_fops; else fops = &vcpu_stat_readonly_fops;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/4508 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/4508 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...