From: Zengruan Ye yezengruan@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8WMFU CVE: NA
--------------------------------
Support the vcpu_is_preempted() functionality under KVM/arm64. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding.
unix benchmark result: host: kernel 4.19.87, HiSilicon Kunpeng920, 8 CPUs guest: kernel 4.19.87, 16 vCPUs
test-case | after-patch | before-patch ----------------------------------------+-------------------+------------------ Dhrystone 2 using register variables | 338955728.5 lps | 339266319.5 lps Double-Precision Whetstone | 30634.9 MWIPS | 30884.4 MWIPS Execl Throughput | 6753.2 lps | 3580.1 lps File Copy 1024 bufsize 2000 maxblocks | 490048.0 KBps | 313282.3 KBps File Copy 256 bufsize 500 maxblocks | 129662.5 KBps | 83550.7 KBps File Copy 4096 bufsize 8000 maxblocks | 1552551.5 KBps | 814327.0 KBps Pipe Throughput | 8976422.5 lps | 9048628.4 lps Pipe-based Context Switching | 258641.7 lps | 252925.9 lps Process Creation | 5312.2 lps | 4507.9 lps Shell Scripts (1 concurrent) | 8704.2 lpm | 6720.9 lpm Shell Scripts (8 concurrent) | 1708.8 lpm | 607.2 lpm System Call Overhead | 3714444.7 lps | 3746386.8 lps ----------------------------------------+-------------------+------------------ System Benchmarks Index Score | 2270.6 | 1679.2
Signed-off-by: Zengruan Ye yezengruan@huawei.com Signed-off-by: lishusen lishusen2@huawei.com --- arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/include/asm/paravirt.h | 5 ++ arch/arm64/include/asm/pvsched-abi.h | 2 + arch/arm64/kernel/paravirt-spinlocks.c | 2 + arch/arm64/kernel/paravirt.c | 108 +++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 + arch/arm64/kvm/hypercalls.c | 4 + include/linux/arm-smccc.h | 2 + include/linux/cpuhotplug.h | 1 + 9 files changed, 128 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e256b091f9c6..f4d25426aef3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -596,10 +596,12 @@ struct kvm_vcpu_arch { gpa_t base; } steal;
+#ifdef CONFIG_PARAVIRT_SCHED /* Guest PV sched state */ struct { gpa_t base; } pvsched; +#endif
/* Per-vcpu CCSIDR override or NULL */ u32 *ccsidr; diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 53282dc429f8..5ccead71bf87 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -20,6 +20,9 @@ static inline u64 paravirt_steal_clock(int cpu)
int __init pv_time_init(void);
+#ifdef CONFIG_PARAVIRT_SCHED +int __init pv_sched_init(void); + __visible bool __native_vcpu_is_preempted(int cpu); DECLARE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted);
@@ -27,10 +30,12 @@ static inline bool pv_vcpu_is_preempted(int cpu) { return static_call(pv_vcpu_preempted)(cpu); } +#endif /* CONFIG_PARAVIRT_SCHED */
#else
#define pv_time_init() do {} while (0) +#define pv_sched_init() do {} while (0)
#endif // CONFIG_PARAVIRT
diff --git a/arch/arm64/include/asm/pvsched-abi.h b/arch/arm64/include/asm/pvsched-abi.h index 80e50e7a1a31..cc8e27dd266a 100644 --- a/arch/arm64/include/asm/pvsched-abi.h +++ b/arch/arm64/include/asm/pvsched-abi.h @@ -7,10 +7,12 @@ #ifndef __ASM_PVSCHED_ABI_H #define __ASM_PVSCHED_ABI_H
+#ifdef CONFIG_PARAVIRT_SCHED struct pvsched_vcpu_state { __le32 preempted; /* Structure must be 64 byte aligned, pad to that size */ u8 padding[60]; } __packed; +#endif /* CONFIG_PARAVIRT_SCHED */
#endif diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c index 3566897070d9..f402e7e6c301 100644 --- a/arch/arm64/kernel/paravirt-spinlocks.c +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -4,6 +4,7 @@ * Author: Zengruan Ye yezengruan@huawei.com */
+#ifdef CONFIG_PARAVIRT_SCHED #include <linux/static_call.h> #include <linux/spinlock.h> #include <asm/paravirt.h> @@ -14,3 +15,4 @@ __visible bool __native_vcpu_is_preempted(int cpu) }
DEFINE_STATIC_CALL(pv_vcpu_preempted, __native_vcpu_is_preempted); +#endif /* CONFIG_PARAVIRT_SCHED */ diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index aa718d6a9274..53bb6fa76366 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -22,6 +22,7 @@
#include <asm/paravirt.h> #include <asm/pvclock-abi.h> +#include <asm/pvsched-abi.h> #include <asm/smp_plat.h>
struct static_key paravirt_steal_enabled; @@ -174,3 +175,110 @@ int __init pv_time_init(void)
return 0; } + +#ifdef CONFIG_PARAVIRT_SCHED +DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) __aligned(64); +EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region); + +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct pvsched_vcpu_state *reg; + u32 preempted; + + reg = &per_cpu(pvsched_vcpu_region, cpu); + if (!reg) { + pr_warn_once("PV sched enabled but not configured for cpu %d\n", + cpu); + return false; + } + + preempted = le32_to_cpu(READ_ONCE(reg->preempted)); + + return !!preempted; +} + +static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE, &res); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_pvsched_vcpu_state(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + /* Pass the memory address to host via hypercall */ + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_INIT, + virt_to_phys(reg), &res); + + return 0; +} + +static int kvm_arm_init_pvsched(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING, + "hypervisor/arm/pvsched:starting", + init_pvsched_vcpu_state, + pvsched_vcpu_state_dying_cpu); + + if (ret < 0) { + pr_warn("PV sched init failed\n"); + return ret; + } + + return 0; +} + +static bool has_kvm_pvsched(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV sched support we require SMCCC 1.1+ */ + if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_SCHED_FEATURES, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_sched_init(void) +{ + int ret; + + if (is_hyp_mode_available()) + return 0; + + if (!has_kvm_pvsched()) { + pr_warn("PV sched is not available\n"); + return 0; + } + + ret = kvm_arm_init_pvsched(); + if (ret) + return ret; + + static_call_update(pv_vcpu_preempted, kvm_vcpu_is_preempted); + pr_info("using PV sched preempted\n"); + + return 0; +} +early_initcall(pv_sched_init); +#endif /* CONFIG_PARAVIRT_SCHED */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c3b47357ade5..a6435fa24611 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1377,6 +1377,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
spin_unlock(&vcpu->arch.mp_state_lock);
+ kvm_arm_pvsched_vcpu_init(&vcpu->arch); + return 0; }
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 48bf2e0b7a1d..4c9fc5df5142 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -332,9 +332,11 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) &smccc_feat->std_hyp_bmap)) val[0] = SMCCC_RET_SUCCESS; break; +#ifdef CONFIG_PARAVIRT_SCHED case ARM_SMCCC_HV_PV_SCHED_FEATURES: val[0] = SMCCC_RET_SUCCESS; break; +#endif /* CONFIG_PARAVIRT_SCHED */ } break; case ARM_SMCCC_HV_PV_TIME_FEATURES: @@ -345,6 +347,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) if (gpa != INVALID_GPA) val[0] = gpa; break; +#ifdef CONFIG_PARAVIRT_SCHED case ARM_SMCCC_HV_PV_SCHED_FEATURES: val[0] = kvm_hypercall_pvsched_features(vcpu); break; @@ -359,6 +362,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) vcpu->arch.pvsched.base = INVALID_GPA; val[0] = SMCCC_RET_SUCCESS; break; +#endif /* CONFIG_PARAVIRT_SCHED */ case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index daab363d31ae..7acb9898fbe1 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -577,6 +577,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ })
+#ifdef CONFIG_PARAVIRT_SCHED /* Paravirtualised sched calls */ #define ARM_SMCCC_HV_PV_SCHED_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ @@ -595,6 +596,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, ARM_SMCCC_SMC_64, \ ARM_SMCCC_OWNER_STANDARD_HYP, \ 0x92) +#endif /* CONFIG_PARAVIRT_SCHED */
#endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 624d4a38c358..f94a1b8e34e0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -190,6 +190,7 @@ enum cpuhp_state { CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, + CPUHP_AP_ARM_KVM_PVSCHED_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING,