-----Original Message----- From: yezengruan Sent: Tuesday, August 4, 2020 3:42 PM To: Xiexiuqi xiexiuqi@huawei.com; Guohanjun (Hanjun Guo) guohanjun@huawei.com Cc: Wanghaibin (D) wanghaibin.wang@huawei.com; Fanhenglong fanhenglong@huawei.com; yezengruan yezengruan@huawei.com; Zhanghailiang zhang.zhanghailiang@huawei.com; kernel.openeuler kernel.openeuler@huawei.com; Chenzhendong (alex) alex.chen@huawei.com; virt@openeuler.org; Xiexiangyou xiexiangyou@huawei.com; yuzenghui yuzenghui@huawei.com Subject: [PATCH hulk-4.19-next v3 12/17] KVM: arm64: Support the vCPU preemption check
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
Support the vcpu_is_preempted() functionality under KVM/arm64. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding.
unix benchmark result: host: kernel 4.19.87, HiSilicon Kunpeng920, 8 CPUs guest: kernel 4.19.87, 16 vCPUs
test-case | after-patch |
before-patch ----------------------------------------+-------------------+----------- ----------------------------------------+-------------------+------- Dhrystone 2 using register variables | 338955728.5 lps | 339266319.5 lps Double-Precision Whetstone | 30634.9 MWIPS | 30884.4 MWIPS Execl Throughput | 6753.2 lps | 3580.1 lps File Copy 1024 bufsize 2000 maxblocks | 490048.0 KBps | 313282.3 KBps File Copy 256 bufsize 500 maxblocks | 129662.5 KBps | 83550.7 KBps File Copy 4096 bufsize 8000 maxblocks | 1552551.5 KBps | 814327.0 KBps Pipe Throughput | 8976422.5 lps | 9048628.4 lps Pipe-based Context Switching | 258641.7 lps | 252925.9 lps Process Creation | 5312.2 lps | 4507.9 lps Shell Scripts (1 concurrent) | 8704.2 lpm | 6720.9 lpm Shell Scripts (8 concurrent) | 1708.8 lpm | 607.2 lpm System Call Overhead | 3714444.7 lps | 3746386.8 lps ----------------------------------------+-------------------+----------- ----------------------------------------+-------------------+------- System Benchmarks Index Score | 2270.6 | 1679.2
Signed-off-by: Zengruan Ye yezengruan@huawei.com
arch/arm64/include/asm/paravirt.h | 8 ++- arch/arm64/kernel/paravirt.c | 116 ++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + include/linux/cpuhotplug.h | 1 + 4 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index ff266c66c..62e9ba70f 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -27,12 +27,18 @@ static inline u64 paravirt_steal_clock(int cpu) return pv_ops.time.steal_clock(cpu); }
+int __init pv_sched_init(void);
__visible bool __native_vcpu_is_preempted(int cpu); static inline bool pv_vcpu_is_preempted(int cpu) { return pv_ops.sched.vcpu_is_preempted(cpu); }
-#endif +#else
+#define pv_sched_init() do {} while (0)
+#endif /* CONFIG_PARAVIRT */
#endif diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 3a410dbaa..834819c80 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -13,10 +13,18 @@
- Author: Stefano Stabellini stefano.stabellini@eu.citrix.com
*/
+#define pr_fmt(fmt) "arm-pv: " fmt
+#include <linux/arm-smccc.h> +#include <linux/cpuhotplug.h> #include <linux/export.h> +#include <linux/io.h> #include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/paravirt.h> +#include <asm/pvsched-abi.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -25,3 +33,111 @@ struct paravirt_patch_template pv_ops = { .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops);
+DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) +__aligned(64); EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region);
+static bool kvm_vcpu_is_preempted(int cpu) {
- struct pvsched_vcpu_state *reg;
- u32 preempted;
- reg = &per_cpu(pvsched_vcpu_region, cpu);
- if (!reg) {
pr_warn_once("PV sched enabled but not configured for
cpu %d\n",
cpu);
return false;
- }
- preempted = le32_to_cpu(READ_ONCE(reg->preempted));
- return !!preempted;
+}
+static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) {
- struct pvsched_vcpu_state *reg;
- reg = this_cpu_ptr(&pvsched_vcpu_region);
- if (!reg)
return -EFAULT;
- memset(reg, 0, sizeof(*reg));
- return 0;
+}
+static int init_pvsched_vcpu_state(unsigned int cpu) {
- struct pvsched_vcpu_state *reg;
- struct arm_smccc_res res;
- reg = this_cpu_ptr(&pvsched_vcpu_region);
- if (!reg)
return -EFAULT;
- /* Pass the memory address to host via hypercall */
- arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA,
virt_to_phys(reg), &res);
- return 0;
+}
+static int kvm_arm_init_pvsched(void) +{
- int ret;
- ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING,
"hypervisor/arm/pvsched:starting",
init_pvsched_vcpu_state,
pvsched_vcpu_state_dying_cpu);
- if (ret < 0) {
pr_warn("PV sched init failed\n");
return ret;
- }
- return 0;
+}
+static bool has_kvm_pvsched(void) +{
- struct arm_smccc_res res;
- /* To detect the presence of PV sched support we require SMCCC 1.1+
*/
- if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
return false;
- arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_HV_PV_SCHED_FEATURES, &res);
- if (res.a0 != SMCCC_RET_SUCCESS)
return false;
- arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_FEATURES,
ARM_SMCCC_HV_PV_SCHED_IPA, &res);
这里为啥需要判断两个feature是否支持?理论上只需要判断ARM_SMCCC_HV_PV_SCHED_FEATURES就可以了吧? 因为这个特性需要你这里的两处都要支持才行吧?
- return (res.a0 == SMCCC_RET_SUCCESS);
+}
+int __init pv_sched_init(void) +{
没有检查返回值,改成void?
- int ret;
- if (is_hyp_mode_available())
return 0;
- if (!has_kvm_pvsched()) {
pr_warn("PV sched is not available\n");
return 0;
- }
- ret = kvm_arm_init_pvsched();
- if (ret)
return ret;
- pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted;
- pr_info("using PV sched preempted\n");
- return 0;
+} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 155b8a61f..b3569d16a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -360,6 +360,8 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash();
- pv_sched_init();
#ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d67c00351..0244b684d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -134,6 +134,7 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING,
- CPUHP_AP_ARM_KVM_PVSCHED_STARTING,
这处修改不会导致kabi改变?还是把宏定义放在最后?
CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, -- 2.19.1