This patch set aims to support the vCPU preempted check and PV qspinlock under KVM/arm64.
Christoffer Dall (1): KVM: arm/arm64: Factor out hypercall handling from PSCI code
Qian Cai (1): arm64/spinlock: fix a -Wunused-function warning
Steven Price (3): KVM: Implement kvm_put_guest() arm/arm64: Provide a wrapper for SMCCC 1.1 calls arm/arm64: Make use of the SMCCC 1.1 wrapper
Waiman Long (1): locking/osq: Use optimized spinning loop for arm64
Wanpeng Li (2): KVM: Boost vCPUs that are delivering interrupts KVM: Check preempted_in_kernel for involuntary preemption
Zengruan Ye (12): arm/paravirt: Use a single ops structure KVM: arm64: Document PV-sched interface KVM: arm64: Implement PV_SCHED_FEATURES call KVM: arm64: Support pvsched preempted via shared structure KVM: arm64: Add interface to support vCPU preempted check KVM: arm64: Support the vCPU preemption check KVM: arm64: Add SMCCC PV-sched to kick cpu KVM: arm64: Implement PV_SCHED_KICK_CPU call KVM: arm64: Add interface to support PV qspinlock KVM: arm64: Enable PV qspinlock KVM: arm64: Add tracepoints for PV qspinlock arm64: defconfig: add CONFIG_PARAVIRT_SPINLOCKS in default
Documentation/virtual/kvm/arm/pvsched.txt | 72 ++++++++ arch/arm/include/asm/kvm_host.h | 25 +++ arch/arm/include/asm/paravirt.h | 9 +- arch/arm/kernel/paravirt.c | 4 +- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm/mm/proc-v7-bugs.c | 13 +- arch/arm64/Kconfig | 14 ++ arch/arm64/configs/euleros_defconfig | 1 + arch/arm64/configs/hulk_defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/kvm_host.h | 20 +++ arch/arm64/include/asm/paravirt.h | 63 ++++++- arch/arm64/include/asm/pvsched-abi.h | 16 ++ arch/arm64/include/asm/qspinlock.h | 15 +- arch/arm64/include/asm/qspinlock_paravirt.h | 12 ++ arch/arm64/include/asm/spinlock.h | 26 +++ arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/alternative.c | 5 +- arch/arm64/kernel/cpu_errata.c | 82 +++------ arch/arm64/kernel/paravirt-spinlocks.c | 18 ++ arch/arm64/kernel/paravirt.c | 188 +++++++++++++++++++- arch/arm64/kernel/setup.c | 2 + arch/arm64/kernel/trace-paravirt.h | 66 +++++++ arch/arm64/kvm/Makefile | 2 + arch/arm64/kvm/handle_exit.c | 5 +- arch/s390/kvm/interrupt.c | 2 +- drivers/xen/time.c | 4 + include/kvm/arm_hypercalls.h | 43 +++++ include/kvm/arm_psci.h | 2 +- include/linux/arm-smccc.h | 71 ++++++++ include/linux/cpuhotplug.h | 1 + include/linux/kvm_host.h | 23 +++ include/linux/kvm_types.h | 2 + kernel/locking/osq_lock.c | 23 ++- virt/kvm/arm/arm.c | 12 +- virt/kvm/arm/hypercalls.c | 72 ++++++++ virt/kvm/arm/psci.c | 76 +------- virt/kvm/arm/pvsched.c | 83 +++++++++ virt/kvm/arm/trace.h | 18 ++ virt/kvm/kvm_main.c | 17 +- 41 files changed, 939 insertions(+), 177 deletions(-) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt create mode 100644 arch/arm64/include/asm/pvsched-abi.h create mode 100644 arch/arm64/include/asm/qspinlock_paravirt.h create mode 100644 arch/arm64/kernel/paravirt-spinlocks.c create mode 100644 arch/arm64/kernel/trace-paravirt.h create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c create mode 100644 virt/kvm/arm/pvsched.c
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Combine the paravirt ops structure in a single structure, keeping the original structure as sub-structure.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/include/asm/paravirt.h | 9 +++++++-- arch/arm/kernel/paravirt.c | 4 ++-- arch/arm64/include/asm/paravirt.h | 9 +++++++-- arch/arm64/kernel/paravirt.c | 4 ++-- drivers/xen/time.c | 4 ++++ 5 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h index d51e5cd31d01..cdbf02d9c1d4 100644 --- a/arch/arm/include/asm/paravirt.h +++ b/arch/arm/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c index 53f371ed4568..75c158b0353f 100644 --- a/arch/arm/kernel/paravirt.c +++ b/arch/arm/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index bb5dcea42003..799d9dd6f7cc 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 53f371ed4568..75c158b0353f 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 3e741cd1409c..2952ac253715 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -175,7 +175,11 @@ void __init xen_time_setup_guest(void) xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_runstate_update_flag);
+#ifdef CONFIG_ARM64 + pv_ops.time.steal_clock = xen_steal_clock; +#else pv_time_ops.steal_clock = xen_steal_clock; +#endif
static_key_slow_inc(¶virt_steal_enabled); if (xen_runstate_remote)
From: Christoffer Dall christoffer.dall@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 55009c6ed2d24fc0f5521ab2482f145d269389ea category: feature bugzilla: NA CVE: NA
--------------------------------
We currently intertwine the KVM PSCI implementation with the general dispatch of hypercall handling, which makes perfect sense because PSCI is the only category of hypercalls we support.
However, as we are about to support additional hypercalls, factor out this functionality into a separate hypercall handler file.
Signed-off-by: Christoffer Dall christoffer.dall@arm.com [steven.price@arm.com: rebased] Reviewed-by: Andrew Jones drjones@redhat.com Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/handle_exit.c | 4 +- include/kvm/arm_hypercalls.h | 43 ++++++++++++++++++++ include/kvm/arm_psci.h | 2 +- virt/kvm/arm/hypercalls.c | 51 ++++++++++++++++++++++++ virt/kvm/arm/psci.c | 76 +----------------------------------- 8 files changed, 102 insertions(+), 79 deletions(-) create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2246..5994f3b3d375 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o
obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 910bd8dabb3c..404c67a06a2f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -21,7 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <trace/events/kvm.h>
#include "trace.h" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index fd930cdeb16c..d17040add48b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7e144991e5c3..d86dfd2d61d7 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -22,8 +22,6 @@ #include <linux/kvm.h> #include <linux/kvm_host.h>
-#include <kvm/arm_psci.h> - #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> @@ -33,6 +31,8 @@ #include <asm/debug-monitors.h> #include <asm/traps.h>
+#include <kvm/arm_hypercalls.h> + #define CREATE_TRACE_POINTS #include "trace.h"
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h new file mode 100644 index 000000000000..0e2509d27910 --- /dev/null +++ b/include/kvm/arm_hypercalls.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. */ + +#ifndef __KVM_ARM_HYPERCALLS_H +#define __KVM_ARM_HYPERCALLS_H + +#include <asm/kvm_emulate.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); + +static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 0); +} + +static inline unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 1); +} + +static inline unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 2); +} + +static inline unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 3); +} + +static inline void smccc_set_retval(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + vcpu_set_reg(vcpu, 0, a0); + vcpu_set_reg(vcpu, 1, a1); + vcpu_set_reg(vcpu, 2, a2); + vcpu_set_reg(vcpu, 3, a3); +} + +#endif diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 4b1548129fa2..ac8cd19d2456 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -51,7 +51,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu);
struct kvm_one_reg;
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c new file mode 100644 index 000000000000..738df465b1d1 --- /dev/null +++ b/virt/kvm/arm/hypercalls.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + u32 val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch(feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + if (kvm_arm_harden_branch_predictor()) + val = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + } + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 34d08ee63747..155197d8f6bd 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -26,6 +26,7 @@ #include <asm/kvm_host.h>
#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h>
/* * This is an implementation of the Power State Coordination Interface @@ -34,38 +35,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 0); -} - -static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 1); -} - -static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 2); -} - -static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 3); -} - -static void smccc_set_retval(struct kvm_vcpu *vcpu, - unsigned long a0, - unsigned long a1, - unsigned long a2, - unsigned long a3) -{ - vcpu_set_reg(vcpu, 0, a0); - vcpu_set_reg(vcpu, 1, a1); - vcpu_set_reg(vcpu, 2, a2); - vcpu_set_reg(vcpu, 3, a3); -} - static unsigned long psci_affinity_mask(unsigned long affinity_level) { if (affinity_level <= 3) @@ -384,7 +353,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * Errors: * -EINVAL: Unrecognized PSCI function */ -static int kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu, vcpu->kvm)) { case KVM_ARM_PSCI_1_0: @@ -398,47 +367,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) }; }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) -{ - u32 func_id = smccc_get_function(vcpu); - u32 val = SMCCC_RET_NOT_SUPPORTED; - u32 feature; - - switch (func_id) { - case ARM_SMCCC_VERSION_FUNC_ID: - val = ARM_SMCCC_VERSION_1_1; - break; - case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - feature = smccc_get_arg1(vcpu); - switch(feature) { - case ARM_SMCCC_ARCH_WORKAROUND_1: - if (kvm_arm_harden_branch_predictor()) - val = SMCCC_RET_SUCCESS; - break; - case ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - case KVM_SSBD_UNKNOWN: - break; - case KVM_SSBD_KERNEL: - val = SMCCC_RET_SUCCESS; - break; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: - val = SMCCC_RET_NOT_REQUIRED; - break; - } - break; - } - break; - default: - return kvm_psci_call(vcpu); - } - - smccc_set_retval(vcpu, val, 0, 0, 0); - return 1; -} - int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { return 1; /* PSCI version */
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit cac0f1b7285eaaf9a186c618c3a7304d82ed5493 category: feature bugzilla: NA CVE: NA
--------------------------------
kvm_put_guest() is analogous to put_user() - it writes a single value to the guest physical address. The implementation is built upon put_user() and so it has the same single copy atomic properties.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/kvm_host.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5086384730cb..ed4fa9a347ed 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -727,6 +727,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); + +#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + type __user *__uaddr = (type __user *)(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(value, __uaddr); \ + if (!__ret) \ + mark_page_dirty(kvm, gfn); \ + __ret; \ +}) + +#define kvm_put_guest(kvm, gpa, value, type) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), (value), type); \ +}) + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 541625ac47ce9d0835efaee0fcbaa251b0000a37 category: feature bugzilla: NA CVE: NA
--------------------------------
SMCCC 1.1 calls may use either HVC or SMC depending on the PSCI conduit. Rather than coding this in every call site, provide a macro which uses the correct instruction. The macro also handles the case where no conduit is configured/available returning a not supported error in res, along with returning the conduit used for the call.
This allow us to remove some duplicated code and will be useful later when adding paravirtualized time hypervisor calls.
Signed-off-by: Steven Price steven.price@arm.com Acked-by: Will Deacon will@kernel.org Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/arm-smccc.h | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 18863d56273c..78b5fc59b2eb 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -311,5 +311,50 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2
+/* + * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. + * Used when the SMCCC conduit is not defined. The empty asm statement + * avoids compiler warnings about unused variables. + */ +#define __fail_smccc_1_1(...) \ + do { \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + asm ("" __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_1_invoke() - make an SMCCC v1.1 compliant call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_1_invoke(...) ({ \ + int method = psci_ops.conduit; \ + switch (method) { \ + case PSCI_CONDUIT_HVC: \ + arm_smccc_1_1_hvc(__VA_ARGS__); \ + break; \ + case PSCI_CONDUIT_SMC: \ + arm_smccc_1_1_smc(__VA_ARGS__); \ + break; \ + default: \ + __fail_smccc_1_1(__VA_ARGS__); \ + method = PSCI_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit ce4d5ca2b9dd5d85944eb93c1bbf9eb11b7a907d category: feature bugzilla: NA CVE: NA
--------------------------------
Rather than directly choosing which function to use based on psci_ops.conduit, use the new arm_smccc_1_1 wrapper instead.
In some cases we still need to do some operations based on the conduit, but the code duplication is removed.
No functional change.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/mm/proc-v7-bugs.c | 13 +++--- arch/arm64/kernel/cpu_errata.c | 82 ++++++++++++---------------------- 2 files changed, 34 insertions(+), 61 deletions(-)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index a6554fdb56c5..ac63a31347c1 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -81,12 +81,13 @@ static void cpu_v7_spectre_init(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) break;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_hvc_switch_mm; @@ -94,10 +95,6 @@ static void cpu_v7_spectre_init(void) break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_smc_switch_mm; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7522163c18f7..d7107ecf3ee1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -217,40 +217,31 @@ static int detect_harden_bp_fw(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) return -1;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + break; + default: + return -1; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - default: - return -1; - } + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - default: - return -1; - } + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; break;
default: @@ -340,6 +331,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state) { + int conduit; + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { pr_info_once("SSBD disabled by kernel configuration\n"); return; @@ -353,19 +346,9 @@ void arm64_set_ssbd_mitigation(bool state) return; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - default: - WARN_ON_ONCE(1); - break; - } + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, + NULL); + WARN_ON_ONCE(conduit == PSCI_CONDUIT_NONE); }
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, @@ -375,6 +358,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, bool required = true; s32 val; bool this_cpu_safe = false; + int conduit;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -399,18 +383,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, return false; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - default: + if (conduit == PSCI_CONDUIT_NONE) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) __ssb_safe = false;
From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v5.8-rc5 commit f5bfdc8e3947a7ae489cf8ae9cfd6b3fb357b952 category: feature bugzilla: NA CVE: NA
--------------------------------
Arm64 has a more optimized spinning loop (atomic_cond_read_acquire) using wfe for spinlock that can boost performance of sibling threads by putting the current cpu to a wait state that is broken only when the monitored variable changes or an external event happens.
OSQ has a more complicated spinning loop. Besides the lock value, it also checks for need_resched() and vcpu_is_preempted(). The check for need_resched() is not a problem as it is only set by the tick interrupt handler. That will be detected by the spinning cpu right after iret.
The vcpu_is_preempted() check, however, is a problem as changes to the preempt state of of previous node will not affect the wait state. For ARM64, vcpu_is_preempted is not currently defined and so is a no-op. Will has indicated that he is planning to para-virtualize wfe instead of defining vcpu_is_preempted for PV support. So just add a comment in arch/arm64/include/asm/spinlock.h to indicate that vcpu_is_preempted() should not be defined as suggested.
On a 2-socket 56-core 224-thread ARM64 system, a kernel mutex locking microbenchmark was run for 10s with and without the patch. The performance numbers before patch were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 316/123,143/2,121,269 Threads = 224, Total Rate = 2,757 kop/s; Percpu Rate = 12 kop/s
After patch, the numbers were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 334/147,836/1,304,787 Threads = 224, Total Rate = 3,311 kop/s; Percpu Rate = 15 kop/s
So there was about 20% performance improvement.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Will Deacon will@kernel.org Link: https://lkml.kernel.org/r/20200113150735.21956-1-longman@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/spinlock.h | 9 +++++++++ kernel/locking/osq_lock.c | 23 ++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 38116008d18b..1210e34b4c08 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,4 +22,13 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb()
+/* + * Changing this will break osq_lock() thanks to the call inside + * smp_cond_load_relaxed(). + * + * See: + * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... + */ +#define vcpu_is_preempted(cpu) false + #endif /* __ASM_SPINLOCK_H */ diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 6ef600aa0f47..1f7734949ac8 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -134,20 +134,17 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */
- while (!READ_ONCE(node->locked)) { - /* - * If we need to reschedule bail... so we can block. - * Use vcpu_is_preempted() to avoid waiting for a preempted - * lock holder: - */ - if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) - goto unqueue; - - cpu_relax(); - } - return true; + /* + * Wait to acquire the lock or cancelation. Note that need_resched() + * will come with an IPI, which will wake smp_cond_load_relaxed() if it + * is implemented with a monitor-wait. vcpu_is_preempted() relies on + * polling, be careful. + */ + if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || + vcpu_is_preempted(node_cpu(node->prev)))) + return true;
-unqueue: + /* unqueue */ /* * Step - A -- stabilize @prev *
From: Qian Cai cai@lca.pw
mainline inclusion from mainline-v5.8-rc5 commit 345d52c184dc7de98cff63f1bfa6f90e9db19809 category: bugfix bugzilla: NA CVE: NA
--------------------------------
The commit f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") introduced a warning from Clang because vcpu_is_preempted() is compiled away,
kernel/locking/osq_lock.c:25:19: warning: unused function 'node_cpu' [-Wunused-function] static inline int node_cpu(struct optimistic_spin_node *node) ^ 1 warning generated.
Fix it by converting vcpu_is_preempted() to a static inline function.
Fixes: f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") Signed-off-by: Qian Cai cai@lca.pw Acked-by: Waiman Long longman@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/spinlock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 1210e34b4c08..a9dec081beca 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -29,6 +29,10 @@ * See: * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... */ -#define vcpu_is_preempted(cpu) false +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return false; +}
#endif /* __ASM_SPINLOCK_H */
From: Wanpeng Li wanpengli@tencent.com
mainline inclusion from mainline-v5.8-rc5 commit d73eb57b80b98ae147e4e6a7d9877c2ba175f972 category: feature bugzilla: NA CVE: NA
--------------------------------
Inspired by commit 9cac38dd5d (KVM/s390: Set preempted flag during vcpu wakeup and interrupt delivery), we want to also boost not just lock holders but also vCPUs that are delivering interrupts. Most smp_call_function_many calls are synchronous, so the IPI target vCPUs are also good yield candidates. This patch introduces vcpu->ready to boost vCPUs during wakeup and interrupt delivery time; unlike s390 we do not reuse vcpu->preempted so that voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin, but vmx_vcpu_pi_put is not affected (VT-d PI handles voluntary preemption separately, in pi_pre_block).
Testing on 80 HT 2 socket Xeon Skylake server, with 80 vCPUs VM 80GB RAM: ebizzy -M
vanilla boosting improved 1VM 21443 23520 9% 2VM 2800 8000 180% 3VM 1800 3100 72%
Testing on my Haswell desktop 8 HT, with 8 vCPUs VM 8GB RAM, two VMs, one running ebizzy -M, the other running 'stress --cpu 2':
w/ boosting + w/o pv sched yield(vanilla)
vanilla boosting improved 1570 4000 155%
w/ boosting + w/ pv sched yield(vanilla)
vanilla boosting improved 1844 5157 179%
w/o boosting, perf top in VM:
72.33% [kernel] [k] smp_call_function_many 4.22% [kernel] [k] call_function_i 3.71% [kernel] [k] async_page_fault
w/ boosting, perf top in VM:
38.43% [kernel] [k] smp_call_function_many 6.31% [kernel] [k] async_page_fault 6.13% libc-2.23.so [.] __memcpy_avx_unaligned 4.88% [kernel] [k] call_function_interrupt
Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Marc Zyngier maz@kernel.org Signed-off-by: Wanpeng Li wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/s390/kvm/interrupt.c | 2 +- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 12 ++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 3515f2b55eb9..5ada5d8d80ee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1144,7 +1144,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ - vcpu->preempted = true; + vcpu->ready = true; swake_up_one(&vcpu->wq); vcpu->stat.halt_wakeup++; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed4fa9a347ed..cec751bf6084 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -300,6 +300,7 @@ struct kvm_vcpu { } spin_loop; #endif bool preempted; + bool ready; struct kvm_vcpu_arch arch; struct dentry *debugfs_dentry; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c2554daa18ad..782c078b73ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -333,6 +333,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; + vcpu->ready = false;
r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2433,6 +2434,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) wqp = kvm_arch_vcpu_wq(vcpu); if (swq_has_sleeper(wqp)) { swake_up_one(wqp); + WRITE_ONCE(vcpu->ready, true); ++vcpu->stat.halt_wakeup; return true; } @@ -2569,7 +2571,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!READ_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->ready)) continue; if (vcpu == me) continue; @@ -4285,8 +4287,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (vcpu->preempted) - vcpu->preempted = false; + vcpu->preempted = false; + WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu);
@@ -4298,8 +4300,10 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (current->state == TASK_RUNNING) + if (current->state == TASK_RUNNING) { vcpu->preempted = true; + WRITE_ONCE(vcpu->ready, true); + } kvm_arch_vcpu_put(vcpu); }
From: Wanpeng Li wanpengli@tencent.com
mainline inclusion from mainline-v5.8-rc5 commit 046ddeed0461b5d270470c253cbb321103d048b6 category: feature bugzilla: NA CVE: NA
--------------------------------
preempted_in_kernel is updated in preempt_notifier when involuntary preemption ocurrs, it can be stale when the voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin() loop. This patch lets it just check preempted_in_kernel for involuntary preemption.
Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- virt/kvm/kvm_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 782c078b73ad..e3bb1b225ecc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2577,7 +2577,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) continue; - if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && + !kvm_arch_vcpu_in_kernel(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -4287,7 +4288,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- vcpu->preempted = false; + WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu); @@ -4301,7 +4302,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
if (current->state == TASK_RUNNING) { - vcpu->preempted = true; + WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); } kvm_arch_vcpu_put(vcpu);
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Introduce a paravirtualization interface for KVM/arm64 to PV-sched.
A hypercall interface is provided for the guest to interrogate the hypervisor's support for this interface and the location of the shared memory structures.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/virtual/kvm/arm/pvsched.txt | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt new file mode 100644 index 000000000000..1d5aefc290ec --- /dev/null +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -0,0 +1,56 @@ +Paravirtualized sched support for arm64 +======================================= + +KVM/arm64 provides some hypervisor service calls to support a paravirtualized +sched. + +Some SMCCC compatible hypercalls are defined: + +* PV_SCHED_FEATURES: 0xC5000090 +* PV_SCHED_IPA_INIT: 0xC5000091 +* PV_SCHED_IPA_RELEASE: 0xC5000092 + +The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. + +PV_SCHED_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000090 + PV_call_id: (uint32) The function to query for support. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-sched feature is supported by the hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_INIT + ============= ======== ========== + Function ID: (uint32) 0xC5000091 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is shared to the + hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_RELEASE + ============= ======== ========== + Function ID: (uint32) 0xC5000092 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is released. + ============= ======== ========== + +PV sched state +-------------- + +The structure pointed to by the PV_SCHED_IPA hypercall is as follows: + ++-----------+-------------+-------------+-----------------------------------+ +| Field | Byte Length | Byte Offset | Description | ++===========+=============+=============+===================================+ +| preempted | 4 | 0 | Indicates that the vCPU that owns | +| | | | this struct is running or not. | +| | | | Non-zero values mean the vCPU has | +| | | | been preempted. Zero means the | +| | | | vCPU is not preempted. | ++-----------+-------------+-------------+-----------------------------------+ + +The preempted field will be updated to 0 by the hypervisor prior to scheduling +a vCPU. When the vCPU is scheduled out, the preempted field will be updated +to 1 by the hypervisor.
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
This provides a mechanism for querying which paravirtualized sched features are available in this hypervisor.
Add some SMCCC compatible hypercalls for PV sched features: PV_SCHED_FEATURES: 0xC5000090 PV_SCHED_IPA_INIT: 0xC5000091 PV_SCHED_IPA_RELEASE: 0xC5000092
Also add the header file which defines the ABI for the paravirtualized sched features we're about to add.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/include/asm/kvm_host.h | 6 ++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/pvsched-abi.h | 16 ++++++++++++++++ arch/arm64/kvm/Makefile | 1 + include/linux/arm-smccc.h | 20 ++++++++++++++++++++ virt/kvm/arm/hypercalls.c | 6 ++++++ virt/kvm/arm/pvsched.c | 23 +++++++++++++++++++++++ 7 files changed, 74 insertions(+) create mode 100644 arch/arm64/include/asm/pvsched-abi.h create mode 100644 virt/kvm/arm/pvsched.c
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 661bd0344254..8f9a966a726f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__
+#include <linux/arm-smccc.h> #include <linux/types.h> #include <linux/kvm_types.h> #include <asm/cputype.h> @@ -302,6 +303,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed48ecf88c6d..452ed37f598e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -430,6 +430,8 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void);
+int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/pvsched-abi.h b/arch/arm64/include/asm/pvsched-abi.h new file mode 100644 index 000000000000..80e50e7a1a31 --- /dev/null +++ b/arch/arm64/include/asm/pvsched-abi.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#ifndef __ASM_PVSCHED_ABI_H +#define __ASM_PVSCHED_ABI_H + +struct pvsched_vcpu_state { + __le32 preempted; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[60]; +} __packed; + +#endif diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d17040add48b..258f9fd91ba5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,6 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvsched.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 78b5fc59b2eb..a378a1d53fb9 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -54,6 +54,7 @@ #define ARM_SMCCC_OWNER_SIP 2 #define ARM_SMCCC_OWNER_OEM 3 #define ARM_SMCCC_OWNER_STANDARD 4 +#define ARM_SMCCC_OWNER_STANDARD_HYP 5 #define ARM_SMCCC_OWNER_TRUSTED_APP 48 #define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 #define ARM_SMCCC_OWNER_TRUSTED_OS 50 @@ -356,5 +357,24 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ })
+/* Paravirtualised sched calls */ +#define ARM_SMCCC_HV_PV_SCHED_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x90) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_INIT \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x91) + +#define ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x92) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index 738df465b1d1..780240bdeb31 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -40,8 +40,14 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = SMCCC_RET_SUCCESS; + break; } break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = kvm_hypercall_pvsched_features(vcpu); + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c new file mode 100644 index 000000000000..40b56e01fc5d --- /dev/null +++ b/virt/kvm/arm/pvsched.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#include <linux/arm-smccc.h> + +#include <kvm/arm_hypercalls.h> + +int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + int val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +}
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Implement the service call for configuring a shared structure between a vCPU and the hypervisor in which the hypervisor can tell the vCPU that is running or not.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/include/asm/kvm_host.h | 14 ++++++++++++++ arch/arm64/include/asm/kvm_host.h | 16 ++++++++++++++++ include/linux/kvm_types.h | 2 ++ virt/kvm/arm/arm.c | 8 ++++++++ virt/kvm/arm/hypercalls.c | 12 ++++++++++++ virt/kvm/arm/pvsched.c | 32 +++++++++++++++++++++++++++++++ 6 files changed, 84 insertions(+)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8f9a966a726f..ead428a9368e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -303,6 +303,20 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} + +static inline void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, + u32 preempted) +{ +} + static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { return SMCCC_RET_NOT_SUPPORTED; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 452ed37f598e..11420fddaa82 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -324,6 +324,11 @@ struct kvm_vcpu_arch { /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; + + /* Guest PV sched state */ + struct { + gpa_t base; + } pvsched; };
/* vcpu_arch flags field values: */ @@ -430,6 +435,17 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->pvsched.base = GPA_INVALID; +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->pvsched.base != GPA_INVALID); +} + +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu);
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index a38729c8296f..458ec5b0ed06 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -49,6 +49,8 @@ typedef unsigned long gva_t; typedef u64 gpa_t; typedef u64 gfn_t;
+#define GPA_INVALID (~(gpa_t)0) + typedef unsigned long hva_t; typedef u64 hpa_t; typedef u64 hfn_t; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2fdb8ac0f921..42dd9a2875d0 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -373,6 +373,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_arm_reset_debug_ptr(vcpu);
+ kvm_arm_pvsched_vcpu_init(&vcpu->arch); + return kvm_vgic_vcpu_init(vcpu); }
@@ -423,6 +425,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_set_wfe_traps(vcpu);
update_steal_time(vcpu); + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 0); }
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -436,6 +441,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL); + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 1); }
static void vcpu_power_off(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index 780240bdeb31..f708b54d1450 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -14,6 +14,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) u32 func_id = smccc_get_function(vcpu); u32 val = SMCCC_RET_NOT_SUPPORTED; u32 feature; + gpa_t gpa;
switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: @@ -48,6 +49,17 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) case ARM_SMCCC_HV_PV_SCHED_FEATURES: val = kvm_hypercall_pvsched_features(vcpu); break; + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + gpa = smccc_get_arg1(vcpu); + if (gpa != GPA_INVALID) { + vcpu->arch.pvsched.base = gpa; + val = SMCCC_RET_SUCCESS; + } + break; + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + vcpu->arch.pvsched.base = GPA_INVALID; + val = SMCCC_RET_SUCCESS; + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index 40b56e01fc5d..8a1302a52464 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -5,9 +5,39 @@ */
#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/pvsched-abi.h>
#include <kvm/arm_hypercalls.h>
+void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) +{ + __le32 preempted_le; + u64 offset; + int idx; + u64 base = vcpu->arch.pvsched.base; + struct kvm *kvm = vcpu->kvm; + + if (base == GPA_INVALID) + return; + + preempted_le = cpu_to_le32(preempted); + + /* + * This function is called from atomic context, so we need to + * disable page faults. + */ + pagefault_disable(); + + idx = srcu_read_lock(&kvm->srcu); + offset = offsetof(struct pvsched_vcpu_state, preempted); + kvm_put_guest(kvm, base + offset, preempted_le, u32); + srcu_read_unlock(&kvm->srcu, idx); + + pagefault_enable(); +} + int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { u32 feature = smccc_get_arg1(vcpu); @@ -15,6 +45,8 @@ int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu)
switch (feature) { case ARM_SMCCC_HV_PV_SCHED_FEATURES: + case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: + case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: val = SMCCC_RET_SUCCESS; break; }
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
This is to fix some lock holder preemption issues. Some other locks implementation do a spin loop before acquiring the lock itself. Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It takes the CPU as parameter and return true if the CPU is preempted. Then kernel can break the spin loops upon the retval of vcpu_is_preempted.
As kernel has used this interface, So lets support it.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/paravirt.h | 12 ++++++++++++ arch/arm64/include/asm/spinlock.h | 10 ++++++++++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/paravirt-spinlocks.c | 13 +++++++++++++ arch/arm64/kernel/paravirt.c | 4 +++- 5 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/paravirt-spinlocks.c
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 799d9dd6f7cc..ff266c66c527 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -11,8 +11,13 @@ struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); };
+struct pv_sched_ops { + bool (*vcpu_is_preempted)(int cpu); +}; + struct paravirt_patch_template { struct pv_time_ops time; + struct pv_sched_ops sched; };
extern struct paravirt_patch_template pv_ops; @@ -21,6 +26,13 @@ static inline u64 paravirt_steal_clock(int cpu) { return pv_ops.time.steal_clock(cpu); } + +__visible bool __native_vcpu_is_preempted(int cpu); +static inline bool pv_vcpu_is_preempted(int cpu) +{ + return pv_ops.sched.vcpu_is_preempted(cpu); +} + #endif
#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index a9dec081beca..4a668995014c 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -18,6 +18,7 @@
#include <asm/qrwlock.h> #include <asm/qspinlock.h> +#include <asm/paravirt.h>
/* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() @@ -30,9 +31,18 @@ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... */ #define vcpu_is_preempted vcpu_is_preempted +#ifdef CONFIG_PARAVIRT +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} + +#else + static inline bool vcpu_is_preempted(int cpu) { return false; } +#endif /* CONFIG_PARAVIRT */
#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 54d0b1d38a4e..73bc1db45839 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -48,7 +48,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ERR_RECOV) += ras.o arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o -arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..fd733eb02d42 --- /dev/null +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#include <linux/spinlock.h> +#include <asm/paravirt.h> + +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 75c158b0353f..3a410dbaad75 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,7 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct paravirt_patch_template pv_ops; +struct paravirt_patch_template pv_ops = { + .sched.vcpu_is_preempted = __native_vcpu_is_preempted, +}; EXPORT_SYMBOL_GPL(pv_ops);
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Support the vcpu_is_preempted() functionality under KVM/arm64. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding.
unix benchmark result: host: kernel 4.19.87, HiSilicon Kunpeng920, 8 CPUs guest: kernel 4.19.87, 16 vCPUs
test-case | after-patch | before-patch ----------------------------------------+-------------------+------------------ Dhrystone 2 using register variables | 338955728.5 lps | 339266319.5 lps Double-Precision Whetstone | 30634.9 MWIPS | 30884.4 MWIPS Execl Throughput | 6753.2 lps | 3580.1 lps File Copy 1024 bufsize 2000 maxblocks | 490048.0 KBps | 313282.3 KBps File Copy 256 bufsize 500 maxblocks | 129662.5 KBps | 83550.7 KBps File Copy 4096 bufsize 8000 maxblocks | 1552551.5 KBps | 814327.0 KBps Pipe Throughput | 8976422.5 lps | 9048628.4 lps Pipe-based Context Switching | 258641.7 lps | 252925.9 lps Process Creation | 5312.2 lps | 4507.9 lps Shell Scripts (1 concurrent) | 8704.2 lpm | 6720.9 lpm Shell Scripts (8 concurrent) | 1708.8 lpm | 607.2 lpm System Call Overhead | 3714444.7 lps | 3746386.8 lps ----------------------------------------+-------------------+------------------ System Benchmarks Index Score | 2270.6 | 1679.2
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/paravirt.h | 8 ++- arch/arm64/kernel/paravirt.c | 112 ++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + include/linux/cpuhotplug.h | 1 + 4 files changed, 122 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index ff266c66c527..62e9ba70f4e6 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -27,12 +27,18 @@ static inline u64 paravirt_steal_clock(int cpu) return pv_ops.time.steal_clock(cpu); }
+int __init pv_sched_init(void); + __visible bool __native_vcpu_is_preempted(int cpu); static inline bool pv_vcpu_is_preempted(int cpu) { return pv_ops.sched.vcpu_is_preempted(cpu); }
-#endif +#else + +#define pv_sched_init() do {} while (0) + +#endif /* CONFIG_PARAVIRT */
#endif diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 3a410dbaad75..23709083a5c2 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -13,10 +13,18 @@ * Author: Stefano Stabellini stefano.stabellini@eu.citrix.com */
+#define pr_fmt(fmt) "arm-pv: " fmt + +#include <linux/arm-smccc.h> +#include <linux/cpuhotplug.h> #include <linux/export.h> +#include <linux/io.h> #include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/paravirt.h> +#include <asm/pvsched-abi.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -25,3 +33,107 @@ struct paravirt_patch_template pv_ops = { .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops); + +DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) __aligned(64); +EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region); + +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct pvsched_vcpu_state *reg; + u32 preempted; + + reg = &per_cpu(pvsched_vcpu_region, cpu); + if (!reg) { + pr_warn_once("PV sched enabled but not configured for cpu %d\n", + cpu); + return false; + } + + preempted = le32_to_cpu(READ_ONCE(reg->preempted)); + + return !!preempted; +} + +static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE, &res); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_pvsched_vcpu_state(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + /* Pass the memory address to host via hypercall */ + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA_INIT, + virt_to_phys(reg), &res); + + return 0; +} + +static int kvm_arm_init_pvsched(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING, + "hypervisor/arm/pvsched:starting", + init_pvsched_vcpu_state, + pvsched_vcpu_state_dying_cpu); + + if (ret < 0) { + pr_warn("PV sched init failed\n"); + return ret; + } + + return 0; +} + +static bool has_kvm_pvsched(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV sched support we require SMCCC 1.1+ */ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_SCHED_FEATURES, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_sched_init(void) +{ + int ret; + + if (is_hyp_mode_available()) + return 0; + + if (!has_kvm_pvsched()) { + pr_warn("PV sched is not available\n"); + return 0; + } + + ret = kvm_arm_init_pvsched(); + if (ret) + return ret; + + pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted; + pr_info("using PV sched preempted\n"); + + return 0; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 155b8a61f98c..b3569d16af19 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -360,6 +360,8 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash();
+ pv_sched_init(); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d5e14f55b7a4..f7767e58d1c5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -134,6 +134,7 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_KVM_PVSCHED_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING,
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
A new hypercall interface function is provided for the guest to kick WFI state vCPU.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/virtual/kvm/arm/pvsched.txt | 16 ++++++++++++++++ include/linux/arm-smccc.h | 6 ++++++ 2 files changed, 22 insertions(+)
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt index 1d5aefc290ec..788e63e32b8b 100644 --- a/Documentation/virtual/kvm/arm/pvsched.txt +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -9,6 +9,7 @@ Some SMCCC compatible hypercalls are defined: * PV_SCHED_FEATURES: 0xC5000090 * PV_SCHED_IPA_INIT: 0xC5000091 * PV_SCHED_IPA_RELEASE: 0xC5000092 +* PV_SCHED_KICK_CPU: 0xC5000093
The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. @@ -36,6 +37,13 @@ PV_SCHED_IPA_RELEASE this vCPU's PV data structure is released. ============= ======== ==========
+PV_SCHED_KICK_CPU + ============= ======== ========== + Function ID: (uint32) 0xC5000093 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the vCPU is + kicked by the hypervisor. + ============= ======== ========== + PV sched state --------------
@@ -54,3 +62,11 @@ The structure pointed to by the PV_SCHED_IPA hypercall is as follows: The preempted field will be updated to 0 by the hypervisor prior to scheduling a vCPU. When the vCPU is scheduled out, the preempted field will be updated to 1 by the hypervisor. + +A vCPU of a paravirtualized guest that is busywaiting in guest kernel mode for +an event to occur (ex: a spinlock to become available) can execute WFI +instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU to sleep +until occurrence of an appropriate event. Another vCPU of the same guest can +wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU hypercall, specifying CPU +id (reg1) of the vCPU to be woken up. diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index a378a1d53fb9..5b5f52ca6ac8 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -376,5 +376,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, ARM_SMCCC_OWNER_STANDARD_HYP, \ 0x92)
+#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x93) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Implement the service call for waking up a WFI state vCPU.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/handle_exit.c | 1 + virt/kvm/arm/arm.c | 4 +++- virt/kvm/arm/hypercalls.c | 3 +++ virt/kvm/arm/pvsched.c | 25 +++++++++++++++++++++++++ 6 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ead428a9368e..597f9532d937 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -322,6 +322,11 @@ static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) return SMCCC_RET_NOT_SUPPORTED; }
+static inline int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 11420fddaa82..354bd5b9f179 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -327,6 +327,7 @@ struct kvm_vcpu_arch {
/* Guest PV sched state */ struct { + bool pv_unhalted; gpa_t base; } pvsched; }; @@ -447,6 +448,7 @@ static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch)
void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); +int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu);
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index d86dfd2d61d7..f5feef3c1e88 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -108,6 +108,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; + vcpu->arch.pvsched.pv_unhalted = false; kvm_vcpu_block(vcpu); kvm_clear_request(KVM_REQ_UNHALT, vcpu); } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 42dd9a2875d0..3c484da5aa3e 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -493,7 +493,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); - return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool pv_unhalted = v->arch.pvsched.pv_unhalted; + + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v) || pv_unhalted) && !v->arch.power_off && !v->arch.pause); }
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index f708b54d1450..7a6c5f18dff2 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -60,6 +60,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) vcpu->arch.pvsched.base = GPA_INVALID; val = SMCCC_RET_SUCCESS; break; + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val = kvm_pvsched_kick_vcpu(vcpu); + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index 8a1302a52464..f8f8da90d29c 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -38,6 +38,30 @@ void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) pagefault_enable(); }
+int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + unsigned int vcpu_idx; + int val = SMCCC_RET_NOT_SUPPORTED; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target = NULL; + + vcpu_idx = smccc_get_arg1(vcpu); + target = kvm_get_vcpu(kvm, vcpu_idx); + if (!target) + goto out; + + target->arch.pvsched.pv_unhalted = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, target); + kvm_vcpu_kick(target); + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + val = SMCCC_RET_SUCCESS; + +out: + return val; +} + int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { u32 feature = smccc_get_arg1(vcpu); @@ -47,6 +71,7 @@ int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) case ARM_SMCCC_HV_PV_SCHED_FEATURES: case ARM_SMCCC_HV_PV_SCHED_IPA_INIT: case ARM_SMCCC_HV_PV_SCHED_IPA_RELEASE: + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: val = SMCCC_RET_SUCCESS; break; }
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
As kernel has used this interface, so lets support it.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/Kconfig | 14 ++++++++++ arch/arm64/include/asm/paravirt.h | 29 +++++++++++++++++++++ arch/arm64/include/asm/qspinlock.h | 15 ++++++++--- arch/arm64/include/asm/qspinlock_paravirt.h | 12 +++++++++ arch/arm64/include/asm/spinlock.h | 3 +++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/alternative.c | 5 ++-- arch/arm64/kernel/paravirt-spinlocks.c | 5 ++++ arch/arm64/kernel/paravirt.c | 4 +++ 9 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/include/asm/qspinlock_paravirt.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8842402e302d..0e71b6819334 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -817,6 +817,7 @@ config NODES_SHIFT config NUMA_AWARE_SPINLOCKS bool "Numa-aware spinlocks" depends on NUMA && QUEUED_SPINLOCKS + depends on PARAVIRT_SPINLOCKS default n help Introduce NUMA (Non Uniform Memory Access) awareness into @@ -901,6 +902,19 @@ config PARAVIRT under a hypervisor, potentially improving performance significantly over full virtualization.
+config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 62e9ba70f4e6..256e3f9df184 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -12,6 +12,12 @@ struct pv_time_ops { };
struct pv_sched_ops { + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + void (*queued_spin_unlock)(struct qspinlock *lock); + + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + bool (*vcpu_is_preempted)(int cpu); };
@@ -35,6 +41,29 @@ static inline bool pv_vcpu_is_preempted(int cpu) return pv_ops.sched.vcpu_is_preempted(cpu); }
+#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +bool pv_is_native_spin_unlock(void); +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return pv_ops.sched.queued_spin_lock_slowpath(lock, val); +} + +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return pv_ops.sched.queued_spin_unlock(lock); +} + +static inline void pv_wait(u8 *ptr, u8 val) +{ + return pv_ops.sched.wait(ptr, val); +} + +static inline void pv_kick(int cpu) +{ + return pv_ops.sched.kick(cpu); +} +#endif /* SMP && PARAVIRT_SPINLOCKS */ + #else
#define pv_sched_init() do {} while (0) diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h index fbe176fd4b3f..0022d446aa64 100644 --- a/arch/arm64/include/asm/qspinlock.h +++ b/arch/arm64/include/asm/qspinlock.h @@ -2,12 +2,19 @@ #ifndef _ASM_ARM64_QSPINLOCK_H #define _ASM_ARM64_QSPINLOCK_H
-#ifdef CONFIG_NUMA_AWARE_SPINLOCKS #include <asm-generic/qspinlock_types.h> +#include <asm/paravirt.h> + +#define _Q_PENDING_LOOPS (1 << 9)
+#ifdef CONFIG_NUMA_AWARE_SPINLOCKS extern void __cna_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#endif + +#ifdef CONFIG_PARAVIRT_SPINLOCKS extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void (*cna_queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
#define queued_spin_unlock queued_spin_unlock /** @@ -23,12 +30,12 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock)
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - cna_queued_spin_lock_slowpath(lock, val); + pv_queued_spin_lock_slowpath(lock, val); }
static inline void queued_spin_unlock(struct qspinlock *lock) { - native_queued_spin_unlock(lock); + pv_queued_spin_unlock(lock); } #endif
diff --git a/arch/arm64/include/asm/qspinlock_paravirt.h b/arch/arm64/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..eba4be28fbb9 --- /dev/null +++ b/arch/arm64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 4a668995014c..17778794080a 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -20,6 +20,9 @@ #include <asm/qspinlock.h> #include <asm/paravirt.h>
+/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb()
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 73bc1db45839..ac110f1b6c27 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -49,6 +49,7 @@ arm64-obj-$(CONFIG_ARM64_ERR_RECOV) += ras.o arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o +arm64-obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o paravirt-spinlocks.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8511fc3b94bb..af5585389550 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -26,6 +26,7 @@ #include <asm/cpufeature.h> #include <asm/insn.h> #include <asm/sections.h> +#include <asm/paravirt.h> #include <linux/stop_machine.h>
#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) @@ -291,9 +292,9 @@ void __init apply_boot_alternatives(void) */ if ((numa_spinlock_flag == 1) || (numa_spinlock_flag == 0 && nr_node_ids > 1 && - cna_queued_spin_lock_slowpath == + pv_ops.sched.queued_spin_lock_slowpath == native_queued_spin_lock_slowpath)) { - cna_queued_spin_lock_slowpath = + pv_ops.sched.queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath; } #endif diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c index fd733eb02d42..3cb43f9e6a1c 100644 --- a/arch/arm64/kernel/paravirt-spinlocks.c +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -11,3 +11,8 @@ __visible bool __native_vcpu_is_preempted(int cpu) { return false; } + +bool pv_is_native_spin_unlock(void) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 23709083a5c2..3edbcf89009e 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -30,6 +30,10 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
struct paravirt_patch_template pv_ops = { +#ifdef CONFIG_PARAVIRT_SPINLOCKS + .sched.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .sched.queued_spin_unlock = native_queued_spin_unlock, +#endif .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops);
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Linux kernel builds were run in KVM guest on HiSilicon Kunpeng920 system. VM guests were set up with 32, 48 and 64 vCPUs on the 32 physical CPUs. The kernel build (make -j<n>) was done in a VM with unpinned vCPUs 3 times with the best time selected and <n> is the number of vCPUs available. The build times of the original linux 4.19.87, pvqspinlock with various number of vCPUs are as follows:
Kernel 32 vCPUs 48 vCPUs 60 vCPUs ---------- -------- -------- -------- 4.19.87 342.336s 602.048s 950.340s pvqsinlock 341.366s 376.135s 437.037s
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/paravirt.h | 5 +++ arch/arm64/kernel/paravirt.c | 60 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 256e3f9df184..10ec0610ad60 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -42,6 +42,7 @@ static inline bool pv_vcpu_is_preempted(int cpu) }
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +void __init pv_qspinlock_init(void); bool pv_is_native_spin_unlock(void); static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { @@ -62,6 +63,10 @@ static inline void pv_kick(int cpu) { return pv_ops.sched.kick(cpu); } +#else + +#define pv_qspinlock_init() do {} while (0) + #endif /* SMP && PARAVIRT_SPINLOCKS */
#else diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 3edbcf89009e..f2376b597848 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -25,6 +25,7 @@ #include <linux/types.h> #include <asm/paravirt.h> #include <asm/pvsched-abi.h> +#include <asm/qspinlock_paravirt.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -120,6 +121,63 @@ static bool has_kvm_pvsched(void) return (res.a0 == SMCCC_RET_SUCCESS); }
+#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool arm_pvspin = false; + +/* Kick a cpu by its cpuid. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); +} + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + dsb(sy); + wfi(); + +out: + local_irq_restore(flags); +} + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + arm_pvspin = false; + + if (!arm_pvspin) { + pr_info("PV qspinlocks disabled\n"); + return; + } + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.sched.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.sched.queued_spin_unlock = __pv_queued_spin_unlock; + pv_ops.sched.wait = kvm_wait; + pv_ops.sched.kick = kvm_kick_cpu; +} + +static __init int arm_parse_pvspin(char *arg) +{ + arm_pvspin = true; + return 0; +} +early_param("arm_pvspin", arm_parse_pvspin); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + int __init pv_sched_init(void) { int ret; @@ -139,5 +197,7 @@ int __init pv_sched_init(void) pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted; pr_info("using PV sched preempted\n");
+ pv_qspinlock_init(); + return 0; }
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
Add tracepoints for PV qspinlock
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/paravirt.c | 6 +++ arch/arm64/kernel/trace-paravirt.h | 66 ++++++++++++++++++++++++++++++ virt/kvm/arm/pvsched.c | 3 ++ virt/kvm/arm/trace.h | 18 ++++++++ 4 files changed, 93 insertions(+) create mode 100644 arch/arm64/kernel/trace-paravirt.h
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index f2376b597848..cf5d19874e5b 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -27,6 +27,9 @@ #include <asm/pvsched-abi.h> #include <asm/qspinlock_paravirt.h>
+#define CREATE_TRACE_POINTS +#include "trace-paravirt.h" + struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
@@ -130,6 +133,8 @@ static void kvm_kick_cpu(int cpu) struct arm_smccc_res res;
arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); + + trace_kvm_kick_cpu("kvm kick cpu", smp_processor_id(), cpu); }
static void kvm_wait(u8 *ptr, u8 val) @@ -146,6 +151,7 @@ static void kvm_wait(u8 *ptr, u8 val)
dsb(sy); wfi(); + trace_kvm_wait("kvm wait wfi", smp_processor_id());
out: local_irq_restore(flags); diff --git a/arch/arm64/kernel/trace-paravirt.h b/arch/arm64/kernel/trace-paravirt.h new file mode 100644 index 000000000000..2d76272f39ae --- /dev/null +++ b/arch/arm64/kernel/trace-paravirt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(kvm_kick_cpu, + TP_PROTO(const char *name, int cpu, int target), + TP_ARGS(name, cpu, target), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("PV qspinlock: %s, cpu %d kick target cpu %d", + __get_str(name), + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(kvm_wait, + TP_PROTO(const char *name, int cpu), + TP_ARGS(name, cpu), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + ), + + TP_printk("PV qspinlock: %s, cpu %d wait kvm access wfi", + __get_str(name), + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/arm64/kernel/ +#define TRACE_INCLUDE_FILE trace-paravirt + +#include <trace/define_trace.h> diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index f8f8da90d29c..194a3af86217 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -11,6 +11,8 @@
#include <kvm/arm_hypercalls.h>
+#include "trace.h" + void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) { __le32 preempted_le; @@ -57,6 +59,7 @@ int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_yield_to(target);
val = SMCCC_RET_SUCCESS; + trace_kvm_pvsched_kick_vcpu(vcpu->vcpu_id, target->vcpu_id);
out: return val; diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 674fda1eb18d..4aea348252f9 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -263,6 +263,24 @@ TRACE_EVENT(kvm_timer_update_irq, __entry->vcpu_id, __entry->irq, __entry->level) );
+TRACE_EVENT(kvm_pvsched_kick_vcpu, + TP_PROTO(int vcpu_id, int target_vcpu_id), + TP_ARGS(vcpu_id, target_vcpu_id), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, target_vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->target_vcpu_id = target_vcpu_id; + ), + + TP_printk("PV qspinlock: vcpu %d kick target vcpu %d", + __entry->vcpu_id, __entry->target_vcpu_id) +); + #endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: NA CVE: NA
--------------------------------
arm64: defconfig: set CONFIG_PARAVIRT_SPINLOCKS in default
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/euleros_defconfig | 1 + arch/arm64/configs/hulk_defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + 3 files changed, 3 insertions(+)
diff --git a/arch/arm64/configs/euleros_defconfig b/arch/arm64/configs/euleros_defconfig index 65e898e77973..12ef6ab39fbf 100644 --- a/arch/arm64/configs/euleros_defconfig +++ b/arch/arm64/configs/euleros_defconfig @@ -440,6 +440,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_SECCOMP=y CONFIG_PARAVIRT=y +# CONFIG_PARAVIRT_SPINLOCKS is not set CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig index ac6cbd6870a5..10c520d0bd4a 100644 --- a/arch/arm64/configs/hulk_defconfig +++ b/arch/arm64/configs/hulk_defconfig @@ -440,6 +440,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_SECCOMP=y CONFIG_PARAVIRT=y +# CONFIG_PARAVIRT_SPINLOCKS is not set CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 44a7d1661576..1034f92c7681 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -443,6 +443,7 @@ CONFIG_SYS_SUPPORTS_HUGETLBFS=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_SECCOMP=y CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y