This patch set aims to support the vCPU preempted check under KVM/arm64.
Christoffer Dall (1): KVM: arm/arm64: Factor out hypercall handling from PSCI code
Qian Cai (1): arm64/spinlock: fix a -Wunused-function warning
Steven Price (3): KVM: Implement kvm_put_guest() arm/arm64: Provide a wrapper for SMCCC 1.1 calls arm/arm64: Make use of the SMCCC 1.1 wrapper
Waiman Long (1): locking/osq: Use optimized spinning loop for arm64
Wanpeng Li (2): KVM: Boost vCPUs that are delivering interrupts KVM: Check preempted_in_kernel for involuntary preemption
Zengruan Ye (5): KVM: arm64: Document PV-sched interface KVM: arm64: Implement PV_SCHED_FEATURES call KVM: arm64: Support pvsched preempted via shared structure KVM: arm64: Add interface to support vCPU preempted check KVM: arm64: Support the vCPU preemption check
Documentation/virtual/kvm/arm/pvsched.txt | 56 +++++++++++ arch/arm/include/asm/kvm_host.h | 20 ++++ arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm/mm/proc-v7-bugs.c | 13 +-- arch/arm64/include/asm/kvm_host.h | 18 ++++ arch/arm64/include/asm/paravirt.h | 25 ++++- arch/arm64/include/asm/pvsched-abi.h | 16 +++ arch/arm64/include/asm/spinlock.h | 23 +++++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cpu_errata.c | 82 ++++++--------- arch/arm64/kernel/paravirt-spinlocks.c | 13 +++ arch/arm64/kernel/paravirt.c | 117 ++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + arch/arm64/kvm/Makefile | 2 + arch/arm64/kvm/handle_exit.c | 4 +- arch/s390/kvm/interrupt.c | 2 +- include/kvm/arm_hypercalls.h | 43 ++++++++ include/kvm/arm_psci.h | 2 +- include/linux/arm-smccc.h | 65 ++++++++++++ include/linux/kvm_host.h | 23 +++++ include/linux/kvm_types.h | 2 + kernel/locking/osq_lock.c | 23 ++--- virt/kvm/arm/arm.c | 8 ++ virt/kvm/arm/hypercalls.c | 69 +++++++++++++ virt/kvm/arm/psci.c | 76 +------------- virt/kvm/arm/pvsched.c | 55 ++++++++++ virt/kvm/kvm_main.c | 17 ++-- 28 files changed, 620 insertions(+), 162 deletions(-) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt create mode 100644 arch/arm64/include/asm/pvsched-abi.h create mode 100644 arch/arm64/kernel/paravirt-spinlocks.c create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c create mode 100644 virt/kvm/arm/pvsched.c
From: Christoffer Dall christoffer.dall@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 55009c6ed2d24fc0f5521ab2482f145d269389ea category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
We currently intertwine the KVM PSCI implementation with the general dispatch of hypercall handling, which makes perfect sense because PSCI is the only category of hypercalls we support.
However, as we are about to support additional hypercalls, factor out this functionality into a separate hypercall handler file.
Signed-off-by: Christoffer Dall christoffer.dall@arm.com [steven.price@arm.com: rebased] Reviewed-by: Andrew Jones drjones@redhat.com Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/handle_exit.c | 4 +- include/kvm/arm_hypercalls.h | 43 ++++++++++++++++++++ include/kvm/arm_psci.h | 2 +- virt/kvm/arm/hypercalls.c | 51 ++++++++++++++++++++++++ virt/kvm/arm/psci.c | 76 +----------------------------------- 8 files changed, 102 insertions(+), 79 deletions(-) create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2246..5994f3b3d375 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o
obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 910bd8dabb3c..404c67a06a2f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -21,7 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <trace/events/kvm.h>
#include "trace.h" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index fd930cdeb16c..d17040add48b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7e144991e5c3..d86dfd2d61d7 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -22,8 +22,6 @@ #include <linux/kvm.h> #include <linux/kvm_host.h>
-#include <kvm/arm_psci.h> - #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> @@ -33,6 +31,8 @@ #include <asm/debug-monitors.h> #include <asm/traps.h>
+#include <kvm/arm_hypercalls.h> + #define CREATE_TRACE_POINTS #include "trace.h"
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h new file mode 100644 index 000000000000..0e2509d27910 --- /dev/null +++ b/include/kvm/arm_hypercalls.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. */ + +#ifndef __KVM_ARM_HYPERCALLS_H +#define __KVM_ARM_HYPERCALLS_H + +#include <asm/kvm_emulate.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); + +static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 0); +} + +static inline unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 1); +} + +static inline unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 2); +} + +static inline unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 3); +} + +static inline void smccc_set_retval(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + vcpu_set_reg(vcpu, 0, a0); + vcpu_set_reg(vcpu, 1, a1); + vcpu_set_reg(vcpu, 2, a2); + vcpu_set_reg(vcpu, 3, a3); +} + +#endif diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 4b1548129fa2..ac8cd19d2456 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -51,7 +51,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu);
struct kvm_one_reg;
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c new file mode 100644 index 000000000000..d59c9381e237 --- /dev/null +++ b/virt/kvm/arm/hypercalls.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + u32 val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch (feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + if (kvm_arm_harden_branch_predictor()) + val = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + } + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 34d08ee63747..155197d8f6bd 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -26,6 +26,7 @@ #include <asm/kvm_host.h>
#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h>
/* * This is an implementation of the Power State Coordination Interface @@ -34,38 +35,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 0); -} - -static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 1); -} - -static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 2); -} - -static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 3); -} - -static void smccc_set_retval(struct kvm_vcpu *vcpu, - unsigned long a0, - unsigned long a1, - unsigned long a2, - unsigned long a3) -{ - vcpu_set_reg(vcpu, 0, a0); - vcpu_set_reg(vcpu, 1, a1); - vcpu_set_reg(vcpu, 2, a2); - vcpu_set_reg(vcpu, 3, a3); -} - static unsigned long psci_affinity_mask(unsigned long affinity_level) { if (affinity_level <= 3) @@ -384,7 +353,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * Errors: * -EINVAL: Unrecognized PSCI function */ -static int kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu, vcpu->kvm)) { case KVM_ARM_PSCI_1_0: @@ -398,47 +367,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) }; }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) -{ - u32 func_id = smccc_get_function(vcpu); - u32 val = SMCCC_RET_NOT_SUPPORTED; - u32 feature; - - switch (func_id) { - case ARM_SMCCC_VERSION_FUNC_ID: - val = ARM_SMCCC_VERSION_1_1; - break; - case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - feature = smccc_get_arg1(vcpu); - switch(feature) { - case ARM_SMCCC_ARCH_WORKAROUND_1: - if (kvm_arm_harden_branch_predictor()) - val = SMCCC_RET_SUCCESS; - break; - case ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - case KVM_SSBD_UNKNOWN: - break; - case KVM_SSBD_KERNEL: - val = SMCCC_RET_SUCCESS; - break; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: - val = SMCCC_RET_NOT_REQUIRED; - break; - } - break; - } - break; - default: - return kvm_psci_call(vcpu); - } - - smccc_set_retval(vcpu, val, 0, 0, 0); - return 1; -} - int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { return 1; /* PSCI version */
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit cac0f1b7285eaaf9a186c618c3a7304d82ed5493 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
kvm_put_guest() is analogous to put_user() - it writes a single value to the guest physical address. The implementation is built upon put_user() and so it has the same single copy atomic properties.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/kvm_host.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7e1efdb40e2a..1e1c7a6241d1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -700,6 +700,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); + +#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + type __user *__uaddr = (type __user *)(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(value, __uaddr); \ + if (!__ret) \ + mark_page_dirty(kvm, gfn); \ + __ret; \ +}) + +#define kvm_put_guest(kvm, gpa, value, type) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), (value), type); \ +}) + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 541625ac47ce9d0835efaee0fcbaa251b0000a37 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
SMCCC 1.1 calls may use either HVC or SMC depending on the PSCI conduit. Rather than coding this in every call site, provide a macro which uses the correct instruction. The macro also handles the case where no conduit is configured/available returning a not supported error in res, along with returning the conduit used for the call.
This allow us to remove some duplicated code and will be useful later when adding paravirtualized time hypervisor calls.
Signed-off-by: Steven Price steven.price@arm.com Acked-by: Will Deacon will@kernel.org Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- include/linux/arm-smccc.h | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 18863d56273c..78b5fc59b2eb 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -311,5 +311,50 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2
+/* + * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. + * Used when the SMCCC conduit is not defined. The empty asm statement + * avoids compiler warnings about unused variables. + */ +#define __fail_smccc_1_1(...) \ + do { \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + asm ("" __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_1_invoke() - make an SMCCC v1.1 compliant call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_1_invoke(...) ({ \ + int method = psci_ops.conduit; \ + switch (method) { \ + case PSCI_CONDUIT_HVC: \ + arm_smccc_1_1_hvc(__VA_ARGS__); \ + break; \ + case PSCI_CONDUIT_SMC: \ + arm_smccc_1_1_smc(__VA_ARGS__); \ + break; \ + default: \ + __fail_smccc_1_1(__VA_ARGS__); \ + method = PSCI_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit ce4d5ca2b9dd5d85944eb93c1bbf9eb11b7a907d category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
Rather than directly choosing which function to use based on psci_ops.conduit, use the new arm_smccc_1_1 wrapper instead.
In some cases we still need to do some operations based on the conduit, but the code duplication is removed.
No functional change.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- arch/arm/mm/proc-v7-bugs.c | 13 +++--- arch/arm64/kernel/cpu_errata.c | 82 ++++++++++++---------------------- 2 files changed, 34 insertions(+), 61 deletions(-)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index a6554fdb56c5..ac63a31347c1 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -81,12 +81,13 @@ static void cpu_v7_spectre_init(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) break;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_hvc_switch_mm; @@ -94,10 +95,6 @@ static void cpu_v7_spectre_init(void) break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_smc_switch_mm; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 706a77a121f1..3c556ff2f33e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -222,40 +222,31 @@ static int detect_harden_bp_fw(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) return -1;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + break; + default: + return -1; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - default: - return -1; - } + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - default: - return -1; - } + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; break;
default: @@ -345,6 +336,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state) { + int conduit; + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { pr_info_once("SSBD disabled by kernel configuration\n"); return; @@ -358,19 +351,9 @@ void arm64_set_ssbd_mitigation(bool state) return; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - default: - WARN_ON_ONCE(1); - break; - } + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, + NULL); + WARN_ON_ONCE(conduit == PSCI_CONDUIT_NONE); }
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, @@ -380,6 +363,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, bool required = true; s32 val; bool this_cpu_safe = false; + int conduit;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -404,18 +388,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, return false; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - default: + if (conduit == PSCI_CONDUIT_NONE) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) __ssb_safe = false;
From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v5.8-rc5 commit f5bfdc8e3947a7ae489cf8ae9cfd6b3fb357b952 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
Arm64 has a more optimized spinning loop (atomic_cond_read_acquire) using wfe for spinlock that can boost performance of sibling threads by putting the current cpu to a wait state that is broken only when the monitored variable changes or an external event happens.
OSQ has a more complicated spinning loop. Besides the lock value, it also checks for need_resched() and vcpu_is_preempted(). The check for need_resched() is not a problem as it is only set by the tick interrupt handler. That will be detected by the spinning cpu right after iret.
The vcpu_is_preempted() check, however, is a problem as changes to the preempt state of of previous node will not affect the wait state. For ARM64, vcpu_is_preempted is not currently defined and so is a no-op. Will has indicated that he is planning to para-virtualize wfe instead of defining vcpu_is_preempted for PV support. So just add a comment in arch/arm64/include/asm/spinlock.h to indicate that vcpu_is_preempted() should not be defined as suggested.
On a 2-socket 56-core 224-thread ARM64 system, a kernel mutex locking microbenchmark was run for 10s with and without the patch. The performance numbers before patch were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 316/123,143/2,121,269 Threads = 224, Total Rate = 2,757 kop/s; Percpu Rate = 12 kop/s
After patch, the numbers were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 334/147,836/1,304,787 Threads = 224, Total Rate = 3,311 kop/s; Percpu Rate = 15 kop/s
So there was about 20% performance improvement.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Will Deacon will@kernel.org Link: https://lkml.kernel.org/r/20200113150735.21956-1-longman@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- arch/arm64/include/asm/spinlock.h | 9 +++++++++ kernel/locking/osq_lock.c | 23 ++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 38116008d18b..1210e34b4c08 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,4 +22,13 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb()
+/* + * Changing this will break osq_lock() thanks to the call inside + * smp_cond_load_relaxed(). + * + * See: + * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... + */ +#define vcpu_is_preempted(cpu) false + #endif /* __ASM_SPINLOCK_H */ diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 6ef600aa0f47..1f7734949ac8 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -134,20 +134,17 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */
- while (!READ_ONCE(node->locked)) { - /* - * If we need to reschedule bail... so we can block. - * Use vcpu_is_preempted() to avoid waiting for a preempted - * lock holder: - */ - if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) - goto unqueue; - - cpu_relax(); - } - return true; + /* + * Wait to acquire the lock or cancelation. Note that need_resched() + * will come with an IPI, which will wake smp_cond_load_relaxed() if it + * is implemented with a monitor-wait. vcpu_is_preempted() relies on + * polling, be careful. + */ + if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || + vcpu_is_preempted(node_cpu(node->prev)))) + return true;
-unqueue: + /* unqueue */ /* * Step - A -- stabilize @prev *
From: Qian Cai cai@lca.pw
mainline inclusion from mainline-v5.8-rc5 commit 345d52c184dc7de98cff63f1bfa6f90e9db19809 category: bugfix bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
The commit f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") introduced a warning from Clang because vcpu_is_preempted() is compiled away,
kernel/locking/osq_lock.c:25:19: warning: unused function 'node_cpu' [-Wunused-function] static inline int node_cpu(struct optimistic_spin_node *node) ^ 1 warning generated.
Fix it by converting vcpu_is_preempted() to a static inline function.
Fixes: f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") Signed-off-by: Qian Cai cai@lca.pw Acked-by: Waiman Long longman@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- arch/arm64/include/asm/spinlock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 1210e34b4c08..a9dec081beca 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -29,6 +29,10 @@ * See: * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... */ -#define vcpu_is_preempted(cpu) false +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return false; +}
#endif /* __ASM_SPINLOCK_H */
From: Wanpeng Li wanpengli@tencent.com
mainline inclusion from mainline-v5.8-rc5 commit d73eb57b80b98ae147e4e6a7d9877c2ba175f972 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
Inspired by commit 9cac38dd5d (KVM/s390: Set preempted flag during vcpu wakeup and interrupt delivery), we want to also boost not just lock holders but also vCPUs that are delivering interrupts. Most smp_call_function_many calls are synchronous, so the IPI target vCPUs are also good yield candidates. This patch introduces vcpu->ready to boost vCPUs during wakeup and interrupt delivery time; unlike s390 we do not reuse vcpu->preempted so that voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin, but vmx_vcpu_pi_put is not affected (VT-d PI handles voluntary preemption separately, in pi_pre_block).
Testing on 80 HT 2 socket Xeon Skylake server, with 80 vCPUs VM 80GB RAM: ebizzy -M
vanilla boosting improved 1VM 21443 23520 9% 2VM 2800 8000 180% 3VM 1800 3100 72%
Testing on my Haswell desktop 8 HT, with 8 vCPUs VM 8GB RAM, two VMs, one running ebizzy -M, the other running 'stress --cpu 2':
w/ boosting + w/o pv sched yield(vanilla)
vanilla boosting improved 1570 4000 155%
w/ boosting + w/ pv sched yield(vanilla)
vanilla boosting improved 1844 5157 179%
w/o boosting, perf top in VM:
72.33% [kernel] [k] smp_call_function_many 4.22% [kernel] [k] call_function_i 3.71% [kernel] [k] async_page_fault
w/ boosting, perf top in VM:
38.43% [kernel] [k] smp_call_function_many 6.31% [kernel] [k] async_page_fault 6.13% libc-2.23.so [.] __memcpy_avx_unaligned 4.88% [kernel] [k] call_function_interrupt
Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Marc Zyngier maz@kernel.org Signed-off-by: Wanpeng Li wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- arch/s390/kvm/interrupt.c | 2 +- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 12 ++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 05ea466b9e40..c567a20ecb78 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1144,7 +1144,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ - vcpu->preempted = true; + vcpu->ready = true; swake_up_one(&vcpu->wq); vcpu->stat.halt_wakeup++; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1e1c7a6241d1..b9443fedf24e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -274,6 +274,7 @@ struct kvm_vcpu { } spin_loop; #endif bool preempted; + bool ready; struct kvm_vcpu_arch arch; struct dentry *debugfs_dentry; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 47b6d182fd8a..c1e63fd9b1ec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -334,6 +334,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; + vcpu->ready = false;
r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2281,6 +2282,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) wqp = kvm_arch_vcpu_wq(vcpu); if (swq_has_sleeper(wqp)) { swake_up_one(wqp); + WRITE_ONCE(vcpu->ready, true); ++vcpu->stat.halt_wakeup; return true; } @@ -2417,7 +2419,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!READ_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->ready)) continue; if (vcpu == me) continue; @@ -4172,8 +4174,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (vcpu->preempted) - vcpu->preempted = false; + vcpu->preempted = false; + WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu);
@@ -4185,8 +4187,10 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (current->state == TASK_RUNNING) + if (current->state == TASK_RUNNING) { vcpu->preempted = true; + WRITE_ONCE(vcpu->ready, true); + } kvm_arch_vcpu_put(vcpu); }
From: Wanpeng Li wanpengli@tencent.com
mainline inclusion from mainline-v5.8-rc5 commit 046ddeed0461b5d270470c253cbb321103d048b6 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
preempted_in_kernel is updated in preempt_notifier when involuntary preemption ocurrs, it can be stale when the voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin() loop. This patch lets it just check preempted_in_kernel for involuntary preemption.
Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- virt/kvm/kvm_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c1e63fd9b1ec..ff1d6ba8e95f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2425,7 +2425,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) continue; - if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && + !kvm_arch_vcpu_in_kernel(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -4174,7 +4175,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- vcpu->preempted = false; + WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu); @@ -4188,7 +4189,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
if (current->state == TASK_RUNNING) { - vcpu->preempted = true; + WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); } kvm_arch_vcpu_put(vcpu);
From: Zengruan Ye yezengruan@huawei.com
euleros inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=35 CVE: NA
--------------------------------
Introduce a paravirtualization interface for KVM/arm64 to PV-sched.
A hypercall interface is provided for the guest to interrogate the hypervisor's support for this interface and the location of the shared memory structures.
Signed-off-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Chaochao Xing xingchaochao@huawei.com Reviewed-by: Zengruan Ye yezengruan@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- Documentation/virtual/kvm/arm/pvsched.txt | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt new file mode 100644 index 000000000000..1d5aefc290ec --- /dev/null +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -0,0 +1,56 @@ +Paravirtualized sched support for arm64 +======================================= + +KVM/arm64 provides some hypervisor service calls to support a paravirtualized +sched. + +Some SMCCC compatible hypercalls are defined: + +* PV_SCHED_FEATURES: 0xC5000090 +* PV_SCHED_IPA_INIT: 0xC5000091 +* PV_SCHED_IPA_RELEASE: 0xC5000092 + +The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. + +PV_SCHED_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000090 + PV_call_id: (uint32) The function to query for support. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-sched feature is supported by the hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_INIT + ============= ======== ========== + Function ID: (uint32) 0xC5000091 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is shared to the + hypervisor. + ============= ======== ========== + +PV_SCHED_IPA_RELEASE + ============= ======== ========== + Function ID: (uint32) 0xC5000092 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is released. + ============= ======== ========== + +PV sched state +-------------- + +The structure pointed to by the PV_SCHED_IPA hypercall is as follows: + ++-----------+-------------+-------------+-----------------------------------+ +| Field | Byte Length | Byte Offset | Description | ++===========+=============+=============+===================================+ +| preempted | 4 | 0 | Indicates that the vCPU that owns | +| | | | this struct is running or not. | +| | | | Non-zero values mean the vCPU has | +| | | | been preempted. Zero means the | +| | | | vCPU is not preempted. | ++-----------+-------------+-------------+-----------------------------------+ + +The preempted field will be updated to 0 by the hypervisor prior to scheduling +a vCPU. When the vCPU is scheduled out, the preempted field will be updated +to 1 by the hypervisor.