From: y00379943 yezengruan@huawei.com
This patch set aims to support the vCPU preempted check and PV qspinlock under KVM/arm64.
Changes from v2: * Remove Wanpeng Li's patches from this patch set * Bunch of typo fixes.
Changes from v1: * Fix pv_time_ops undeclared in drivers/xen/time.c * Fix native_queued_spin_lock_slowpath undeclared in arch/arm64/kernel/alternative.c
Christoffer Dall (1): KVM: arm/arm64: Factor out hypercall handling from PSCI code
Qian Cai (1): arm64/spinlock: fix a -Wunused-function warning
Steven Price (3): KVM: Implement kvm_put_guest() arm/arm64: Provide a wrapper for SMCCC 1.1 calls arm/arm64: Make use of the SMCCC 1.1 wrapper
Waiman Long (1): locking/osq: Use optimized spinning loop for arm64
Zengruan Ye (11): arm/paravirt: Use a single ops structure KVM: arm64: Document PV-sched interface KVM: arm64: Implement PV_SCHED_FEATURES call KVM: arm64: Support pvsched preempted via shared structure KVM: arm64: Add interface to support vCPU preempted check KVM: arm64: Support the vCPU preemption check KVM: arm64: Add SMCCC PV-sched to kick cpu KVM: arm64: Implement PV_SCHED_KICK_CPU call KVM: arm64: Add interface to support PV qspinlock KVM: arm64: Enable PV qspinlock KVM: arm64: Add tracepoints for PV qspinlock
Documentation/virtual/kvm/arm/pvsched.txt | 64 +++++++ arch/arm/include/asm/kvm_host.h | 25 +++ arch/arm/include/asm/paravirt.h | 9 +- arch/arm/kernel/paravirt.c | 4 +- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm/mm/proc-v7-bugs.c | 13 +- arch/arm64/Kconfig | 14 ++ arch/arm64/include/asm/kvm_host.h | 20 ++ arch/arm64/include/asm/paravirt.h | 63 ++++++- arch/arm64/include/asm/pvsched-abi.h | 16 ++ arch/arm64/include/asm/qspinlock.h | 15 +- arch/arm64/include/asm/qspinlock_paravirt.h | 12 ++ arch/arm64/include/asm/spinlock.h | 26 +++ arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/alternative.c | 5 +- arch/arm64/kernel/cpu_errata.c | 82 +++------ arch/arm64/kernel/paravirt-spinlocks.c | 18 ++ arch/arm64/kernel/paravirt.c | 192 +++++++++++++++++++- arch/arm64/kernel/setup.c | 2 + arch/arm64/kernel/trace-paravirt.h | 66 +++++++ arch/arm64/kvm/Makefile | 2 + arch/arm64/kvm/handle_exit.c | 5 +- drivers/xen/time.c | 4 + include/kvm/arm_hypercalls.h | 43 +++++ include/kvm/arm_psci.h | 2 +- include/linux/arm-smccc.h | 65 +++++++ include/linux/cpuhotplug.h | 1 + include/linux/kvm_host.h | 22 +++ include/linux/kvm_types.h | 2 + kernel/locking/osq_lock.c | 23 +-- virt/kvm/arm/arm.c | 12 +- virt/kvm/arm/hypercalls.c | 68 +++++++ virt/kvm/arm/psci.c | 76 +------- virt/kvm/arm/pvsched.c | 73 ++++++++ virt/kvm/arm/trace.h | 18 ++ 36 files changed, 899 insertions(+), 170 deletions(-) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt create mode 100644 arch/arm64/include/asm/pvsched-abi.h create mode 100644 arch/arm64/include/asm/qspinlock_paravirt.h create mode 100644 arch/arm64/kernel/paravirt-spinlocks.c create mode 100644 arch/arm64/kernel/trace-paravirt.h create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c create mode 100644 virt/kvm/arm/pvsched.c
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Combine the paravirt ops structure in a single structure, keeping the original structure as sub-structure.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm/include/asm/paravirt.h | 9 +++++++-- arch/arm/kernel/paravirt.c | 4 ++-- arch/arm64/include/asm/paravirt.h | 9 +++++++-- arch/arm64/kernel/paravirt.c | 4 ++-- drivers/xen/time.c | 4 ++++ 5 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h index d51e5cd31..cdbf02d9c 100644 --- a/arch/arm/include/asm/paravirt.h +++ b/arch/arm/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c index 53f371ed4..75c158b03 100644 --- a/arch/arm/kernel/paravirt.c +++ b/arch/arm/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index bb5dcea42..799d9dd6f 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -10,11 +10,16 @@ extern struct static_key paravirt_steal_rq_enabled; struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); }; -extern struct pv_time_ops pv_time_ops; + +struct paravirt_patch_template { + struct pv_time_ops time; +}; + +extern struct paravirt_patch_template pv_ops;
static inline u64 paravirt_steal_clock(int cpu) { - return pv_time_ops.steal_clock(cpu); + return pv_ops.time.steal_clock(cpu); } #endif
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 53f371ed4..75c158b03 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 3e741cd14..2952ac253 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -175,7 +175,11 @@ void __init xen_time_setup_guest(void) xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_runstate_update_flag);
+#ifdef CONFIG_ARM64 + pv_ops.time.steal_clock = xen_steal_clock; +#else pv_time_ops.steal_clock = xen_steal_clock; +#endif
static_key_slow_inc(¶virt_steal_enabled); if (xen_runstate_remote)
From: Christoffer Dall christoffer.dall@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 55009c6ed2d24fc0f5521ab2482f145d269389ea category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
We currently intertwine the KVM PSCI implementation with the general dispatch of hypercall handling, which makes perfect sense because PSCI is the only category of hypercalls we support.
However, as we are about to support additional hypercalls, factor out this functionality into a separate hypercall handler file.
Signed-off-by: Christoffer Dall christoffer.dall@arm.com [steven.price@arm.com: rebased] Reviewed-by: Andrew Jones drjones@redhat.com Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org --- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/handle_exit.c | 4 +- include/kvm/arm_hypercalls.h | 43 ++++++++++++++++++++ include/kvm/arm_psci.h | 2 +- virt/kvm/arm/hypercalls.c | 51 ++++++++++++++++++++++++ virt/kvm/arm/psci.c | 76 +----------------------------------- 8 files changed, 102 insertions(+), 79 deletions(-) create mode 100644 include/kvm/arm_hypercalls.h create mode 100644 virt/kvm/arm/hypercalls.c
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2..5994f3b3d 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o
obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 910bd8dab..404c67a06 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -21,7 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <trace/events/kvm.h>
#include "trace.h" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index fd930cdeb..d17040add 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 2349c2662..e893528cd 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -22,8 +22,6 @@ #include <linux/kvm.h> #include <linux/kvm_host.h>
-#include <kvm/arm_psci.h> - #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> @@ -33,6 +31,8 @@ #include <asm/debug-monitors.h> #include <asm/traps.h>
+#include <kvm/arm_hypercalls.h> + #define CREATE_TRACE_POINTS #include "trace.h"
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h new file mode 100644 index 000000000..0e2509d27 --- /dev/null +++ b/include/kvm/arm_hypercalls.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. */ + +#ifndef __KVM_ARM_HYPERCALLS_H +#define __KVM_ARM_HYPERCALLS_H + +#include <asm/kvm_emulate.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); + +static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 0); +} + +static inline unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 1); +} + +static inline unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 2); +} + +static inline unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 3); +} + +static inline void smccc_set_retval(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + vcpu_set_reg(vcpu, 0, a0); + vcpu_set_reg(vcpu, 1, a1); + vcpu_set_reg(vcpu, 2, a2); + vcpu_set_reg(vcpu, 3, a3); +} + +#endif diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 4b1548129..ac8cd19d2 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -51,7 +51,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_psci_call(struct kvm_vcpu *vcpu);
struct kvm_one_reg;
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c new file mode 100644 index 000000000..738df465b --- /dev/null +++ b/virt/kvm/arm/hypercalls.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + u32 val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch(feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + if (kvm_arm_harden_branch_predictor()) + val = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + } + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 34d08ee63..155197d8f 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -26,6 +26,7 @@ #include <asm/kvm_host.h>
#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h>
/* * This is an implementation of the Power State Coordination Interface @@ -34,38 +35,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 0); -} - -static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 1); -} - -static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 2); -} - -static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) -{ - return vcpu_get_reg(vcpu, 3); -} - -static void smccc_set_retval(struct kvm_vcpu *vcpu, - unsigned long a0, - unsigned long a1, - unsigned long a2, - unsigned long a3) -{ - vcpu_set_reg(vcpu, 0, a0); - vcpu_set_reg(vcpu, 1, a1); - vcpu_set_reg(vcpu, 2, a2); - vcpu_set_reg(vcpu, 3, a3); -} - static unsigned long psci_affinity_mask(unsigned long affinity_level) { if (affinity_level <= 3) @@ -384,7 +353,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * Errors: * -EINVAL: Unrecognized PSCI function */ -static int kvm_psci_call(struct kvm_vcpu *vcpu) +int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu, vcpu->kvm)) { case KVM_ARM_PSCI_1_0: @@ -398,47 +367,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) }; }
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) -{ - u32 func_id = smccc_get_function(vcpu); - u32 val = SMCCC_RET_NOT_SUPPORTED; - u32 feature; - - switch (func_id) { - case ARM_SMCCC_VERSION_FUNC_ID: - val = ARM_SMCCC_VERSION_1_1; - break; - case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - feature = smccc_get_arg1(vcpu); - switch(feature) { - case ARM_SMCCC_ARCH_WORKAROUND_1: - if (kvm_arm_harden_branch_predictor()) - val = SMCCC_RET_SUCCESS; - break; - case ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - case KVM_SSBD_UNKNOWN: - break; - case KVM_SSBD_KERNEL: - val = SMCCC_RET_SUCCESS; - break; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: - val = SMCCC_RET_NOT_REQUIRED; - break; - } - break; - } - break; - default: - return kvm_psci_call(vcpu); - } - - smccc_set_retval(vcpu, val, 0, 0, 0); - return 1; -} - int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { return 1; /* PSCI version */
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit cac0f1b7285eaaf9a186c618c3a7304d82ed5493 category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
kvm_put_guest() is analogous to put_user() - it writes a single value to the guest physical address. The implementation is built upon put_user() and so it has the same single copy atomic properties.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org --- include/linux/kvm_host.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a0de4c7dc..efd946663 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -727,6 +727,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); + +#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + type __user *__uaddr = (type __user *)(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(value, __uaddr); \ + if (!__ret) \ + mark_page_dirty(kvm, gfn); \ + __ret; \ +}) + +#define kvm_put_guest(kvm, gpa, value, type) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), (value), type); \ +}) + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit 541625ac47ce9d0835efaee0fcbaa251b0000a37 category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
SMCCC 1.1 calls may use either HVC or SMC depending on the PSCI conduit. Rather than coding this in every call site, provide a macro which uses the correct instruction. The macro also handles the case where no conduit is configured/available returning a not supported error in res, along with returning the conduit used for the call.
This allow us to remove some duplicated code and will be useful later when adding paravirtualized time hypervisor calls.
Signed-off-by: Steven Price steven.price@arm.com Acked-by: Will Deacon will@kernel.org Signed-off-by: Marc Zyngier maz@kernel.org --- include/linux/arm-smccc.h | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 18863d562..78b5fc59b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -311,5 +311,50 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2
+/* + * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. + * Used when the SMCCC conduit is not defined. The empty asm statement + * avoids compiler warnings about unused variables. + */ +#define __fail_smccc_1_1(...) \ + do { \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + asm ("" __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_1_invoke() - make an SMCCC v1.1 compliant call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_1_invoke(...) ({ \ + int method = psci_ops.conduit; \ + switch (method) { \ + case PSCI_CONDUIT_HVC: \ + arm_smccc_1_1_hvc(__VA_ARGS__); \ + break; \ + case PSCI_CONDUIT_SMC: \ + arm_smccc_1_1_smc(__VA_ARGS__); \ + break; \ + default: \ + __fail_smccc_1_1(__VA_ARGS__); \ + method = PSCI_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.8-rc5 commit ce4d5ca2b9dd5d85944eb93c1bbf9eb11b7a907d category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Rather than directly choosing which function to use based on psci_ops.conduit, use the new arm_smccc_1_1 wrapper instead.
In some cases we still need to do some operations based on the conduit, but the code duplication is removed.
No functional change.
Signed-off-by: Steven Price steven.price@arm.com Signed-off-by: Marc Zyngier maz@kernel.org --- arch/arm/mm/proc-v7-bugs.c | 13 +++--- arch/arm64/kernel/cpu_errata.c | 82 ++++++++++++---------------------- 2 files changed, 34 insertions(+), 61 deletions(-)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index a6554fdb5..ac63a3134 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -81,12 +81,13 @@ static void cpu_v7_spectre_init(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) break;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_hvc_switch_mm; @@ -94,10 +95,6 @@ static void cpu_v7_spectre_init(void) break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_smc_switch_mm; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7522163c1..d7107ecf3 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -217,40 +217,31 @@ static int detect_harden_bp_fw(void) if (psci_ops.smccc_version == SMCCC_VERSION_1_0) return -1;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + break; + default: + return -1; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - default: - return -1; - } + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break;
case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - default: - return -1; - } + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; break;
default: @@ -340,6 +331,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state) { + int conduit; + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { pr_info_once("SSBD disabled by kernel configuration\n"); return; @@ -353,19 +346,9 @@ void arm64_set_ssbd_mitigation(bool state) return; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - default: - WARN_ON_ONCE(1); - break; - } + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, + NULL); + WARN_ON_ONCE(conduit == PSCI_CONDUIT_NONE); }
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, @@ -375,6 +358,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, bool required = true; s32 val; bool this_cpu_safe = false; + int conduit;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -399,18 +383,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, return false; }
- switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - default: + if (conduit == PSCI_CONDUIT_NONE) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) __ssb_safe = false;
From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v5.8-rc5 commit f5bfdc8e3947a7ae489cf8ae9cfd6b3fb357b952 category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Arm64 has a more optimized spinning loop (atomic_cond_read_acquire) using wfe for spinlock that can boost performance of sibling threads by putting the current cpu to a wait state that is broken only when the monitored variable changes or an external event happens.
OSQ has a more complicated spinning loop. Besides the lock value, it also checks for need_resched() and vcpu_is_preempted(). The check for need_resched() is not a problem as it is only set by the tick interrupt handler. That will be detected by the spinning cpu right after iret.
The vcpu_is_preempted() check, however, is a problem as changes to the preempt state of of previous node will not affect the wait state. For ARM64, vcpu_is_preempted is not currently defined and so is a no-op. Will has indicated that he is planning to para-virtualize wfe instead of defining vcpu_is_preempted for PV support. So just add a comment in arch/arm64/include/asm/spinlock.h to indicate that vcpu_is_preempted() should not be defined as suggested.
On a 2-socket 56-core 224-thread ARM64 system, a kernel mutex locking microbenchmark was run for 10s with and without the patch. The performance numbers before patch were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 316/123,143/2,121,269 Threads = 224, Total Rate = 2,757 kop/s; Percpu Rate = 12 kop/s
After patch, the numbers were:
Running locktest with mutex [runtime = 10s, load = 1] Threads = 224, Min/Mean/Max = 334/147,836/1,304,787 Threads = 224, Total Rate = 3,311 kop/s; Percpu Rate = 15 kop/s
So there was about 20% performance improvement.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Will Deacon will@kernel.org Link: https://lkml.kernel.org/r/20200113150735.21956-1-longman@redhat.com --- arch/arm64/include/asm/spinlock.h | 9 +++++++++ kernel/locking/osq_lock.c | 23 ++++++++++------------- 2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 38116008d..1210e34b4 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,4 +22,13 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb()
+/* + * Changing this will break osq_lock() thanks to the call inside + * smp_cond_load_relaxed(). + * + * See: + * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... + */ +#define vcpu_is_preempted(cpu) false + #endif /* __ASM_SPINLOCK_H */ diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 6ef600aa0..1f7734949 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -134,20 +134,17 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */
- while (!READ_ONCE(node->locked)) { - /* - * If we need to reschedule bail... so we can block. - * Use vcpu_is_preempted() to avoid waiting for a preempted - * lock holder: - */ - if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) - goto unqueue; - - cpu_relax(); - } - return true; + /* + * Wait to acquire the lock or cancelation. Note that need_resched() + * will come with an IPI, which will wake smp_cond_load_relaxed() if it + * is implemented with a monitor-wait. vcpu_is_preempted() relies on + * polling, be careful. + */ + if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || + vcpu_is_preempted(node_cpu(node->prev)))) + return true;
-unqueue: + /* unqueue */ /* * Step - A -- stabilize @prev *
From: Qian Cai cai@lca.pw
mainline inclusion from mainline-v5.8-rc5 commit 345d52c184dc7de98cff63f1bfa6f90e9db19809 category: bugfix bugzilla: NA DTS: NA CVE: NA
--------------------------------
The commit f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") introduced a warning from Clang because vcpu_is_preempted() is compiled away,
kernel/locking/osq_lock.c:25:19: warning: unused function 'node_cpu' [-Wunused-function] static inline int node_cpu(struct optimistic_spin_node *node) ^ 1 warning generated.
Fix it by converting vcpu_is_preempted() to a static inline function.
Fixes: f5bfdc8e3947 ("locking/osq: Use optimized spinning loop for arm64") Signed-off-by: Qian Cai cai@lca.pw Acked-by: Waiman Long longman@redhat.com --- arch/arm64/include/asm/spinlock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 1210e34b4..a9dec081b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -29,6 +29,10 @@ * See: * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... */ -#define vcpu_is_preempted(cpu) false +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return false; +}
#endif /* __ASM_SPINLOCK_H */
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Introduce a paravirtualization interface for KVM/arm64 to PV-sched.
A hypercall interface is provided for the guest to interrogate the hypervisor's support for this interface and the location of the shared memory structures.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- Documentation/virtual/kvm/arm/pvsched.txt | 48 +++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Documentation/virtual/kvm/arm/pvsched.txt
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt new file mode 100644 index 000000000..9091dea7e --- /dev/null +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -0,0 +1,48 @@ +Paravirtualized sched support for arm64 +======================================= + +KVM/arm64 provides some hypervisor service calls to support a paravirtualized +sched. + +Two new SMCCC compatible hypercalls are defined: + +* PV_SCHED_FEATURES: 0xC5000040 +* PV_SCHED_IPA: 0xC5000041 + +The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. + +PV_SCHED_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000040 + PV_call_id: (uint32) The function to query for support. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-sched feature is supported by the hypervisor. + ============= ======== ========== + +PV_SCHED_IPA + ============= ======== ========== + Function ID: (uint32) 0xC5000041 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the IPA of + this vCPU's PV data structure is shared to the + hypervisor. + ============= ======== ========== + +PV sched state +-------------- + +The structure pointed to by the PV_SCHED_IPA hypercall is as follows: + ++-----------+-------------+-------------+-----------------------------------+ +| Field | Byte Length | Byte Offset | Description | ++===========+=============+=============+===================================+ +| preempted | 4 | 0 | Indicates that the vCPU that owns | +| | | | this struct is running or not. | +| | | | Non-zero values mean the vCPU has | +| | | | been preempted. Zero means the | +| | | | vCPU is not preempted. | ++-----------+-------------+-------------+-----------------------------------+ + +The preempted field will be updated to 0 by the hypervisor prior to scheduling +a vCPU. When the vCPU is scheduled out, the preempted field will be updated +to 1 by the hypervisor.
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
This provides a mechanism for querying which paravirtualized sched features are available in this hypervisor.
Add two new SMCCC compatible hypercalls for PV sched features: PV_SCHED_FEATURES: 0xC5000040 PV_SCHED_IPA: 0xC5000041
Also add the header file which defines the ABI for the paravirtualized sched features we're about to add.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm/include/asm/kvm_host.h | 6 ++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/pvsched-abi.h | 16 ++++++++++++++++ arch/arm64/kvm/Makefile | 1 + include/linux/arm-smccc.h | 14 ++++++++++++++ virt/kvm/arm/hypercalls.c | 6 ++++++ virt/kvm/arm/pvsched.c | 23 +++++++++++++++++++++++ 7 files changed, 68 insertions(+) create mode 100644 arch/arm64/include/asm/pvsched-abi.h create mode 100644 virt/kvm/arm/pvsched.c
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 661bd0344..8f9a966a7 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__
+#include <linux/arm-smccc.h> #include <linux/types.h> #include <linux/kvm_types.h> #include <asm/cputype.h> @@ -302,6 +303,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 62b6d0447..00662362b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -409,6 +409,8 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void);
+int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/pvsched-abi.h b/arch/arm64/include/asm/pvsched-abi.h new file mode 100644 index 000000000..80e50e7a1 --- /dev/null +++ b/arch/arm64/include/asm/pvsched-abi.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#ifndef __ASM_PVSCHED_ABI_H +#define __ASM_PVSCHED_ABI_H + +struct pvsched_vcpu_state { + __le32 preempted; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[60]; +} __packed; + +#endif diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d17040add..258f9fd91 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,6 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvsched.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 78b5fc59b..159189b3a 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -54,6 +54,7 @@ #define ARM_SMCCC_OWNER_SIP 2 #define ARM_SMCCC_OWNER_OEM 3 #define ARM_SMCCC_OWNER_STANDARD 4 +#define ARM_SMCCC_OWNER_STANDARD_HYP 5 #define ARM_SMCCC_OWNER_TRUSTED_APP 48 #define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 #define ARM_SMCCC_OWNER_TRUSTED_OS 50 @@ -356,5 +357,18 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ })
+/* Paravirtualised sched calls */ +#define ARM_SMCCC_HV_PV_SCHED_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x40) + +#define ARM_SMCCC_HV_PV_SCHED_IPA \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x41) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index 738df465b..780240bde 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -40,8 +40,14 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = SMCCC_RET_SUCCESS; + break; } break; + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = kvm_hypercall_pvsched_features(vcpu); + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c new file mode 100644 index 000000000..40b56e01f --- /dev/null +++ b/virt/kvm/arm/pvsched.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#include <linux/arm-smccc.h> + +#include <kvm/arm_hypercalls.h> + +int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + int val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_SCHED_FEATURES: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +}
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Implement the service call for configuring a shared structure between a vCPU and the hypervisor in which the hypervisor can tell the vCPU that is running or not.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm/include/asm/kvm_host.h | 14 ++++++++++++++ arch/arm64/include/asm/kvm_host.h | 16 ++++++++++++++++ include/linux/kvm_types.h | 2 ++ virt/kvm/arm/arm.c | 8 ++++++++ virt/kvm/arm/hypercalls.c | 8 ++++++++ virt/kvm/arm/pvsched.c | 22 ++++++++++++++++++++++ 6 files changed, 70 insertions(+)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8f9a966a7..ead428a93 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -303,6 +303,20 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} + +static inline void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, + u32 preempted) +{ +} + static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { return SMCCC_RET_NOT_SUPPORTED; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 00662362b..93e01ded7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -324,6 +324,11 @@ struct kvm_vcpu_arch { /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; + + /* Guest PV sched state */ + struct { + gpa_t base; + } pvsched; };
/* vcpu_arch flags field values: */ @@ -409,6 +414,17 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void);
+static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->pvsched.base = GPA_INVALID; +} + +static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->pvsched.base != GPA_INVALID); +} + +void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu);
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index a38729c82..458ec5b0e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -49,6 +49,8 @@ typedef unsigned long gva_t; typedef u64 gpa_t; typedef u64 gfn_t;
+#define GPA_INVALID (~(gpa_t)0) + typedef unsigned long hva_t; typedef u64 hpa_t; typedef u64 hfn_t; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d1fef368c..19add1742 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -373,6 +373,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_arm_reset_debug_ptr(vcpu);
+ kvm_arm_pvsched_vcpu_init(&vcpu->arch); + return kvm_vgic_vcpu_init(vcpu); }
@@ -407,6 +409,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear_wfe_traps(vcpu); else vcpu_set_wfe_traps(vcpu); + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 0); }
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -420,6 +425,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL); + + if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + kvm_update_pvsched_preempted(vcpu, 1); }
static void vcpu_power_off(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index 780240bde..3a18c14e7 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -14,6 +14,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) u32 func_id = smccc_get_function(vcpu); u32 val = SMCCC_RET_NOT_SUPPORTED; u32 feature; + gpa_t gpa;
switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: @@ -48,6 +49,13 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) case ARM_SMCCC_HV_PV_SCHED_FEATURES: val = kvm_hypercall_pvsched_features(vcpu); break; + case ARM_SMCCC_HV_PV_SCHED_IPA: + gpa = smccc_get_arg1(vcpu); + if (gpa != GPA_INVALID) { + vcpu->arch.pvsched.base = gpa; + val = SMCCC_RET_SUCCESS; + } + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index 40b56e01f..22c5f6e0b 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -5,9 +5,30 @@ */
#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/pvsched-abi.h>
#include <kvm/arm_hypercalls.h>
+void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) +{ + __le32 preempted_le; + u64 offset; + int idx; + u64 base = vcpu->arch.pvsched.base; + struct kvm *kvm = vcpu->kvm; + + if (base == GPA_INVALID) + return; + + preempted_le = cpu_to_le32(preempted); + idx = srcu_read_lock(&kvm->srcu); + offset = offsetof(struct pvsched_vcpu_state, preempted); + kvm_put_guest(kvm, base + offset, preempted_le, u32); + srcu_read_unlock(&kvm->srcu, idx); +} + int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { u32 feature = smccc_get_arg1(vcpu); @@ -15,6 +36,7 @@ int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu)
switch (feature) { case ARM_SMCCC_HV_PV_SCHED_FEATURES: + case ARM_SMCCC_HV_PV_SCHED_IPA: val = SMCCC_RET_SUCCESS; break; }
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
This is to fix some lock holder preemption issues. Some other locks implementation do a spin loop before acquiring the lock itself. Currently kernel has an interface of bool vcpu_is_preempted(int cpu). It takes the CPU as parameter and return true if the CPU is preempted. Then kernel can break the spin loops upon the retval of vcpu_is_preempted.
As kernel has used this interface, So lets support it.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm64/include/asm/paravirt.h | 12 ++++++++++++ arch/arm64/include/asm/spinlock.h | 10 ++++++++++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/paravirt-spinlocks.c | 13 +++++++++++++ arch/arm64/kernel/paravirt.c | 4 +++- 5 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/paravirt-spinlocks.c
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 799d9dd6f..ff266c66c 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -11,8 +11,13 @@ struct pv_time_ops { unsigned long long (*steal_clock)(int cpu); };
+struct pv_sched_ops { + bool (*vcpu_is_preempted)(int cpu); +}; + struct paravirt_patch_template { struct pv_time_ops time; + struct pv_sched_ops sched; };
extern struct paravirt_patch_template pv_ops; @@ -21,6 +26,13 @@ static inline u64 paravirt_steal_clock(int cpu) { return pv_ops.time.steal_clock(cpu); } + +__visible bool __native_vcpu_is_preempted(int cpu); +static inline bool pv_vcpu_is_preempted(int cpu) +{ + return pv_ops.sched.vcpu_is_preempted(cpu); +} + #endif
#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index a9dec081b..4a6689950 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -18,6 +18,7 @@
#include <asm/qrwlock.h> #include <asm/qspinlock.h> +#include <asm/paravirt.h>
/* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() @@ -30,9 +31,18 @@ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-a... */ #define vcpu_is_preempted vcpu_is_preempted +#ifdef CONFIG_PARAVIRT +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} + +#else + static inline bool vcpu_is_preempted(int cpu) { return false; } +#endif /* CONFIG_PARAVIRT */
#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 54d0b1d38..73bc1db45 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -48,7 +48,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ERR_RECOV) += ras.o arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o -arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000..fd733eb02 --- /dev/null +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#include <linux/spinlock.h> +#include <asm/paravirt.h> + +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 75c158b03..3a410dbaa 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,7 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
-struct paravirt_patch_template pv_ops; +struct paravirt_patch_template pv_ops = { + .sched.vcpu_is_preempted = __native_vcpu_is_preempted, +}; EXPORT_SYMBOL_GPL(pv_ops);
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Support the vcpu_is_preempted() functionality under KVM/arm64. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding.
unix benchmark result: host: kernel 4.19.87, HiSilicon Kunpeng920, 8 CPUs guest: kernel 4.19.87, 16 vCPUs
test-case | after-patch | before-patch ----------------------------------------+-------------------+------------------ Dhrystone 2 using register variables | 338955728.5 lps | 339266319.5 lps Double-Precision Whetstone | 30634.9 MWIPS | 30884.4 MWIPS Execl Throughput | 6753.2 lps | 3580.1 lps File Copy 1024 bufsize 2000 maxblocks | 490048.0 KBps | 313282.3 KBps File Copy 256 bufsize 500 maxblocks | 129662.5 KBps | 83550.7 KBps File Copy 4096 bufsize 8000 maxblocks | 1552551.5 KBps | 814327.0 KBps Pipe Throughput | 8976422.5 lps | 9048628.4 lps Pipe-based Context Switching | 258641.7 lps | 252925.9 lps Process Creation | 5312.2 lps | 4507.9 lps Shell Scripts (1 concurrent) | 8704.2 lpm | 6720.9 lpm Shell Scripts (8 concurrent) | 1708.8 lpm | 607.2 lpm System Call Overhead | 3714444.7 lps | 3746386.8 lps ----------------------------------------+-------------------+------------------ System Benchmarks Index Score | 2270.6 | 1679.2
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm64/include/asm/paravirt.h | 8 ++- arch/arm64/kernel/paravirt.c | 116 ++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + include/linux/cpuhotplug.h | 1 + 4 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index ff266c66c..62e9ba70f 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -27,12 +27,18 @@ static inline u64 paravirt_steal_clock(int cpu) return pv_ops.time.steal_clock(cpu); }
+int __init pv_sched_init(void); + __visible bool __native_vcpu_is_preempted(int cpu); static inline bool pv_vcpu_is_preempted(int cpu) { return pv_ops.sched.vcpu_is_preempted(cpu); }
-#endif +#else + +#define pv_sched_init() do {} while (0) + +#endif /* CONFIG_PARAVIRT */
#endif diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 3a410dbaa..834819c80 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -13,10 +13,18 @@ * Author: Stefano Stabellini stefano.stabellini@eu.citrix.com */
+#define pr_fmt(fmt) "arm-pv: " fmt + +#include <linux/arm-smccc.h> +#include <linux/cpuhotplug.h> #include <linux/export.h> +#include <linux/io.h> #include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/paravirt.h> +#include <asm/pvsched-abi.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -25,3 +33,111 @@ struct paravirt_patch_template pv_ops = { .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops); + +DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) __aligned(64); +EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region); + +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct pvsched_vcpu_state *reg; + u32 preempted; + + reg = &per_cpu(pvsched_vcpu_region, cpu); + if (!reg) { + pr_warn_once("PV sched enabled but not configured for cpu %d\n", + cpu); + return false; + } + + preempted = le32_to_cpu(READ_ONCE(reg->preempted)); + + return !!preempted; +} + +static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_pvsched_vcpu_state(unsigned int cpu) +{ + struct pvsched_vcpu_state *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&pvsched_vcpu_region); + if (!reg) + return -EFAULT; + + /* Pass the memory address to host via hypercall */ + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA, + virt_to_phys(reg), &res); + + return 0; +} + +static int kvm_arm_init_pvsched(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING, + "hypervisor/arm/pvsched:starting", + init_pvsched_vcpu_state, + pvsched_vcpu_state_dying_cpu); + + if (ret < 0) { + pr_warn("PV sched init failed\n"); + return ret; + } + + return 0; +} + +static bool has_kvm_pvsched(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV sched support we require SMCCC 1.1+ */ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_SCHED_FEATURES, &res); + + if (res.a0 != SMCCC_RET_SUCCESS) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_FEATURES, + ARM_SMCCC_HV_PV_SCHED_IPA, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_sched_init(void) +{ + int ret; + + if (is_hyp_mode_available()) + return 0; + + if (!has_kvm_pvsched()) { + pr_warn("PV sched is not available\n"); + return 0; + } + + ret = kvm_arm_init_pvsched(); + if (ret) + return ret; + + pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted; + pr_info("using PV sched preempted\n"); + + return 0; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 155b8a61f..b3569d16a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -360,6 +360,8 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash();
+ pv_sched_init(); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d67c00351..0244b684d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -134,6 +134,7 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_KVM_PVSCHED_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING,
-----Original Message----- From: yezengruan Sent: Tuesday, August 4, 2020 3:42 PM To: Xiexiuqi xiexiuqi@huawei.com; Guohanjun (Hanjun Guo) guohanjun@huawei.com Cc: Wanghaibin (D) wanghaibin.wang@huawei.com; Fanhenglong fanhenglong@huawei.com; yezengruan yezengruan@huawei.com; Zhanghailiang zhang.zhanghailiang@huawei.com; kernel.openeuler kernel.openeuler@huawei.com; Chenzhendong (alex) alex.chen@huawei.com; virt@openeuler.org; Xiexiangyou xiexiangyou@huawei.com; yuzenghui yuzenghui@huawei.com Subject: [PATCH hulk-4.19-next v3 12/17] KVM: arm64: Support the vCPU preemption check
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
Support the vcpu_is_preempted() functionality under KVM/arm64. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding.
unix benchmark result: host: kernel 4.19.87, HiSilicon Kunpeng920, 8 CPUs guest: kernel 4.19.87, 16 vCPUs
test-case | after-patch |
before-patch ----------------------------------------+-------------------+----------- ----------------------------------------+-------------------+------- Dhrystone 2 using register variables | 338955728.5 lps | 339266319.5 lps Double-Precision Whetstone | 30634.9 MWIPS | 30884.4 MWIPS Execl Throughput | 6753.2 lps | 3580.1 lps File Copy 1024 bufsize 2000 maxblocks | 490048.0 KBps | 313282.3 KBps File Copy 256 bufsize 500 maxblocks | 129662.5 KBps | 83550.7 KBps File Copy 4096 bufsize 8000 maxblocks | 1552551.5 KBps | 814327.0 KBps Pipe Throughput | 8976422.5 lps | 9048628.4 lps Pipe-based Context Switching | 258641.7 lps | 252925.9 lps Process Creation | 5312.2 lps | 4507.9 lps Shell Scripts (1 concurrent) | 8704.2 lpm | 6720.9 lpm Shell Scripts (8 concurrent) | 1708.8 lpm | 607.2 lpm System Call Overhead | 3714444.7 lps | 3746386.8 lps ----------------------------------------+-------------------+----------- ----------------------------------------+-------------------+------- System Benchmarks Index Score | 2270.6 | 1679.2
Signed-off-by: Zengruan Ye yezengruan@huawei.com
arch/arm64/include/asm/paravirt.h | 8 ++- arch/arm64/kernel/paravirt.c | 116 ++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 + include/linux/cpuhotplug.h | 1 + 4 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index ff266c66c..62e9ba70f 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -27,12 +27,18 @@ static inline u64 paravirt_steal_clock(int cpu) return pv_ops.time.steal_clock(cpu); }
+int __init pv_sched_init(void);
__visible bool __native_vcpu_is_preempted(int cpu); static inline bool pv_vcpu_is_preempted(int cpu) { return pv_ops.sched.vcpu_is_preempted(cpu); }
-#endif +#else
+#define pv_sched_init() do {} while (0)
+#endif /* CONFIG_PARAVIRT */
#endif diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 3a410dbaa..834819c80 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -13,10 +13,18 @@
- Author: Stefano Stabellini stefano.stabellini@eu.citrix.com
*/
+#define pr_fmt(fmt) "arm-pv: " fmt
+#include <linux/arm-smccc.h> +#include <linux/cpuhotplug.h> #include <linux/export.h> +#include <linux/io.h> #include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/paravirt.h> +#include <asm/pvsched-abi.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -25,3 +33,111 @@ struct paravirt_patch_template pv_ops = { .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops);
+DEFINE_PER_CPU(struct pvsched_vcpu_state, pvsched_vcpu_region) +__aligned(64); EXPORT_PER_CPU_SYMBOL(pvsched_vcpu_region);
+static bool kvm_vcpu_is_preempted(int cpu) {
- struct pvsched_vcpu_state *reg;
- u32 preempted;
- reg = &per_cpu(pvsched_vcpu_region, cpu);
- if (!reg) {
pr_warn_once("PV sched enabled but not configured for
cpu %d\n",
cpu);
return false;
- }
- preempted = le32_to_cpu(READ_ONCE(reg->preempted));
- return !!preempted;
+}
+static int pvsched_vcpu_state_dying_cpu(unsigned int cpu) {
- struct pvsched_vcpu_state *reg;
- reg = this_cpu_ptr(&pvsched_vcpu_region);
- if (!reg)
return -EFAULT;
- memset(reg, 0, sizeof(*reg));
- return 0;
+}
+static int init_pvsched_vcpu_state(unsigned int cpu) {
- struct pvsched_vcpu_state *reg;
- struct arm_smccc_res res;
- reg = this_cpu_ptr(&pvsched_vcpu_region);
- if (!reg)
return -EFAULT;
- /* Pass the memory address to host via hypercall */
- arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_IPA,
virt_to_phys(reg), &res);
- return 0;
+}
+static int kvm_arm_init_pvsched(void) +{
- int ret;
- ret = cpuhp_setup_state(CPUHP_AP_ARM_KVM_PVSCHED_STARTING,
"hypervisor/arm/pvsched:starting",
init_pvsched_vcpu_state,
pvsched_vcpu_state_dying_cpu);
- if (ret < 0) {
pr_warn("PV sched init failed\n");
return ret;
- }
- return 0;
+}
+static bool has_kvm_pvsched(void) +{
- struct arm_smccc_res res;
- /* To detect the presence of PV sched support we require SMCCC 1.1+
*/
- if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
return false;
- arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_HV_PV_SCHED_FEATURES, &res);
- if (res.a0 != SMCCC_RET_SUCCESS)
return false;
- arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_FEATURES,
ARM_SMCCC_HV_PV_SCHED_IPA, &res);
这里为啥需要判断两个feature是否支持?理论上只需要判断ARM_SMCCC_HV_PV_SCHED_FEATURES就可以了吧? 因为这个特性需要你这里的两处都要支持才行吧?
- return (res.a0 == SMCCC_RET_SUCCESS);
+}
+int __init pv_sched_init(void) +{
没有检查返回值,改成void?
- int ret;
- if (is_hyp_mode_available())
return 0;
- if (!has_kvm_pvsched()) {
pr_warn("PV sched is not available\n");
return 0;
- }
- ret = kvm_arm_init_pvsched();
- if (ret)
return ret;
- pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted;
- pr_info("using PV sched preempted\n");
- return 0;
+} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 155b8a61f..b3569d16a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -360,6 +360,8 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash();
- pv_sched_init();
#ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d67c00351..0244b684d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -134,6 +134,7 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING,
- CPUHP_AP_ARM_KVM_PVSCHED_STARTING,
这处修改不会导致kabi改变?还是把宏定义放在最后?
CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, -- 2.19.1
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
A new hypercall interface function is provided for the guest to kick WFI state vCPU.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- Documentation/virtual/kvm/arm/pvsched.txt | 18 +++++++++++++++++- include/linux/arm-smccc.h | 6 ++++++ 2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt index 9091dea7e..f8b746a7f 100644 --- a/Documentation/virtual/kvm/arm/pvsched.txt +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -4,10 +4,11 @@ Paravirtualized sched support for arm64 KVM/arm64 provides some hypervisor service calls to support a paravirtualized sched.
-Two new SMCCC compatible hypercalls are defined: +Three new SMCCC compatible hypercalls are defined:
* PV_SCHED_FEATURES: 0xC5000040 * PV_SCHED_IPA: 0xC5000041 +* PV_SCHED_KICK_CPU: 0xC5000042
The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. @@ -28,6 +29,13 @@ PV_SCHED_IPA hypervisor. ============= ======== ==========
+PV_SCHED_KICK_CPU + ============= ======== ========== + Function ID: (uint32) 0xC5000042 + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the vCPU is + kicked by the hypervisor. + ============= ======== ========== + PV sched state --------------
@@ -46,3 +54,11 @@ The structure pointed to by the PV_SCHED_IPA hypercall is as follows: The preempted field will be updated to 0 by the hypervisor prior to scheduling a vCPU. When the vCPU is scheduled out, the preempted field will be updated to 1 by the hypervisor. + +A vCPU of a paravirtualized guest that is busywaiting in guest kernel mode for +an event to occur (ex: a spinlock to become available) can execute WFI +instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU to sleep +until occurrence of an appropriate event. Another vCPU of the same guest can +wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU hypercall, specifying CPU +id (reg1) of the vCPU to be woken up. diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 159189b3a..950d29492 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -370,5 +370,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, ARM_SMCCC_OWNER_STANDARD_HYP, \ 0x41)
+#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x42) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/
-----Original Message----- From: yezengruan Sent: Tuesday, August 4, 2020 3:42 PM To: Xiexiuqi xiexiuqi@huawei.com; Guohanjun (Hanjun Guo) guohanjun@huawei.com Cc: Wanghaibin (D) wanghaibin.wang@huawei.com; Fanhenglong fanhenglong@huawei.com; yezengruan yezengruan@huawei.com; Zhanghailiang zhang.zhanghailiang@huawei.com; kernel.openeuler kernel.openeuler@huawei.com; Chenzhendong (alex) alex.chen@huawei.com; virt@openeuler.org; Xiexiangyou xiexiangyou@huawei.com; yuzenghui yuzenghui@huawei.com Subject: [PATCH hulk-4.19-next v3 13/17] KVM: arm64: Add SMCCC PV-sched to kick cpu
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
A new hypercall interface function is provided for the guest to kick WFI state vCPU.
Signed-off-by: Zengruan Ye yezengruan@huawei.com
Documentation/virtual/kvm/arm/pvsched.txt | 18 +++++++++++++++++- include/linux/arm-smccc.h | 6 ++++++ 2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/Documentation/virtual/kvm/arm/pvsched.txt b/Documentation/virtual/kvm/arm/pvsched.txt index 9091dea7e..f8b746a7f 100644 --- a/Documentation/virtual/kvm/arm/pvsched.txt +++ b/Documentation/virtual/kvm/arm/pvsched.txt @@ -4,10 +4,11 @@ Paravirtualized sched support for arm64 KVM/arm64 provides some hypervisor service calls to support a paravirtualized sched.
-Two new SMCCC compatible hypercalls are defined: +Three new SMCCC compatible hypercalls are defined:
- PV_SCHED_FEATURES: 0xC5000040
- PV_SCHED_IPA: 0xC5000041
+* PV_SCHED_KICK_CPU: 0xC5000042
The existence of the PV_SCHED hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. @@ -28,6 +29,13 @@ PV_SCHED_IPA hypervisor. ============= ======== ==========
+PV_SCHED_KICK_CPU
- ============= ======== ==========
- Function ID: (uint32) 0xC5000042
- Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the
vCPU is
kicked by the hypervisor.
- ============= ======== ==========
PV sched state
@@ -46,3 +54,11 @@ The structure pointed to by the PV_SCHED_IPA hypercall is as follows: The preempted field will be updated to 0 by the hypervisor prior to scheduling a vCPU. When the vCPU is scheduled out, the preempted field will be updated to 1 by the hypervisor.
+A vCPU of a paravirtualized guest that is busywaiting in guest kernel +mode for an event to occur (ex: a spinlock to become available) can +execute WFI instruction once it has busy-waited for more than a threshold time-interval. +Execution of WFI instruction would cause the hypervisor to put the vCPU +to sleep until occurrence of an appropriate event. Another vCPU of the +same guest can wakeup the sleeping vCPU by issuing PV_SCHED_KICK_CPU +hypercall, specifying CPU id (reg1) of the vCPU to be woken up. diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 159189b3a..950d29492 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -370,5 +370,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, ARM_SMCCC_OWNER_STANDARD_HYP, \ 0x41)
+#define ARM_SMCCC_HV_PV_SCHED_KICK_CPU \
- ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_STANDARD_HYP, \
0x42)
建议从后边选用值,防止这些补丁无法进入内核,导致后续主干内核使用了此值进而导致兼容性问题!!
#endif /*__ASSEMBLY__*/
#endif /*__LINUX_ARM_SMCCC_H*/
2.19.1
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Implement the service call for waking up a WFI state vCPU.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm/include/asm/kvm_host.h | 5 +++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/handle_exit.c | 1 + virt/kvm/arm/arm.c | 4 +++- virt/kvm/arm/hypercalls.c | 3 +++ virt/kvm/arm/pvsched.c | 25 +++++++++++++++++++++++++ 6 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ead428a93..597f9532d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -322,6 +322,11 @@ static inline int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) return SMCCC_RET_NOT_SUPPORTED; }
+static inline int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 93e01ded7..ec2d86cc7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -327,6 +327,7 @@ struct kvm_vcpu_arch {
/* Guest PV sched state */ struct { + bool pv_unhalted; gpa_t base; } pvsched; }; @@ -426,6 +427,7 @@ static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch)
void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); +int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu);
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e893528cd..3d1a08544 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -107,6 +107,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; + vcpu->arch.pvsched.pv_unhalted = false; kvm_vcpu_block(vcpu); kvm_clear_request(KVM_REQ_UNHALT, vcpu); } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 19add1742..dda81cea2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -477,7 +477,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); - return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool pv_unhalted = v->arch.pvsched.pv_unhalted; + + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v) || pv_unhalted) && !v->arch.power_off && !v->arch.pause); }
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c index 3a18c14e7..3f7b2c25e 100644 --- a/virt/kvm/arm/hypercalls.c +++ b/virt/kvm/arm/hypercalls.c @@ -56,6 +56,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) val = SMCCC_RET_SUCCESS; } break; + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: + val = kvm_pvsched_kick_vcpu(vcpu); + break; default: return kvm_psci_call(vcpu); } diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index 22c5f6e0b..ce6fbb20c 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -29,6 +29,30 @@ void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) srcu_read_unlock(&kvm->srcu, idx); }
+int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) +{ + unsigned int vcpu_idx; + int val = SMCCC_RET_NOT_SUPPORTED; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target = NULL; + + vcpu_idx = smccc_get_arg1(vcpu); + target = kvm_get_vcpu(kvm, vcpu_idx); + if (!target) + goto out; + + target->arch.pvsched.pv_unhalted = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, target); + kvm_vcpu_kick(target); + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + val = SMCCC_RET_SUCCESS; + +out: + return val; +} + int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) { u32 feature = smccc_get_arg1(vcpu); @@ -37,6 +61,7 @@ int kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu) switch (feature) { case ARM_SMCCC_HV_PV_SCHED_FEATURES: case ARM_SMCCC_HV_PV_SCHED_IPA: + case ARM_SMCCC_HV_PV_SCHED_KICK_CPU: val = SMCCC_RET_SUCCESS; break; }
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
As kernel has used this interface, so lets support it.
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm64/Kconfig | 14 ++++++++++ arch/arm64/include/asm/paravirt.h | 29 +++++++++++++++++++++ arch/arm64/include/asm/qspinlock.h | 15 ++++++++--- arch/arm64/include/asm/qspinlock_paravirt.h | 12 +++++++++ arch/arm64/include/asm/spinlock.h | 3 +++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/alternative.c | 5 ++-- arch/arm64/kernel/paravirt-spinlocks.c | 5 ++++ arch/arm64/kernel/paravirt.c | 4 +++ 9 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/include/asm/qspinlock_paravirt.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b818273ef..0c5118b13 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -818,6 +818,7 @@ config NODES_SHIFT config NUMA_AWARE_SPINLOCKS bool "Numa-aware spinlocks" depends on NUMA && QUEUED_SPINLOCKS + depends on PARAVIRT_SPINLOCKS default n help Introduce NUMA (Non Uniform Memory Access) awareness into @@ -902,6 +903,19 @@ config PARAVIRT under a hypervisor, potentially improving performance significantly over full virtualization.
+config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 62e9ba70f..256e3f9df 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -12,6 +12,12 @@ struct pv_time_ops { };
struct pv_sched_ops { + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + void (*queued_spin_unlock)(struct qspinlock *lock); + + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + bool (*vcpu_is_preempted)(int cpu); };
@@ -35,6 +41,29 @@ static inline bool pv_vcpu_is_preempted(int cpu) return pv_ops.sched.vcpu_is_preempted(cpu); }
+#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +bool pv_is_native_spin_unlock(void); +static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + return pv_ops.sched.queued_spin_lock_slowpath(lock, val); +} + +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + return pv_ops.sched.queued_spin_unlock(lock); +} + +static inline void pv_wait(u8 *ptr, u8 val) +{ + return pv_ops.sched.wait(ptr, val); +} + +static inline void pv_kick(int cpu) +{ + return pv_ops.sched.kick(cpu); +} +#endif /* SMP && PARAVIRT_SPINLOCKS */ + #else
#define pv_sched_init() do {} while (0) diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h index fbe176fd4..0022d446a 100644 --- a/arch/arm64/include/asm/qspinlock.h +++ b/arch/arm64/include/asm/qspinlock.h @@ -2,12 +2,19 @@ #ifndef _ASM_ARM64_QSPINLOCK_H #define _ASM_ARM64_QSPINLOCK_H
-#ifdef CONFIG_NUMA_AWARE_SPINLOCKS #include <asm-generic/qspinlock_types.h> +#include <asm/paravirt.h> + +#define _Q_PENDING_LOOPS (1 << 9)
+#ifdef CONFIG_NUMA_AWARE_SPINLOCKS extern void __cna_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#endif + +#ifdef CONFIG_PARAVIRT_SPINLOCKS extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void (*cna_queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
#define queued_spin_unlock queued_spin_unlock /** @@ -23,12 +30,12 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock)
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - cna_queued_spin_lock_slowpath(lock, val); + pv_queued_spin_lock_slowpath(lock, val); }
static inline void queued_spin_unlock(struct qspinlock *lock) { - native_queued_spin_unlock(lock); + pv_queued_spin_unlock(lock); } #endif
diff --git a/arch/arm64/include/asm/qspinlock_paravirt.h b/arch/arm64/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000..eba4be28f --- /dev/null +++ b/arch/arm64/include/asm/qspinlock_paravirt.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 4a6689950..177787940 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -20,6 +20,9 @@ #include <asm/qspinlock.h> #include <asm/paravirt.h>
+/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb()
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 73bc1db45..ac110f1b6 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -49,6 +49,7 @@ arm64-obj-$(CONFIG_ARM64_ERR_RECOV) += ras.o arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt-spinlocks.o +arm64-obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o paravirt-spinlocks.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index f336aa39d..faf737196 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -26,6 +26,7 @@ #include <asm/cpufeature.h> #include <asm/insn.h> #include <asm/sections.h> +#include <asm/paravirt.h> #include <linux/stop_machine.h>
#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) @@ -303,9 +304,9 @@ void __init apply_boot_alternatives(void) */ if ((numa_spinlock_flag == 1) || (numa_spinlock_flag == 0 && nr_node_ids > 1 && - cna_queued_spin_lock_slowpath == + pv_ops.sched.queued_spin_lock_slowpath == native_queued_spin_lock_slowpath)) { - cna_queued_spin_lock_slowpath = + pv_ops.sched.queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath; } #endif diff --git a/arch/arm64/kernel/paravirt-spinlocks.c b/arch/arm64/kernel/paravirt-spinlocks.c index fd733eb02..3cb43f9e6 100644 --- a/arch/arm64/kernel/paravirt-spinlocks.c +++ b/arch/arm64/kernel/paravirt-spinlocks.c @@ -11,3 +11,8 @@ __visible bool __native_vcpu_is_preempted(int cpu) { return false; } + +bool pv_is_native_spin_unlock(void) +{ + return false; +} diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 834819c80..78a6e200e 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -30,6 +30,10 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
struct paravirt_patch_template pv_ops = { +#ifdef CONFIG_PARAVIRT_SPINLOCKS + .sched.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .sched.queued_spin_unlock = native_queued_spin_unlock, +#endif .sched.vcpu_is_preempted = __native_vcpu_is_preempted, }; EXPORT_SYMBOL_GPL(pv_ops);
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Linux kernel builds were run in KVM guest on HiSilicon Kunpeng920 system. VM guests were set up with 32, 48 and 64 vCPUs on the 32 physical CPUs. The kernel build (make -j<n>) was done in a VM with unpinned vCPUs 3 times with the best time selected and <n> is the number of vCPUs available. The build times of the original linux 4.19.87, pvqspinlock with various number of vCPUs are as follows:
Kernel 32 vCPUs 48 vCPUs 60 vCPUs ---------- -------- -------- -------- 4.19.87 342.336s 602.048s 950.340s pvqsinlock 341.366s 376.135s 437.037s
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm64/include/asm/paravirt.h | 5 +++ arch/arm64/kernel/paravirt.c | 60 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+)
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 256e3f9df..10ec0610a 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -42,6 +42,7 @@ static inline bool pv_vcpu_is_preempted(int cpu) }
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +void __init pv_qspinlock_init(void); bool pv_is_native_spin_unlock(void); static inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { @@ -62,6 +63,10 @@ static inline void pv_kick(int cpu) { return pv_ops.sched.kick(cpu); } +#else + +#define pv_qspinlock_init() do {} while (0) + #endif /* SMP && PARAVIRT_SPINLOCKS */
#else diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 78a6e200e..c46cf02d1 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -25,6 +25,7 @@ #include <linux/types.h> #include <asm/paravirt.h> #include <asm/pvsched-abi.h> +#include <asm/qspinlock_paravirt.h>
struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -124,6 +125,63 @@ static bool has_kvm_pvsched(void) return (res.a0 == SMCCC_RET_SUCCESS); }
+#ifdef CONFIG_PARAVIRT_SPINLOCKS +static bool arm_pvspin = false; + +/* Kick a cpu by its cpuid. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); +} + +static void kvm_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + + if (READ_ONCE(*ptr) != val) + goto out; + + dsb(sy); + wfi(); + +out: + local_irq_restore(flags); +} + +void __init pv_qspinlock_init(void) +{ + /* Don't use the PV qspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + arm_pvspin = false; + + if (!arm_pvspin) { + pr_info("PV qspinlocks disabled\n"); + return; + } + pr_info("PV qspinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.sched.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.sched.queued_spin_unlock = __pv_queued_spin_unlock; + pv_ops.sched.wait = kvm_wait; + pv_ops.sched.kick = kvm_kick_cpu; +} + +static __init int arm_parse_pvspin(char *arg) +{ + arm_pvspin = true; + return 0; +} +early_param("arm_pvspin", arm_parse_pvspin); +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + int __init pv_sched_init(void) { int ret; @@ -143,5 +201,7 @@ int __init pv_sched_init(void) pv_ops.sched.vcpu_is_preempted = kvm_vcpu_is_preempted; pr_info("using PV sched preempted\n");
+ pv_qspinlock_init(); + return 0; }
euleros inclusion category: feature bugzilla: NA DTS: NA CVE: NA
--------------------------------
Add tracepoints for PV qspinlock
Signed-off-by: Zengruan Ye yezengruan@huawei.com --- arch/arm64/kernel/paravirt.c | 6 +++ arch/arm64/kernel/trace-paravirt.h | 66 ++++++++++++++++++++++++++++++ virt/kvm/arm/pvsched.c | 3 ++ virt/kvm/arm/trace.h | 18 ++++++++ 4 files changed, 93 insertions(+) create mode 100644 arch/arm64/kernel/trace-paravirt.h
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index c46cf02d1..852347fa5 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -27,6 +27,9 @@ #include <asm/pvsched-abi.h> #include <asm/qspinlock_paravirt.h>
+#define CREATE_TRACE_POINTS +#include "trace-paravirt.h" + struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled;
@@ -134,6 +137,8 @@ static void kvm_kick_cpu(int cpu) struct arm_smccc_res res;
arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_SCHED_KICK_CPU, cpu, &res); + + trace_kvm_kick_cpu("kvm kick cpu", smp_processor_id(), cpu); }
static void kvm_wait(u8 *ptr, u8 val) @@ -150,6 +155,7 @@ static void kvm_wait(u8 *ptr, u8 val)
dsb(sy); wfi(); + trace_kvm_wait("kvm wait wfi", smp_processor_id());
out: local_irq_restore(flags); diff --git a/arch/arm64/kernel/trace-paravirt.h b/arch/arm64/kernel/trace-paravirt.h new file mode 100644 index 000000000..2d76272f3 --- /dev/null +++ b/arch/arm64/kernel/trace-paravirt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(c) 2019 Huawei Technologies Co., Ltd + * Author: Zengruan Ye yezengruan@huawei.com + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(kvm_kick_cpu, + TP_PROTO(const char *name, int cpu, int target), + TP_ARGS(name, cpu, target), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("PV qspinlock: %s, cpu %d kick target cpu %d", + __get_str(name), + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(kvm_wait, + TP_PROTO(const char *name, int cpu), + TP_ARGS(name, cpu), + + TP_STRUCT__entry( + __string(name, name) + __field(int, cpu) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->cpu = cpu; + ), + + TP_printk("PV qspinlock: %s, cpu %d wait kvm access wfi", + __get_str(name), + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/arm64/kernel/ +#define TRACE_INCLUDE_FILE trace-paravirt + +#include <trace/define_trace.h> diff --git a/virt/kvm/arm/pvsched.c b/virt/kvm/arm/pvsched.c index ce6fbb20c..2c587a698 100644 --- a/virt/kvm/arm/pvsched.c +++ b/virt/kvm/arm/pvsched.c @@ -11,6 +11,8 @@
#include <kvm/arm_hypercalls.h>
+#include "trace.h" + void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) { __le32 preempted_le; @@ -48,6 +50,7 @@ int kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) kvm_vcpu_yield_to(target);
val = SMCCC_RET_SUCCESS; + trace_kvm_pvsched_kick_vcpu(vcpu->vcpu_id, target->vcpu_id);
out: return val; diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 674fda1eb..4aea34825 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -263,6 +263,24 @@ TRACE_EVENT(kvm_timer_update_irq, __entry->vcpu_id, __entry->irq, __entry->level) );
+TRACE_EVENT(kvm_pvsched_kick_vcpu, + TP_PROTO(int vcpu_id, int target_vcpu_id), + TP_ARGS(vcpu_id, target_vcpu_id), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, target_vcpu_id) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->target_vcpu_id = target_vcpu_id; + ), + + TP_printk("PV qspinlock: vcpu %d kick target vcpu %d", + __entry->vcpu_id, __entry->target_vcpu_id) +); + #endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH