From: Jingxian He hejingxian@huawei.com
Add cvm feature patches: 1. add cvm host feature 2. enable pmu phys irq inject for cvm 3. add bounce buffer feature for cvm guest 4. add lpi support for cvm guest
arch/arm64/configs/defconfig | 2 + arch/arm64/configs/openeuler_defconfig | 2 + arch/arm64/include/asm/cvm_guest.h | 21 + arch/arm64/include/asm/kvm_emulate.h | 13 + arch/arm64/include/asm/kvm_host.h | 12 + arch/arm64/include/asm/kvm_tmi.h | 377 ++++++++++++ arch/arm64/include/asm/kvm_tmm.h | 72 +++ arch/arm64/kvm/Kconfig | 16 + arch/arm64/kvm/Makefile | 5 + arch/arm64/kvm/arch_timer.c | 104 +++- arch/arm64/kvm/arm.c | 157 ++++- arch/arm64/kvm/cvm.c | 805 +++++++++++++++++++++++++ arch/arm64/kvm/cvm_exit.c | 223 +++++++ arch/arm64/kvm/cvm_guest.c | 91 +++ arch/arm64/kvm/guest.c | 8 + arch/arm64/kvm/hyp/vgic-v3-sr.c | 19 + arch/arm64/kvm/mmio.c | 17 +- arch/arm64/kvm/mmu.c | 7 + arch/arm64/kvm/pmu-emul.c | 10 + arch/arm64/kvm/psci.c | 12 +- arch/arm64/kvm/reset.c | 10 + arch/arm64/kvm/tmi.c | 168 ++++++ arch/arm64/kvm/vgic/vgic-v3.c | 17 +- arch/arm64/kvm/vgic/vgic.c | 55 +- arch/arm64/mm/mmu.c | 11 + arch/arm64/mm/pageattr.c | 9 +- drivers/irqchip/irq-gic-v3-its.c | 228 ++++++- drivers/perf/arm_pmu.c | 17 + include/kvm/arm_arch_timer.h | 4 + include/linux/kvm_host.h | 21 + include/linux/perf/arm_pmu.h | 3 + include/linux/swiotlb.h | 13 + include/uapi/linux/kvm.h | 29 + kernel/dma/direct.c | 39 ++ kernel/dma/swiotlb.c | 86 ++- virt/kvm/kvm_main.c | 7 +- 36 files changed, 2646 insertions(+), 44 deletions(-) create mode 100644 arch/arm64/include/asm/cvm_guest.h create mode 100644 arch/arm64/include/asm/kvm_tmi.h create mode 100644 arch/arm64/include/asm/kvm_tmm.h create mode 100644 arch/arm64/kvm/cvm.c create mode 100644 arch/arm64/kvm/cvm_exit.c create mode 100644 arch/arm64/kvm/cvm_guest.c create mode 100644 arch/arm64/kvm/tmi.c
From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CC0X CVE: N/A
Reference: N/A
--------------------------------
Add host support for Confidential VMs: 1. Add new kvm_type for cvm. 2. Init cvm related data while user create vm with cvm type. 3. Add cvm hypervisor which run in sel2 which named tmm. 4. Kvm call tmm interface to create cvm stage2 pagetable and run cvm.
Signed-off-by: Jingxian He hejingxian@huawei.com --- arch/arm64/configs/defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/kvm_emulate.h | 13 + arch/arm64/include/asm/kvm_host.h | 12 + arch/arm64/include/asm/kvm_tmi.h | 374 ++++++++++++ arch/arm64/include/asm/kvm_tmm.h | 72 +++ arch/arm64/kvm/Kconfig | 8 + arch/arm64/kvm/Makefile | 4 + arch/arm64/kvm/arch_timer.c | 104 +++- arch/arm64/kvm/arm.c | 133 +++- arch/arm64/kvm/cvm.c | 805 +++++++++++++++++++++++++ arch/arm64/kvm/cvm_exit.c | 223 +++++++ arch/arm64/kvm/guest.c | 8 + arch/arm64/kvm/hyp/vgic-v3-sr.c | 19 + arch/arm64/kvm/mmio.c | 17 +- arch/arm64/kvm/mmu.c | 7 + arch/arm64/kvm/psci.c | 12 +- arch/arm64/kvm/reset.c | 10 + arch/arm64/kvm/tmi.c | 168 ++++++ arch/arm64/kvm/vgic/vgic-v3.c | 17 +- arch/arm64/kvm/vgic/vgic.c | 55 +- include/kvm/arm_arch_timer.h | 4 + include/linux/kvm_host.h | 21 + include/uapi/linux/kvm.h | 29 + virt/kvm/kvm_main.c | 7 +- 25 files changed, 2103 insertions(+), 21 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_tmi.h create mode 100644 arch/arm64/include/asm/kvm_tmm.h create mode 100644 arch/arm64/kvm/cvm.c create mode 100644 arch/arm64/kvm/cvm_exit.c create mode 100644 arch/arm64/kvm/tmi.c
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d025bafcc..ace2bf4ad 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -110,6 +110,7 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_CVM_HOST=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 5ad5e4378..e298ca7e5 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -762,6 +762,7 @@ CONFIG_ACPI_PPTT=y CONFIG_IRQ_BYPASS_MANAGER=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_CVM_HOST=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQFD=y CONFIG_HAVE_KVM_IRQ_ROUTING=y diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fb3e3f613..8c7832f1e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -556,4 +556,17 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) return test_bit(feature, vcpu->arch.features); }
+#ifdef CONFIG_CVM_HOST +static inline bool kvm_is_cvm(struct kvm *kvm) +{ + if (static_branch_unlikely(&kvm_cvm_is_available)) + return kvm->arch.is_cvm; + return false; +} + +static inline enum cvm_state kvm_cvm_state(struct kvm *kvm) +{ + return READ_ONCE(kvm->arch.cvm.state); +} +#endif #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8bb67dfb9..01b8f9331 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,6 +26,9 @@ #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/thread_info.h> +#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmm.h> +#endif
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -127,6 +130,11 @@ struct kvm_arch { cpumask_t *dvm_cpumask; /* Union of all vcpu's cpus_ptr */ u64 lsudvmbm_el2; #endif + +#ifdef CONFIG_CVM_HOST + struct cvm cvm; + bool is_cvm; +#endif };
struct kvm_vcpu_fault_info { @@ -405,6 +413,10 @@ struct kvm_vcpu_arch { cpumask_t *cpus_ptr; cpumask_t *pre_cpus_ptr; #endif + +#ifdef CONFIG_CVM_HOST + struct cvm_tec tec; +#endif };
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ diff --git a/arch/arm64/include/asm/kvm_tmi.h b/arch/arm64/include/asm/kvm_tmi.h new file mode 100644 index 000000000..8534ccf3f --- /dev/null +++ b/arch/arm64/include/asm/kvm_tmi.h @@ -0,0 +1,374 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#ifndef __TMM_TMI_H +#define __TMM_TMI_H +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_pgtable.h> +#include <linux/virtio_ring.h> + +#define GRANULE_SIZE 4096 + +#define NO_NUMA -1 + +#define TMM_TTT_LEVEL_3 3 + +#ifdef CONFIG_CVM_HOST_FVP_PLAT +#define CVM_MEM_BASE ULL(0x8800000000) /* choose FVP platform to run cVM */ +#define VQ_NUM 3 +#else +#define CVM_MEM_BASE ULL(0x800000000) /* choose qemu platform to run cVM */ +#define VQ_NUM 3 +#endif + +#define MEM_SEG_NUMS 2 + +/* define in QEMU hw/arm/virt.c */ +#define VIRT_PCIE_MMIO 0x10000000 /* 256MB */ +#define VIRT_PCIE_MMIO_SIZE 0x1000000 /* 16MB */ +#define VIRT_HIGH_PCIE_ECAM 0x8000000000 /* 512GB */ +#define VIRT_HIGH_PCIE_ECAM_SIZE 0x12000000 /* 288MB */ + +/* TMI error codes. */ +#define TMI_SUCCESS 0 +#define TMI_ERROR_INPUT 1 +#define TMI_ERROR_MEMORY 2 +#define TMI_ERROR_ALIAS 3 +#define TMI_ERROR_IN_USE 4 +#define TMI_ERROR_CVM_STATE 5 +#define TMI_ERROR_OWNER 6 +#define TMI_ERROR_TEC 7 +#define TMI_ERROR_TTT_WALK 8 +#define TMI_ERROR_TTT_ENTRY 9 +#define TMI_ERROR_NOT_SUPPORTED 10 +#define TMI_ERROR_INTERNAL 11 +#define TMI_ERROR_CVM_POWEROFF 12 + +#define TMI_RETURN_STATUS(ret) ((ret) & 0xFF) +#define TMI_RETURN_INDEX(ret) (((ret) >> 8) & 0xFF) + +#define TMI_FEATURE_REGISTER_0_S2SZ GENMASK(7, 0) +#define TMI_FEATURE_REGISTER_0_LPA2 BIT(8) +#define TMI_FEATURE_REGISTER_0_SVE_EN BIT(9) +#define TMI_FEATURE_REGISTER_0_SVE_VL GENMASK(13, 10) +#define TMI_FEATURE_REGISTER_0_NUM_BPS GENMASK(17, 14) +#define TMI_FEATURE_REGISTER_0_NUM_WPS GENMASK(21, 18) +#define TMI_FEATURE_REGISTER_0_PMU_EN BIT(22) +#define TMI_FEATURE_REGISTER_0_PMU_NUM_CTRS GENMASK(27, 23) +#define TMI_FEATURE_REGISTER_0_HASH_SHA_256 BIT(28) +#define TMI_FEATURE_REGISTER_0_HASH_SHA_512 BIT(29) + +#define TMI_CVM_PARAM_FLAG_LPA2 BIT(0) +#define TMI_CVM_PARAM_FLAG_SVE BIT(1) +#define TMI_CVM_PARAM_FLAG_PMU BIT(2) + +/* + * Many of these fields are smaller than u64 but all fields have u64 + * alignment, so use u64 to ensure correct alignment. + */ +struct tmi_cvm_params { + u64 flags; + u64 s2sz; + u64 sve_vl; + u64 num_bps; + u64 num_wps; + u64 pmu_num_cnts; + u64 measurement_algo; + u64 vmid; + u64 ns_vtcr; + u64 vttbr_el2; + u64 ttt_base; + s64 ttt_level_start; + u64 ttt_num_start; + u8 rpv[64]; /* Bits 512 */ +}; + +#define TMI_NOT_RUNNABLE 0 +#define TMI_RUNNABLE 1 + +/* + * The number of GPRs (starting from X0) that are + * configured by the host when a TEC is created. + */ +#define TEC_CREATE_NR_GPRS (8U) + +struct tmi_tec_params { + uint64_t gprs[TEC_CREATE_NR_GPRS]; + uint64_t pc; + uint64_t flags; + uint64_t ram_size; +}; + +#define TEC_ENTRY_FLAG_EMUL_MMIO (1UL << 0U) +#define TEC_ENTRY_FLAG_INJECT_SEA (1UL << 1U) +#define TEC_ENTRY_FLAG_TRAP_WFI (1UL << 2U) +#define TEC_ENTRY_FLAG_TRAP_WFE (1UL << 3U) + +#define TMI_EXIT_SYNC 0 +#define TMI_EXIT_IRQ 1 +#define TMI_EXIT_FIQ 2 +#define TMI_EXIT_PSCI 3 +#define TMI_EXIT_HOST_CALL 5 +#define TMI_EXIT_SERROR 6 + +/* + * The number of GPRs (starting from X0) per voluntary exit context. + * Per SMCCC. + */ + #define TEC_EXIT_NR_GPRS (31U) + +/* Maximum number of Interrupt Controller List Registers. */ +#define TEC_GIC_NUM_LRS (16U) + +struct tmi_tec_entry { + uint64_t flags; + uint64_t gprs[TEC_EXIT_NR_GPRS]; + uint64_t gicv3_lrs[TEC_GIC_NUM_LRS]; + uint64_t gicv3_hcr; +}; + +struct tmi_tec_exit { + uint64_t exit_reason; + uint64_t esr; + uint64_t far; + uint64_t hpfar; + uint64_t gprs[TEC_EXIT_NR_GPRS]; + uint64_t gicv3_hcr; + uint64_t gicv3_lrs[TEC_GIC_NUM_LRS]; + uint64_t gicv3_misr; + uint64_t gicv3_vmcr; + uint64_t cntv_ctl; + uint64_t cntv_cval; + uint64_t cntp_ctl; + uint64_t cntp_cval; + uint64_t imm; +}; + +struct tmi_tec_run { + struct tmi_tec_entry tec_entry; + struct tmi_tec_exit tec_exit; +}; + +#define TMI_FNUM_MIN_VALUE U(0x150) +#define TMI_FNUM_MAX_VALUE U(0x18F) + +/****************************************************************************** + * Bit definitions inside the function id as per the SMC calling convention + ******************************************************************************/ +#define FUNCID_TYPE_SHIFT 31 +#define FUNCID_CC_SHIFT 30 +#define FUNCID_OEN_SHIFT 24 +#define FUNCID_NUM_SHIFT 0 + +#define FUNCID_TYPE_MASK 0x1 +#define FUNCID_CC_MASK 0x1 +#define FUNCID_OEN_MASK 0x3f +#define FUNCID_NUM_MASK 0xffff + +#define FUNCID_TYPE_WIDTH 1 +#define FUNCID_CC_WIDTH 1 +#define FUNCID_OEN_WIDTH 6 +#define FUNCID_NUM_WIDTH 16 + +#define SMC_64 1 +#define SMC_32 0 +#define SMC_TYPE_FAST 1 +#define SMC_TYPE_STD 0 + +/***************************************************************************** + * Owning entity number definitions inside the function id as per the SMC + * calling convention + *****************************************************************************/ +#define OEN_ARM_START 0 +#define OEN_ARM_END 0 +#define OEN_CPU_START 1 +#define OEN_CPU_END 1 +#define OEN_SIP_START 2 +#define OEN_SIP_END 2 +#define OEN_OEM_START 3 +#define OEN_OEM_END 3 +#define OEN_STD_START 4 /* Standard Calls */ +#define OEN_STD_END 4 +#define OEN_TAP_START 48 /* Trusted Applications */ +#define OEN_TAP_END 49 +#define OEN_TOS_START 50 /* Trusted OS */ +#define OEN_TOS_END 63 +#define OEN_LIMIT 64 + +/* Get TMI fastcall std FID from function number */ +#define TMI_FID(smc_cc, func_num) \ + ((SMC_TYPE_FAST << FUNCID_TYPE_SHIFT) | \ + ((smc_cc) << FUNCID_CC_SHIFT) | \ + (OEN_STD_START << FUNCID_OEN_SHIFT) | \ + ((func_num) << FUNCID_NUM_SHIFT)) + +#define U(_x) (_x##U) + +/* + * SMC_TMM_INIT_COMPLETE is the only function in the TMI that originates from + * the CVM world and is handled by the SPMD. The remaining functions are + * always invoked by the Normal world, forward by SPMD and handled by the + * TMM. + */ +#define TMI_FNUM_VERSION U(0x260) +#define TMI_FNUM_MEM_ALLOC U(0x261) +#define TMI_FNUM_MEM_FREE U(0x262) +#define TMI_FNUM_DATA_CREATE U(0x263) +#define TMI_FNUM_DATA_DESTROY U(0x265) +#define TMI_FNUM_CVM_ACTIVATE U(0x267) +#define TMI_FNUM_CVM_CREATE U(0x268) +#define TMI_FNUM_CVM_DESTROY U(0x269) +#define TMI_FNUM_TEC_CREATE U(0x27A) +#define TMI_FNUM_TEC_DESTROY U(0x27B) +#define TMI_FNUM_TEC_ENTER U(0x27C) +#define TMI_FNUM_TTT_CREATE U(0x27D) +#define TMI_FNUM_TTT_DESTROY U(0x27E) +#define TMI_FNUM_TTT_MAP_UNPROTECTED U(0x27F) +#define TMI_FNUM_TTT_MAP_PROTECTED U(0x280) +#define TMI_FNUM_TTT_UNMAP_UNPROTECTED U(0x282) +#define TMI_FNUM_TTT_UNMAP_PROTECTED U(0x283) +#define TMI_FNUM_PSCI_COMPLETE U(0x284) +#define TMI_FNUM_FEATURES U(0x285) +#define TMI_FNUM_TTT_MAP_RANGE U(0x286) +#define TMI_FNUM_TTT_UNMAP_RANGE U(0x287) + +/* TMI SMC64 PIDs handled by the SPMD */ +#define TMI_TMM_VESION TMI_FID(SMC_64, TMI_FNUM_VERSION) +#define TMI_TMM_DATA_CREATE TMI_FID(SMC_64, TMI_FNUM_DATA_CREATE) +#define TMI_TMM_DATA_DESTROY TMI_FID(SMC_64, TMI_FNUM_DATA_DESTROY) +#define TMI_TMM_CVM_ACTIVATE TMI_FID(SMC_64, TMI_FNUM_CVM_ACTIVATE) +#define TMI_TMM_CVM_CREATE TMI_FID(SMC_64, TMI_FNUM_CVM_CREATE) +#define TMI_TMM_CVM_DESTROY TMI_FID(SMC_64, TMI_FNUM_CVM_DESTROY) +#define TMI_TMM_TEC_CREATE TMI_FID(SMC_64, TMI_FNUM_TEC_CREATE) +#define TMI_TMM_TEC_DESTROY TMI_FID(SMC_64, TMI_FNUM_TEC_DESTROY) +#define TMI_TMM_TEC_ENTER TMI_FID(SMC_64, TMI_FNUM_TEC_ENTER) +#define TMI_TMM_TTT_CREATE TMI_FID(SMC_64, TMI_FNUM_TTT_CREATE) +#define TMI_TMM_TTT_DESTROY TMI_FID(SMC_64, TMI_FNUM_TTT_DESTROY) +#define TMI_TMM_TTT_MAP_UNPROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_UNPROTECTED) +#define TMI_TMM_TTT_MAP_PROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_PROTECTED) +#define TMI_TMM_TTT_UNMAP_UNPROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_UNPROTECTED) +#define TMI_TMM_TTT_UNMAP_PROTECTED TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_PROTECTED) +#define TMI_TMM_PSCI_COMPLETE TMI_FID(SMC_64, TMI_FNUM_PSCI_COMPLETE) +#define TMI_TMM_FEATURES TMI_FID(SMC_64, TMI_FNUM_FEATURES) +#define TMI_TMM_MEM_ALLOC TMI_FID(SMC_64, TMI_FNUM_MEM_ALLOC) +#define TMI_TMM_MEM_FREE TMI_FID(SMC_64, TMI_FNUM_MEM_FREE) +#define TMI_TMM_TTT_MAP_RANGE TMI_FID(SMC_64, TMI_FNUM_TTT_MAP_RANGE) +#define TMI_TMM_TTT_UNMAP_RANGE TMI_FID(SMC_64, TMI_FNUM_TTT_UNMAP_RANGE) + +#define TMI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define TMI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define TMI_ABI_VERSION_MAJOR U(0x0) + +/* KVM_CAP_ARM_TMM on VM fd */ +#define KVM_CAP_ARM_TMM_CONFIG_CVM_HOST 0 +#define KVM_CAP_ARM_TMM_CREATE_CVM 1 +#define KVM_CAP_ARM_TMM_INIT_IPA_CVM 2 +#define KVM_CAP_ARM_TMM_POPULATE_CVM 3 +#define KVM_CAP_ARM_TMM_ACTIVATE_CVM 4 + +#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA256 0 +#define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA512 1 + +#define KVM_CAP_ARM_TMM_RPV_SIZE 64 + +/* List of configuration items accepted for KVM_CAP_ARM_TMM_CONFIG_CVM_HOST */ +#define KVM_CAP_ARM_TMM_CFG_RPV 0 +#define KVM_CAP_ARM_TMM_CFG_HASH_ALGO 1 +#define KVM_CAP_ARM_TMM_CFG_SVE 2 +#define KVM_CAP_ARM_TMM_CFG_DBG 3 +#define KVM_CAP_ARM_TMM_CFG_PMU 4 + +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_available); +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_enable); + +struct kvm_cap_arm_tmm_config_item { + __u32 cfg; + union { + /* cfg == KVM_CAP_ARM_TMM_CFG_RPV */ + struct { + __u8 rpv[KVM_CAP_ARM_TMM_RPV_SIZE]; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_HASH_ALGO */ + struct { + __u32 hash_algo; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_SVE */ + struct { + __u32 sve_vq; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_DBG */ + struct { + __u32 num_brps; + __u32 num_wrps; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_PMU */ + struct { + __u32 num_pmu_cntrs; + }; + /* Fix the size of the union */ + __u8 reserved[256]; + }; +}; + +enum tmi_tmm_mem_type { + TMM_MEM_TYPE_RD, + TMM_MEM_TYPE_TEC, + TMM_MEM_TYPE_TTT, + TMM_MEM_TYPE_CVM_PA, +}; + +enum tmi_tmm_map_size { + TMM_MEM_MAP_SIZE_4K, + TMM_MEM_MAP_SIZE_2M, + TMM_MEM_MAP_SIZE_1G, + TMM_MEM_MAP_SIZE_MAX, +}; + +static inline bool tmm_is_addr_ttt_level_aligned(uint64_t addr, int level) +{ + uint64_t mask = (1 << (12 + 9 * (3 - level))) - 1; + + return (addr & mask) == 0; +} + +u64 phys_to_cvm_phys(u64 phys); + +u64 tmi_version(void); +u64 tmi_data_create(u64 data, u64 rd, u64 map_addr, u64 src, u64 level); +u64 tmi_data_destroy(u64 rd, u64 map_addr, u64 level); +u64 tmi_cvm_activate(u64 rd); +u64 tmi_cvm_create(u64 rd, u64 params_ptr); +u64 tmi_cvm_destroy(u64 rd); +u64 tmi_tec_create(u64 tec, u64 rd, u64 mpidr, u64 params_ptr); +u64 tmi_tec_destroy(u64 tec); +u64 tmi_tec_enter(u64 tec, u64 run_ptr); +u64 tmi_ttt_create(u64 ttt, u64 rd, u64 map_addr, u64 level); +u64 tmi_ttt_destroy(u64 ttt, u64 rd, u64 map_addr, u64 level); +u64 tmi_ttt_map_unprotected(u64 rd, u64 map_addr, u64 level, u64 ttte); +u64 tmi_ttt_unmap_unprotected(u64 rd, u64 map_addr, u64 level, u64 ns); +u64 tmi_ttt_unmap_protected(u64 rd, u64 map_addr, u64 level); +u64 tmi_psci_complete(u64 calling_tec, u64 target_tec); +u64 tmi_features(u64 index); +u64 tmi_ttt_map_range(u64 rd, u64 map_addr, u64 size, u64 cur_node, u64 target_node); +u64 tmi_ttt_unmap_range(u64 rd, u64 map_addr, u64 size, u64 node_id); + +u64 tmi_mem_alloc(u64 rd, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size); +u64 tmi_mem_free(u64 pa, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size); + +void kvm_cvm_vcpu_put(struct kvm_vcpu *vcpu); +int kvm_load_user_data(struct kvm *kvm, unsigned long arg); +unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, + unsigned long target_affinity, unsigned long lowest_affinity_level); +int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool serror_pending, bool ext_dabt_pending); + +#endif diff --git a/arch/arm64/include/asm/kvm_tmm.h b/arch/arm64/include/asm/kvm_tmm.h new file mode 100644 index 000000000..dc211facf --- /dev/null +++ b/arch/arm64/include/asm/kvm_tmm.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#ifndef __ASM_KVM_TMM_H +#define __ASM_KVM_TMM_H + +#include <uapi/linux/kvm.h> + +enum cvm_state { + CVM_STATE_NONE, + CVM_STATE_NEW, + CVM_STATE_ACTIVE, + CVM_STATE_DYING +}; + +struct cvm { + enum cvm_state state; + u32 cvm_vmid; + u64 rd; + u64 loader_start; + u64 initrd_start; + u64 initrd_size; + u64 ram_size; + struct kvm_numa_info numa_info; + struct tmi_cvm_params *params; +}; + +/* + * struct cvm_tec - Additional per VCPU data for a CVM + */ +struct cvm_tec { + u64 tec; + bool tec_created; + void *tec_run; +}; + +int kvm_init_tmm(void); +int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +int kvm_init_cvm_vm(struct kvm *kvm); +void kvm_destroy_cvm(struct kvm *kvm); +int kvm_create_tec(struct kvm_vcpu *vcpu); +void kvm_destroy_tec(struct kvm_vcpu *vcpu); +int kvm_tec_enter(struct kvm_vcpu *vcpu); +int handle_cvm_exit(struct kvm_vcpu *vcpu, int rec_run_status); +int kvm_arm_create_cvm(struct kvm *kvm); +void kvm_free_rd(struct kvm *kvm); +int cvm_create_rd(struct kvm *kvm); +int kvm_arm_cvm_first_run(struct kvm_vcpu *vcpu); +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target); +int kvm_arch_tec_init(struct kvm_vcpu *vcpu); + +void kvm_cvm_unmap_destroy_range(struct kvm *kvm); + +#define CVM_TTT_BLOCK_LEVEL 2 +#define CVM_TTT_MAX_LEVEL 3 + +#define CVM_PAGE_SHIFT 12 +#define CVM_PAGE_SIZE BIT(CVM_PAGE_SHIFT) +#define CVM_TTT_LEVEL_SHIFT(l) \ + ((CVM_PAGE_SHIFT - 3) * (4 - (l)) + 3) +#define CVM_L2_BLOCK_SIZE BIT(CVM_TTT_LEVEL_SHIFT(2)) + +static inline unsigned long cvm_ttt_level_mapsize(int level) +{ + if (WARN_ON(level > CVM_TTT_BLOCK_LEVEL)) + return CVM_PAGE_SIZE; + + return (1UL << CVM_TTT_LEVEL_SHIFT(level)); +} + +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d984a6041..7c24a4d33 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -46,6 +46,14 @@ menuconfig KVM
If unsure, say N.
+config CVM_HOST + bool "CVM_Enabled" + depends on KVM && ARM64 + help + Support CVM based on S-EL2 + + If unsure, say N. + if KVM
source "virt/kvm/Kconfig" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 395d65165..5fd1b8be1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,5 +24,9 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/shadow_dev.o vgic/vgic-debug.o
+kvm-$(CONFIG_CVM_HOST) += tmi.o +kvm-$(CONFIG_CVM_HOST) += cvm.o +kvm-$(CONFIG_CVM_HOST) += cvm_exit.o + kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index f9d6a5cd4..960f45b4e 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,10 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif + #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h>
@@ -138,10 +142,80 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } }
+#ifdef CONFIG_CVM_HOST +static bool cvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +{ + return timer_ctx && + ((timer_get_ctl(timer_ctx) & + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); +} + +void kvm_cvm_timers_update(struct kvm_vcpu *vcpu) +{ + int i; + u64 cval, now; + bool status, level; + struct arch_timer_context *timer; + struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + timer = &arch_timer->timers[i]; + + if (!timer->loaded) { + if (!cvm_timer_irq_can_fire(timer)) + continue; + cval = timer_get_cval(timer); + now = kvm_phys_timer_read() - timer_get_offset(timer); + level = (cval <= now); + kvm_timer_update_irq(vcpu, level, timer); + } else { + status = timer_get_ctl(timer) & ARCH_TIMER_CTRL_IT_STAT; + level = cvm_timer_irq_can_fire(timer) && status; + if (level != timer->irq.level) + kvm_timer_update_irq(vcpu, level, timer); + } + } +} + +static void set_cvm_timers_loaded(struct kvm_vcpu *vcpu, bool loaded) +{ + int i; + struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *timer = &arch_timer->timers[i]; + + timer->loaded = loaded; + } +} + +static void kvm_timer_blocking(struct kvm_vcpu *vcpu); +static void kvm_timer_unblocking(struct kvm_vcpu *vcpu); + +static inline void cvm_vcpu_load_timer_callback(struct kvm_vcpu *vcpu) +{ + kvm_cvm_timers_update(vcpu); + kvm_timer_unblocking(vcpu); + set_cvm_timers_loaded(vcpu, true); +} + +static inline void cvm_vcpu_put_timer_callback(struct kvm_vcpu *vcpu) +{ + set_cvm_timers_loaded(vcpu, false); + if (rcuwait_active(kvm_arch_vcpu_get_wait(vcpu))) + kvm_timer_blocking(vcpu); +} +#endif + static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) { struct kvm_vcpu *vcpu = ctxt->vcpu;
+#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) + return; +#endif + switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; @@ -667,6 +741,13 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct timer_map map;
+#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + cvm_vcpu_load_timer_callback(vcpu); + return; + } +#endif + if (unlikely(!timer->enabled)) return;
@@ -752,6 +833,13 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) struct timer_map map; struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + cvm_vcpu_put_timer_callback(vcpu); + return; + } +#endif + if (unlikely(!timer->enabled)) return;
@@ -898,7 +986,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->vcpu = vcpu;
/* Synchronize cntvoff across all vtimers of a VM. */ - update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) + update_vtimer_cntvoff(vcpu, 0); + else +#endif + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); timer_set_offset(ptimer, 0);
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); @@ -1356,6 +1449,15 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; }
+#ifdef CONFIG_CVM_HOST + /* + * We don't use mapped IRQs for CVM because the TMI doesn't allow + * us setting the LR.HW bit in the VGIC. + */ + if (vcpu_is_tec(vcpu)) + return 0; +#endif + get_timer_map(vcpu, &map);
if (vtimer_is_irqbypass()) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 718f6060b..3c529d1a2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -38,6 +38,9 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/sections.h> +#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif
#include <kvm/arm_hypercalls.h> #include <kvm/arm_pmu.h> @@ -108,6 +111,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; kvm->arch.return_nisv_io_abort_to_user = true; break; +#ifdef CONFIG_CVM_HOST + case KVM_CAP_ARM_TMM: + if (static_branch_unlikely(&kvm_cvm_is_available)) + r = kvm_cvm_enable_cap(kvm, cap); + break; +#endif default: r = -EINVAL; break; @@ -149,13 +158,29 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return ret; #endif
+#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) { + ret = cvm_create_rd(kvm); + if (ret) + return ret; + } +#endif + ret = kvm_arm_setup_stage2(kvm, type); if (ret) +#ifdef CONFIG_CVM_HOST + goto out_free_rd; +#else return ret; +#endif
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); if (ret) +#ifdef CONFIG_CVM_HOST + goto out_free_rd; +#else return ret; +#endif
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) @@ -167,10 +192,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
set_default_csv2(kvm); +#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) { + ret = kvm_init_cvm_vm(kvm); + if (ret) + goto out_free_stage2_pgd; + } +#endif
return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); +#ifdef CONFIG_CVM_HOST +out_free_rd: + kvm_free_rd(kvm); +#endif return ret; }
@@ -203,6 +239,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } atomic_set(&kvm->online_vcpus, 0); +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(kvm)) + kvm_destroy_cvm(kvm); +#endif }
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -258,11 +298,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_STEAL_TIME: - r = kvm_arm_pvtime_supported(); +#ifdef CONFIG_CVM_HOST + if (kvm && kvm_is_cvm(kvm)) + r = 0; + else +#endif + r = kvm_arm_pvtime_supported(); break; case KVM_CAP_ARM_VIRT_MSI_BYPASS: r = sdev_enable; break; +#ifdef CONFIG_CVM_HOST + case KVM_CAP_ARM_TMM: + r = static_key_enabled(&kvm_cvm_is_available); + break; +#endif default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; @@ -358,6 +408,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return err; #endif
+#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) { + err = kvm_arch_tec_init(vcpu); + if (err) + return err; + } +#endif return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); }
@@ -444,8 +501,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->cpu = cpu;
+#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + if (single_task_running()) + vcpu_clear_wfx_traps(vcpu); + else + vcpu_set_wfx_traps(vcpu); + } +#endif kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + return; + } +#endif if (has_vhe()) kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -472,6 +544,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + kvm_cvm_vcpu_put(vcpu); + return; + } +#endif kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) kvm_vcpu_put_sysregs_vhe(vcpu); @@ -662,6 +740,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Tell the rest of the code that there are userspace irqchip * VMs in the wild. */ +#ifdef CONFIG_CVM_HOST + if (!kvm_is_cvm(kvm)) +#endif static_branch_inc(&userspace_irqchip_in_use); }
@@ -830,7 +911,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = kvm_vcpu_first_run_init(vcpu); if (ret) return ret; - +#ifdef CONFIG_CVM_HOST + if (kvm_is_cvm(vcpu->kvm)) { + ret = kvm_arm_cvm_first_run(vcpu); + if (ret) + return ret; + } +#endif if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu); if (ret) @@ -905,8 +992,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ trace_kvm_entry(vcpu->vcpu_id, *vcpu_pc(vcpu)); guest_enter_irqoff(); - - ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + ret = kvm_tec_enter(vcpu); + else +#endif + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -961,11 +1052,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * guest time. */ guest_exit(); - trace_kvm_exit(vcpu->vcpu_id, ret, *vcpu_pc(vcpu)); - - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); +#ifdef CONFIG_CVM_HOST + if (!vcpu_is_tec(vcpu)) { +#endif + trace_kvm_exit(vcpu->vcpu_id, ret, *vcpu_pc(vcpu));
+ /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, ret); +#ifdef CONFIG_CVM_HOST + } +#endif preempt_enable();
/* @@ -986,8 +1082,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->arch.target = -1; ret = ARM_EXCEPTION_IL; } - - ret = handle_exit(vcpu, ret); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + ret = handle_cvm_exit(vcpu, ret); + else +#endif + ret = handle_exit(vcpu, ret); update_vcpu_stat_time(&vcpu->stat); }
@@ -1419,6 +1519,11 @@ long kvm_arch_vm_ioctl(struct file *filp, void __user *argp = (void __user *)arg;
switch (ioctl) { +#ifdef CONFIG_CVM_HOST + case KVM_LOAD_USER_DATA: { + return kvm_load_user_data(kvm, arg); + } +#endif case KVM_CREATE_IRQCHIP: { int ret; if (!vgic_present) @@ -1950,7 +2055,13 @@ int kvm_arch_init(void *opaque) kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); return -ENODEV; } - +#ifdef CONFIG_CVM_HOST + if (static_branch_unlikely(&kvm_cvm_is_enable) && in_hyp_mode) { + err = kvm_init_tmm(); + if (err) + return err; + } +#endif if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ diff --git a/arch/arm64/kvm/cvm.c b/arch/arm64/kvm/cvm.c new file mode 100644 index 000000000..da9a61aae --- /dev/null +++ b/arch/arm64/kvm/cvm.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <asm/kvm_tmi.h> +#include <asm/kvm_pgtable.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> +#include <asm/stage2_pgtable.h> +#include <linux/arm-smccc.h> +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +/* Protects access to cvm_vmid_bitmap */ +static DEFINE_SPINLOCK(cvm_vmid_lock); +static unsigned long *cvm_vmid_bitmap; +DEFINE_STATIC_KEY_FALSE(kvm_cvm_is_available); +DEFINE_STATIC_KEY_FALSE(kvm_cvm_is_enable); + +static int __init setup_cvm_host(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { + if (val) + static_branch_enable(&kvm_cvm_is_enable); + } + return ret; +} +early_param("cvm_host", setup_cvm_host); + +u64 cvm_phys_to_phys(u64 phys) +{ + return phys; +} + +u64 phys_to_cvm_phys(u64 phys) +{ + return phys; +} + +static int cvm_vmid_init(void) +{ + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + cvm_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); + if (!cvm_vmid_bitmap) { + kvm_err("%s: Couldn't allocate cvm vmid bitmap\n", __func__); + return -ENOMEM; + } + return 0; +} + +static unsigned long tmm_feat_reg0; + +static bool tmm_supports(unsigned long feature) +{ + return !!u64_get_bits(tmm_feat_reg0, feature); +} + +bool kvm_cvm_supports_sve(void) +{ + return tmm_supports(TMI_FEATURE_REGISTER_0_SVE_EN); +} + +bool kvm_cvm_supports_pmu(void) +{ + return tmm_supports(TMI_FEATURE_REGISTER_0_PMU_EN); +} + +u32 kvm_cvm_ipa_limit(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_S2SZ); +} + +u32 kvm_cvm_get_num_brps(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_NUM_BPS); +} + +u32 kvm_cvm_get_num_wrps(void) +{ + return u64_get_bits(tmm_feat_reg0, TMI_FEATURE_REGISTER_0_NUM_WPS); +} + +static int cvm_vmid_reserve(void) +{ + int ret; + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + spin_lock(&cvm_vmid_lock); + ret = bitmap_find_free_region(cvm_vmid_bitmap, vmid_count, 0); + spin_unlock(&cvm_vmid_lock); + + return ret; +} + +static void cvm_vmid_release(unsigned int vmid) +{ + spin_lock(&cvm_vmid_lock); + bitmap_release_region(cvm_vmid_bitmap, vmid, 0); + spin_unlock(&cvm_vmid_lock); +} + +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +{ + u64 shift = ARM64_HW_PGTABLE_LEVEL_SHIFT(pgt->start_level - 1); + u64 mask = BIT(pgt->ia_bits) - 1; + + return (addr & mask) >> shift; +} + +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +{ + struct kvm_pgtable pgt = { + .ia_bits = ia_bits, + .start_level = start_level, + }; + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; +} + +int kvm_arm_create_cvm(struct kvm *kvm) +{ + int ret; + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + unsigned int pgd_sz; + + if (!kvm_is_cvm(kvm) || kvm_cvm_state(kvm) != CVM_STATE_NONE) + return 0; + + ret = cvm_vmid_reserve(); + if (ret < 0) + return ret; + + kvm->arch.cvm.cvm_vmid = ret; + + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); + + kvm->arch.cvm.params->ttt_base = phys_to_cvm_phys(kvm->arch.mmu.pgd_phys); + kvm->arch.cvm.params->measurement_algo = 0; + kvm->arch.cvm.params->ttt_level_start = kvm->arch.mmu.pgt->start_level; + kvm->arch.cvm.params->ttt_num_start = pgd_sz; + kvm->arch.cvm.params->s2sz = VTCR_EL2_IPA(kvm->arch.vtcr); + kvm->arch.cvm.params->vmid = kvm->arch.cvm.cvm_vmid; + kvm->arch.cvm.params->ns_vtcr = kvm->arch.vtcr; + kvm->arch.cvm.params->vttbr_el2 = kvm->arch.mmu.pgd_phys; + ret = tmi_cvm_create(kvm->arch.cvm.rd, __pa(kvm->arch.cvm.params)); + if (!ret) + kvm_info("KVM creates cVM: %d\n", kvm->arch.cvm.cvm_vmid); + + WRITE_ONCE(kvm->arch.cvm.state, CVM_STATE_NEW); + kfree(kvm->arch.cvm.params); + kvm->arch.cvm.params = NULL; + return ret; +} + +int cvm_create_rd(struct kvm *kvm) +{ + if (!static_key_enabled(&kvm_cvm_is_available)) + return -EFAULT; + + kvm->arch.cvm.rd = tmi_mem_alloc(kvm->arch.cvm.rd, NO_NUMA, + TMM_MEM_TYPE_RD, TMM_MEM_MAP_SIZE_MAX); + if (!kvm->arch.cvm.rd) { + kvm_err("tmi_mem_alloc for cvm rd failed: %d\n", kvm->arch.cvm.cvm_vmid); + return -ENOMEM; + } + kvm->arch.is_cvm = true; + return 0; +} + +void kvm_free_rd(struct kvm *kvm) +{ + int ret; + + if (!kvm->arch.cvm.rd) + return; + + ret = tmi_mem_free(kvm->arch.cvm.rd, NO_NUMA, TMM_MEM_TYPE_RD, TMM_MEM_MAP_SIZE_MAX); + if (ret) + kvm_err("tmi_mem_free for cvm rd failed: %d\n", kvm->arch.cvm.cvm_vmid); + else + kvm->arch.cvm.rd = 0; +} + +void kvm_destroy_cvm(struct kvm *kvm) +{ + uint32_t cvm_vmid = kvm->arch.cvm.cvm_vmid; + + kfree(kvm->arch.cvm.params); + kvm->arch.cvm.params = NULL; + + if (kvm_cvm_state(kvm) == CVM_STATE_NONE) + return; + + cvm_vmid_release(cvm_vmid); + + WRITE_ONCE(kvm->arch.cvm.state, CVM_STATE_DYING); + + if (!tmi_cvm_destroy(kvm->arch.cvm.rd)) + kvm_info("KVM has destroyed cVM: %d\n", kvm->arch.cvm.cvm_vmid); + + kvm_free_rd(kvm); +} + +static int kvm_get_host_numa_node_by_ipa(uint64_t ipa, struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_numa_info *numa_info = &vcpu->kvm->arch.cvm.numa_info; + for (i = 0; i < numa_info->numa_cnt && i < MAX_NUMA_NODE; i++) { + struct kvm_numa_node *numa_node = &numa_info->numa_nodes[i]; + + if (ipa >= numa_node->ipa_start && + ipa < (numa_node->ipa_start + numa_node->ipa_size)) + return numa_node->host_numa_node; + } + return NO_NUMA; +} + +static int kvm_cvm_ttt_create(struct cvm *cvm, + unsigned long addr, + int level, + phys_addr_t phys) +{ + addr = ALIGN_DOWN(addr, cvm_ttt_level_mapsize(level - 1)); + return tmi_ttt_create(phys, cvm->rd, addr, level); +} + +int kvm_cvm_create_ttt_levels(struct kvm *kvm, struct cvm *cvm, + unsigned long ipa, + int level, + int max_level, + struct kvm_mmu_memory_cache *mc) +{ + if (WARN_ON(level == max_level)) + return 0; + + while (level++ < max_level) { + phys_addr_t ttt; + ttt = tmi_mem_alloc(cvm->rd, NO_NUMA, + TMM_MEM_TYPE_TTT, TMM_MEM_MAP_SIZE_MAX); + if (ttt == 0) + return -ENOMEM; + + if (kvm_cvm_ttt_create(cvm, ipa, level, ttt)) { + (void)tmi_mem_free(ttt, NO_NUMA, TMM_MEM_TYPE_TTT, TMM_MEM_MAP_SIZE_MAX); + return -ENXIO; + } + } + + return 0; +} + +static int kvm_cvm_create_protected_data_page(struct kvm *kvm, struct cvm *cvm, + unsigned long ipa, int level, + struct page *src_page, phys_addr_t dst_phys) +{ + phys_addr_t src_phys; + int ret; + + src_phys = page_to_phys(src_page); + ret = tmi_data_create(dst_phys, cvm->rd, ipa, src_phys, level); + if (TMI_RETURN_STATUS(ret) == TMI_ERROR_TTT_WALK) { + /* Create missing RTTs and retry */ + int level_fault = TMI_RETURN_INDEX(ret); + + ret = kvm_cvm_create_ttt_levels(kvm, cvm, ipa, level_fault, + level, NULL); + if (ret) + goto err; + ret = tmi_data_create(dst_phys, cvm->rd, ipa, src_phys, level); + } + WARN_ON(ret); + + if (ret) + goto err; + + return 0; + +err: + return -ENXIO; +} + +static u64 cvm_granule_size(u32 level) +{ + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); +} + +int kvm_cvm_populate_par_region(struct kvm *kvm, + phys_addr_t ipa_base, + phys_addr_t ipa_end, + phys_addr_t dst_phys) +{ + struct cvm *cvm = &kvm->arch.cvm; + struct kvm_memory_slot *memslot; + gfn_t base_gfn, end_gfn; + int idx; + phys_addr_t ipa; + int ret = 0; + int level = TMM_TTT_LEVEL_3; + unsigned long map_size = cvm_granule_size(level); + + base_gfn = gpa_to_gfn(ipa_base); + end_gfn = gpa_to_gfn(ipa_end); + + idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, base_gfn); + if (!memslot) { + ret = -EFAULT; + goto out; + } + + /* We require the region to be contained within a single memslot */ + if (memslot->base_gfn + memslot->npages < end_gfn) { + ret = -EINVAL; + goto out; + } + + mmap_read_lock(current->mm); + + ipa = ipa_base; + while (ipa < ipa_end) { + struct page *page; + kvm_pfn_t pfn; + + /* + * FIXME: This causes over mapping, but there's no good + * solution here with the ABI as it stands + */ + ipa = ALIGN_DOWN(ipa, map_size); + + pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); + + if (is_error_pfn(pfn)) { + ret = -EFAULT; + break; + } + + page = pfn_to_page(pfn); + + ret = kvm_cvm_create_protected_data_page(kvm, cvm, ipa, level, page, dst_phys); + if (ret) + goto err_release_pfn; + + ipa += map_size; + dst_phys += map_size; + kvm_release_pfn_dirty(pfn); +err_release_pfn: + if (ret) { + kvm_release_pfn_clean(pfn); + break; + } + } + + mmap_read_unlock(current->mm); +out: + srcu_read_unlock(&kvm->srcu, idx); + return ret; +} + +static int kvm_sel2_map_protected_ipa(struct kvm_vcpu *vcpu) +{ + int ret = 0; + gpa_t gpa, gpa_data_end, gpa_end, data_size; + u64 map_size, dst_phys; + u64 l2_granule = cvm_granule_size(2); /* 2MB */ + u64 numa_id; + int cur_numa_id; + + /* 2MB alignment below addresses*/ + gpa = vcpu->kvm->arch.cvm.loader_start; + gpa_end = vcpu->kvm->arch.cvm.loader_start + vcpu->kvm->arch.cvm.ram_size; + data_size = vcpu->kvm->arch.cvm.initrd_start - vcpu->kvm->arch.cvm.loader_start + + vcpu->kvm->arch.cvm.initrd_size; + data_size = round_up(data_size, l2_granule); + gpa_data_end = vcpu->kvm->arch.cvm.loader_start + data_size + l2_granule; + gpa = round_down(gpa, l2_granule); + gpa_end = round_up(gpa_end, l2_granule); + gpa_data_end = round_up(gpa_data_end, l2_granule); + + /* get numa_id */ + numa_id = kvm_get_host_numa_node_by_ipa(gpa, vcpu); + map_size = l2_granule; + do { + dst_phys = tmi_mem_alloc(vcpu->kvm->arch.cvm.rd, numa_id, + TMM_MEM_TYPE_CVM_PA, map_size); + if (!dst_phys) { + ret = -ENOMEM; + kvm_err("[%s] call tmi_mem_alloc failed.\n", __func__); + goto out; + } + + /* Try to call tmi_data_create to copy kernel data, and call tmi_data_create + *to map all necessary gpa for system boot, only copy the data_size is not enough + *to boot kernel, here, we copy and map another 2MB. */ + ret = kvm_cvm_populate_par_region(vcpu->kvm, gpa, gpa + map_size, dst_phys); + if (ret) { + ret = -EFAULT; + kvm_err("kvm_cvm_populate_par_region fail:%d.\n", ret); + goto out; + } + gpa += map_size; + dst_phys += map_size; + } while (gpa < gpa_data_end); + + cur_numa_id = numa_node_id(); + if (cur_numa_id < 0) { + ret = -EFAULT; + kvm_err("get current numa node fail\n"); + goto out; + } + + /* Map gpa range to secure mem without copy data from host. + * The cvm gpa map pages will free by destroy cvm. */ + ret = tmi_ttt_map_range(vcpu->kvm->arch.cvm.rd, gpa_data_end, + gpa_end - gpa_data_end, cur_numa_id, numa_id); + if (ret) + kvm_err("tmi_ttt_map_range fail:%d.\n", ret); +out: + return ret; +} + +int kvm_create_tec(struct kvm_vcpu *vcpu) +{ + int ret; + int i; + struct tmi_tec_params *params_ptr; + struct user_pt_regs *vcpu_regs = vcpu_gp_regs(vcpu); + uint64_t mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + + params_ptr = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!params_ptr) + return -ENOMEM; + + for (i = 0; i < TEC_CREATE_NR_GPRS; ++i) + params_ptr->gprs[i] = vcpu_regs->regs[i]; + + params_ptr->pc = vcpu_regs->pc; + + if (vcpu->vcpu_id == 0) + params_ptr->flags = TMI_RUNNABLE; + else + params_ptr->flags = TMI_NOT_RUNNABLE; + params_ptr->ram_size = vcpu->kvm->arch.cvm.ram_size; + ret = tmi_tec_create(vcpu->arch.tec.tec, vcpu->kvm->arch.cvm.rd, mpidr, __pa(params_ptr)); + + kfree(params_ptr); + + return ret; +} + +static int kvm_create_all_tecs(struct kvm *kvm) +{ + int ret = 0; + struct kvm_vcpu *vcpu; + unsigned long i; + + if (READ_ONCE(kvm->arch.cvm.state) == CVM_STATE_ACTIVE) + return -1; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!vcpu->arch.tec.tec_created) { + ret = kvm_create_tec(vcpu); + if (ret) { + mutex_unlock(&kvm->lock); + return ret; + } + vcpu->arch.tec.tec_created = true; + } + } + mutex_unlock(&kvm->lock); + return ret; +} + +static int config_cvm_sve(struct kvm *kvm, struct kvm_cap_arm_tmm_config_item *cfg) +{ + struct tmi_cvm_params *params = kvm->arch.cvm.params; + + int max_sve_vq = u64_get_bits(tmm_feat_reg0, + TMI_FEATURE_REGISTER_0_SVE_VL); + + if (!kvm_cvm_supports_sve()) + return -EINVAL; + + if (cfg->sve_vq > max_sve_vq) + return -EINVAL; + + params->sve_vl = cfg->sve_vq; + params->flags |= TMI_CVM_PARAM_FLAG_SVE; + + return 0; +} + +static int config_cvm_pmu(struct kvm *kvm, struct kvm_cap_arm_tmm_config_item *cfg) +{ + struct tmi_cvm_params *params = kvm->arch.cvm.params; + + int max_pmu_num_ctrs = u64_get_bits(tmm_feat_reg0, + TMI_FEATURE_REGISTER_0_PMU_NUM_CTRS); + + if (!kvm_cvm_supports_pmu()) + return -EINVAL; + + if (cfg->num_pmu_cntrs > max_pmu_num_ctrs) + return -EINVAL; + + params->pmu_num_cnts = cfg->num_pmu_cntrs; + params->flags |= TMI_CVM_PARAM_FLAG_PMU; + + return 0; +} + +static int kvm_tmm_config_cvm(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + struct kvm_cap_arm_tmm_config_item cfg; + int r = 0; + + if (kvm_cvm_state(kvm) != CVM_STATE_NONE) + return -EBUSY; + + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) + return -EFAULT; + + switch (cfg.cfg) { + case KVM_CAP_ARM_TMM_CFG_SVE: + r = config_cvm_sve(kvm, &cfg); + break; + case KVM_CAP_ARM_TMM_CFG_PMU: + r = config_cvm_pmu(kvm, &cfg); + break; + default: + r = -EINVAL; + } + + return r; +} + +int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r = 0; + + mutex_lock(&kvm->lock); + switch (cap->args[0]) { + case KVM_CAP_ARM_TMM_CONFIG_CVM_HOST: + r = kvm_tmm_config_cvm(kvm, cap); + break; + case KVM_CAP_ARM_TMM_CREATE_CVM: + r = kvm_arm_create_cvm(kvm); + break; + default: + r = -EINVAL; + break; + } + mutex_unlock(&kvm->lock); + + return r; +} + +void kvm_destroy_tec(struct kvm_vcpu *vcpu) +{ + int ret = 0; + if (!vcpu_is_tec(vcpu)) + return; + + if (tmi_tec_destroy(vcpu->arch.tec.tec) != 0) + kvm_err("%s vcpu id : %d failed!\n", __func__, vcpu->vcpu_id); + + ret = tmi_mem_free(vcpu->arch.tec.tec, NO_NUMA, TMM_MEM_TYPE_TEC, TMM_MEM_MAP_SIZE_MAX); + if (ret != 0) + kvm_err("tmi_mem_free for cvm tec failed\n"); + vcpu->arch.tec.tec = 0; + kfree(vcpu->arch.tec.tec_run); +} + +static int tmi_check_version(void) +{ + uint64_t res; + int version_major; + int version_minor; + + res = tmi_version(); + if (res == SMCCC_RET_NOT_SUPPORTED) + return -ENXIO; + + version_major = TMI_ABI_VERSION_GET_MAJOR(res); + version_minor = TMI_ABI_VERSION_GET_MINOR(res); + + if (version_major != TMI_ABI_VERSION_MAJOR) { + kvm_err("Unsupported TMI_ABI (version %d %d)\n", version_major, + version_minor); + return -ENXIO; + } + + kvm_info("TMI ABI version %d,%d\n", version_major, version_minor); + return 0; +} + +static int kvm_kick_boot_vcpu(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + + if (READ_ONCE(kvm->arch.cvm.state) == CVM_STATE_ACTIVE) + return 0; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (i == 0) + kvm_vcpu_kick(vcpu); + } + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_arm_cvm_first_run(struct kvm_vcpu *vcpu) +{ + int ret = 0; + + if (READ_ONCE(vcpu->kvm->arch.cvm.state) == CVM_STATE_ACTIVE) + return ret; + + if (vcpu->vcpu_id == 0) { + ret = kvm_create_all_tecs(vcpu->kvm); + if (ret != 0) + return ret; + } else { + kvm_kick_boot_vcpu(vcpu->kvm); + } + + mutex_lock(&vcpu->kvm->lock); + + if (vcpu->vcpu_id == 0) { + ret = kvm_sel2_map_protected_ipa(vcpu); + if (ret) { + kvm_err("Map protected ipa failed!\n"); + goto unlock_exit; + } + ret = tmi_cvm_activate(vcpu->kvm->arch.cvm.rd); + if (ret) { + kvm_err("tmi_cvm_activate failed!\n"); + goto unlock_exit; + } + + WRITE_ONCE(vcpu->kvm->arch.cvm.state, CVM_STATE_ACTIVE); + kvm_info("cVM%d is activated!\n", vcpu->kvm->arch.cvm.cvm_vmid); + } +unlock_exit: + mutex_unlock(&vcpu->kvm->lock); + + return ret; +} + +int kvm_tec_enter(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + if (READ_ONCE(vcpu->kvm->arch.cvm.state) != CVM_STATE_ACTIVE) + return -EINVAL; + + /* set/clear TWI TWE flags */ + if (vcpu->arch.hcr_el2 & HCR_TWI) + run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFI; + else + run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; + + if (vcpu->arch.hcr_el2 & HCR_TWE) + run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFE; + else + run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; + + return tmi_tec_enter(vcpu->arch.tec.tec, __pa(run)); +} + +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) +{ + int ret; + + ret = tmi_psci_complete(calling->arch.tec.tec, target->arch.tec.tec); + if (ret) + return -EINVAL; + return 0; +} + +#define SIMD_PAGE_SIZE 3*PAGE_SIZE +int kvm_arch_tec_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.tec.tec_run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.tec.tec_run) + return -ENOMEM; + + vcpu->arch.tec.tec = tmi_mem_alloc(vcpu->kvm->arch.cvm.rd, NO_NUMA, TMM_MEM_TYPE_TEC, TMM_MEM_MAP_SIZE_MAX); + if (vcpu->arch.tec.tec == 0) { + kvm_info("KVM tmi_mem_alloc failed:%d\n", vcpu->vcpu_id); + return -ENOMEM; + } + kvm_info("KVM inits cVM VCPU:%d\n", vcpu->vcpu_id); + + return 0; +} + +int kvm_init_tmm(void) +{ + int ret; + + if (PAGE_SIZE != SZ_4K) + return 0; + + if (tmi_check_version()) + return 0; + + ret = cvm_vmid_init(); + if (ret) + return ret; + + tmm_feat_reg0 = tmi_features(0); + kvm_info("TMM feature0: 0x%lx\n", tmm_feat_reg0); + + static_branch_enable(&kvm_cvm_is_available); + + return 0; +} + +int kvm_init_cvm_vm(struct kvm *kvm) +{ + struct tmi_cvm_params *params; + + params = (struct tmi_cvm_params *)kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!params) + return -ENOMEM; + + kvm->arch.cvm.params = params; + + return 0; +} + +int kvm_load_user_data(struct kvm *kvm, unsigned long arg) +{ + struct kvm_user_data user_data; + void __user *argp = (void __user *)arg; + + if (!kvm_is_cvm(kvm)) + return -EFAULT; + + if (copy_from_user(&user_data, argp, sizeof(user_data))) + return -EFAULT; + + kvm->arch.cvm.loader_start = user_data.loader_start; + kvm->arch.cvm.initrd_start = user_data.initrd_start; + kvm->arch.cvm.initrd_size = user_data.initrd_size; + kvm->arch.cvm.ram_size = user_data.ram_size; + memcpy(&kvm->arch.cvm.numa_info, &user_data.numa_info, sizeof(struct kvm_numa_info)); + + return 0; +} + +void kvm_cvm_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvm_timer_vcpu_put(vcpu); + kvm_vgic_put(vcpu); + vcpu->cpu = -1; +} +unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, + unsigned long target_affinity, unsigned long lowest_affinity_level) +{ + struct kvm_vcpu *target_vcpu; + + if (lowest_affinity_level != 0) + return PSCI_RET_INVALID_PARAMS; + + target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, target_affinity); + if (!target_vcpu) + return PSCI_RET_INVALID_PARAMS; + + cvm_psci_complete(vcpu, target_vcpu); + return PSCI_RET_SUCCESS; +} + +int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool serror_pending, bool ext_dabt_pending) +{ + if (serror_pending) + return -EINVAL; + + if (ext_dabt_pending) { + if (!(((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags & + TEC_ENTRY_FLAG_EMUL_MMIO)) + return -EINVAL; + + ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags + &= ~TEC_ENTRY_FLAG_EMUL_MMIO; + ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags + |= TEC_ENTRY_FLAG_INJECT_SEA; + } + return 0; +} diff --git a/arch/arm64/kvm/cvm_exit.c b/arch/arm64/kvm/cvm_exit.c new file mode 100644 index 000000000..6149b57c5 --- /dev/null +++ b/arch/arm64/kvm/cvm_exit.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include <linux/kvm_host.h> +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +#include <asm/kvm_tmi.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_coproc.h> + +typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + +static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) +{ + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->tec_exit.cntv_ctl; + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->tec_exit.cntv_cval; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->tec_exit.cntp_ctl; + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->tec_exit.cntp_cval; + + /* Because the timer mask is tainted by TMM, we don't know the + * true intent of the guest. Here, we assume mask is always + * cleared during WFI. + */ + if (unmask_ctl) { + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + } + + kvm_cvm_timers_update(vcpu); +} + +static int tec_exit_reason_notimpl(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + pr_err("[vcpu %d] Unhandled exit reason from cvm (ESR: %#llx)\n", + vcpu->vcpu_id, run->tec_exit.esr); + return -ENXIO; +} + +/* The process is the same as kvm_handle_wfx, + * except the tracing and updating operation for pc, + * we copy kvm_handle_wfx process here + * to avoid changing kvm_handle_wfx function. + */ +static int tec_exit_wfx(struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + + if (esr & ESR_ELx_WFx_ISS_WFE) + vcpu->stat.wfe_exit_stat++; + else + vcpu->stat.wfi_exit_stat++; + + if (esr & ESR_ELx_WFx_ISS_WFxT) { + if (esr & ESR_ELx_WFx_ISS_RV) { + u64 val, now; + + now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + if (now >= val) + goto out; + } else { + /* Treat WFxT as WFx if RN is invalid */ + esr &= ~ESR_ELx_WFx_ISS_WFxT; + } + } + + if (esr & ESR_ELx_WFx_ISS_WFE) { + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); + } else { + vcpu->arch.pvsched.pv_unhalted = false; + if (esr & ESR_ELx_WFx_ISS_WFxT) + vcpu->arch.flags |= KVM_ARM64_WFIT; + kvm_vcpu_block(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_WFIT; + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + } + +out: + return 1; +} + +static int tec_exit_sys_reg(struct kvm_vcpu *vcpu) +{ + int ret; + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + + if (is_write) + vcpu_set_reg(vcpu, rt, run->tec_exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) + run->tec_entry.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; +} + +static int tec_exit_sync_dabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) { + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), + run->tec_exit.gprs[0]); + } + return kvm_handle_guest_abort(vcpu); +} + +static int tec_exit_sync_iabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", + vcpu->vcpu_id, run->tec_exit.esr); + + return -ENXIO; +} + +static exit_handler_fn tec_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = tec_exit_reason_notimpl, + [ESR_ELx_EC_WFx] = tec_exit_wfx, + [ESR_ELx_EC_SYS64] = tec_exit_sys_reg, + [ESR_ELx_EC_DABT_LOW] = tec_exit_sync_dabt, + [ESR_ELx_EC_IABT_LOW] = tec_exit_sync_iabt +}; + +static int tec_exit_psci(struct kvm_vcpu *vcpu) +{ + int i; + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); + + return kvm_psci_call(vcpu); +} + +static int tec_exit_host_call(struct kvm_vcpu *vcpu) +{ + int ret, i; + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + vcpu->stat.hvc_exit_stat++; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); + + ret = kvm_hvc_call_handler(vcpu); + + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + ret = 1; + } + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + run->tec_entry.gprs[i] = vcpu_get_reg(vcpu, i); + + return ret; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0(and set exit_reason) on + * proper exit to userspace + */ + +int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) +{ + unsigned long status; + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + u8 esr_ec = ESR_ELx_EC(run->tec_exit.esr); + bool is_wfx; + + status = TMI_RETURN_STATUS(tec_run_ret); + + if (status == TMI_ERROR_CVM_POWEROFF) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SHUTDOWN; + return 0; + } + + if (status == TMI_ERROR_CVM_STATE) { + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + return 0; + } + + if (tec_run_ret) + return -ENXIO; + + vcpu->arch.fault.esr_el2 = run->tec_exit.esr; + vcpu->arch.fault.far_el2 = run->tec_exit.far; + vcpu->arch.fault.hpfar_el2 = run->tec_exit.hpfar; + + is_wfx = (run->tec_exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); + update_arch_timer_irq_lines(vcpu, is_wfx); + + run->tec_entry.flags = 0; + + switch (run->tec_exit.exit_reason) { + case TMI_EXIT_FIQ: + case TMI_EXIT_IRQ: + return 1; + case TMI_EXIT_PSCI: + return tec_exit_psci(vcpu); + case TMI_EXIT_SYNC: + return tec_exit_handlers[esr_ec](vcpu); + case TMI_EXIT_HOST_CALL: + return tec_exit_host_call(vcpu); + } + + kvm_pr_unimpl("Unsupported exit reason : 0x%llx\n", + run->tec_exit.exit_reason); + return 0; +} diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index f9c3dbc99..ecdd35527 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -27,6 +27,10 @@ #include <asm/kvm_coproc.h> #include <asm/sigcontext.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif + #include "trace.h"
struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -818,6 +822,10 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending;
+#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return kvm_cvm_vcpu_set_events(vcpu, serror_pending, ext_dabt_pending); +#endif if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd..a45e68ef2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -229,6 +229,25 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } }
+#ifdef CONFIG_CVM_HOST +void __vgic_v3_restore_tec_state(struct vgic_v3_cpu_if *cpu_if, + u64 *entry_hcr, + u64 *entry_lrs) +{ + u64 used_lrs = cpu_if->used_lrs; + int i; + + *entry_hcr = cpu_if->vgic_hcr; + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + if (i < used_lrs) { + entry_lrs[i] = cpu_if->vgic_lr[i]; + } else { + entry_lrs[i] = 0; + } + } +} +#endif + void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1b..6fc49784b 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -8,6 +8,10 @@ #include <asm/kvm_emulate.h> #include <trace/events/kvm.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif + #include "trace.h"
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) @@ -109,6 +113,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)-> + tec_entry.gprs[0] = data; + } +#endif }
/* @@ -177,7 +187,12 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) run->mmio.phys_addr = fault_ipa; run->mmio.len = len; vcpu->mmio_needed = 1; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags |= + TEC_ENTRY_FLAG_EMUL_MMIO; + } +#endif if (!ret) { /* We handled the access successfully in the kernel. */ if (!is_write) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6fa92a143..30426f6ad 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -474,6 +474,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
spin_lock(&kvm->mmu_lock); pgt = mmu->pgt; + if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; @@ -790,6 +791,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + write_fault = true; + prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; + } +#endif exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); vcpu->stat.mabt_exit_stat++; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 32bb26be8..0160ee8d6 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -16,6 +16,9 @@ #include <kvm/arm_psci.h> #include <kvm/arm_hypercalls.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif /* * This is an implementation of the Power State Coordination Interface * as described in ARM document number ARM DEN 0022A. @@ -78,6 +81,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + cvm_psci_complete(source_vcpu, vcpu); +#endif if (!vcpu->arch.power_off) { if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; @@ -133,7 +140,10 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
/* Ignore other bits of target affinity */ target_affinity &= target_affinity_mask; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return cvm_psci_vcpu_affinity_info(vcpu, target_affinity, lowest_affinity_level); +#endif /* * If one or more VCPU matching target affinity are running * then ON else OFF diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 51f4c5e85..bb177d58c 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -30,6 +30,9 @@ #include <asm/kvm_mmu.h> #include <asm/virt.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif /* Maximum phys_shift supported for any VM on this host */ static u32 kvm_ipa_limit;
@@ -199,6 +202,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { kfree(vcpu->arch.sve_state); +#ifdef CONFIG_CVM_HOST + kvm_destroy_tec(vcpu); +#endif }
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) @@ -433,7 +439,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) u32 parange, phys_shift; u8 lvls, pbha = 0xf;
+#ifdef CONFIG_CVM_HOST + if ((type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) && (!kvm_is_cvm(kvm))) +#else if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) +#endif return -EINVAL;
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); diff --git a/arch/arm64/kvm/tmi.c b/arch/arm64/kvm/tmi.c new file mode 100644 index 000000000..83adfc9f0 --- /dev/null +++ b/arch/arm64/kvm/tmi.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include <linux/arm-smccc.h> +#include <asm/kvm_tmi.h> + +u64 tmi_version(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_VESION, &res); + return res.a1; +} + +u64 tmi_data_create(u64 data, u64 rd, u64 map_addr, u64 src, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_DATA_CREATE, data, rd, map_addr, src, level, &res); + return res.a1; +} + +u64 tmi_data_destroy(u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_DATA_DESTROY, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_cvm_activate(u64 rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_ACTIVATE, rd, &res); + return res.a1; +} + +u64 tmi_cvm_create(u64 rd, u64 params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_CREATE, rd, params_ptr, &res); + return res.a1; +} + +u64 tmi_cvm_destroy(u64 rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_CVM_DESTROY, rd, &res); + return res.a1; +} + +u64 tmi_tec_create(u64 tec, u64 rd, u64 mpidr, u64 params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_CREATE, tec, rd, mpidr, params_ptr, &res); + return res.a1; +} + +u64 tmi_tec_destroy(u64 tec) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_DESTROY, tec, &res); + return res.a1; +} + +u64 tmi_tec_enter(u64 tec, u64 run_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TEC_ENTER, tec, run_ptr, &res); + return res.a1; +} + +u64 tmi_ttt_create(u64 ttt, u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_CREATE, ttt, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_ttt_destroy(u64 ttt, u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_DESTROY, ttt, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_ttt_map_unprotected(u64 rd, u64 map_addr, u64 level, u64 ttte) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_MAP_UNPROTECTED, rd, map_addr, level, ttte, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_unprotected(u64 rd, u64 map_addr, u64 level, u64 ns) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_UNPROTECTED, rd, map_addr, level, ns, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_protected(u64 rd, u64 map_addr, u64 level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_PROTECTED, rd, map_addr, level, &res); + return res.a1; +} + +u64 tmi_psci_complete(u64 calling_tec, u64 target_tec) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_PSCI_COMPLETE, calling_tec, target_tec, &res); + return res.a1; +} + +u64 tmi_features(u64 index) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_FEATURES, index, &res); + return res.a1; +} + +u64 tmi_mem_alloc(u64 rd, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_MEM_ALLOC, rd, numa_id, tmm_mem_type, tmm_map_size, &res); + return res.a1; +} + +u64 tmi_mem_free(u64 pa, u64 numa_id, enum tmi_tmm_mem_type tmm_mem_type, + enum tmi_tmm_map_size tmm_map_size) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_MEM_FREE, pa, numa_id, tmm_mem_type, tmm_map_size, &res); + return res.a1; +} + +u64 tmi_ttt_map_range(u64 rd, u64 map_addr, u64 size, u64 cur_node, u64 target_node) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_MAP_RANGE, rd, map_addr, size, cur_node, target_node, &res); + return res.a1; +} + +u64 tmi_ttt_unmap_range(u64 rd, u64 map_addr, u64 size, u64 node_id) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(TMI_TMM_TTT_UNMAP_RANGE, rd, map_addr, size, node_id, &res); + return res.a1; +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 213afce81..2505dc544 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -10,6 +10,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_asm.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif + #include "vgic.h"
static bool group0_trap; @@ -674,7 +678,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; - kvm_vgic_global_state.can_emulate_gicv2 = true; +#ifdef CONFIG_CVM_HOST + if (!static_branch_unlikely(&kvm_cvm_is_available)) +#endif + kvm_vgic_global_state.can_emulate_gicv2 = true; ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device.\n"); @@ -735,7 +742,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + cpu_if->vgic_vmcr = + ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_exit.gicv3_vmcr; + return; + } +#endif if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 116aa91d5..7a52d7eb8 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -12,6 +12,10 @@
#include <asm/kvm_hyp.h>
+#ifdef CONFIG_CVM_HOST +#include <asm/kvm_tmi.h> +#endif + #include "vgic.h"
#define CREATE_TRACE_POINTS @@ -872,12 +876,44 @@ static inline bool can_access_vgic_from_kernel(void) return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); }
+#ifdef CONFIG_CVM_HOST +static inline void vgic_tmm_save_state(struct kvm_vcpu *vcpu) +{ + int i; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + cpu_if->vgic_lr[i] = tec_run->tec_exit.gicv3_lrs[i]; + tec_run->tec_entry.gicv3_lrs[i] = 0; + } +} + +static inline void vgic_tmm_restore_state(struct kvm_vcpu *vcpu) +{ + int i; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { + tec_run->tec_entry.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + tec_run->tec_exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } +} +#endif + static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); else - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + vgic_tmm_save_state(vcpu); + else +#endif + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + }
/* Sync back the hardware VGIC state into our emulation after a guest's run. */ @@ -907,7 +943,12 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); else - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + vgic_tmm_restore_state(vcpu); + else +#endif + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); }
/* Flush our emulation state into the GIC hardware before entering the guest. */ @@ -948,7 +989,10 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return; +#endif if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_load(vcpu); else @@ -959,7 +1003,10 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; - +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + return; +#endif if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_put(vcpu); else diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 413d6f9bc..18ccd16fc 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -117,4 +117,8 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, u32 timer_get_ctl(struct arch_timer_context *ctxt); u64 timer_get_cval(struct arch_timer_context *ctxt);
+#ifdef CONFIG_CVM_HOST +/* Needed for S-EL2 */ +void kvm_cvm_timers_update(struct kvm_vcpu *vcpu); +#endif #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 595c9da4f..7c04fbeeb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,6 +345,27 @@ struct kvm_vcpu { struct kvm_vcpu_arch arch; };
+#ifdef CONFIG_CVM_HOST +#define KVM_TYPE_CVM_BIT 8 +#define CVM_MAX_HALT_POLL_NS 100000 + +DECLARE_STATIC_KEY_FALSE(kvm_cvm_is_available); + +static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_cvm_is_available)) + return vcpu->arch.tec.tec_run; + + return false; +} + +static inline bool kvm_arm_cvm_type(unsigned long type) +{ + return type & (1UL << KVM_TYPE_CVM_BIT); +} + +#endif + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a1d8b1184..3332ee9ed 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1373,6 +1373,35 @@ struct kvm_master_dev_info { #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#ifdef CONFIG_CVM_HOST +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) + +#define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ +#define MAX_NUMA_NODE 8 +#define MAX_CPU_BIT_MAP 0 + +struct kvm_numa_node { + __u64 numa_id; + __u64 ipa_start; + __u64 ipa_size; + int64_t host_numa_node; + __u64 cpu_id[MAX_CPU_BIT_MAP]; +}; + +struct kvm_numa_info { + __u64 numa_cnt; + struct kvm_numa_node numa_nodes[MAX_NUMA_NODE]; +}; + +struct kvm_user_data { + __u64 loader_start; + __u64 initrd_start; + __u64 initrd_size; + __u64 ram_size; + struct kvm_numa_info numa_info; +}; +#endif + /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { __u64 user_addr; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9166ef044..ef9f6d9df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1089,7 +1089,12 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_arch_destroy_vm; }
- kvm->max_halt_poll_ns = halt_poll_ns; +#ifdef CONFIG_CVM_HOST + if (kvm_arm_cvm_type(type)) + kvm->max_halt_poll_ns = CVM_MAX_HALT_POLL_NS; + else +#endif + kvm->max_halt_poll_ns = halt_poll_ns;
r = kvm_arch_init_vm(kvm, type); if (r)
From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CC0X CVE: N/A
Reference: N/A
--------------------------------
Enable pmu phys irq inject to Confidential VMs.
Signed-off-by: Jingxian He hejingxian@huawei.com --- arch/arm64/include/asm/kvm_tmi.h | 3 +++ arch/arm64/kvm/arm.c | 24 ++++++++++++++++++++++++ arch/arm64/kvm/pmu-emul.c | 10 ++++++++++ drivers/perf/arm_pmu.c | 17 +++++++++++++++++ include/linux/perf/arm_pmu.h | 3 +++ 5 files changed, 57 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_tmi.h b/arch/arm64/include/asm/kvm_tmi.h index 8534ccf3f..1bba7c7cd 100644 --- a/arch/arm64/include/asm/kvm_tmi.h +++ b/arch/arm64/include/asm/kvm_tmi.h @@ -4,6 +4,7 @@ */ #ifndef __TMM_TMI_H #define __TMM_TMI_H +#ifdef CONFIG_CVM_HOST #include <linux/kvm_host.h> #include <asm/kvm_asm.h> #include <asm/kvm_pgtable.h> @@ -144,6 +145,7 @@ struct tmi_tec_exit { uint64_t cntp_ctl; uint64_t cntp_cval; uint64_t imm; + uint64_t pmu_ovf_status; };
struct tmi_tec_run { @@ -372,3 +374,4 @@ int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, bool serror_pending, bool ext_dabt_pending);
#endif +#endif diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3c529d1a2..5372a53a6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -40,6 +40,7 @@ #include <asm/sections.h> #ifdef CONFIG_CVM_HOST #include <asm/kvm_tmi.h> +#include <linux/perf/arm_pmu.h> #endif
#include <kvm/arm_hypercalls.h> @@ -890,6 +891,18 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) xfer_to_guest_mode_work_pending(); }
+#ifdef CONFIG_CVM_HOST +static inline void update_pmu_phys_irq(struct kvm_vcpu *vcpu, bool *pmu_stopped) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + if (pmu->irq_level) { + *pmu_stopped = true; + arm_pmu_set_phys_irq(false); + } +} +#endif + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -934,6 +947,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { +#ifdef CONFIG_CVM_HOST + bool pmu_stopped = false; +#endif /* * Check conditions before entering the guest */ @@ -953,6 +969,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_disable();
kvm_pmu_flush_hwstate(vcpu); +#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) + update_pmu_phys_irq(vcpu, &pmu_stopped); +#endif
local_irq_disable();
@@ -1063,6 +1083,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } #endif preempt_enable(); +#ifdef CONFIG_CVM_HOST + if (pmu_stopped) + arm_pmu_set_phys_irq(true); +#endif
/* * The ARMv8 architecture doesn't give the hypervisor diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 9fdc76c6d..f5efc006f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -13,6 +13,7 @@ #include <asm/kvm_emulate.h> #include <kvm/arm_pmu.h> #include <kvm/arm_vgic.h> +#include <asm/kvm_tmi.h>
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); @@ -370,6 +371,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0;
+#ifdef CONFIG_CVM_HOST + if (vcpu_is_tec(vcpu)) { + struct tmi_tec_run *run = vcpu->arch.tec.tec_run; + + reg = run->tec_exit.pmu_ovf_status; + return reg; + } +#endif + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4ef8aee84..743f52d94 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -797,6 +797,23 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; }
+#ifdef CONFIG_CVM_HOST +void arm_pmu_set_phys_irq(bool enable) +{ + int cpu = get_cpu(); + struct arm_pmu *pmu = per_cpu(cpu_armpmu, cpu); + int irq; + + irq = armpmu_get_cpu_irq(pmu, cpu); + if (irq && !enable) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); + else if (irq && enable) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); + + put_cpu(); +} +#endif + #ifdef CONFIG_CPU_PM static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6fd58c8f9..c7a35d321 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -189,6 +189,9 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif
+#ifdef CONFIG_CVM_HOST +void arm_pmu_set_phys_irq(bool enable); +#endif /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void);
From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CC0X CVE: N/A
Reference: N/A
--------------------------------
Add bounce buffer feature for cvm guest os: 1) Cvm guest mapped memory is secure memory. 2) Qemu/kvm cannot access the secure memory. 3) Use bounce buffer as memory shared by cvm guest and qemu/kvm.
Signed-off-by: Jingxian He hejingxian@huawei.com --- arch/arm64/configs/defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/cvm_guest.h | 21 ++++++ arch/arm64/kvm/Kconfig | 8 +++ arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/cvm_guest.c | 91 ++++++++++++++++++++++++++ arch/arm64/mm/mmu.c | 11 ++++ arch/arm64/mm/pageattr.c | 9 ++- include/linux/swiotlb.h | 13 ++++ kernel/dma/direct.c | 39 +++++++++++ kernel/dma/swiotlb.c | 86 +++++++++++++++++++++++- 11 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/cvm_guest.h create mode 100644 arch/arm64/kvm/cvm_guest.c
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ace2bf4ad..0ba4538d9 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -111,6 +111,7 @@ CONFIG_ACPI_APEI_EINJ=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e298ca7e5..25a5fa5c7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -763,6 +763,7 @@ CONFIG_IRQ_BYPASS_MANAGER=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQFD=y CONFIG_HAVE_KVM_IRQ_ROUTING=y diff --git a/arch/arm64/include/asm/cvm_guest.h b/arch/arm64/include/asm/cvm_guest.h new file mode 100644 index 000000000..00c39f7bf --- /dev/null +++ b/arch/arm64/include/asm/cvm_guest.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#ifndef __ASM_CVM_GUEST_H +#define __ASM_CVM_GUEST_H + +#ifdef CONFIG_CVM_GUEST +static inline bool cvm_mem_encrypt_active(void) +{ + return false; +} + +int set_cvm_memory_encrypted(unsigned long addr, int numpages); + +int set_cvm_memory_decrypted(unsigned long addr, int numpages); + +bool is_cvm_world(void); + +#endif +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 7c24a4d33..d21e27f74 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -54,6 +54,14 @@ config CVM_HOST
If unsure, say N.
+config CVM_GUEST + bool "CVM guest enable" + depends on KVM && SWIOTLB && ARM64 + help + Support CVM guest based on S-EL2 + + If unsure, say N. + if KVM
source "virt/kvm/Kconfig" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5fd1b8be1..8bb4d4ef1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -30,3 +30,4 @@ kvm-$(CONFIG_CVM_HOST) += cvm_exit.o
kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ +obj-$(CONFIG_CVM_GUEST) += cvm_guest.o diff --git a/arch/arm64/kvm/cvm_guest.c b/arch/arm64/kvm/cvm_guest.c new file mode 100644 index 000000000..0f6c7e0ec --- /dev/null +++ b/arch/arm64/kvm/cvm_guest.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, The Linux Foundation. All rights reserved. + */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> + +#define CVM_PTE_NS_MASK 0x20 +static bool cvm_guest_enable __read_mostly; + +static int __init setup_cvm_guest(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + cvm_guest_enable = false; + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { + if (val) + cvm_guest_enable = true; + } + return ret; +} +early_param("cvm_guest", setup_cvm_guest); + +bool is_cvm_world(void) +{ + return cvm_guest_enable; +} + +static int change_page_range_cvm(pte_t *ptep, unsigned long addr, void *data) +{ + bool encrypt = (bool)data; + pte_t pte = READ_ONCE(*ptep); + + if (encrypt) { + if (!(pte.pte & CVM_PTE_NS_MASK)) + return 0; + pte.pte = pte.pte & (~CVM_PTE_NS_MASK); + } else { + if (pte.pte & CVM_PTE_NS_MASK) + return 0; + /* Set NS BIT */ + pte.pte = pte.pte | CVM_PTE_NS_MASK; + } + set_pte(ptep, pte); + + return 0; +} + +static int __change_memory_common_cvm(unsigned long start, unsigned long size, bool encrypt) +{ + int ret; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range_cvm, (void *)encrypt); + flush_tlb_kernel_range(start, start + size); + return ret; +} + +static int __set_memory_encrypted(unsigned long addr, + int numpages, + bool encrypt) +{ + if (!is_cvm_world()) + return 0; + + WARN_ON(!__is_lm_address(addr)); + return __change_memory_common_cvm(addr, PAGE_SIZE * numpages, encrypt); +} + +int set_cvm_memory_encrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, true); +} + +int set_cvm_memory_decrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, false); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 804d5197c..b6eb82f6d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -38,6 +38,9 @@ #include <asm/ptdump.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> +#ifdef CONFIG_CVM_GUEST +#include <asm/cvm_guest.h> +#endif
#define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -494,7 +497,11 @@ static void __init map_mem(pgd_t *pgdp) int flags = 0, eflags = 0; u64 i;
+#ifdef CONFIG_CVM_GUEST + if (rodata_full || debug_pagealloc_enabled() || is_cvm_world()) +#else if (rodata_full || debug_pagealloc_enabled()) +#endif flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
#ifdef CONFIG_KFENCE @@ -1514,7 +1521,11 @@ int arch_add_memory(int nid, u64 start, u64 size, }
+#ifdef CONFIG_CVM_GUEST + if (rodata_full || debug_pagealloc_enabled() || is_cvm_world()) +#else if (rodata_full || debug_pagealloc_enabled()) +#endif flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0bc12dbf2..fe0650386 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -11,6 +11,9 @@ #include <asm/cacheflush.h> #include <asm/set_memory.h> #include <asm/tlbflush.h> +#ifdef CONFIG_CVM_GUEST +#include <asm/cvm_guest.h> +#endif
struct page_change_data { pgprot_t set_mask; @@ -188,7 +191,11 @@ int set_direct_map_default_noflush(struct page *page)
void __kernel_map_pages(struct page *page, int numpages, int enable) { - if (!debug_pagealloc_enabled() && !rodata_full) +#ifdef CONFIG_CVM_GUEST + if ((!debug_pagealloc_enabled() && !rodata_full) || is_cvm_world()) +#else + if ((!debug_pagealloc_enabled() && !rodata_full)) +#endif return;
set_memory_valid((unsigned long)page_address(page), numpages, enable); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5d2dbe7e0..e5403f5df 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -6,6 +6,9 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/limits.h> +#ifdef CONFIG_CVM_GUEST +#include <asm/cvm_guest.h> +#endif
struct device; struct page; @@ -75,6 +78,16 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) return paddr >= io_tlb_start && paddr < io_tlb_end; }
+#ifdef CONFIG_CVM_GUEST +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return is_cvm_world(); +} + +struct page *swiotlb_alloc(struct device *dev, size_t size); +bool swiotlb_free(struct device *dev, struct page *page, size_t size); +#endif + void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 2922250f9..075e85cfb 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,6 +75,20 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); }
+#ifdef CONFIG_CVM_GUEST +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) +{ + struct page *page = swiotlb_alloc(dev, size); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + swiotlb_free(dev, page, size); + return NULL; + } + + return page; +} +#endif + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -84,6 +98,11 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
WARN_ON_ONCE(!PAGE_ALIGNED(size));
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_swiotlb(dev, size); +#endif + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); @@ -237,6 +256,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, return NULL; } out_free_pages: +#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return NULL; +#endif dma_free_contiguous(dev, page, size); return NULL; } @@ -271,6 +295,11 @@ void dma_direct_free(struct device *dev, size_t size, else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size);
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, dma_direct_to_page(dev, dma_addr), size)) + return; +#endif dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); }
@@ -307,6 +336,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_free_pages: +#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return NULL; +#endif dma_free_contiguous(dev, page, size); return NULL; } @@ -325,6 +359,11 @@ void dma_direct_free_pages(struct device *dev, size_t size, if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)vaddr, PFN_UP(size));
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return; +#endif dma_free_contiguous(dev, page, size); }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d897d1613..579d3cb50 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -47,6 +47,10 @@ #include <linux/memblock.h> #include <linux/iommu-helper.h>
+#ifdef CONFIG_CVM_GUEST +#include <asm/cvm_guest.h> +#endif + #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h>
@@ -194,12 +198,20 @@ void __init swiotlb_update_mem_attributes(void) void *vaddr; unsigned long bytes;
+#ifdef CONFIG_CVM_GUEST + if (!is_cvm_world() && (no_iotlb_memory || late_alloc)) +#else if (no_iotlb_memory || late_alloc) +#endif return;
vaddr = phys_to_virt(io_tlb_start); bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + set_cvm_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); +#endif memset(vaddr, 0, bytes); }
@@ -265,8 +277,13 @@ swiotlb_init(int verbose)
/* Get IO TLB memory from the low pages */ vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) { +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + swiotlb_update_mem_attributes(); +#endif return; + }
if (io_tlb_start) { memblock_free_early(io_tlb_start, @@ -772,3 +789,70 @@ static int __init swiotlb_create_debugfs(void) late_initcall(swiotlb_create_debugfs);
#endif + +#ifdef CONFIG_CVM_GUEST +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + phys_addr_t tlb_addr; + int index; + + index = find_slots(dev, 0, size); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(io_tlb_start, index); + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +static void swiotlb_release_slots(struct device *hwdev, phys_addr_t tlb_addr, + size_t alloc_size) +{ + unsigned long flags; + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); + int i, count, nslots = nr_slots(alloc_size + offset); + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = io_tlb_list[index + nslots]; + else + count = 0; + + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + io_tlb_used -= nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + + if (!is_swiotlb_buffer(tlb_addr)) + return false; + + swiotlb_release_slots(dev, tlb_addr, size); + return true; +} +#endif
From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CC0X CVE: N/A
Reference: N/A
--------------------------------
Add lpi support for cvm guest os: The gic-its driver mapped memory must shared with qemu/kvm. The cvm guest gic-its driver alloc memory from bounce buffer to share with qemu/kvm.
Signed-off-by: Jingxian He hejingxian@huawei.com Signed-off-by: wuweinan wuweinan@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 229 ++++++++++++++++++++++++++++--- include/uapi/linux/kvm.h | 4 +- 2 files changed, 209 insertions(+), 24 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 632444f86..806359f33 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -29,6 +29,10 @@ #include <linux/percpu.h> #include <linux/slab.h> #include <linux/syscore_ops.h> +#ifdef CONFIG_CVM_GUEST +#include <linux/swiotlb.h> +#include <asm/cvm_guest.h> +#endif
#include <linux/irqchip.h> #include <linux/irqchip/arm-gic-v3.h> @@ -313,6 +317,91 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
static DEFINE_IDA(its_vpeid_ida);
+#ifdef CONFIG_CVM_GUEST +static struct device cvm_alloc_device; +static LIST_HEAD(cvm_its_nodes); +static raw_spinlock_t cvm_its_lock; + +struct its_device_order { + struct its_device *dev; + struct list_head entry; + int itt_order; +}; + +static inline struct page *its_alloc_shared_pages_node(int node, gfp_t gfp, + unsigned int order) +{ + return swiotlb_alloc(&cvm_alloc_device, (1 << order) * PAGE_SIZE); +} + +static inline struct page *its_alloc_shared_pages(gfp_t gfp, unsigned int order) +{ + return its_alloc_shared_pages_node(NUMA_NO_NODE, gfp, order); +} + +static void its_free_shared_pages(void *addr, int order) +{ + if (order < 0) + return; + + swiotlb_free(&cvm_alloc_device, (struct page *)addr, (1 << order) * PAGE_SIZE); +} + +static int add_its_device_order(struct its_device *dev, int itt_order) +{ + struct its_device_order *new; + unsigned long flags; + + new = kmalloc(sizeof(struct its_device_order), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->dev = dev; + new->itt_order = itt_order; + raw_spin_lock_irqsave(&cvm_its_lock, flags); + list_add_tail(&new->entry, &cvm_its_nodes); + raw_spin_unlock_irqrestore(&cvm_its_lock, flags); + return 0; +} + +/* get its device order and then free its device order */ +static int get_its_device_order(struct its_device *dev) +{ + struct its_device_order *pos, *tmp; + unsigned long flags; + int itt_order = -1; + + raw_spin_lock_irqsave(&cvm_its_lock, flags); + list_for_each_entry_safe(pos, tmp, &cvm_its_nodes, entry) { + if (pos->dev == dev) { + itt_order = pos->itt_order; + list_del(&pos->entry); + kfree(pos); + goto found; + } + } +found: + raw_spin_unlock_irqrestore(&cvm_its_lock, flags); + return itt_order; +} + +static void *its_alloc_shared_page_address(struct its_device *dev, + struct its_node *its, int sz) +{ + struct page *page; + int itt_order; + + itt_order = get_order(sz); + if (add_its_device_order(dev, itt_order)) + return NULL; + + page = its_alloc_shared_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + itt_order); + if (!page) + return NULL; + return (void *)page_address(page); +} +#endif + static void free_devid_to_rsv_pools(struct its_device *its_dev) { struct rsv_devid_pool *pool = its_dev->devid_pool; @@ -2447,7 +2536,13 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) { struct page *prop_page;
- prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + prop_page = its_alloc_shared_pages(gfp_flags, + get_order(LPI_PROPBASE_SZ)); + else +#endif + prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); if (!prop_page) return NULL;
@@ -2458,8 +2553,14 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
static void its_free_prop_table(struct page *prop_page) { - free_pages((unsigned long)page_address(prop_page), - get_order(LPI_PROPBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); + else +#endif + free_pages((unsigned long)page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); }
static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) @@ -2581,7 +2682,13 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, order = get_order(GITS_BASER_PAGES_MAX * psz); }
- page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, order); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); if (!page) return -ENOMEM;
@@ -2594,7 +2701,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, /* 52bit PA is supported only when PageSize=64K */ if (psz != SZ_64K) { pr_err("ITS: no 52bit PA support when psz=%d\n", psz); - free_pages((unsigned long)base, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(base, order); + else +#endif + free_pages((unsigned long)base, order); return -ENXIO; }
@@ -2648,7 +2760,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", &its->phys_base, its_base_type_string[type], val, tmp); - free_pages((unsigned long)base, order); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(base, order); + else +#endif + free_pages((unsigned long)base, order); return -ENXIO; }
@@ -2787,8 +2904,14 @@ static void its_free_tables(struct its_node *its)
for (i = 0; i < GITS_BASER_NR_REGS; i++) { if (its->tables[i].base) { - free_pages((unsigned long)its->tables[i].base, - its->tables[i].order); +#ifdef CONFIG_CVM_GUEST + if (!is_cvm_world()) + its_free_shared_pages(its->tables[i].base, + its->tables[i].order); + else +#endif + free_pages((unsigned long)its->tables[i].base, + its->tables[i].order); its->tables[i].base = NULL; } } @@ -3051,7 +3174,13 @@ static bool allocate_vpe_l2_table(int cpu, u32 id)
/* Allocate memory for 2nd level table */ if (!table[idx]) { - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages(GFP_KERNEL | __GFP_ZERO, + get_order(psz)); + else +#endif + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); if (!page) return false;
@@ -3170,7 +3299,13 @@ static int allocate_vpe_l1_table(void)
pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n", np, npg, psz, epp, esz); - page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages(GFP_ATOMIC | __GFP_ZERO, + get_order(np * PAGE_SIZE)); + else +#endif + page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); if (!page) return -ENOMEM;
@@ -3218,8 +3353,14 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) { struct page *pend_page;
- pend_page = alloc_pages(gfp_flags | __GFP_ZERO, - get_order(LPI_PENDBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + pend_page = its_alloc_shared_pages(gfp_flags | __GFP_ZERO, + get_order(LPI_PENDBASE_SZ)); + else +#endif + pend_page = alloc_pages(gfp_flags | __GFP_ZERO, + get_order(LPI_PENDBASE_SZ)); if (!pend_page) return NULL;
@@ -3231,7 +3372,13 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
static void its_free_pending_table(struct page *pt) { - free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(page_address(pt), + get_order(LPI_PENDBASE_SZ)); + else +#endif + free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); }
/* @@ -3768,8 +3915,15 @@ static bool its_alloc_table_entry(struct its_node *its,
/* Allocate memory for 2nd level table */ if (!table[idx]) { - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(baser->psz)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); if (!page) return false;
@@ -3872,7 +4026,12 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; - itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + itt = its_alloc_shared_page_address(dev, its, sz); + else +#endif + itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); if (alloc_lpis) { lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) @@ -3886,7 +4045,12 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { kfree(dev); - kfree(itt); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(itt, get_order(sz)); + else +#endif + kfree(itt); kfree(lpi_map); kfree(col_map); return NULL; @@ -3923,7 +4087,12 @@ static void its_free_device(struct its_device *its_dev) list_del(&its_dev->entry); raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); kfree(its_dev->event_map.col_map); - kfree(its_dev->itt); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(its_dev->itt, get_its_device_order(its_dev)); + else +#endif + kfree(its_dev->itt);
if (its_dev->is_vdev) { WARN_ON(!rsv_devid_pool_cap); @@ -5594,8 +5763,15 @@ static int __init its_probe_one(struct resource *res,
its->numa_node = numa_node;
- page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(ITS_CMD_QUEUE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + page = its_alloc_shared_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); + else +#endif + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); if (!page) { err = -ENOMEM; goto out_unmap_sgir; @@ -5661,7 +5837,12 @@ static int __init its_probe_one(struct resource *res, out_free_tables: its_free_tables(its); out_free_cmd: - free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + its_free_shared_pages(its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); + else +#endif + free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); out_unmap_sgir: if (its->sgir_base) iounmap(its->sgir_base); @@ -5957,6 +6138,12 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, bool has_vtimer_irqbypass = false; int err;
+#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) { + device_initialize(&cvm_alloc_device); + raw_spin_lock_init(&cvm_its_lock); + } +#endif gic_rdists = rdists;
its_parent = parent_domain; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3332ee9ed..08bfcdb28 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1373,12 +1373,11 @@ struct kvm_master_dev_info { #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
-#ifdef CONFIG_CVM_HOST #define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data)
#define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ #define MAX_NUMA_NODE 8 -#define MAX_CPU_BIT_MAP 0 +#define MAX_CPU_BIT_MAP 4
struct kvm_numa_node { __u64 numa_id; @@ -1400,7 +1399,6 @@ struct kvm_user_data { __u64 ram_size; struct kvm_numa_info numa_info; }; -#endif
/* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping {
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/5851 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/O...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/5851 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/O...