
From: Sebastian Ott <sebott@redhat.com> mainline inclusion from mainline-v6.15-rc1 commit 3adaee78306148da5df5d3d655e9a90bf18e9513 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBN3WI Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h... ---------------------------------------------------------------------- KVM's treatment of the ID registers that describe the implementation (MIDR, REVIDR, and AIDR) is interesting, to say the least. On the userspace-facing end of it, KVM presents the values of the boot CPU on all vCPUs and treats them as invariant. On the guest side of things KVM presents the hardware values of the local CPU, which can change during CPU migration in a big-little system. While one may call this fragile, there is at least some degree of predictability around it. For example, if a VMM wanted to present big-little to a guest, it could affine vCPUs accordingly to the correct clusters. All of this makes a giant mess out of adding support for making these implementation ID registers writable. Avoid breaking the rather subtle ABI around the old way of doing things by requiring opt-in from userspace to make the registers writable. When the cap is enabled, allow userspace to set MIDR, REVIDR, and AIDR to any non-reserved value and present those values consistently across all vCPUs. Signed-off-by: Sebastian Ott <sebott@redhat.com> [oliver: changelog, capability] Link: https://lore.kernel.org/r/20250225005401.679536-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton <oliver.upton@linux.dev> --- Documentation/virt/kvm/api.rst | 18 +++++++++ arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/kvm/arm.c | 9 +++++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 23 ++++++++++- arch/arm64/kvm/sys_regs.c | 47 +++++++++++++++++++--- include/uapi/linux/kvm.h | 3 ++ 6 files changed, 96 insertions(+), 7 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a6c73443c88d..470b7da2ae41 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7890,6 +7890,24 @@ This capability is aimed to mitigate the threat that malicious VMs can cause CPU stuck (due to event windows don't open up) and make the CPU unavailable to host or other VMs. +7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS +------------------------------------- + +:Architectures: arm64 +:Target: VM +:Parameters: None +:Returns: 0 on success, -EBUSY if vCPUs have been created before enabling this + capability. + +This capability changes the behavior of the registers that identify a PE +implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1. +By default, these registers are visible to userspace but treated as invariant. + +When this capability is enabled, KVM allows userspace to change the +aforementioned registers before the first KVM_RUN. These registers are VM +scoped, meaning that the same set of values are presented on all vCPUs in a +given VM. + 7.38 KVM_CAP_ARM_RME -------------------- diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2f6cb0dff6d0..15443533427f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -249,6 +249,9 @@ struct kvm_arch { #define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 /* Initial ID reg values loaded */ #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 + +/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */ +#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10 unsigned long flags; /* VM-wide vCPU feature set diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3b168b1a28ce..8c6e7eac084f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -248,6 +248,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); break; + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + r = 0; + set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags); + } + mutex_unlock(&kvm->lock); + break; case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: new_cap = cap->args[0]; @@ -436,6 +444,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_INJECT_EXT_DABT: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: case KVM_CAP_ARM_SYSTEM_SUSPEND: case KVM_CAP_IRQFD_RESAMPLE: r = 1; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 627527e03410..bca5900c2f8f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -17,9 +17,30 @@ #include <asm/kvm_mmu.h> #include <asm/mpam.h> +static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return vcpu; +} + +static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt) +{ + return host_data_ptr(host_ctxt) != ctxt; +} + static inline u64 ctxt_midr_el1(struct kvm_cpu_context *ctxt) { - return read_cpuid_id(); + struct kvm *kvm = kern_hyp_va(ctxt_to_vcpu(ctxt)->kvm); + + if (!(ctxt_is_guest(ctxt) && + test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags))) + return read_cpuid_id(); + + return kvm_read_vm_id_reg(kvm, SYS_MIDR_EL1); } static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6ec26fd32594..b7881ed8ade0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2179,6 +2179,17 @@ static bool access_imp_id_reg(struct kvm_vcpu *vcpu, if (p->is_write) return write_to_read_only(vcpu, p, r); + /* + * Return the VM-scoped implementation ID register values if userspace + * has made them writable. + */ + if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &vcpu->kvm->arch.flags)) + return access_id_reg(vcpu, p, r); + + /* + * Otherwise, fall back to the old behavior of returning the value of + * the current CPU. + */ switch (reg_to_encoding(r)) { case SYS_REVIDR_EL1: p->regval = read_sysreg(revidr_el1); @@ -2222,19 +2233,43 @@ static u64 reset_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 val) { + struct kvm *kvm = vcpu->kvm; u64 expected; + guard(mutex)(&kvm->arch.config_lock); + expected = read_id_reg(vcpu, r); + if (expected == val) + return 0; + + if (!test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags)) + return -EINVAL; - return (expected == val) ? 0 : -EINVAL; + /* + * Once the VM has started the ID registers are immutable. Reject the + * write if userspace tries to change it. + */ + if (kvm_vm_has_ran_once(kvm)) + return -EBUSY; + + /* + * Any value is allowed for the implementation ID registers so long as + * it is within the writable mask. + */ + if ((val & r->val) != val) + return -EINVAL; + + kvm_set_vm_id_reg(kvm, reg_to_encoding(r), val); + return 0; } -#define IMPLEMENTATION_ID(reg) { \ +#define IMPLEMENTATION_ID(reg, mask) { \ SYS_DESC(SYS_##reg), \ .access = access_imp_id_reg, \ .get_user = get_id_reg, \ .set_user = set_imp_id_reg, \ .reset = reset_imp_id_reg, \ + .val = mask, \ } /* @@ -2345,15 +2380,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi }, { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 }, - - IMPLEMENTATION_ID(MIDR_EL1), + IMPLEMENTATION_ID(MIDR_EL1, GENMASK_ULL(31, 0)), #ifdef CONFIG_ARM64_HISI_IPIV { SYS_DESC(SYS_MPIDR_EL1), .reset = reset_mpidr, .reg = MPIDR_EL1, .set_user = set_mpidr}, #else { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, #endif - IMPLEMENTATION_ID(REVIDR_EL1), + IMPLEMENTATION_ID(REVIDR_EL1, GENMASK_ULL(63, 0)), + /* * ID regs: all ID_SANITISED() entries here must have corresponding * entries in arm64_ftr_regs[]. @@ -2606,7 +2641,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, - IMPLEMENTATION_ID(AIDR_EL1), + IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)), { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, ID_WRITABLE(CTR_EL0, CTR_EL0_DIC_MASK | CTR_EL0_IDC_MASK | diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 72007f466c1c..3178fbd16892 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1218,6 +1218,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 + #define KVM_CAP_ARM_RME 300 #define KVM_CAP_SEV_ES_GHCB 500 -- 2.33.0