From: James Morse james.morse@arm.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8T2RT
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/morse/linux.git/log/?h=mpam/...
---------------------------
Currently KVM only allows writeable ID registers to be downgraded in the 'safe' direction, as determined by the cpufeature 'lower safe' flags.
commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits in ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to guests, but didn't add trap handling. A previous patch supplied the missing trap handling.
Existing VMs that have the MPAM field of AA64PFR0_EL1 need to be migratable, but there is little point enabling the MPAM CPU interface on new VMs until there is something a guest can do with it.
Clear the MPAM field from the guest's AA64PFR0_EL1 by default, but allow user-space to set it again if the host supports MPAM. Add a helper to return the maximum permitted value for an ID register. For most this is the reset value. To allow the MPAM field to be written as supported, check if the host sanitised value is '1' and upgrade the reset value.
Finally, change the trap handling to inject an undef if MPAM was not advertised to the guest.
Full support will depend on an psuedo-device being created that describes the virt->phys PARTID mapping the VMM expects. Migration would be expected to fail if this psuedo-device can't be created on the remote end. This ID bit isn't needed to block migration.
Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Zeng Heng zengheng4@huawei.com --- arch/arm64/kvm/sys_regs.c | 74 +++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0a6e5386107e..9424fa7351bf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -411,21 +411,29 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return true; }
-static bool workaround_bad_mpam_abi(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool trap_mpam(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { + u64 aa64pfr0_el1 = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1); + /* - * The ID register can't be removed without breaking migration, - * but MPAMIDR_EL1 can advertise all-zeroes, indicating there are zero - * PARTID/PMG supported by the CPU, allowing the other two trapped - * registers (MPAM1_EL1 and MPAM0_EL1) to be treated as RAZ/WI. + * What did we expose to the guest? + * Earlier guests may have seen the ID bits, which can't be removed + * without breaking migration, but MPAMIDR_EL1 can advertise all-zeroes, + * indicating there are zero PARTID/PMG supported by the CPU, allowing + * the other two trapped registers (MPAM1_EL1 and MPAM0_EL1) to be + * treated as RAZ/WI. * Emulating MPAM1_EL1 as RAZ/WI means the guest sees the MPAMEN bit * as clear, and realises MPAM isn't usable on this CPU. */ - p->regval = 0; + if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, aa64pfr0_el1)) { + p->regval = 0; + return true; + }
- return true; + kvm_inject_undefined(vcpu); + return false; }
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, @@ -1245,6 +1253,36 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, return arm64_ftr_safe_value(&kvm_ftr, new, cur); }
+static u64 kvm_arm64_ftr_max(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + u64 pfr0, val = rd->reset(vcpu, rd); + u32 field, id = reg_to_encoding(rd); + + /* + * Some values may reset to a lower value than can be supported, + * get the maximum feature value. + */ + switch (id) { + case SYS_ID_AA64PFR0_EL1: + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + /* + * MPAM resets to 0, but migration of MPAM=1 guests is needed. + * See trap_mpam() for more. + */ + field = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); + if (field == ID_AA64PFR0_EL1_MPAM_1) { + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, ID_AA64PFR0_EL1_MPAM_1); + } + + break; + } + + return val; +} + /** * arm64_check_features() - Check if a feature register value constitutes * a subset of features indicated by the idreg's KVM sanitised limit. @@ -1265,8 +1303,7 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, const struct arm64_ftr_bits *ftrp = NULL; u32 id = reg_to_encoding(rd); u64 writable_mask = rd->val; - u64 limit = rd->reset(vcpu, rd); - u64 mask = 0; + u64 limit, mask = 0;
/* * Hidden and unallocated ID registers may not have a corresponding @@ -1280,6 +1317,7 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, if (!ftr_reg) return -EINVAL;
+ limit = kvm_arm64_ftr_max(vcpu, rd); ftrp = ftr_reg->ftr_bits;
for (; ftrp && ftrp->width; ftrp++) { @@ -1483,6 +1521,14 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
val &= ~ID_AA64PFR0_EL1_AMU_MASK;
+ /* + * MPAM is disabled by default as KVM also needs a set of PARTID to + * program the MPAMVPMx_EL2 PARTID remapping registers with. But some + * older kernels let the guest see the ID bit. Turning it on causes + * the registers to be emulated as RAZ/WI. See trap_mpam() for more. + */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + return val; }
@@ -2147,11 +2193,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_LOREA_EL1), trap_loregion }, { SYS_DESC(SYS_LORN_EL1), trap_loregion }, { SYS_DESC(SYS_LORC_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAMIDR_EL1), workaround_bad_mpam_abi }, + { SYS_DESC(SYS_MPAMIDR_EL1), trap_mpam }, { SYS_DESC(SYS_LORID_EL1), trap_loregion },
- { SYS_DESC(SYS_MPAM1_EL1), workaround_bad_mpam_abi }, - { SYS_DESC(SYS_MPAM0_EL1), workaround_bad_mpam_abi }, + { SYS_DESC(SYS_MPAM1_EL1), trap_mpam }, + { SYS_DESC(SYS_MPAM0_EL1), trap_mpam }, { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },