From: Mark Brown broonie@kernel.org
mainline inclusion from mainline-v5.19-rc1 commit b42990d3bf77cc29d7c33e21518c1f806dae6b21 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5ITJT CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------------
The vector lengths used for SME are controlled through a similar set of registers to those for SVE and enumerated using a similar algorithm with some slight differences due to the fact that unlike SVE there are no restrictions on which combinations of vector lengths can be supported nor any mandatory vector lengths which must be implemented. Add a new vector type and implement support for enumerating it.
One slightly awkward feature is that we need to read the current vector length using a different instruction (or enter streaming mode which would have the same issue and be higher cost). Rather than add an ops structure we add special cases directly in the otherwise generic vec_probe_vqs() function, this is a bit inelegant but it's the only place where this is an issue.
Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Link: https://lore.kernel.org/r/20220419112247.711548-10-broonie@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com --- arch/arm64/include/asm/cpu.h | 3 + arch/arm64/include/asm/cpufeature.h | 7 ++ arch/arm64/include/asm/fpsimd.h | 26 ++++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/cpufeature.c | 47 +++++++++++ arch/arm64/kernel/cpuinfo.c | 4 + arch/arm64/kernel/entry-fpsimd.S | 9 ++ arch/arm64/kernel/fpsimd.c | 123 +++++++++++++++++++++++++++- 8 files changed, 218 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 3479fbaeeb73..d05306f3f5e9 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -59,6 +59,9 @@ struct cpuinfo_arm64 {
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ u64 reg_zcr; + + /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */ + u64 reg_smcr; };
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 774f2fb13b07..600fc40b313f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -599,6 +599,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) return val > 0; }
+static inline bool id_aa64pfr1_sme(u64 pfr1) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT); + + return val > 0; +} + void __init setup_cpu_features(void); void check_local_cpu_capabilities(void);
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index dbf232891b4d..6cb4b10a2c93 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -79,6 +79,7 @@ extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void); +extern u64 read_smcr_features(void);
/* * Helpers to translate bit indices in sve_vq_map to VQ values (and @@ -175,6 +176,12 @@ static inline void write_vl(enum vec_type type, u64 val) tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; write_sysreg_s(tmp | val, SYS_ZCR_EL1); break; +#endif +#ifdef CONFIG_ARM64_SME + case ARM64_VEC_SME: + tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_SMCR_EL1); + break; #endif default: WARN_ON_ONCE(1); @@ -266,12 +273,31 @@ static inline void sme_smstop(void) asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr")); }
+extern void __init sme_setup(void); + +static inline int sme_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SME); +} + +static inline int sme_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SME); +} + +extern unsigned int sme_get_vl(void); + #else
static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { }
+static inline void sme_setup(void) { } +static inline unsigned int sme_get_vl(void) { return 0; } +static inline int sme_max_vl(void) { return 0; } +static inline int sme_max_virtualisable_vl(void) { return 0; } + #endif /* ! CONFIG_ARM64_SME */
/* For use by EFI runtime services calls only */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 39fe01aaf76d..a121b4976d10 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,7 @@ struct debug_info {
enum vec_type { ARM64_VEC_SVE = 0, + ARM64_VEC_SME, ARM64_VEC_MAX, };
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5bd971fea36f..b443de8bcf70 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -545,6 +545,12 @@ static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_END, };
+static const struct arm64_ftr_bits ftr_smcr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_SIZE, 0), /* LEN */ + ARM64_FTR_END, +}; + /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of @@ -634,6 +640,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
/* Op1 = 3, CRn = 0, CRm = 0 */ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, @@ -898,6 +905,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) vec_init_vq_map(ARM64_VEC_SVE); }
+ if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr); + if (IS_ENABLED(CONFIG_ARM64_SME)) + vec_init_vq_map(ARM64_VEC_SME); + } + /* * Initialize the indirect array of CPU hwcaps capabilities pointers * before we handle the boot CPU below. @@ -1113,6 +1126,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, + info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -1123,6 +1139,16 @@ void update_cpu_features(int cpu, vec_update_vq_map(ARM64_VEC_SVE); }
+ if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { + taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu, + info->reg_smcr, boot->reg_smcr); + + /* Probe vector lengths, unless we already gave up on SME */ + if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) && + !system_capabilities_finalized()) + vec_update_vq_map(ARM64_VEC_SME); + } + /* * This relies on a sanitised view of the AArch64 ID registers * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. @@ -2764,6 +2790,23 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ }
+static void verify_sme_features(void) +{ + u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + u64 smcr = read_smcr_features(); + + unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK; + unsigned int len = smcr & SMCR_ELx_LEN_MASK; + + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) { + pr_crit("CPU%d: SME: vector length support mismatch\n", + smp_processor_id()); + cpu_die_early(); + } + + /* Add checks on other SMCR bits here if necessary */ +} + static void verify_hyp_capabilities(void) { u64 safe_mmfr1, mmfr0, mmfr1; @@ -2820,6 +2863,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features();
+ if (system_supports_sme()) + verify_sme_features(); + if (is_hyp_mode_available()) verify_hyp_capabilities(); } @@ -2936,6 +2982,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
sve_setup(); + sme_setup(); minsigstksz_setup();
/* Advertise that we have computed the system capabilities */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 972315bf8302..3899083ff555 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -414,6 +414,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) id_aa64pfr0_sve(info->reg_id_aa64pfr0)) info->reg_zcr = read_zcr_features();
+ if (IS_ENABLED(CONFIG_ARM64_SME) && + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) + info->reg_smcr = read_smcr_features(); + cpuinfo_detect_icache_policy(info); }
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index a5f9475ce6eb..048dfcfadaed 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -72,3 +72,12 @@ SYM_FUNC_START(sve_flush_live) SYM_FUNC_END(sve_flush_live)
#endif /* CONFIG_ARM64_SVE */ + +#ifdef CONFIG_ARM64_SME + +SYM_FUNC_START(sme_get_vl) + _sme_rdsvl 0, 1 + ret +SYM_FUNC_END(sme_get_vl) + +#endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 411356fea1a9..6cc51288531f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -132,6 +132,12 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { .max_virtualisable_vl = SVE_VL_MIN, }, #endif +#ifdef CONFIG_ARM64_SME + [ARM64_VEC_SME] = { + .type = ARM64_VEC_SME, + .name = "SME", + }, +#endif };
static unsigned int vec_vl_inherit_flag(enum vec_type type) @@ -182,6 +188,20 @@ extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME + +static int get_sme_default_vl(void) +{ + return get_default_vl(ARM64_VEC_SME); +} + +static void set_sme_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SME, val); +} + +#endif + DEFINE_PER_CPU(bool, fpsimd_context_busy); EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
@@ -397,6 +417,8 @@ static unsigned int find_supported_vector_length(enum vec_type type,
if (vl > max_vl) vl = max_vl; + if (vl < info->min_vl) + vl = info->min_vl;
bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); @@ -758,7 +780,23 @@ static void vec_probe_vqs(struct vl_info *info,
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { write_vl(info->type, vq - 1); /* self-syncing */ - vl = sve_get_vl(); + + switch (info->type) { + case ARM64_VEC_SVE: + vl = sve_get_vl(); + break; + case ARM64_VEC_SME: + vl = sme_get_vl(); + break; + default: + vl = 0; + break; + } + + /* Minimum VL identified? */ + if (sve_vq_from_vl(vl) > vq) + break; + vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); } @@ -1005,7 +1043,88 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) SYS_SMCR_EL1); }
-#endif /* CONFIG_ARM64_SVE */ +/* + * Read the pseudo-SMCR used by cpufeatures to identify the supported + * vector length. + * + * Use only if SME is present. + * This function clobbers the SME vector length. + */ +u64 read_smcr_features(void) +{ + u64 smcr; + unsigned int vq_max; + + sme_kernel_enable(NULL); + sme_smstart_sm(); + + /* + * Set the maximum possible VL. + */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, + SYS_SMCR_EL1); + + smcr = read_sysreg_s(SYS_SMCR_EL1); + smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + smcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + sme_smstop_sm(); + + return smcr; +} + +void __init sme_setup(void) +{ + struct vl_info *info = &vl_info[ARM64_VEC_SME]; + u64 smcr; + int min_bit; + + if (!system_supports_sme()) + return; + + /* + * SME doesn't require any particular vector length be + * supported but it does require at least one. We should have + * disabled the feature entirely while bringing up CPUs but + * let's double check here. + */ + WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); + + min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); + info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); + + smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); + info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); + + /* + * Sanity-check that the max VL we determined through CPU features + * corresponds properly to sme_vq_map. If not, do our best: + */ + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SME, + info->max_vl); + + WARN_ON(info->min_vl > info->max_vl); + + /* + * For the default VL, pick the maximum supported value <= 32 + * (256 bits) if there is one since this is guaranteed not to + * grow the signal frame when in streaming mode, otherwise the + * minimum available VL will be used. + */ + set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32)); + + pr_info("SME: minimum available vector length %u bytes per vector\n", + info->min_vl); + pr_info("SME: maximum available vector length %u bytes per vector\n", + info->max_vl); + pr_info("SME: default vector length %u bytes per vector\n", + get_sme_default_vl()); +} + +#endif /* CONFIG_ARM64_SME */
/* * Trapped SVE access