Random performance decreases appear on cases of Hackbench which test pipe or socket communication among multi-threads on Hisi HIP08 SoC. Cache sharing which caused by the change of the data layout and the cache readunique prefetch mechanism both lead to this problem.
Readunique mechanism which may caused by store operation will invalid cachelines on other cores during data fetching stage which can cause cacheline invalidation happens frequently in a sharing data access situation.
Disable cache readunique prefetch can trackle this problem. Test cases are like: for i in 20;do echo "--------pipe thread num=$i----------" for j in $(seq 1 10);do ./hackbench -pipe $i thread 1000 done done
We disable readunique prefetch only in el2 for in el1 disabling readunique prefetch may cause panic due to lack of related priority which often be set in BIOS.
Introduce CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH and disable RU prefetch using boot cmdline 'readunique_prefetch=off'.
Kai Shen (1): arm64: errata: add option to disable cache readunique prefetch on HIP08
Xie XiuQi (1): arm64: errata: enable HISILICON_ERRATUM_HIP08_RU_PREFETCH
arch/arm64/Kconfig | 18 +++++++++ arch/arm64/configs/openeuler_defconfig | 2 + arch/arm64/kernel/cpu_errata.c | 56 ++++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 77 insertions(+)
From: Kai Shen shenkai8@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZFV2 CVE: NA
-----------------------------------------------------------
Random performance decreases appear on cases of Hackbench which test pipe or socket communication among multi-threads on Hisi HIP08 SoC. Cache sharing which caused by the change of the data layout and the cache readunique prefetch mechanism both lead to this problem.
Readunique mechanism which may caused by store operation will invalid cachelines on other cores during data fetching stage which can cause cacheline invalidation happens frequently in a sharing data access situation.
Disable cache readunique prefetch can trackle this problem. Test cases are like: for i in 20;do echo "--------pipe thread num=$i----------" for j in $(seq 1 10);do ./hackbench -pipe $i thread 1000 done done
We disable readunique prefetch only in el2 for in el1 disabling readunique prefetch may cause panic due to lack of related priority which often be set in BIOS.
Introduce CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH and disable RU prefetch using boot cmdline 'readunique_prefetch=off'.
Signed-off-by: Kai Shen shenkai8@huawei.com Signed-off-by: Hanjun Guo guohanjun@huawei.com [XQ: adjusted context] Signed-off-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/Kconfig | 18 +++++++++++ arch/arm64/kernel/cpu_errata.c | 56 ++++++++++++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 3 files changed, 75 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 78f20e632712..bbe85ca456c9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1229,6 +1229,24 @@ config SOCIONEXT_SYNQUACER_PREITS
If unsure, say Y.
+config HISILICON_ERRATUM_HIP08_RU_PREFETCH + bool "HIP08 RU: HiSilicon HIP08 cache readunique might cause performance drop" + default y + help + The HiSilicon HIP08 cache readunique might compromise performance, + use cmdline "readunique_prefetch_disable" to disable RU prefetch. + + If unsure, say Y. + +config HISILICON_HIP08_RU_PREFETCH_DEFAULT_OFF + bool "HIP08 RU: disable HiSilicon HIP08 cache readunique by default" + depends on HISILICON_ERRATUM_HIP08_RU_PREFETCH + default n + help + Disable HiSilicon HIP08 cache readunique by default. + + If unsure, say N. + endmenu # "ARM errata workarounds via the alternatives framework"
choice diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5706e74c5578..6a2ae67e85f7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -13,6 +13,11 @@ #include <asm/cpufeature.h> #include <asm/kvm_asm.h> #include <asm/smp_plat.h> +#ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH +#include <asm/ptrace.h> +#include <asm/sysreg.h> +#include <linux/smp.h> +#endif
static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) @@ -121,6 +126,48 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); }
+#ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH +# ifdef CONFIG_HISILICON_HIP08_RU_PREFETCH_DEFAULT_OFF +static bool readunique_prefetch_enabled; +# else +static bool readunique_prefetch_enabled = true; +# endif +static int __init readunique_prefetch_switch(char *data) +{ + if (!data) + return -EINVAL; + + if (strcmp(data, "off") == 0) + readunique_prefetch_enabled = false; + else if (strcmp(data, "on") == 0) + readunique_prefetch_enabled = true; + else + return -EINVAL; + + return 0; +} +early_param("readunique_prefetch", readunique_prefetch_switch); + +static bool +should_disable_hisi_hip08_ru_prefetch(const struct arm64_cpu_capabilities *entry, int unused) +{ + u64 el; + + if (readunique_prefetch_enabled) + return false; + + el = read_sysreg(CurrentEL); + return el == CurrentEL_EL2; +} + +#define CTLR_HISI_HIP08_RU_PREFETCH (1L << 40) +static void __maybe_unused +hisi_hip08_ru_prefetch_disable(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(S3_1_c15_c6_4, 0, CTLR_HISI_HIP08_RU_PREFETCH); +} +#endif + static DEFINE_RAW_SPINLOCK(reg_user_mask_modification); static void __maybe_unused cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused) @@ -744,6 +791,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38, ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1), }, +#endif +#ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH + { + .desc = "HiSilicon HIP08 Cache Readunique Prefetch Disable", + .capability = ARM64_WORKAROUND_HISI_HIP08_RU_PREFETCH, + ERRATA_MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + .matches = should_disable_hisi_hip08_ru_prefetch, + .cpu_enable = hisi_hip08_ru_prefetch_disable, + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index dea3dc89234b..bbb97036074b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -100,3 +100,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_HISI_HIP08_RU_PREFETCH
From: Xie XiuQi xiexiuqi@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZFV2 CVE: NA
------------------------------------------------------------
enable CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH, to add a cmdline option to disable prefetch.
Signed-off-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8f1a4db8d49b..bd8a43177855 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -402,6 +402,8 @@ CONFIG_QCOM_FALKOR_ERRATUM_E1041=y CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y CONFIG_ROCKCHIP_ERRATUM_3588001=y CONFIG_SOCIONEXT_SYNQUACER_PREITS=y +CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH=y +# CONFIG_HISILICON_HIP08_RU_PREFETCH_DEFAULT_OFF is not set # end of ARM errata workarounds via the alternatives framework
CONFIG_ARM64_4K_PAGES=y
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3159 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3159 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...