From 85f6f7c3bd13b3d6fd064899e2818ee70bdf29e2 Mon Sep 17 00:00:00 2001
euleros inclusion
commit 85f6f7c3bd13b3d6fd064899e2818ee70bdf29e2
category: bugfix
Random performance decreases appear on cases of Hackbench which test
pipe or socket communication among multi-threads on Hisi 1620 SoC.
Cache sharing which caused by the change of the data layout and the
cache readunique mechanism both lead to this problem.
Readunique mechanism which may caused by store operation will invalid
cachelines on other cores during data fetching stage which can cause
cacheline invalidation happens frequently in a sharing data access
situation.
Disable cache readunique can trackle this problem.
Test cases are like:
./hackbench -pipe 20 thread 1000
On 128 cores 1620 machine, the time cost of above test cases is basicly
0.3s when RU is off while when RU is on the time cost can be over 1s.
What's more, we disble readunique only in el2 for in el1 disabling
readunique may cause panic due to lack of related priority which often
be set in BIOS.
Signed-off-by: Kai Shen <shenkai8(a)huawei.com>
Reviewed-by: Wenliang He <hewenliang4(a)huawei.com>
Reviewed-by: Jinxian He <hejingxian(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Reviewed-by: Wei Li <liwei391(a)huawei.com>
---
arch/arm64/Kconfig | 9 +++++++
arch/arm64/configs/euleros_defconfig | 1 +
arch/arm64/include/asm/cpucaps.h | 3 ++-
arch/arm64/kernel/cpu_errata.c | 37 ++++++++++++++++++++++++++++
4 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b818273ef..33e185af5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -642,6 +642,15 @@ config QCOM_FALKOR_ERRATUM_E1041
If unsure, say Y.
+config HISILICON_ERRATUM_1620_RU
+ bool "Hisi 1620 cache readunique might compromise performance"
+ default y
+ help
+ The HiSilicon 1620 cache readunique might compromise performance,
+ use cmdline to enable or disable RU.
+
+ If unsure, say Y.
+
endmenu
diff --git a/arch/arm64/configs/euleros_defconfig
b/arch/arm64/configs/euleros_defconfig
index 360e69291..29403bfe4 100644
--- a/arch/arm64/configs/euleros_defconfig
+++ b/arch/arm64/configs/euleros_defconfig
@@ -5650,3 +5650,4 @@ CONFIG_IO_STRICT_DEVMEM=y
# CONFIG_DEBUG_EFI is not set
# CONFIG_ARM64_RELOC_TEST is not set
# CONFIG_CORESIGHT is not set
+CONFIG_HISILICON_ERRATUM_1620=y
diff --git a/arch/arm64/include/asm/cpucaps.h
b/arch/arm64/include/asm/cpucaps.h
index d56d815b9..112c866c1 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -58,7 +58,8 @@
#define ARM64_SSBS 37
#define ARM64_CLEARPAGE_STNP 38
#define ARM64_WORKAROUND_1542419 39
+#define ARM64_HISI_1620_RU 40
-#define ARM64_NCAPS 40
+#define ARM64_NCAPS 41
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 7522163c1..fa751abed 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -25,6 +25,11 @@
#include <asm/cpufeature.h>
#include <asm/smp_plat.h>
+#ifdef CONFIG_HISILICON_ERRATUM_1620_RU
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#endif
+
static bool __maybe_unused
is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int
scope)
{
@@ -491,6 +496,30 @@ cpu_enable_cache_maint_trap(const struct
arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
}
+#ifdef CONFIG_HISILICON_ERRATUM_1620_RU
+static bool readunique_enabled = false;
+
+static int __init enable_readunique(char *data)
+{
+ readunique_enabled = true;
+ return 0;
+}
+__setup("readunique_enable", enable_readunique);
+
+#define CTLR_HISI_1620_RU (1L << 40)
+static void __maybe_unused
+hisi_1620_ru_disable(const struct arm64_cpu_capabilities *__unused)
+{
+ u64 el;
+ if (readunique_enabled)
+ return;
+
+ el = read_sysreg(CurrentEL);
+ if (el == CurrentEL_EL2)
+ sysreg_clear_set(S3_1_c15_c6_4, 0, CTLR_HISI_1620_RU);
+}
+#endif
+
/* known invulnerable cores */
static const struct midr_range arm64_ssb_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
@@ -884,6 +913,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.matches = has_neoverse_n1_erratum_1542419,
.cpu_enable = cpu_enable_trap_ctr_access,
},
+#endif
+#ifdef CONFIG_HISILICON_ERRATUM_1620_RU
+ {
+ .desc = "Hisi 1620 Cache Readunique Disable",
+ .capability = ARM64_HISI_1620_RU,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ .cpu_enable = hisi_1620_ru_disable,
+ },
#endif
{
}
--
2.19.1