Fix the pgtable prefetch problem, besides add some ras features which is used in ascend scenarios.
v3: - Fix code check warnings
v2: - issue new ISSUES for olk6.6
Zhang Jian (1): mm: export collect_procs()
Zhang Zekun (3): iommu/arm-smmu-v3: Add a SYNC command to avoid broken page table prefetch mm: memory-failure: Directly return the task for specific use ACPI: APEI: Don't call notifier again in ts scenario
arch/arm64/Kconfig | 13 +++++++++++++ arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/kernel/cpu_errata.c | 14 ++++++++++++++ arch/arm64/tools/cpucaps | 1 + drivers/acpi/apei/ghes.c | 3 +++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 20 ++++++++++++++++++++ include/linux/mm.h | 8 ++++++++ mm/Kconfig | 11 +++++++++++ mm/memory-failure.c | 13 +++++++++++++ 9 files changed, 84 insertions(+)
hulk inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8QSLV CVE: NA
-----------------------------------------------
On Hisilicon LINXICORE9100 cores, SMMU pagetable prefetch features may prefetch and use a invalid PTE even the PTE is valid at that time. This will cause the device trigger fake pagefaults. If the SMMU works in terminate mode, transactions which occur fake pagefaults will be aborted, and could result in unexpected errors.
To fix this problem, we need to add a SYNC command after smmu has map a iova, then smmu will always try to get the newest PTE.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- arch/arm64/Kconfig | 13 +++++++++++++ arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/kernel/cpu_errata.c | 14 ++++++++++++++ arch/arm64/tools/cpucaps | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 20 ++++++++++++++++++++ 5 files changed, 49 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2aca373a7038..02f6dff029ed 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1180,6 +1180,19 @@ config HISILICON_ERRATUM_1980005
If unsure, say N.
+config HISILICON_ERRATUM_162100602 + bool "Hisilicon erratum 162100602" + depends on ARM_SMMU_V3 + default y + help + On Hisilicon LINXICORE9100 cores, SMMU pagetable prefetch features may + prefetch and use a invalid PTE even the PTE is valid at that time. This + will cause the device trigger fake pagefaults. If the SMMU works in + terminate mode, transactions which occur fake pagefaults will be aborted, + and could result in unexpected errors. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 33ba39711884..98fa4d2f3b13 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -399,6 +399,7 @@ CONFIG_CAVIUM_TX2_ERRATUM_219=y CONFIG_FUJITSU_ERRATUM_010001=y CONFIG_HISILICON_ERRATUM_161600802=y CONFIG_HISILICON_ERRATUM_162100125=y +CONFIG_HISILICON_ERRATUM_162100602=y CONFIG_QCOM_FALKOR_ERRATUM_1003=y CONFIG_QCOM_FALKOR_ERRATUM_1009=y CONFIG_QCOM_QDF2400_ERRATUM_0065=y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a686a96d966a..c569e6c0ac07 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -360,6 +360,13 @@ static const struct midr_range hisilicon_erratum_162100125_cpus[] = { }; #endif
+#ifdef CONFIG_HISILICON_ERRATUM_162100602 +static const struct midr_range hisilicon_erratum_162100602_cpus[] = { + MIDR_REV(MIDR_HISI_LINXICORE9100, 0, 0), + {}, +}; +#endif + #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 static const struct arm64_cpu_capabilities qcom_erratum_1003_list[] = { { @@ -591,6 +598,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(hisilicon_erratum_162100125_cpus), }, #endif +#ifdef CONFIG_HISILICON_ERRATUM_162100602 + { + .desc = "Hisilicon erratum 162100602", + .capability = ARM64_WORKAROUND_HISILICON_ERRATUM_162100602, + ERRATA_MIDR_RANGE_LIST(hisilicon_erratum_162100602_cpus), + }, +#endif #ifdef CONFIG_HISILICON_ERRATUM_1980005 { .desc = "Hisilicon erratum 1980005 (IDC)", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 0b62edb91876..569ecec76c16 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -103,3 +103,4 @@ WORKAROUND_SPECULATIVE_AT WORKAROUND_HISILICON_ERRATUM_162100125 WORKAROUND_HISI_HIP08_RU_PREFETCH WORKAROUND_HISILICON_1980005 +WORKAROUND_HISILICON_ERRATUM_162100602 diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5055a66644af..d0422bb13724 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2528,6 +2528,23 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, gather->pgsize, true, smmu_domain); }
+#ifdef CONFIG_HISILICON_ERRATUM_162100602 +static void arm_smmu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + size_t granule_size; + + if (!cpus_have_const_cap(ARM64_WORKAROUND_HISILICON_ERRATUM_162100602)) + return; + + granule_size = 1 << __ffs(smmu_domain->domain.pgsize_bitmap); + + /* Add a SYNC command to sync io-pgtale to avoid errors in pgtable prefetch*/ + arm_smmu_tlb_inv_range_domain(iova, granule_size, granule_size, true, smmu_domain); +} +#endif + static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -2883,6 +2900,9 @@ static struct iommu_ops arm_smmu_ops = { .unmap_pages = arm_smmu_unmap_pages, .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, +#ifdef CONFIG_HISILICON_ERRATUM_162100602 + .iotlb_sync_map = arm_smmu_iotlb_sync_map, +#endif .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, .free = arm_smmu_domain_free,
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8Q58S CVE: NA
---------------------------------
In some ascend sceneraios, we need to get the task regradless of whether it has signal handling functions.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- include/linux/mm.h | 4 ++++ mm/Kconfig | 11 +++++++++++ mm/memory-failure.c | 5 +++++ 3 files changed, 20 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 80bacc4da324..4e482ff01d38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -395,6 +395,10 @@ extern unsigned int kobjsize(const void *objp); # define VM_SHARE_POOL VM_NONE #endif
+#if defined(CONFIG_ASCEND_RAS_FEATURES) +#define ASCEND_HWPOISON_MAGIC_NUM 0xABAB +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif diff --git a/mm/Kconfig b/mm/Kconfig index ff0c36f42ca8..fe1750f2fd8a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1323,6 +1323,17 @@ config ASCEND_OOM 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
+config ASCEND_RAS_FEATURES + bool "ACPI RAS features for ascend scenarios" + default n + depends on ACPI && ARM64 + help + ACPI RAS features developed to support some of special cases in ascend + scenarios. If you don't know the actual usage of these features don't + open this config. + + If not sure, say no. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4d6e43c88489..228700bb5cb4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -582,6 +582,11 @@ struct task_struct *task_early_kill(struct task_struct *tsk, int force_early) { if (!tsk->mm) return NULL; + +#ifdef CONFIG_ASCEND_RAS_FEATURES + if (force_early == ASCEND_HWPOISON_MAGIC_NUM) + return tsk; +#endif /* * Comparing ->mm here because current task might represent * a subthread, while tsk always points to the main thread.
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8Q58S CVE: NA
--------------------------------------------
Don't call ghes_defer_non_standard_event() after the memory failure has been handled. Because the notifier will be called in atomic_notifier_call_chain(), when handling CPER_SEC_PLATFORM_MEM errors in ghes_do_proc().
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/acpi/apei/ghes.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index bf1b9252a8da..af6e32d56ece 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -684,6 +684,9 @@ static bool ghes_do_proc(struct ghes *ghes,
arch_apei_report_mem_error(sev, mem_err); queued = ghes_handle_memory_failure(gdata, sev); +#ifdef CONFIG_ASCEND_RAS_FEATURES + continue; +#endif } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata);
From: Zhang Jian zhangjian210@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8R318 CVE: NA
-------------------------------------------------
Collect the processes who have the page mapped via collect_procs().
@page if the page is a part of the hugepages/compound-page, we must using compound_head() to find it's head page to prevent the kernel panic, and make the page be locked.
@to_kill the function will return a linked list, when we have used this list, we must kfree the list.
@force_early if we want to find all process, we must make it be true, if it's false, the function will only return the process who have PF_MCE_PROCESS or PF_MCE_EARLY mark.
limits: if force_early is true, sysctl_memory_failure_early_kill is useless. If it's false, no process have PF_MCE_PROCESS and PF_MCE_EARLY flag, and the sysctl_memory_failure_early_kill is enabled, function will return all tasks whether the task have the PF_MCE_PROCESS and PF_MCE_EARLY flag.
Signed-off-by: Zhang Jian zhangjian210@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- include/linux/mm.h | 4 ++++ mm/memory-failure.c | 8 ++++++++ 2 files changed, 12 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4e482ff01d38..4246015259ff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3882,6 +3882,10 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); +#ifdef CONFIG_ASCEND_RAS_FEATURES +extern void collect_procs(struct page *page, struct list_head *tokill, + int force_early); +#endif #else static inline void memory_failure_queue(unsigned long pfn, int flags) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 228700bb5cb4..b90e7dbf7430 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -709,8 +709,13 @@ static void collect_procs_fsdax(struct page *page, /* * Collect the processes who have the corrupted page mapped to kill. */ +#ifdef CONFIG_ASCEND_RAS_FEATURES +void collect_procs(struct page *page, struct list_head *tokill, + int force_early) +#else static void collect_procs(struct page *page, struct list_head *tokill, int force_early) +#endif { if (!page->mapping) return; @@ -721,6 +726,9 @@ static void collect_procs(struct page *page, struct list_head *tokill, else collect_procs_file(page, tokill, force_early); } +#ifdef CONFIG_ASCEND_RAS_FEATURES +EXPORT_SYMBOL_GPL(collect_procs); +#endif
struct hwpoison_walk { struct to_kill tk;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3560 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3560 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...