From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4N3XU CVE: NA
-------------------------------------------------
The cdm nodes are easiler to raise an ECC error and it may cause the kernel crash if the essential structures went wrong. So move the management structures for hbm nodes to the ddr nodes of the same partion to reduce the probability of kernel crashes.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Signed-off-by: Lijun Fang fanglijun3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/Kconfig | 9 ++++++ arch/arm64/mm/numa.c | 67 +++++++++++++++++++++++++++++++++++++++- include/linux/nodemask.h | 7 +++++ mm/sparse.c | 7 +++-- 4 files changed, 86 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2df4b310eb23..44f1bf1a5b08 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2000,6 +2000,15 @@ config ASCEND_DVPP_MMAP special memory for DvPP processor, the new flag is only valid for Ascend platform.
+config ASCEND_CLEAN_CDM + bool "move the management structure for HBM to DDR" + def_bool n + depends on COHERENT_DEVICE + help + The cdm nodes sometimes are easiler to raise an ECC error and it may + cause the kernel crash if the essential structures went wrong. So move + the management structures for hbm nodes to the ddr nodes of the same + partion to reduce the probability of kernel crashes. endif
endmenu diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 9a2e29a3a597..a167b74272b2 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -33,6 +33,70 @@ inline int arch_check_node_cdm(int nid) return node_isset(nid, cdmmask); }
+#ifdef CONFIG_ASCEND_CLEAN_CDM +/** + * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the + * same partion. + * @nid: input node ID + * + * Here is a typical memory topology in usage. + * There are some DDR and HBM in each partion and DDRs present at first, then + * come all the HBMs of the first partion, then HBMs of the second partion, etc. + * + * ------------------------- + * | P0 | P1 | + * ----------- | ----------- + * |node0 DDR| | |node1 DDR| + * |---------- | ----------| + * |node2 HBM| | |node4 HBM| + * |---------- | ----------| + * |node3 HBM| | |node5 HBM| + * |---------- | ----------| + * | ... | | | ... | + * |---------- | ----------| + * + * Return: + * This function returns a ddr node which is of the same partion with the input + * node if the input node is a HBM node. + * The input nid is returned if it is a DDR node or if the memory topology of + * the system doesn't apply to the above model. + */ +int __init cdm_node_to_ddr_node(int nid) +{ + nodemask_t ddr_mask; + int nr_ddr, cdm_per_part, fake_nid; + int nr_cdm = nodes_weight(cdmmask); + /* + * Specify the count of hbm nodes whoes management structrue would be + * moved. Here number 2 is a magic and we should make it configable + * for extending + */ + int hbm_per_part = 2; + + if (!nr_cdm || nodes_empty(numa_nodes_parsed)) + return nid; + + if (!node_isset(nid, cdmmask)) + return nid; + + nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed); + nr_ddr = nodes_weight(ddr_mask); + cdm_per_part = nr_cdm / nr_ddr; + + if (cdm_per_part == 0 || nid < nr_ddr || + nid >= (hbm_per_part + 1) * nr_ddr) + /* our assumption has borken, just return the original nid. */ + return nid; + + fake_nid = (nid - nr_ddr) / hbm_per_part; + fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid; + + pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid); + + return fake_nid; +} +#endif + static int __init cdm_nodes_setup(char *s) { int nid; @@ -252,11 +316,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) u64 nd_pa; void *nd; int tnid; + int fake_nid = cdm_node_to_ddr_node(nid);
if (start_pfn >= end_pfn) pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
- nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, fake_nid); if (!nd_pa) panic("Cannot allocate %zu bytes for node %d data\n", nd_size, nid); diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 90ea204cc059..2636f08d685c 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -509,6 +509,12 @@ static inline int node_random(const nodemask_t *mask) #ifdef CONFIG_COHERENT_DEVICE extern int arch_check_node_cdm(int nid);
+#ifdef CONFIG_ASCEND_CLEAN_CDM +extern int cdm_node_to_ddr_node(int nid); +#else +static inline int cdm_node_to_ddr_node(int nid) { return nid; } +#endif + static inline nodemask_t system_mem_nodemask(void) { nodemask_t system_mem; @@ -552,6 +558,7 @@ static inline void node_clear_state_cdm(int node) #else
static inline int arch_check_node_cdm(int nid) { return 0; } +static inline int cdm_node_to_ddr_node(int nid) { return nid; }
static inline nodemask_t system_mem_nodemask(void) { diff --git a/mm/sparse.c b/mm/sparse.c index 0e645ff9cf0f..5a48ea3e9968 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -528,14 +528,15 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, struct mem_section_usage *usage; unsigned long pnum; struct page *map; + int fake_nid = cdm_node_to_ddr_node(nid);
- usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(fake_nid), mem_section_usage_size() * map_count); if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } - sparse_buffer_init(map_count * section_map_size(), nid); + sparse_buffer_init(map_count * section_map_size(), fake_nid); for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum);
@@ -543,7 +544,7 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, break;
map = __populate_section_memmap(pfn, PAGES_PER_SECTION, - nid, NULL); + fake_nid, NULL); if (!map) { pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid);