From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
-------------------------------------------------
The cdm nodes are easiler to raise an ECC error and it may cause the kernel crash if the essential structures went wrong. So move the management structures for hbm nodes to the ddr nodes of the same partion to reduce the probability of kernel crashes.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/Kconfig | 10 ++++++++ arch/arm64/mm/numa.c | 54 +++++++++++++++++++++++++++++++++++++++- include/linux/nodemask.h | 7 ++++++ mm/sparse.c | 8 +++--- 4 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9d49b9524e1d4..2f34aef79179e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1470,6 +1470,16 @@ config ASCEND_SHARE_POOL help This feature allows multiple processes to share virtual memory both in kernel and user level, which is only enabled for ascend platform. + +config ASCEND_CLEAN_CDM + bool "move the management structure for HBM to DDR" + def_bool n + depends on COHERENT_DEVICE + help + The cdm nodes sometimes are easiler to raise an ECC error and it may + cause the kernel crash if the essential structures went wrong. So move + the management structures for hbm nodes to the ddr nodes of the same + partion to reduce the probability of kernel crashes. endif
endmenu diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index a9d3ad5ee0cc3..a194bad6fdfcf 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -45,6 +45,57 @@ inline int arch_check_node_cdm(int nid) return node_isset(nid, cdmmask); }
+#ifdef CONFIG_ASCEND_CLEAN_CDM +/** + * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the + * same partion. + * @nid: input node ID + * + * Here is a typical memory topology in usage. + * There are some DDR and HBM in each partion and DDRs present at first, then + * come all the HBMs of the first partion, then HBMs of the second partion, etc. + * + * ------------------------- + * | P0 | P1 | + * ----------- | ----------- + * |node0 DDR| | |node1 DDR| + * |---------- | ----------| + * |node2 HBM| | |node4 HBM| + * |---------- | ----------| + * |node3 HBM| | |node5 HBM| + * ----------- | ----------- + * + * Return: + * This function returns a ddr node which is of the same partion with the input + * node if the input node is a HBM node. + * The input nid is returned if it is a DDR node or if the memory topology of + * the system doesn't apply to the above model. + */ +int __init cdm_node_to_ddr_node(int nid) +{ + nodemask_t ddr_mask; + int nr_ddr, cdm_per_part, fake_nid; + int nr_cdm = nodes_weight(cdmmask); + + if (!nr_cdm || nodes_empty(numa_nodes_parsed)) + return nid; + + if (!node_isset(nid, cdmmask)) + return nid; + + nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed); + nr_ddr = nodes_weight(ddr_mask); + cdm_per_part = nr_cdm / nr_ddr ? : 1; + + fake_nid = (nid - nr_ddr) / cdm_per_part; + fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid; + + pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid); + + return fake_nid; +} +#endif + static int __init cdm_nodes_setup(char *s) { int nid; @@ -264,11 +315,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) u64 nd_pa; void *nd; int tnid; + int fake_nid = cdm_node_to_ddr_node(nid);
if (start_pfn >= end_pfn) pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
- nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, fake_nid); nd = __va(nd_pa);
/* report and initialize */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 41fb047bdba80..7c0571b95ce4d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -508,6 +508,12 @@ static inline int node_random(const nodemask_t *mask) #ifdef CONFIG_COHERENT_DEVICE extern int arch_check_node_cdm(int nid);
+#ifdef CONFIG_ASCEND_CLEAN_CDM +extern int cdm_node_to_ddr_node(int nid); +#else +static inline int cdm_node_to_ddr_node(int nid) { return nid; } +#endif + static inline nodemask_t system_mem_nodemask(void) { nodemask_t system_mem; @@ -551,6 +557,7 @@ static inline void node_clear_state_cdm(int node) #else
static inline int arch_check_node_cdm(int nid) { return 0; } +static inline int cdm_node_to_ddr_node(int nid) { return nid; }
static inline nodemask_t system_mem_nodemask(void) { diff --git a/mm/sparse.c b/mm/sparse.c index 62ae3880a9add..f19d2ca250cee 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -458,21 +458,23 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, { unsigned long pnum, usemap_longs, *usemap; struct page *map; + int fake_nid = cdm_node_to_ddr_node(nid);
usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(fake_nid), usemap_size() * map_count); if (!usemap) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } - sparse_buffer_init(map_count * section_map_size(), nid); + + sparse_buffer_init(map_count * section_map_size(), fake_nid); for_each_present_section_nr(pnum_begin, pnum) { if (pnum >= pnum_end) break;
- map = sparse_mem_map_populate(pnum, nid, NULL); + map = sparse_mem_map_populate(pnum, fake_nid, NULL); if (!map) { pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid);
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
-------------------
Check whether the topological structure of the DDR/HBM brokes our assumption or not. If it got broken we just return the input nid, or an invalid nid could be returned and it may break the kernel.
Fixes: aabbfd385ab2 ("numa: Move the management structures for cdm nodes to ddr") Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/numa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index a194bad6fdfcf..82d53927554d8 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -85,7 +85,11 @@ int __init cdm_node_to_ddr_node(int nid)
nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed); nr_ddr = nodes_weight(ddr_mask); - cdm_per_part = nr_cdm / nr_ddr ? : 1; + cdm_per_part = nr_cdm / nr_ddr; + + if (cdm_per_part == 0 || nid < nr_ddr) + /* our assumption has borken, just return the original nid. */ + return nid;
fake_nid = (nid - nr_ddr) / cdm_per_part; fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid;
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
-------------------------------------------------
Not all cdm nodes are hbm and we don't need to operate on the other nodes. So we should specify the hbm count per partion.
Here we assume that all the hbm nodes appear at first of all the cdm nodes in one partion. Otherwise the management structure of the hbm nodes could not be moved, which is not worse than closing this feature.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/numa.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 82d53927554d8..c65a71de8d5fb 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -63,7 +63,9 @@ inline int arch_check_node_cdm(int nid) * |node2 HBM| | |node4 HBM| * |---------- | ----------| * |node3 HBM| | |node5 HBM| - * ----------- | ----------- + * |---------- | ----------| + * | ... | | | ... | + * |---------- | ----------| * * Return: * This function returns a ddr node which is of the same partion with the input @@ -76,6 +78,12 @@ int __init cdm_node_to_ddr_node(int nid) nodemask_t ddr_mask; int nr_ddr, cdm_per_part, fake_nid; int nr_cdm = nodes_weight(cdmmask); + /* + * Specify the count of hbm nodes whoes management structrue would be + * moved. Here number 2 is a magic and we should make it configable + * for extending + */ + int hbm_per_part = 2;
if (!nr_cdm || nodes_empty(numa_nodes_parsed)) return nid; @@ -87,11 +95,12 @@ int __init cdm_node_to_ddr_node(int nid) nr_ddr = nodes_weight(ddr_mask); cdm_per_part = nr_cdm / nr_ddr;
- if (cdm_per_part == 0 || nid < nr_ddr) + if (cdm_per_part == 0 || nid < nr_ddr || + nid >= (hbm_per_part + 1) * nr_ddr) /* our assumption has borken, just return the original nid. */ return nid;
- fake_nid = (nid - nr_ddr) / cdm_per_part; + fake_nid = (nid - nr_ddr) / hbm_per_part; fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid;
pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid);
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
----------------------------------------------------------
Use a bootarg to precisely specify the target node to which we want to move the kernel structrue for a cdm node.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/numa.c | 82 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+)
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index c65a71de8d5fb..60bdaaf95b901 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,6 +46,85 @@ inline int arch_check_node_cdm(int nid) }
#ifdef CONFIG_ASCEND_CLEAN_CDM + +#define MAX_PARTATION_COUNT 8 +#define MAX_CDM_PER_PATRATION 8 + +/* + * Here we provide a way to precisely specify the target node to which we want + * to move the kernel structrue for a cdm node, instead of guessing the hardware + * topologies. Even the node isn't a cdm node, the movement could be reasonable. + * Suppose a node is designed to be used only by some certern processes and + * devices, the kernel structure of that node could be overwritten by a broken + * process. + * + * A possible configure in bootargs: + * cdm_move_map=0,2,3,6;1,4,5,7 + * That means to move the kernel structure for node 2,3,6 to node 0 and kernel + * structure for node 4,5,7 to node 1. + */ +static bool cdm_to_ddr_hardcode = true; +static int cdm_to_ddr_map[MAX_PARTATION_COUNT][MAX_CDM_PER_PATRATION + 1]; + +static int __init cdm_to_ddr_parse_param(char *str) +{ + int i, j; + char *p, *n; + + cdm_to_ddr_hardcode = false; + for (i = 0; i < MAX_PARTATION_COUNT; i++) + for (j = 0; j < MAX_CDM_PER_PATRATION + 1; j++) + cdm_to_ddr_map[i][j] = -1; + + for (p = n = str, i = 0; strsep(&p, ";"); i++, n = p) { + char *s = n; + + for (j = 0; strsep(&n, ","); j++, s = n) { + int err; + unsigned long long nid; + + if (j >= MAX_CDM_PER_PATRATION + 1) { + pr_warn("the cdm nodes in this partation is more than supported\n"); + break; + } + + err = kstrtoull(s, 0, &nid); + if (err) { + pr_err("bootargs for cdm_move_map invalid, %d\n", + err); + return err; + } + + cdm_to_ddr_map[i][j] = (int)nid; + if (j > 0) + pr_info("node %d moved to node %d\n", + cdm_to_ddr_map[i][j], + cdm_to_ddr_map[i][0]); + } + } + + return 0; +} +early_param("cdm_move_map", cdm_to_ddr_parse_param); + +static int __init cdm_node_to_ddr_node_mapped(int nid) +{ + int i, j; + + for (i = 0; i < MAX_PARTATION_COUNT; i++) { + if (cdm_to_ddr_map[i][0] == -1) + break; + for (j = 1; j < MAX_CDM_PER_PATRATION + 1; j++) { + if (cdm_to_ddr_map[i][j] == -1) + break; + else if (cdm_to_ddr_map[i][j] == nid) + return cdm_to_ddr_map[i][0]; + } + } + + return nid; +} + /** * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the * same partion. @@ -85,6 +164,9 @@ int __init cdm_node_to_ddr_node(int nid) */ int hbm_per_part = 2;
+ if (!cdm_to_ddr_hardcode) + return cdm_node_to_ddr_node_mapped(nid); + if (!nr_cdm || nodes_empty(numa_nodes_parsed)) return nid;
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
----------------------------------------------------------
Enable CONFIG_ASCEND_CLEAN_CDM by default.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/hulk_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig index 50cc7d95c9297..e0b220f16f222 100644 --- a/arch/arm64/configs/hulk_defconfig +++ b/arch/arm64/configs/hulk_defconfig @@ -486,6 +486,7 @@ CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES=y CONFIG_ASCEND_WATCHDOG_SYSFS_CONFIGURE=y CONFIG_ASCEND_AUTO_TUNING_HUGEPAGE=y CONFIG_ASCEND_SHARE_POOL=y +CONFIG_ASCEND_CLEAN_CDM=y
# # Boot options