From: Wang Wensheng <wangwensheng4(a)huawei.com>
ascend inclusion
category: feature
bugzilla: NA
CVE: NA
-------------------------------------------------
The cdm nodes are easiler to raise an ECC error and it may cause the
kernel crash if the essential structures went wrong. So move the
management structures for hbm nodes to the ddr nodes of the same
partion to reduce the probability of kernel crashes.
Signed-off-by: Wang Wensheng <wangwensheng4(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
arch/arm64/Kconfig | 10 ++++++++
arch/arm64/mm/numa.c | 54 +++++++++++++++++++++++++++++++++++++++-
include/linux/nodemask.h | 7 ++++++
mm/sparse.c | 8 +++---
4 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b7caf370a14b7..3848c062ea2c5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1583,6 +1583,16 @@ config ASCEND_BOOT_CRASH_KERNEL
Usage:
1. add a node name:kexecmailbox to dts config.
2. after kexec run, set sysctl -w kernel.kexec_bios_start=1.
+
+config ASCEND_CLEAN_CDM
+ bool "move the management structure for HBM to DDR"
+ def_bool n
+ depends on COHERENT_DEVICE
+ help
+ The cdm nodes sometimes are easiler to raise an ECC error and it may
+ cause the kernel crash if the essential structures went wrong. So move
+ the management structures for hbm nodes to the ddr nodes of the same
+ partion to reduce the probability of kernel crashes.
endif
endmenu
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index a9d3ad5ee0cc3..a194bad6fdfcf 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -45,6 +45,57 @@ inline int arch_check_node_cdm(int nid)
return node_isset(nid, cdmmask);
}
+#ifdef CONFIG_ASCEND_CLEAN_CDM
+/**
+ * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the
+ * same partion.
+ * @nid: input node ID
+ *
+ * Here is a typical memory topology in usage.
+ * There are some DDR and HBM in each partion and DDRs present at first, then
+ * come all the HBMs of the first partion, then HBMs of the second partion, etc.
+ *
+ * -------------------------
+ * | P0 | P1 |
+ * ----------- | -----------
+ * |node0 DDR| | |node1 DDR|
+ * |---------- | ----------|
+ * |node2 HBM| | |node4 HBM|
+ * |---------- | ----------|
+ * |node3 HBM| | |node5 HBM|
+ * ----------- | -----------
+ *
+ * Return:
+ * This function returns a ddr node which is of the same partion with the input
+ * node if the input node is a HBM node.
+ * The input nid is returned if it is a DDR node or if the memory topology of
+ * the system doesn't apply to the above model.
+ */
+int __init cdm_node_to_ddr_node(int nid)
+{
+ nodemask_t ddr_mask;
+ int nr_ddr, cdm_per_part, fake_nid;
+ int nr_cdm = nodes_weight(cdmmask);
+
+ if (!nr_cdm || nodes_empty(numa_nodes_parsed))
+ return nid;
+
+ if (!node_isset(nid, cdmmask))
+ return nid;
+
+ nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed);
+ nr_ddr = nodes_weight(ddr_mask);
+ cdm_per_part = nr_cdm / nr_ddr ? : 1;
+
+ fake_nid = (nid - nr_ddr) / cdm_per_part;
+ fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid;
+
+ pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid);
+
+ return fake_nid;
+}
+#endif
+
static int __init cdm_nodes_setup(char *s)
{
int nid;
@@ -264,11 +315,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
u64 nd_pa;
void *nd;
int tnid;
+ int fake_nid = cdm_node_to_ddr_node(nid);
if (start_pfn >= end_pfn)
pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
- nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, fake_nid);
nd = __va(nd_pa);
/* report and initialize */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 41fb047bdba80..7c0571b95ce4d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -508,6 +508,12 @@ static inline int node_random(const nodemask_t *mask)
#ifdef CONFIG_COHERENT_DEVICE
extern int arch_check_node_cdm(int nid);
+#ifdef CONFIG_ASCEND_CLEAN_CDM
+extern int cdm_node_to_ddr_node(int nid);
+#else
+static inline int cdm_node_to_ddr_node(int nid) { return nid; }
+#endif
+
static inline nodemask_t system_mem_nodemask(void)
{
nodemask_t system_mem;
@@ -551,6 +557,7 @@ static inline void node_clear_state_cdm(int node)
#else
static inline int arch_check_node_cdm(int nid) { return 0; }
+static inline int cdm_node_to_ddr_node(int nid) { return nid; }
static inline nodemask_t system_mem_nodemask(void)
{
diff --git a/mm/sparse.c b/mm/sparse.c
index 9854aff6b4193..581982a376bdd 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -458,21 +458,23 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
{
unsigned long pnum, usemap_longs, *usemap;
struct page *map;
+ int fake_nid = cdm_node_to_ddr_node(nid);
usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS);
- usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
+ usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(fake_nid),
usemap_size() *
map_count);
if (!usemap) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
- sparse_buffer_init(map_count * section_map_size(), nid);
+
+ sparse_buffer_init(map_count * section_map_size(), fake_nid);
for_each_present_section_nr(pnum_begin, pnum) {
if (pnum >= pnum_end)
break;
- map = sparse_mem_map_populate(pnum, nid, NULL);
+ map = sparse_mem_map_populate(pnum, fake_nid, NULL);
if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
--
2.25.1