On 2022/1/8 11:06, Kefeng Wang wrote:
整体上,
补丁至少拆成2个,1个pmem.c共享,包括kconfig设计
cmdline支持
热升级arm64的pmem和x86上e820并不是一个东西,这个是单独的功能。
参考是nd_e820.c文件实现的功能,利用nvdimm资源发现机制注册pmem device,
为了保持一致沿用了x86中的叫法(后续推入社区会考虑修改名字,防止与x86中e820中的pmem混淆),所以这个pmem是不能共享的。
(目前的是根据上游linux社区建议,及上次在openEuler合入时寒军建议修改后的。)
On 2022/1/7 19:03, Zhuling wrote:
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4O31I?from=project-issue CVE: NA
Register pmem in arm64: Use memmap(memmap=nn[KMG]!ss[KMG]) reserve memory and e820(driver/nvdimm/e820.c) function to register persistent memory in arm64. when the kernel restart or update, the data in PMEM will not be lost and can be loaded faster. this is a general features.
driver/nvdimm/e820.c: The function of this file is scan "iomem_resource" and take advantage of nvdimm resource discovery mechanism by registering a resource named "Persistent Memory (legacy)", this function doesn't depend on architecture.
We will push the feature to linux kernel community and discuss to modify the file name. because people have a mistaken notion that the e820.c is depend on x86.
If you want use this features, you need do as follows: 1.Reserve memory: add memmap to reserve memory in grub.cfg memmap=nn[KMG]!ss[KMG] exp:memmap=100K!0x1a0000000. 2.Insmod nd_e820.ko: modprobe nd_e820. 3.Check pmem device in /dev exp: /dev/pmem0
Signed-off-by: Zhuling zhuling8@huawei.com
arch/arm64/Kconfig | 22 ++++++++++++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/pmem.c | 36 ++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 11 ++++++++ arch/arm64/mm/init.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/Kconfig | 6 +++++ drivers/nvdimm/Makefile | 1 + 7 files changed, 140 insertions(+) create mode 100644 arch/arm64/kernel/pmem.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 405e5ce..2231eac 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1322,6 +1322,28 @@ config RODATA_FULL_DEFAULT_ENABLED This requires the linear region to be mapped down to pages, which may adversely affect performance in some cases. +config ARM64_PMEM_RESERVE + bool "Reserve memory for persistent storage" + default n + help + Use memmap=nn[KMG]!ss[KMG](memmap=100K!0x1a0000000) reserve + memory for persistent storage.
+ Say y here to enable this feature.
+config ARM64_PMEM_LEGACY_DEVICE + bool "Create persistent storage" + depends on BLK_DEV + depends on LIBNVDIMM + select ARM64_PMEM_RESERVE + help + Use reserved memory for persistent storage when the kernel + restart or update. the data in PMEM will not be lost and + can be loaded faster.
+ Say y if unsure.
config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" help diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 169d90f..f615325 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-$(CONFIG_MPAM) += mpam/ +obj-$(CONFIG_ARM64_PMEM_LEGACY_DEVICE) += pmem.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/pmem.c b/arch/arm64/kernel/pmem.c new file mode 100644 index 0000000..d1efdc8 --- /dev/null +++ b/arch/arm64/kernel/pmem.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/*
- Copyright(c) 2021 Huawei Technologies Co., Ltd
- Derived from x86 and arm64 implement PMEM.
- */
+#include <linux/platform_device.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/module.h>
+static int found(struct resource *res, void *data) +{ + return 1; +}
+static int __init register_e820_pmem(void) +{ + struct platform_device *pdev; + int rc;
+ rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY, + IORESOURCE_MEM, 0, -1, NULL, found); + if (rc <= 0) + return 0;
+ /* + * See drivers/nvdimm/e820.c for the implementation, this is + * simply here to trigger the module to load on demand. + */ + pdev = platform_device_alloc("e820_pmem", -1);
+ return platform_device_add(pdev); +} +device_initcall(register_e820_pmem);
- 这个跟x86的 pmem.c 有什么区别,如果没有区别,应该先抽取一个补丁放到公共目录,而不是简单的copy
这个作为单独的补丁
热升级Pmem是独立的功能,如上解释
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 92d75e3..1a8a4d2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -70,6 +70,11 @@ static int __init arm64_enable_cpu0_hotplug(char *str) __setup("arm64_cpu0_hotplug", arm64_enable_cpu0_hotplug); #endif +#ifdef CONFIG_ARM64_PMEM_RESERVE +extern struct resource pmem_res; +#endif
这个只支持一个pmem,从实际情况,可以有很多pmem,cmdline也是支持的;
这块代码放到单独文件
这个功能时通用功能,不过目前主要是给热升级用,目前没有多个pmem的需求,且pmem
大小是可以控制。(参考x86中e820中也是仅一个)
phys_addr_t __fdt_pointer __initdata; /* @@ -305,6 +310,12 @@ static void __init request_standard_resources(void) } } +#ifdef CONFIG_ARM64_PMEM_RESERVE + if (pmem_res.end && pmem_res.start) + request_resource(&iomem_resource, &pmem_res); +#endif
static int __init reserve_memblock_reserved_regions(void) { u64 i, j; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6ebfabd..3b6907f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,6 +56,8 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); +phys_addr_t start_at, mem_size;
#ifdef CONFIG_PIN_MEMORY struct resource pin_memory_resource = { .name = "Pin memory", @@ -111,6 +113,18 @@ static void __init reserve_pin_memory_res(void) */ phys_addr_t arm64_dma_phys_limit __ro_after_init;
上面pin_memory的代码也是一样的问题;
下面一起解释了
+static unsigned long long pmem_size, pmem_start;
+#ifdef CONFIG_ARM64_PMEM_RESERVE +struct resource pmem_res = { + .name = "Persistent Memory (legacy)", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM, + .desc = IORES_DESC_PERSISTENT_MEMORY_LEGACY +}; +#endif
放到独立文件吧
我看arm64下其他,暂没有放单独文件的,x86下也是可以放在本文件里(x86和arm64实现有些不同,相关参数是在kernel/setup.c),
如果参数较多,功能比较独立,可放在一个独立文件下。如果要放单独文件,是不是后续将热升级这部分相关
的统一修改会更好。
#ifndef CONFIG_KEXEC_CORE static void __init reserve_crashkernel(void) { @@ -403,6 +417,26 @@ static int __init reserve_park_mem(void) return -EINVAL; } #endif +static bool __init is_mem_valid(unsigned long long mem_size, unsigned long long mem_start) +{ + if (!memblock_is_region_memory(mem_start, mem_size)) { + pr_warn("cannot reserve mem: region is not memory!\n"); + return false; + }
+ if (memblock_is_region_reserved(mem_start, mem_size)) { + pr_warn("cannot reserve mem: region overlaps reserved memory!\n"); + return false; + }
+ if (!IS_ALIGNED(mem_start, SZ_2M)) { + pr_warn("cannot reserve mem: base address is not 2MB aligned!\n"); + return false; + }
+ return true; +}
同上
static int need_remove_real_memblock __initdata; @@ -442,7 +476,17 @@ static int __init parse_memmap_one(char *p) start_at = memparse(p + 1, &p); memblock_reserve(start_at, mem_size); memblock_mark_memmap(start_at, mem_size); +<<<<<<< HEAD + } else if (*p == '!') { + start_at = memparse(p+1, &p);
+ pmem_start = start_at; + pmem_size = mem_size;
有冲突; cmdline可以有多次,这样写有问题;后面会覆盖前面
这部分,我是基于刘鹏的提交加入的,理论是我们是相互不影响的,我再自测下。
+ }else +======= } else +>>>>>>> 374db2be8805428b4f54b5ef793b0d3f5069c5f9 pr_info("Unrecognized memmap option, please check the parameter.\n"); return *p == '\0' ? 0 : -EINVAL; @@ -464,6 +508,20 @@ static int __init parse_memmap_opt(char *str) } early_param("memmap", parse_memmap_opt); +#ifdef CONFIG_ARM64_PMEM_RESERVE +static void __init reserve_pmem(void) +{ + if (!is_mem_valid(mem_size, start_at)) + return;
+ memblock_remove(pmem_start, pmem_size); + pr_info("pmem reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + pmem_start, pmem_start + pmem_size, pmem_size >> 20); + pmem_res.start = pmem_start; + pmem_res.end = pmem_start + pmem_size - 1; +} +#endif
放到独立文件
同上解释
void __init arm64_memblock_init(void) { const s64 linear_region_size = BIT(vabits_actual - 1); @@ -638,6 +696,11 @@ void __init bootmem_init(void) reserve_quick_kexec(); #endif +#ifdef CONFIG_ARM64_PMEM_RESERVE + reserve_pmem(); +#endif
reserve_pin_memory_res(); memblock_dump_all(); diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index b7d1eb3..9567cca 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -132,3 +132,9 @@ config NVDIMM_TEST_BUILD infrastructure. endif
+config PMEM_LEGACY + tristate "Pmem_legacy"
+ select X86_PMEM_LEGACY if X86 + select ARM64_PMEM_LEGACY_DEVICE if ARM64
这个不对等,而且不合理;X86_PMEM_LEGACY是 tristate的,由用户控制,
所以这个PMEM_LEGGACY应该是替代x86_pmem_legacy的
这部分是初次合入openEuler时根据寒军的意见,统一使用方式,两个pmem是不同功能,故无法替代。
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 0407753..6f8dc92 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_OF_PMEM) += of_pmem.o obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
.