[PATCH v3 0/3] arm64/riscv: Add support for crashkernel CMA reservation
Add support for crashkernel CMA reservation for arm64 and riscv. By the way, exclude crash kernel memory in crash core to avoid duplication. Changs in v3: - Exclude crash kernel memory in crash core as Mike suggested. - Add acked-by. Jinjie Ruan (3): crash: Exclude crash kernel memory in crash core arm64: kexec: Add support for crashkernel CMA reservation riscv: kexec: Add support for crashkernel CMA reservation .../admin-guide/kernel-parameters.txt | 16 ++++---- arch/arm64/kernel/machine_kexec_file.c | 16 +++----- arch/arm64/mm/init.c | 5 ++- arch/loongarch/kernel/machine_kexec_file.c | 12 ------ arch/powerpc/kexec/ranges.c | 16 ++------ arch/riscv/kernel/machine_kexec_file.c | 15 ++++--- arch/riscv/mm/init.c | 5 ++- arch/x86/kernel/crash.c | 39 +------------------ kernel/crash_core.c | 28 +++++++++++++ 9 files changed, 61 insertions(+), 91 deletions(-) -- 2.34.1
The exclude of crashk_res, crashk_low_res and crashk_cma memory are almost identical across different architectures, so handling them in the crash core would eliminate a lot of duplication, so do them in the common code. Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/kernel/machine_kexec_file.c | 12 ------- arch/loongarch/kernel/machine_kexec_file.c | 12 ------- arch/powerpc/kexec/ranges.c | 16 ++------- arch/riscv/kernel/machine_kexec_file.c | 5 +-- arch/x86/kernel/crash.c | 39 ++-------------------- kernel/crash_core.c | 28 ++++++++++++++++ 6 files changed, 34 insertions(+), 78 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 410060ebd86d..ed2c45007158 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -64,20 +64,8 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); -out: kfree(cmem); return ret; } diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index fb57026f5f25..26f867e53955 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -80,20 +80,8 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret < 0) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret < 0) - goto out; - } - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); -out: kfree(cmem); return ret; } diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 867135560e5c..3f76dd266b1f 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -553,9 +553,7 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend) +static int crash_realloc_mem_range_guarded(struct crash_mem **mem_ranges) { struct crash_mem *tmem = *mem_ranges; @@ -566,7 +564,7 @@ static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, return -ENOMEM; } - return crash_exclude_mem_range(tmem, mstart, mend); + return 0; } /** @@ -604,18 +602,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) sort_memory_ranges(*mem_ranges, true); } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + ret = crash_realloc_mem_range_guarded(mem_ranges); if (ret) goto out; - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - goto out; - } - /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index dd9d92a96517..fec3622a13c9 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -74,10 +74,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) if (ret) goto out; - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); out: kfree(cmem); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 335fd2ee9766..d8341a48f6b3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -186,41 +186,6 @@ static struct crash_mem *fill_up_crash_elf_data(void) return cmem; } -/* - * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in cmem->ranges[] array - */ -static int elf_header_exclude_ranges(struct crash_mem *cmem) -{ - int ret = 0; - int i; - - /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); - if (ret) - return ret; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - return ret; - - if (crashk_low_res.end) - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - if (ret) - return ret; - - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - return ret; - } - - return 0; -} - static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; @@ -247,8 +212,8 @@ static int prepare_elf_headers(void **addr, unsigned long *sz, if (ret) goto out; - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(cmem); + /* Exclude the low 1M because it is always reserved */ + ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); if (ret) goto out; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 99dac1aa972a..5c0de111ddc3 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -18,6 +18,7 @@ #include <linux/memblock.h> #include <linux/kmemleak.h> #include <linux/crash_core.h> +#include <linux/crash_reserve.h> #include <linux/reboot.h> #include <linux/btf.h> #include <linux/objtool.h> @@ -161,8 +162,30 @@ static inline resource_size_t crash_resource_size(const struct resource *res) return !res->end ? 0 : resource_size(res); } +static int crash_exclude_mem_ranges(struct crash_mem *cmem) +{ + int ret, i; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + return ret; +} int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) @@ -174,6 +197,11 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, unsigned int cpu, i; unsigned long long notes_addr; unsigned long mstart, mend; + int ret; + + ret = crash_exclude_mem_ranges(mem); + if (ret) + return ret; /* extra phdr for vmcoreinfo ELF note */ nr_phdr = nr_cpus + 1; -- 2.34.1
Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. Crash kernel memory reservation wastes production resources if too large, risks kdump failure if too small, and faces allocation difficulties on fragmented systems due to contiguous block constraints. The new CMA-based crashkernel reservation scheme splits the "large fixed reservation" into a "small fixed region + large CMA dynamic region": the CMA memory is available to userspace during normal operation to avoid waste, and is reclaimed for kdump upon crash—saving memory while improving reliability. So extend crashkernel CMA reservation support to arm64. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use. - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore. Update kernel-parameters.txt to document CMA support for crashkernel on arm64 architecture. Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- v3: - Add Acked-by. v2: - Free cmem in prepare_elf_headers() - Add the mtivation. --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm64/kernel/machine_kexec_file.c | 8 +++++++- arch/arm64/mm/init.c | 5 +++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1058f2a6d6a8..36bb642a7edd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,7 +1119,7 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ppc] Reserve additional crash kernel memory from + [KNL, X86, ARM64, ppc] Reserve additional crash kernel memory from CMA. This reservation is usable by the first system's userspace memory and kernel movable allocations (memory balloon, zswap). Pages allocated from this memory range diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index ed2c45007158..cab4aaf8cb22 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -48,7 +48,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) u64 i; phys_addr_t start, end; - nr_ranges = 2; /* for exclusion of crashkernel region */ + nr_ranges = 2 + crashk_cma_cnt; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; @@ -64,6 +64,12 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->nr_ranges++; } + for (i = 0; i < crashk_cma_cnt; i++) { + cmem->ranges[cmem->nr_ranges].start = crashk_cma_ranges[i].start; + cmem->ranges[cmem->nr_ranges].end = crashk_cma_ranges[i].end; + cmem->nr_ranges++; + } + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); kfree(cmem); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 524d34a0e921..28165d94af08 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -96,8 +96,8 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; static void __init arch_reserve_crashkernel(void) { + unsigned long long crash_base, crash_size, cma_size = 0; unsigned long long low_size = 0; - unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -106,11 +106,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) -- 2.34.1
Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. This allows the kernel to dynamically allocate contiguous memory for crash dumping when needed, rather than permanently reserving a fixed region at boot time. So extend crashkernel CMA reservation support to riscv. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use. - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore. Update kernel-parameters.txt to document CMA support for crashkernel on riscv architecture. Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++-------- arch/riscv/kernel/machine_kexec_file.c | 10 ++++++++-- arch/riscv/mm/init.c | 5 +++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 36bb642a7edd..3b92324d3a03 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,14 +1119,14 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ARM64, ppc] Reserve additional crash kernel memory from - CMA. This reservation is usable by the first system's - userspace memory and kernel movable allocations (memory - balloon, zswap). Pages allocated from this memory range - will not be included in the vmcore so this should not - be used if dumping of userspace memory is intended and - it has to be expected that some movable kernel pages - may be missing from the dump. + [KNL, X86, ARM64, RISCV, ppc] Reserve additional crash + kernel memory from CMA. This reservation is usable by + the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated + from this memory range will not be included in the vmcore + so this should not be used if dumping of userspace memory + is intended and it has to be expected that some movable + kernel pages may be missing from the dump. A standard crashkernel reservation, as described above, is still needed to hold the crash kernel and initrd. diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index fec3622a13c9..0e4ac70d5a9a 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -59,9 +59,9 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) { struct crash_mem *cmem; unsigned int nr_ranges; - int ret; + int ret, i; - nr_ranges = 1; /* For exclusion of crashkernel region */ + nr_ranges = 1 + crashk_cma_cnt; /* For exclusion of crashkernel region */ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); @@ -74,6 +74,12 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) if (ret) goto out; + for (i = 0; i < crashk_cma_cnt; i++) { + cmem->ranges[cmem->nr_ranges].start = crashk_cma_ranges[i].start; + cmem->ranges[cmem->nr_ranges].end = crashk_cma_ranges[i].end; + cmem->nr_ranges++; + } + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); out: diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index addb8a9305be..074d2d5f79ee 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1404,7 +1404,7 @@ static inline void setup_vm_final(void) */ static void __init arch_reserve_crashkernel(void) { - unsigned long long low_size = 0; + unsigned long long low_size = 0, cma_size = 0; unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -1414,11 +1414,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } void __init paging_init(void) -- 2.34.1
participants (1)
-
Jinjie Ruan