From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
To be consistent with the style of other ARCHs such as x86, the kexec commit b5a34a20984c ("arm64: support more than one crash kernel regions") requires all crash regions to be named "Crash kernel". Update the name of crashk_low_res, so that we can directly use the latest kexec tool without having to maintain a private version.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/setup.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2dd3ea837d35..ddca8d27fca6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -274,16 +274,9 @@ static void __init request_standard_resources(void) request_memmap_resources(res);
#ifdef CONFIG_KEXEC_CORE - /* - * Userspace will find "Crash kernel" or "Crash kernel (low)" - * region in /proc/iomem. - * In order to distinct from the high region and make no effect - * to the use of existing kexec-tools, rename the low region as - * "Crash kernel (low)". - */ + /* Userspace will find "Crash kernel" region in /proc/iomem. */ if (crashk_low_res.end && crashk_low_res.start >= res->start && crashk_low_res.end <= res->end) { - crashk_low_res.name = "Crash kernel (low)"; request_resource(res, &crashk_low_res); } if (crashk_res.end && crashk_res.start >= res->start &&
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
If the crashkernel reservation is deferred, such boundaries are not known when the linear mapping is created. But its upper limit is fixed, cannot above 4G. Therefore, unless otherwise required, block mapping should be used for memory above 4G to improve performance.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ad5f8f38a487..1f1d9d87286f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -490,10 +490,10 @@ static void __init map_mem(pgd_t *pgdp) phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; - int flags = 0; + int flags = 0, eflags = 0; u64 i;
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
#ifdef CONFIG_KFENCE @@ -514,17 +514,40 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) + eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) break; + +#ifdef CONFIG_KEXEC_CORE + if (eflags && (end >= SZ_4G)) { + /* + * The memory block cross the 4G boundary. + * Forcibly use page-level mappings for memory under 4G. + */ + if (start < SZ_4G) { + __map_memblock(pgdp, start, SZ_4G - 1, + pgprot_tagged(PAGE_KERNEL), flags | eflags); + start = SZ_4G; + } + + /* Page-level mappings is not mandatory for memory above 4G */ + eflags = 0; + } +#endif + /* * The linear map must allow allocation tags reading/writing * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), - flags); + flags | eflags); }
/*
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
If the crashkernel has both high memory above 4G and low memory under 4G, kexec always loads the content such as Imge and dtb to the high memory instead of the low memory. This means that only high memory requires write protection based on page-level mapping. The allocation of high memory does not depend on the DMA boundary. So we can reserve the high memory first even if the crashkernel reservation is deferred.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/kexec.h | 1 + arch/arm64/mm/init.c | 2 + arch/arm64/mm/mmu.c | 15 ++++++ kernel/crash_core.c | 92 ++++++++++++++++++++++++++++++++-- 4 files changed, 106 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index e19c0af3b53d..4024431ee001 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -99,6 +99,7 @@ static inline void crash_post_resume(void) {}
#ifdef CONFIG_KEXEC_CORE extern void __init reserve_crashkernel(void); +extern void __init reserve_crashkernel_high(void); #endif void machine_kexec_mask_interrupts(void);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2f3910beb4cf..0c43739bc6c5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -441,6 +441,8 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ reserve_crashkernel_high(); + reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1f1d9d87286f..a455a3128e9d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -517,6 +517,10 @@ static void __init map_mem(pgd_t *pgdp) #ifdef CONFIG_KEXEC_CORE if (crash_mem_map) eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); #endif
/* map all the memory banks */ @@ -564,6 +568,17 @@ static void __init map_mem(pgd_t *pgdp) PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + #ifdef CONFIG_KFENCE /* * Map the __kfence_pool at page granularity now. diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 88d93da963e8..5bd5fb6f7291 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -321,6 +321,9 @@ int __init parse_crashkernel_low(char *cmdline, */
#ifdef CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL +static bool crash_high_mem_reserved __initdata; +static struct resource crashk_res_high; + static int __init reserve_crashkernel_low(void) { #ifdef CONFIG_64BIT @@ -374,6 +377,66 @@ static int __init reserve_crashkernel_low(void) return 0; }
+void __init reserve_crashkernel_high(void) +{ + unsigned long long crash_base, crash_size; + char *cmdline = boot_command_line; + int ret; + + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* crashkernel=X[@offset] */ + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + &crash_size, &crash_base); + if (ret || !crash_size) { + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); + if (ret || !crash_size) + return; + } + + crash_size = PAGE_ALIGN(crash_size); + + /* + * For the case crashkernel=X, may fall back to reserve memory above + * 4G, make reservations here in advance. It will be released later if + * the region is successfully reserved under 4G. + */ + if (!crash_base) { + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + crash_base, CRASH_ADDR_HIGH_MAX); + if (!crash_base) + return; + + crash_high_mem_reserved = true; + } + + /* Mark the memory range that requires page-level mappings */ + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init hand_over_reserved_high_mem(void) +{ + crashk_res_high.start = crashk_res.start; + crashk_res_high.end = crashk_res.end; + + crashk_res.start = 0; + crashk_res.end = 0; +} + +static void __init take_reserved_high_mem(unsigned long long *crash_base, + unsigned long long *crash_size) +{ + *crash_base = crashk_res_high.start; + *crash_size = resource_size(&crashk_res_high); +} + +static void __init free_reserved_high_mem(void) +{ + memblock_free(crashk_res_high.start, resource_size(&crashk_res_high)); +} + /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -389,6 +452,8 @@ void __init reserve_crashkernel(void)
total_mem = memblock_phys_mem_size();
+ hand_over_reserved_high_mem(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { @@ -398,6 +463,11 @@ void __init reserve_crashkernel(void) if (ret != 0 || crash_size <= 0) return; high = true; + + if (crash_high_mem_reserved) { + take_reserved_high_mem(&crash_base, &crash_size); + goto reserve_low; + } }
/* 0 means: find the address automatically */ @@ -411,10 +481,15 @@ void __init reserve_crashkernel(void) * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. */ - if (!high) + if (!high) { crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, crash_size, CRASH_ALIGN); + if (!crash_base && crash_high_mem_reserved) { + take_reserved_high_mem(&crash_base, &crash_size); + goto reserve_low; + } + } if (!crash_base) crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_HIGH_MAX, crash_size, @@ -447,9 +522,18 @@ void __init reserve_crashkernel(void) return; }
- if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); - return; + if ((crash_base >= CRASH_ADDR_LOW_MAX) || high) { +reserve_low: + if (reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + } else if (crash_high_mem_reserved) { + /* + * The crash memory is successfully allocated under 4G, and the + * previously reserved high memory is no longer required. + */ + free_reserved_high_mem(); }
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
For the case crashkernel=X@offset and crashkernel=X,high, we've explicitly used 'crashk_res' to mark the scope of the page-level mapping required, so NO_BLOCK_MAPPINGS should not be required for other areas. Otherwise, system performance will be affected. In fact, only the case crashkernel=X requires page-level mapping for all low memory under 4G because it attempts high memory after it fails to request low memory first, and we cannot predict its final location.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/kexec.h | 2 ++ arch/arm64/mm/mmu.c | 17 +---------------- kernel/crash_core.c | 3 +++ 3 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 4024431ee001..d797dbab3aad 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -97,6 +97,8 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif
+extern bool crash_low_mem_page_map; + #ifdef CONFIG_KEXEC_CORE extern void __init reserve_crashkernel(void); extern void __init reserve_crashkernel_high(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a455a3128e9d..c21d911aa755 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -470,21 +470,6 @@ void __init mark_linear_text_alias_ro(void) PAGE_KERNEL_RO); }
-static bool crash_mem_map __initdata; - -static int __init enable_crash_mem_map(char *arg) -{ - /* - * Proper parameter parsing is done by reserve_crashkernel(). We only - * need to know if the linear map has to avoid block mappings so that - * the crashkernel reservations can be unmapped later. - */ - crash_mem_map = true; - - return 0; -} -early_param("crashkernel", enable_crash_mem_map); - static void __init map_mem(pgd_t *pgdp) { phys_addr_t kernel_start = __pa_symbol(_text); @@ -515,7 +500,7 @@ static void __init map_mem(pgd_t *pgdp) memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE - if (crash_mem_map) + if (crash_low_mem_page_map) eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
if (crashk_res.end) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 5bd5fb6f7291..0865f816b57a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -321,6 +321,7 @@ int __init parse_crashkernel_low(char *cmdline, */
#ifdef CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL +bool crash_low_mem_page_map __initdata; static bool crash_high_mem_reserved __initdata; static struct resource crashk_res_high;
@@ -393,6 +394,8 @@ void __init reserve_crashkernel_high(void) ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); if (ret || !crash_size) return; + } else if (!crash_base) { + crash_low_mem_page_map = true; }
crash_size = PAGE_ALIGN(crash_size);
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
For "crashkernel=X,high", there must be two crash regions: high=crashk_res and low=crashk_low_res. But now the syscall kexec_file_load() only add the crashk_res into "linux,usable-memory-range", this causes the second kernel to have no available dma memory. Fix it like kexec tool do for option -c.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/of/kexec.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index f335d941a716..d8231c34e873 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -396,6 +396,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, crashk_res.end - crashk_res.start + 1); if (ret) goto out; + + if (crashk_low_res.end) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + FDT_PROP_MEM_RANGE, + crashk_low_res.start, + crashk_low_res.end - crashk_low_res.start + 1); + if (ret) + goto out; + } }
/* add bootargs */