From: Sang Yan sangyan@huawei.com
hulk inclusion category: feature bugzilla: 48159 CVE: N/A
In normal kexec, relocating kernel may cost 5 ~ 10 seconds, to copy all segments from vmalloced memory to kernel boot memory, because of disabled mmu.
We introduce quick kexec to save time of copying memory as above, just like kdump(kexec on crash), by using reserved memory "Quick Kexec".
Constructing quick kimage as the same as crash kernel, then simply copy all segments of kimage to reserved memroy.
We also add this support in syscall kexec_load using flags of KEXEC_QUICK.
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/Kconfig | 10 ++++++++++ include/linux/ioport.h | 1 + include/linux/kexec.h | 11 ++++++++++- include/uapi/linux/kexec.h | 1 + kernel/kexec.c | 10 ++++++++++ kernel/kexec_core.c | 42 +++++++++++++++++++++++++++++++++--------- 6 files changed, 65 insertions(+), 10 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig index 2592b4b..7811eee 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -18,6 +18,16 @@ config KEXEC_CORE select CRASH_CORE bool
+config QUICK_KEXEC + bool "Support for quick kexec" + depends on KEXEC_CORE + help + It uses pre-reserved memory to accelerate kexec, just like + crash kexec, loads new kernel and initrd to reserved memory, + and boots new kernel on that memory. It will save the time + of relocating kernel. + + config KEXEC_ELF bool
diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5135d4b..84a716f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -139,6 +139,7 @@ enum { IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, IORES_DESC_SOFT_RESERVED = 8, + IORES_DESC_QUICK_KEXEC = 9, };
/* diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f301f2f..7fff410 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -269,9 +269,10 @@ struct kimage { unsigned long control_page;
/* Flags to indicate special processing */ - unsigned int type : 1; + unsigned int type : 2; #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 +#define KEXEC_TYPE_QUICK 2 unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; @@ -331,6 +332,11 @@ extern int kexec_load_disabled; #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) #endif
+#ifdef CONFIG_QUICK_KEXEC +#undef KEXEC_FLAGS +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_QUICK) +#endif + /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) @@ -338,6 +344,9 @@ extern int kexec_load_disabled; /* Location of a reserved region to hold the crash kernel. */ extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_QUICK_KEXEC +extern struct resource quick_kexec_res; +#endif
/* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 05669c8..d891d80 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -12,6 +12,7 @@ /* kexec flags for different usage scenarios */ #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 +#define KEXEC_QUICK 0x00000004 #define KEXEC_ARCH_MASK 0xffff0000
/* diff --git a/kernel/kexec.c b/kernel/kexec.c index c82c6c0..4acc909 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -44,6 +44,9 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, int ret; struct kimage *image; bool kexec_on_panic = flags & KEXEC_ON_CRASH; +#ifdef CONFIG_QUICK_KEXEC + bool kexec_on_quick = flags & KEXEC_QUICK; +#endif
if (kexec_on_panic) { /* Verify we have a valid entry point */ @@ -69,6 +72,13 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, image->type = KEXEC_TYPE_CRASH; }
+#ifdef CONFIG_QUICK_KEXEC + if (kexec_on_quick) { + image->control_page = quick_kexec_res.start; + image->type = KEXEC_TYPE_QUICK; + } +#endif + ret = sanity_check_segment_list(image); if (ret) goto out_free_image; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 2ca8875..c7e2aa2 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -53,6 +53,17 @@ note_buf_t __percpu *crash_notes; /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false;
+/* Resource for quick kexec */ +#ifdef CONFIG_QUICK_KEXEC +struct resource quick_kexec_res = { + .name = "Quick kexec", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_QUICK_KEXEC +}; +#endif + int kexec_should_crash(struct task_struct *p) { /* @@ -396,8 +407,9 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, return pages; }
-static struct page *kimage_alloc_crash_control_pages(struct kimage *image, - unsigned int order) +static struct page *kimage_alloc_special_control_pages(struct kimage *image, + unsigned int order, + unsigned long end) { /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages @@ -427,7 +439,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, size = (1 << order) << PAGE_SHIFT; hole_start = (image->control_page + (size - 1)) & ~(size - 1); hole_end = hole_start + size - 1; - while (hole_end <= crashk_res.end) { + while (hole_end <= end) { unsigned long i;
cond_resched(); @@ -462,7 +474,6 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, return pages; }
- struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order) { @@ -473,8 +484,15 @@ struct page *kimage_alloc_control_pages(struct kimage *image, pages = kimage_alloc_normal_control_pages(image, order); break; case KEXEC_TYPE_CRASH: - pages = kimage_alloc_crash_control_pages(image, order); + pages = kimage_alloc_special_control_pages(image, order, + crashk_res.end); + break; +#ifdef CONFIG_QUICK_KEXEC + case KEXEC_TYPE_QUICK: + pages = kimage_alloc_special_control_pages(image, order, + quick_kexec_res.end); break; +#endif }
return pages; @@ -830,11 +848,12 @@ static int kimage_load_normal_segment(struct kimage *image, return result; }
-static int kimage_load_crash_segment(struct kimage *image, +static int kimage_load_special_segment(struct kimage *image, struct kexec_segment *segment) { - /* For crash dumps kernels we simply copy the data from - * user space to it's destination. + /* + * For crash dumps kernels and quick kexec kernels + * we simply copy the data from user space to it's destination. * We do things a page at a time for the sake of kmap. */ unsigned long maddr; @@ -908,8 +927,13 @@ int kimage_load_segment(struct kimage *image, result = kimage_load_normal_segment(image, segment); break; case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_special_segment(image, segment); break; +#ifdef CONFIG_QUICK_KEXEC + case KEXEC_TYPE_QUICK: + result = kimage_load_special_segment(image, segment); + break; +#endif }
return result;
From: Sang Yan sangyan@huawei.com
hulk inclusion category: feature bugzilla: 48159 CVE: N/A
Reserve memory for quick kexec on arm64 with cmdline "quickkexec=".
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/kernel/setup.c | 6 ++++++ arch/arm64/mm/init.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6aff30d..de5e554 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -254,6 +254,12 @@ static void __init request_standard_resources(void) crashk_res.end <= res->end) request_resource(res, &crashk_res); #endif +#ifdef CONFIG_QUICK_KEXEC + if (quick_kexec_res.end && + quick_kexec_res.start >= res->start && + quick_kexec_res.end <= res->end) + request_resource(res, &quick_kexec_res); +#endif } }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 794f992..b4d124d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -129,6 +129,45 @@ static void __init reserve_elfcorehdr(void) } #endif /* CONFIG_CRASH_DUMP */
+#ifdef CONFIG_QUICK_KEXEC +static int __init parse_quick_kexec(char *p) +{ + if (!p) + return 0; + + quick_kexec_res.end = PAGE_ALIGN(memparse(p, NULL)); + + return 0; +} +early_param("quickkexec", parse_quick_kexec); + +static void __init reserve_quick_kexec(void) +{ + unsigned long long mem_start, mem_len; + + mem_len = quick_kexec_res.end; + if (mem_len == 0) + return; + + /* Current arm64 boot protocol requires 2MB alignment */ + mem_start = memblock_find_in_range(0, arm64_dma32_phys_limit, + mem_len, SZ_2M); + if (mem_start == 0) { + pr_warn("cannot allocate quick kexec mem (size:0x%llx)\n", + mem_len); + quick_kexec_res.end = 0; + return; + } + + memblock_reserve(mem_start, mem_len); + pr_info("quick kexec mem reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + mem_start, mem_start + mem_len, mem_len >> 20); + + quick_kexec_res.start = mem_start; + quick_kexec_res.end = mem_start + mem_len - 1; +} +#endif + /* * Return the maximum physical address for a zone with a given address size * limit. It currently assumes that for memory starting above 4G, 32-bit @@ -357,6 +396,10 @@ void __init arm64_memblock_init(void)
reserve_crashkernel();
+#ifdef CONFIG_QUICK_KEXEC + reserve_quick_kexec(); +#endif + reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
Reviewed-by: Jing Xiangfeng jingxiangfeng@huawei.com
On 2021/2/19 17:23, sangyan@huawei.com wrote:
From: Sang Yan sangyan@huawei.com
hulk inclusion category: feature bugzilla: 48159 CVE: N/A
Reserve memory for quick kexec on arm64 with cmdline "quickkexec=".
Signed-off-by: Sang Yan sangyan@huawei.com
arch/arm64/kernel/setup.c | 6 ++++++ arch/arm64/mm/init.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6aff30d..de5e554 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -254,6 +254,12 @@ static void __init request_standard_resources(void) crashk_res.end <= res->end) request_resource(res, &crashk_res); #endif +#ifdef CONFIG_QUICK_KEXEC
if (quick_kexec_res.end &&
quick_kexec_res.start >= res->start &&
quick_kexec_res.end <= res->end)
request_resource(res, &quick_kexec_res);
+#endif } }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 794f992..b4d124d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -129,6 +129,45 @@ static void __init reserve_elfcorehdr(void) } #endif /* CONFIG_CRASH_DUMP */
+#ifdef CONFIG_QUICK_KEXEC +static int __init parse_quick_kexec(char *p) +{
- if (!p)
return 0;
- quick_kexec_res.end = PAGE_ALIGN(memparse(p, NULL));
- return 0;
+} +early_param("quickkexec", parse_quick_kexec);
+static void __init reserve_quick_kexec(void) +{
- unsigned long long mem_start, mem_len;
- mem_len = quick_kexec_res.end;
- if (mem_len == 0)
return;
- /* Current arm64 boot protocol requires 2MB alignment */
- mem_start = memblock_find_in_range(0, arm64_dma32_phys_limit,
mem_len, SZ_2M);
- if (mem_start == 0) {
pr_warn("cannot allocate quick kexec mem (size:0x%llx)\n",
mem_len);
quick_kexec_res.end = 0;
return;
- }
- memblock_reserve(mem_start, mem_len);
- pr_info("quick kexec mem reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
mem_start, mem_start + mem_len, mem_len >> 20);
- quick_kexec_res.start = mem_start;
- quick_kexec_res.end = mem_start + mem_len - 1;
+} +#endif
- /*
- Return the maximum physical address for a zone with a given address size
- limit. It currently assumes that for memory starting above 4G, 32-bit
@@ -357,6 +396,10 @@ void __init arm64_memblock_init(void)
reserve_crashkernel();
+#ifdef CONFIG_QUICK_KEXEC
- reserve_quick_kexec();
+#endif
reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
From: Sang Yan sangyan@huawei.com
hulk inclusion category: feature bugzilla: 48159 CVE: N/A
Enable quick kexec on openEuler by default.
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 1743f53..c0e3d15 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -704,6 +704,7 @@ CONFIG_CRYPTO_AES_ARM64_BS=m # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y +CONFIG_QUICK_KEXEC=y CONFIG_SET_FS=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y