From: Sang Yan sangyan@huawei.com
hulk inclusion from openEuler-21.03 commit 6d3c56fa93e3 ("arm64: smp: Add support for cpu park") category: feature bugzilla: 48159 CVE: N/A
------------------------------
Introducing a feature of CPU PARK in order to save time of cpus down and up during kexec, which may cost 250ms of per cpu's down and 30ms of up.
As a result, for 128 cores, it costs more than 30 seconds to down and up cpus during kexec. Think about 256 cores and more.
CPU PARK is a state that cpu power-on and staying in spin loop, polling for exit chances, such as writing exit address.
Reserving a block of memory, to fill with cpu park text section, exit address and park-magic-flag of each cpu. In implementation, reserved one page for one cpu core.
Cpus going to park state instead of down in machine_shutdown(). Cpus going out of park state in smp_init instead of brought up.
One of cpu park sections in pre-reserved memory blocks,: +--------------+ + exit address + +--------------+ + park magic + +--------------+ + park codes + + . + + . + + . + +--------------+
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/Kconfig | 12 +++ arch/arm64/include/asm/kexec.h | 7 ++ arch/arm64/include/asm/smp.h | 16 +++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/cpu-park.S | 59 ++++++++++ arch/arm64/kernel/machine_kexec.c | 2 +- arch/arm64/kernel/process.c | 4 + arch/arm64/kernel/smp.c | 222 ++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/init.c | 64 +++++++++++ 9 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/cpu-park.S
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 37c5064..dec367a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -964,6 +964,18 @@ config CRASH_DUMP
For more details see Documentation/kdump/kdump.txt
+config ARM64_CPU_PARK + bool "Support CPU PARK on kexec" + depends on SMP + depends on KEXEC_CORE + help + This enables support for CPU PARK feature in + order to save time of cpu down to up. + CPU park is a state through kexec, spin loop + instead of cpu die before jumping to new kernel, + jumping out from loop to new kernel entry in + smp_init. + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 87e75d0..30ab1fa 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -31,6 +31,11 @@ /* 2M alignment for crash kernel regions */ #define CRASH_ALIGN SZ_2M
+#ifdef CONFIG_ARM64_CPU_PARK +/* CPU park state flag: "park" */ +#define PARK_MAGIC 0x7061726b +#endif + #ifndef __ASSEMBLY__
/** @@ -96,6 +101,8 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif
+void machine_kexec_mask_interrupts(void); + #endif /* __ASSEMBLY__ */
#endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 403c22f..f0898dc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -153,6 +153,22 @@ extern bool smp_crash_stop_failed(void);
void ipi_set_nmi_prio(void __iomem *base, u8 prio);
+#ifdef CONFIG_ARM64_CPU_PARK +#define PARK_SECTION_SIZE 1024 +struct cpu_park_info { + /* Physical address of reserved park memory. */ + unsigned long start; + /* park reserve mem len should be PARK_SECTION_SIZE * NR_CPUS */ + unsigned long len; + /* Virtual address of reserved park memory. */ + unsigned long start_v; +}; +extern struct cpu_park_info park_info; +extern void enter_cpu_park(unsigned long text, unsigned long exit); +extern void do_cpu_park(unsigned long exit); +extern int kexec_smp_send_park(void); +#endif + #endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 6dd565a..d2861d7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -56,6 +56,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_ARM64_CPU_PARK) += cpu-park.o arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/arm64/kernel/cpu-park.S b/arch/arm64/kernel/cpu-park.S new file mode 100644 index 0000000..fc8f61e --- /dev/null +++ b/arch/arm64/kernel/cpu-park.S @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * CPU park routines + * + * Copyright (C) 2020 Huawei Technologies., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/kexec.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + +.text +.pushsection .idmap.text, "awx" + +/* cpu park helper in idmap section */ +ENTRY(enter_cpu_park) + /* Clear sctlr_el1 flags. */ + mrs x12, sctlr_el1 + mov_q x13, SCTLR_ELx_FLAGS + bic x12, x12, x13 + pre_disable_mmu_workaround + msr sctlr_el1, x12 /* disable mmu */ + isb + + mov x18, x0 + mov x0, x1 /* secondary_entry addr */ + br x18 /* call do_cpu_park of each cpu */ +ENDPROC(enter_cpu_park) + +.popsection + +ENTRY(do_cpu_park) + ldr x18, =PARK_MAGIC /* magic number "park" */ + add x1, x0, #8 + str x18, [x1] /* set on-park flag */ + dc civac, x1 /* flush cache of "park" */ + dsb nsh + isb + +.Lloop: + wfe + isb + ldr x19, [x0] + cmp x19, #0 /* test secondary_entry */ + b.eq .Lloop + + ic iallu /* invalidate the local I-cache */ + dsb nsh + isb + + br x19 /* jump to secondary_entry */ +ENDPROC(do_cpu_park) + diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 7aef7ac..b41d6d4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -221,7 +221,7 @@ void machine_kexec(struct kimage *kimage) BUG(); /* Should never get here. */ }
-static void machine_kexec_mask_interrupts(void) +void machine_kexec_mask_interrupts(void) { unsigned int i; struct irq_desc *desc; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 8342459..ddb1911 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -151,6 +151,10 @@ void arch_cpu_idle_dead(void) */ void machine_shutdown(void) { +#ifdef CONFIG_ARM64_CPU_PARK + if (kexec_smp_send_park() == 0) + return; +#endif disable_nonboot_cpus(); }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5235b9a..090e760 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -98,6 +98,167 @@ static inline int op_cpu_kill(unsigned int cpu) } #endif
+#ifdef CONFIG_ARM64_CPU_PARK +struct cpu_park_section { + unsigned long exit; /* exit address of park look */ + unsigned long magic; /* maigc represent park state */ + char text[0]; /* text section of park */ +}; + +static int mmap_cpu_park_mem(void) +{ + if (!park_info.start) + return -ENOMEM; + + if (park_info.start_v) + return 0; + + park_info.start_v = (unsigned long)__ioremap(park_info.start, + park_info.len, + PAGE_KERNEL_EXEC); + if (!park_info.start_v) { + pr_warn("map park memory failed."); + return -ENOMEM; + } + + return 0; +} + +static inline unsigned long cpu_park_section_v(unsigned int cpu) +{ + return park_info.start_v + PARK_SECTION_SIZE * (cpu - 1); +} + +static inline unsigned long cpu_park_section_p(unsigned int cpu) +{ + return park_info.start + PARK_SECTION_SIZE * (cpu - 1); +} + +/* + * Write the secondary_entry to exit section of park state. + * Then the secondary cpu will jump straight into the kernel + * by the secondary_entry. + */ +static int write_park_exit(unsigned int cpu) +{ + struct cpu_park_section *park_section; + unsigned long *park_exit; + unsigned long *park_text; + + if (mmap_cpu_park_mem() != 0) + return -EPERM; + + park_section = (struct cpu_park_section *)cpu_park_section_v(cpu); + park_exit = &park_section->exit; + park_text = (unsigned long *)park_section->text; + pr_debug("park_text 0x%lx : 0x%lx, do_cpu_park text 0x%lx : 0x%lx", + (unsigned long)park_text, *park_text, + (unsigned long)do_cpu_park, + *(unsigned long *)do_cpu_park); + + /* + * Test first 8 bytes to determine + * whether needs to write cpu park exit. + */ + if (*park_text == *(unsigned long *)do_cpu_park) { + writeq_relaxed(__pa_symbol(secondary_entry), park_exit); + __flush_dcache_area((__force void *)park_exit, + sizeof(unsigned long)); + flush_icache_range((unsigned long)park_exit, + (unsigned long)(park_exit + 1)); + sev(); + dsb(sy); + isb(); + + pr_debug("Write cpu %u secondary entry 0x%lx to 0x%lx.", + cpu, *park_exit, (unsigned long)park_exit); + pr_info("Boot cpu %u from PARK state.", cpu); + return 0; + } + + return -EPERM; +} + +/* Install cpu park sections for the specific cpu. */ +static int install_cpu_park(unsigned int cpu) +{ + struct cpu_park_section *park_section; + unsigned long *park_exit; + unsigned long *park_magic; + unsigned long park_text_len; + + park_section = (struct cpu_park_section *)cpu_park_section_v(cpu); + pr_debug("Install cpu park on cpu %u park exit 0x%lx park text 0x%lx", + cpu, (unsigned long)park_section, + (unsigned long)(park_section->text)); + + park_exit = &park_section->exit; + park_magic = &park_section->magic; + park_text_len = PARK_SECTION_SIZE - sizeof(struct cpu_park_section); + + *park_exit = 0UL; + *park_magic = 0UL; + memcpy((void *)park_section->text, do_cpu_park, park_text_len); + __flush_dcache_area((void *)park_section, PARK_SECTION_SIZE); + + return 0; +} + +static int uninstall_cpu_park(unsigned int cpu) +{ + unsigned long park_section; + + if (mmap_cpu_park_mem() != 0) + return -EPERM; + + park_section = cpu_park_section_v(cpu); + memset((void *)park_section, 0, PARK_SECTION_SIZE); + __flush_dcache_area((void *)park_section, PARK_SECTION_SIZE); + + return 0; +} + +static int cpu_wait_park(unsigned int cpu) +{ + long timeout; + struct cpu_park_section *park_section; + + volatile unsigned long *park_magic; + + park_section = (struct cpu_park_section *)cpu_park_section_v(cpu); + park_magic = &park_section->magic; + + timeout = USEC_PER_SEC; + while (*park_magic != PARK_MAGIC && timeout--) + udelay(1); + + if (timeout > 0) + pr_debug("cpu %u park done.", cpu); + else + pr_err("cpu %u park failed.", cpu); + + return *park_magic == PARK_MAGIC; +} + +static void cpu_park(unsigned int cpu) +{ + unsigned long park_section_p; + unsigned long park_exit_phy; + unsigned long do_park; + typeof(enter_cpu_park) *park; + + park_section_p = cpu_park_section_p(cpu); + park_exit_phy = park_section_p; + pr_debug("Go to park cpu %u exit address 0x%lx", cpu, park_exit_phy); + + do_park = park_section_p + sizeof(struct cpu_park_section); + park = (void *)__pa_symbol(enter_cpu_park); + + cpu_install_idmap(); + park(do_park, park_exit_phy); + unreachable(); +} +#endif
/* * Boot a secondary CPU, and assign it the specified idle task. @@ -105,6 +266,10 @@ static inline int op_cpu_kill(unsigned int cpu) */ static int boot_secondary(unsigned int cpu, struct task_struct *idle) { +#ifdef CONFIG_ARM64_CPU_PARK + if (write_park_exit(cpu) == 0) + return 0; +#endif if (cpu_ops[cpu]->cpu_boot) return cpu_ops[cpu]->cpu_boot(cpu);
@@ -153,6 +318,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; }
+#ifdef CONFIG_ARM64_CPU_PARK + uninstall_cpu_park(cpu); +#endif + secondary_data.task = NULL; secondary_data.stack = NULL; status = READ_ONCE(secondary_data.status); @@ -863,6 +1032,20 @@ static void ipi_cpu_stop(unsigned int cpu) local_daif_mask(); sdei_mask_local_cpu();
+#ifdef CONFIG_ARM64_CPU_PARK + /* + * Go to cpu park state. + * Otherwise go to cpu die. + */ + if (kexec_in_progress && park_info.start_v) { + machine_kexec_mask_interrupts(); + cpu_park(cpu); + + if (cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); + } +#endif + while (1) cpu_relax(); } @@ -1037,6 +1220,45 @@ void smp_send_stop(void) sdei_mask_local_cpu(); }
+#ifdef CONFIG_ARM64_CPU_PARK +int kexec_smp_send_park(void) +{ + unsigned long cpu; + + if (WARN_ON(!kexec_in_progress)) { + pr_crit("%s called not in kexec progress.", __func__); + return -EPERM; + } + + if (mmap_cpu_park_mem() != 0) { + pr_info("no cpuparkmem, goto normal way."); + return -EPERM; + } + + local_irq_disable(); + + if (num_online_cpus() > 1) { + cpumask_t mask; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + for_each_cpu(cpu, &mask) + install_cpu_park(cpu); + smp_cross_call(&mask, IPI_CPU_STOP); + + /* Wait for other CPUs to park */ + for_each_cpu(cpu, &mask) + cpu_wait_park(cpu); + pr_info("smp park other cpus done\n"); + } + + sdei_mask_local_cpu(); + + return 0; +} +#endif + #ifdef CONFIG_KEXEC_CORE void crash_smp_send_stop(void) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0773205..3a914ff 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -466,6 +466,57 @@ static int __init parse_memmap_opt(char *str) } early_param("memmap", parse_memmap_opt);
+#ifdef CONFIG_ARM64_CPU_PARK +struct cpu_park_info park_info = { + .start = 0, + .len = PARK_SECTION_SIZE * NR_CPUS, + .start_v = 0, +}; + +static int __init parse_park_mem(char *p) +{ + if (!p) + return 0; + + park_info.start = PAGE_ALIGN(memparse(p, NULL)); + if (park_info.start == 0) + pr_info("cpu park mem params[%s]", p); + + return 0; +} +early_param("cpuparkmem", parse_park_mem); + +static int __init reserve_park_mem(void) +{ + if (park_info.start == 0 || park_info.len == 0) + return 0; + + park_info.start = PAGE_ALIGN(park_info.start); + park_info.len = PAGE_ALIGN(park_info.len); + + if (!memblock_is_region_memory(park_info.start, park_info.len)) { + pr_warn("cannot reserve park mem: region is not memory!"); + goto out; + } + + if (memblock_is_region_reserved(park_info.start, park_info.len)) { + pr_warn("cannot reserve park mem: region overlaps reserved memory!"); + goto out; + } + + memblock_remove(park_info.start, park_info.len); + pr_info("cpu park mem reserved: 0x%016lx - 0x%016lx (%ld MB)", + park_info.start, park_info.start + park_info.len, + park_info.len >> 20); + + return 0; +out: + park_info.start = 0; + park_info.len = 0; + return -EINVAL; +} +#endif + void __init arm64_memblock_init(void) { const s64 linear_region_size = -(s64)PAGE_OFFSET; @@ -582,6 +633,19 @@ void __init arm64_memblock_init(void) else arm64_dma_phys_limit = PHYS_MASK + 1;
+ /* + * Reserve park memory before crashkernel and quick kexec. + * Because park memory must be specified by address, but + * crashkernel and quickkexec may be specified by memory length, + * then find one sutiable memory region to reserve. + * + * So reserve park memory firstly is better, but it may cause + * crashkernel or quickkexec reserving failed. + */ +#ifdef CONFIG_ARM64_CPU_PARK + reserve_park_mem(); +#endif + reserve_crashkernel();
reserve_elfcorehdr();
From: Sang Yan sangyan@huawei.com
hulk inclusion from openEuler-21.03 commit 43b33a48c03d ("arm64: Enable cpu park on openEuler") category: feature bugzilla: 48159 CVE: N/A
------------------------------
Enable cpu park on openEuler by default.
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 89dec8e..1b61f52 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -446,6 +446,7 @@ CONFIG_PARAVIRT=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +CONFIG_ARM64_CPU_PARK=y # CONFIG_XEN is not set CONFIG_FORCE_MAX_ZONEORDER=14 CONFIG_UNMAP_KERNEL_AT_EL0=y
From: Sang Yan sangyan@huawei.com
hulk inclusion from openEuler-21.03 commit e789f5dff6c2 ("kexec: Add quick kexec support for kernel") category: feature bugzilla: 48159 CVE: N/A
------------------------------
In normal kexec, relocating kernel may cost 5 ~ 10 seconds, to copy all segments from vmalloced memory to kernel boot memory, because of disabled mmu.
We introduce quick kexec to save time of copying memory as above, just like kdump(kexec on crash), by using reserved memory "Quick Kexec".
Constructing quick kimage as the same as crash kernel, then simply copy all segments of kimage to reserved memroy.
We also add this support in syscall kexec_load using flags of KEXEC_QUICK.
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/Kconfig | 9 +++++++++ include/linux/ioport.h | 1 + include/linux/kexec.h | 11 ++++++++++- include/uapi/linux/kexec.h | 1 + kernel/kexec.c | 10 ++++++++++ kernel/kexec_core.c | 41 ++++++++++++++++++++++++++++++++--------- 6 files changed, 63 insertions(+), 10 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig index 0dd478f..e906cbb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -18,6 +18,15 @@ config KEXEC_CORE select CRASH_CORE bool
+config QUICK_KEXEC + bool "Support for quick kexec" + depends on KEXEC_CORE + help + It uses pre-reserved memory to accelerate kexec, just like + crash kexec, loads new kernel and initrd to reserved memory, + and boots new kernel on that memory. It will save the time + of relocating kernel. + config HAVE_IMA_KEXEC bool
diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5330288..1a438ec 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -139,6 +139,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, + IORES_DESC_QUICK_KEXEC = 8, };
/* helpers to define resources */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d6b8d0a..98cf0fb 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -233,9 +233,10 @@ struct kimage { unsigned long control_page;
/* Flags to indicate special processing */ - unsigned int type : 1; + unsigned int type : 2; #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 +#define KEXEC_TYPE_QUICK 2 unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; @@ -296,6 +297,11 @@ extern int kexec_load_disabled; #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) #endif
+#ifdef CONFIG_QUICK_KEXEC +#undef KEXEC_FLAGS +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_QUICK) +#endif + /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) @@ -305,6 +311,9 @@ extern int kexec_load_disabled; extern struct resource crashk_res; extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_QUICK_KEXEC +extern struct resource quick_kexec_res; +#endif
/* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 6d11286..ca3cebe 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -12,6 +12,7 @@ /* kexec flags for different usage scenarios */ #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 +#define KEXEC_QUICK 0x00000004 #define KEXEC_ARCH_MASK 0xffff0000
/* diff --git a/kernel/kexec.c b/kernel/kexec.c index 6855980..47dfad7 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -46,6 +46,9 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, int ret; struct kimage *image; bool kexec_on_panic = flags & KEXEC_ON_CRASH; +#ifdef CONFIG_QUICK_KEXEC + bool kexec_on_quick = flags & KEXEC_QUICK; +#endif
if (kexec_on_panic) { /* Verify we have a valid entry point */ @@ -71,6 +74,13 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, image->type = KEXEC_TYPE_CRASH; }
+#ifdef CONFIG_QUICK_KEXEC + if (kexec_on_quick) { + image->control_page = quick_kexec_res.start; + image->type = KEXEC_TYPE_QUICK; + } +#endif + ret = sanity_check_segment_list(image); if (ret) goto out_free_image; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index b36c9c4..595a757 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -74,6 +74,16 @@ struct resource crashk_low_res = { .desc = IORES_DESC_CRASH_KERNEL };
+#ifdef CONFIG_QUICK_KEXEC +struct resource quick_kexec_res = { + .name = "Quick kexec", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_QUICK_KEXEC +}; +#endif + int kexec_should_crash(struct task_struct *p) { /* @@ -470,8 +480,10 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, return pages; }
-static struct page *kimage_alloc_crash_control_pages(struct kimage *image, - unsigned int order) + +static struct page *kimage_alloc_special_control_pages(struct kimage *image, + unsigned int order, + unsigned long end) { /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages @@ -501,7 +513,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, size = (1 << order) << PAGE_SHIFT; hole_start = (image->control_page + (size - 1)) & ~(size - 1); hole_end = hole_start + size - 1; - while (hole_end <= crashk_res.end) { + while (hole_end <= end) { unsigned long i;
cond_resched(); @@ -536,7 +548,6 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, return pages; }
- struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order) { @@ -547,8 +558,15 @@ struct page *kimage_alloc_control_pages(struct kimage *image, pages = kimage_alloc_normal_control_pages(image, order); break; case KEXEC_TYPE_CRASH: - pages = kimage_alloc_crash_control_pages(image, order); + pages = kimage_alloc_special_control_pages(image, order, + crashk_res.end); + break; +#ifdef CONFIG_QUICK_KEXEC + case KEXEC_TYPE_QUICK: + pages = kimage_alloc_special_control_pages(image, order, + quick_kexec_res.end); break; +#endif }
return pages; @@ -898,11 +916,11 @@ static int kimage_load_normal_segment(struct kimage *image, return result; }
-static int kimage_load_crash_segment(struct kimage *image, +static int kimage_load_special_segment(struct kimage *image, struct kexec_segment *segment) { - /* For crash dumps kernels we simply copy the data from - * user space to it's destination. + /* For crash dumps kernels and quick kexec kernels + * we simply copy the data from user space to it's destination. * We do things a page at a time for the sake of kmap. */ unsigned long maddr; @@ -976,8 +994,13 @@ int kimage_load_segment(struct kimage *image, result = kimage_load_normal_segment(image, segment); break; case KEXEC_TYPE_CRASH: - result = kimage_load_crash_segment(image, segment); + result = kimage_load_special_segment(image, segment); + break; +#ifdef CONFIG_QUICK_KEXEC + case KEXEC_TYPE_QUICK: + result = kimage_load_special_segment(image, segment); break; +#endif }
return result;
From: Sang Yan sangyan@huawei.com
hulk inclusion from openEuler-21.03 commit cbaf3d9ca458 ("arm64: Reserve memory for quick kexec") category: feature bugzilla: 48159 CVE: N/A
------------------------------
Reserve memory for quick kexec on arm64 with cmdline "quickkexec=".
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/kernel/setup.c | 7 +++++++ arch/arm64/mm/init.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index ad3759b..b49aa5c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -274,6 +274,13 @@ static void __init request_standard_resources(void) request_resource(res, &crashk_res); #endif
+#ifdef CONFIG_QUICK_KEXEC + if (quick_kexec_res.end && + quick_kexec_res.start >= res->start && + quick_kexec_res.end <= res->end) + request_resource(res, &quick_kexec_res); +#endif + for (j = 0; j < res_mem_count; j++) { if (res_resources[j].start >= res->start && res_resources[j].end <= res->end) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 3a914ff..041a085 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -194,6 +194,45 @@ static void __init kexec_reserve_crashkres_pages(void) } #endif /* CONFIG_KEXEC_CORE */
+#ifdef CONFIG_QUICK_KEXEC +static int __init parse_quick_kexec(char *p) +{ + if (!p) + return 0; + + quick_kexec_res.end = PAGE_ALIGN(memparse(p, NULL)); + + return 0; +} +early_param("quickkexec", parse_quick_kexec); + +static void __init reserve_quick_kexec(void) +{ + unsigned long long mem_start, mem_len; + + mem_len = quick_kexec_res.end; + if (mem_len == 0) + return; + + /* Current arm64 boot protocol requires 2MB alignment */ + mem_start = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + mem_len, CRASH_ALIGN); + if (mem_start == 0) { + pr_warn("cannot allocate quick kexec mem (size:0x%llx)\n", + mem_len); + quick_kexec_res.end = 0; + return; + } + + memblock_reserve(mem_start, mem_len); + pr_info("quick kexec mem reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + mem_start, mem_start + mem_len, mem_len >> 20); + + quick_kexec_res.start = mem_start; + quick_kexec_res.end = mem_start + mem_len - 1; +} +#endif + #ifdef CONFIG_CRASH_DUMP static int __init early_init_dt_scan_elfcorehdr(unsigned long node, const char *uname, int depth, void *data) @@ -648,6 +687,10 @@ void __init arm64_memblock_init(void)
reserve_crashkernel();
+#ifdef CONFIG_QUICK_KEXEC + reserve_quick_kexec(); +#endif + reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
From: Sang Yan sangyan@huawei.com
hulk inclusion from openEuler-21.03 commit ee2cd8e7db2d ("kexec: Enable quick kexec on openEuler") category: feature bugzilla: 48159 CVE: N/A
------------------------------
Enable quick kexec on openEuler
Signed-off-by: Sang Yan sangyan@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 1b61f52..d8a4e8d 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -692,6 +692,7 @@ CONFIG_CRYPTO_AES_ARM64_NEON_BLK=m # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y +CONFIG_QUICK_KEXEC=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y