to avoid these changes, we intend to remove the kexec_file_load() system call and indirectly call kexec_file_load() via the kexec_load() system call. Our method is to add a flag and check whether the flag is flagged at the beginning of kexec_load().
Signed-off-by: xuhuijie xuhujie@huawei.com --- include/linux/kexec.h | 4 ++ include/uapi/asm-generic/unistd.h | 4 +- include/uapi/linux/kexec.h | 1 + kernel/kexec.c | 10 ++++ kernel/kexec_file.c | 90 +++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 3 deletions(-)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4fb900498..542a15c91 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -149,6 +149,10 @@ int kexec_image_probe_default(struct kimage *image, void *buf, unsigned long buf_len); int kexec_image_post_load_cleanup_default(struct kimage *image);
+int wrap_kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char __user *cmdline_ptr, + unsigned long flags); + /* * If kexec_buf.mem is set to this value, kexec_locate_mem_hole() * will try to allocate free memory. Arch may overwrite it. diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 446a99933..b538ed1be 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -740,11 +740,9 @@ __SYSCALL(__NR_statx, sys_statx) __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) #define __NR_rseq 293 __SYSCALL(__NR_rseq, sys_rseq) -#define __NR_kexec_file_load 294 -__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
#undef __NR_syscalls -#define __NR_syscalls 295 +#define __NR_syscalls 294
/* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index ca3cebebd..9385ecba2 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -25,6 +25,7 @@ #define KEXEC_FILE_UNLOAD 0x00000001 #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 +#define KEXEC_FILE_LOAD_WRAP 0x100000000
/* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. diff --git a/kernel/kexec.c b/kernel/kexec.c index 47dfad722..d44a55202 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -238,6 +238,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, { int result;
+ if (flags & KEXEC_FILE_LOAD_WRAP) { + int kernel_fd, initrd_fd; + + kernel_fd = entry >> 32; + initrd_fd = entry & 0x00000000ffffffff; + flags &= ~KEXEC_FILE_LOAD_WRAP; + return wrap_kexec_file_load(kernel_fd, initrd_fd, nr_segments, + (char __user *)segments, flags); + } + result = kexec_load_check(nr_segments, flags); if (result) return result; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 49db1111b..2a25db8dd 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -408,6 +408,96 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, return ret; }
+int wrap_kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, const char __user *cmdline_ptr, + unsigned long flags) +{ + int ret = 0, i; + struct kimage **dest_image, *image; + + /* We only trust the superuser with rebooting the system. */ + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return -EPERM; + + /* Make sure we have a legal set of flags */ + if (flags != (flags & KEXEC_FILE_FLAGS)) + return -EINVAL; + + image = NULL; + + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + + dest_image = &kexec_image; + if (flags & KEXEC_FILE_ON_CRASH) { + dest_image = &kexec_crash_image; + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } + + if (flags & KEXEC_FILE_UNLOAD) + goto exchange; + + /* + * In case of crash, new kernel gets loaded in reserved region. It is + * same memory where old crash kernel might be loaded. Free any + * current crash dump kernel before we corrupt it. + */ + if (flags & KEXEC_FILE_ON_CRASH) + kimage_free(xchg(&kexec_crash_image, NULL)); + + ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, + cmdline_len, flags); + if (ret) + goto out; + + ret = machine_kexec_prepare(image); + if (ret) + goto out; + + /* + * Some architecture(like S390) may touch the crash memory before + * machine_kexec_prepare(), we must copy vmcoreinfo data after it. + */ + ret = kimage_crash_copy_vmcoreinfo(image); + if (ret) + goto out; + + ret = kexec_calculate_store_digests(image); + if (ret) + goto out; + + for (i = 0; i < image->nr_segments; i++) { + struct kexec_segment *ksegment; + + ksegment = &image->segment[i]; + pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", + i, ksegment->buf, ksegment->bufsz, ksegment->mem, + ksegment->memsz); + + ret = kimage_load_segment(image, &image->segment[i]); + if (ret) + goto out; + } + + kimage_terminate(image); + + /* + * Free up any temporary buffers allocated which are not needed + * after image has been loaded + */ + kimage_file_post_load_cleanup(image); +exchange: + image = xchg(dest_image, image); +out: + if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + + mutex_unlock(&kexec_mutex); + kimage_free(image); + return ret; +} + static int locate_mem_hole_top_down(unsigned long start, unsigned long end, struct kexec_buf *kbuf) {