From: Jingxian He hejingxian@huawei.com
Add notifier calling method for checkpoint and restore during kernel module upgrading.
Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: Xiaoguang Li lixiaoguang2@huawei.com Signed-off-by: He Jingxian hejingxian@huawei.com Signed-off-by: fu.lin fu.lin10@huawei.com --- criu/config.c | 1 + criu/cr-dump.c | 33 ++++++++++ criu/cr-restore.c | 22 ++++++- criu/crtools.c | 3 + criu/include/cr_options.h | 1 + criu/include/restorer.h | 1 + criu/include/util.h | 42 ++++++++++++ criu/pie/restorer.c | 135 ++++++++++++++++++++++++++++++++++---- criu/pie/util.c | 91 +++++++++++++++++++++++++ include/common/lock.h | 4 ++ 10 files changed, 319 insertions(+), 14 deletions(-)
diff --git a/criu/config.c b/criu/config.c index a5bcf10..e1de191 100644 --- a/criu/config.c +++ b/criu/config.c @@ -544,6 +544,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity), BOOL_OPT("pin-memory", &opts.pin_memory), BOOL_OPT("use-fork-pid", &opts.use_fork_pid), + BOOL_OPT("with-notifier", &opts.with_notifier_kup), { }, };
diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 8575516..96c0cd3 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1699,6 +1699,8 @@ static int cr_lazy_mem_dump(void) return ret; }
+static enum notifier_state notifier_state = NOTHING_COMPLETE; + static int cr_dump_finish(int ret) { int post_dump_ret = 0; @@ -1783,6 +1785,20 @@ static int cr_dump_finish(int ret) restore_task_special_pages(0); }
+ if (ret != 0 && opts.with_notifier_kup) { + pr_info("call notifier rollback\n"); + switch (notifier_state) { + case PRE_FREEZE_COMPLETE: + notifier_kup(PRE_FREEZE, ROLLBACK, true); + break; + case FREEZE_TO_KILL_COMPLETE: + notifier_kup(FREEZE_TO_KILL, ROLLBACK, true); + break; + default: + break; + } + } + if (ret) { pr_err("Dumping FAILED.\n"); } else { @@ -1816,6 +1832,14 @@ int cr_dump_tasks(pid_t pid) goto err; root_item->pid->real = pid;
+ if (notifier_kup(PRE_FREEZE, PREPARE, opts.with_notifier_kup)) { + /* disable rollback function because we has already rollbacked. */ + opts.with_notifier_kup = false; + pr_err("call notifier: %d err\n", PRE_FREEZE); + goto err; + } else + notifier_state = PRE_FREEZE_COMPLETE; + pre_dump_ret = run_scripts(ACT_PRE_DUMP); if (pre_dump_ret != 0) { pr_err("Pre dump script failed with %d!\n", pre_dump_ret); @@ -1971,6 +1995,15 @@ int cr_dump_tasks(pid_t pid) ret = write_img_inventory(&he); if (ret) goto err; + + ret = notifier_kup(FREEZE_TO_KILL, PREPARE, opts.with_notifier_kup); + if (ret) { + opts.with_notifier_kup = false; + pr_err("call notifier:%d err\n", FREEZE_TO_KILL); + goto err; + } else + notifier_state = FREEZE_TO_KILL_COMPLETE; + err: if (parent_ie) inventory_entry__free_unpacked(parent_ie, NULL); diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e050b88..1e2ed9a 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1977,8 +1977,10 @@ static int restore_task_with_children(void *_arg) return 0;
err: - if (current->parent == NULL) + if (current->parent == NULL) { + do_notifier_rollback(opts.with_notifier_kup, POST_UPDATE_KERNEL_COMPLETE); futex_abort_and_wake(&task_entries->nr_in_progress); + } exit(1); }
@@ -2421,8 +2423,10 @@ skip_ns_bouncing: */ attach_to_tasks(root_seized);
- if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) + if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) { + pr_err("Can't switch to CR_STATE_RESTORE_CREDS stage\n"); goto out_kill_network_unlocked; + }
timing_stop(TIME_RESTORE);
@@ -2599,6 +2603,15 @@ int cr_restore_tasks(void) goto err;
ret = restore_root_task(root_item); + if (ret) + goto err; + + ret = notifier_kup(POST_RUN, PREPARE, opts.with_notifier_kup); + if (ret < 0) { + opts.with_notifier_kup = false; + pr_err("calling POST_RUN notifier list return err"); + } + err: cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret); return ret; @@ -3861,6 +3874,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns */ task_args->lsm_type = kdat.lsm;
+ task_args->with_notifier_kup = opts.with_notifier_kup; + /* * Make root and cwd restore _that_ late not to break any * attempts to open files by paths above (e.g. /proc). @@ -3907,6 +3922,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns err: free_mappings(&self_vmas); err_nv: + if (current->parent == NULL && opts.with_notifier_kup) + do_notifier_rollback(true, POST_UPDATE_KERNEL_COMPLETE); + /* Just to be sure */ exit(1); return -1; diff --git a/criu/crtools.c b/criu/crtools.c index 9b3ef33..d53be3d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -445,6 +445,9 @@ usage: " same cpu quantity.\n" " --pin-memory Use pin memory method for checkpoint and restore.\n" " --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n" +" --with-notifier Allow to checkout/restore kup notifier chain. This\n" +" feature needs the kernel's assistance.\n" +" Only for the host with these feature.\n" "\n" "Check options:\n" " Without options, "criu check" checks availability of absolutely required\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 7fad678..1acb5ef 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -178,6 +178,7 @@ struct cr_options { int with_cpu_affinity; int pin_memory; int use_fork_pid; + int with_notifier_kup; };
extern struct cr_options opts; diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 8fd47e2..7152b34 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -227,6 +227,7 @@ struct task_restore_args { bool has_clone3_set_tid; bool pin_memory; bool use_fork_pid; + bool with_notifier_kup; } __aligned(64);
/* diff --git a/criu/include/util.h b/criu/include/util.h index c2baf27..d226d2c 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -13,6 +13,8 @@ #include <sys/sysmacros.h> #include <dirent.h> #include <poll.h> +#include <sys/stat.h> +#include <fcntl.h>
#include "int.h" #include "common/compiler.h" @@ -380,4 +382,44 @@ static inline void print_stack_trace(pid_t pid) {}
extern int mount_detached_fs(const char *fsname);
+#define NOTIFY_PROC_PATH "/sys/kernel/modrestore/nvwa_notifier" + +#if __has_include("linux/modrestore.h") +#define CONFIG_EULEROS_MODRESTORE_NOTIFY +# include <linux/modrestore.h> +#else +enum KUP_HOOK_POINT { + PRE_FREEZE, + FREEZE_TO_KILL, + PRE_UPDATE_KERNEL, + POST_UPDATE_KERNEL, + UNFREEZE_TO_RUN, + POST_RUN, + + KUP_HOOK_MAX, +}; + +enum nvwa_cmd { + PREPARE = 0, + ROLLBACK, + + NVWA_CMD_MAX, +}; +#endif + +enum notifier_state { + NOTHING_COMPLETE, + PRE_FREEZE_COMPLETE, + FREEZE_TO_KILL_COMPLETE, + PRE_UPDATE_KERNEL_COMPLETE, + POST_UPDATE_KERNEL_COMPLETE, + UNFREEZE_TO_RUN_COMPLETE, + POST_RUN_COMPLETE, + + NOTIFIER_ROLLBACK_DONE = 0xfc17173b, /* has done rollback */ +}; + +int notifier_kup(enum KUP_HOOK_POINT, enum nvwa_cmd, bool); +void do_notifier_rollback(bool, enum notifier_state); + #endif /* __CR_UTIL_H__ */ diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 4ab8a45..a6245e4 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -77,6 +77,7 @@
static struct task_entries *task_entries_local; static futex_t thread_inprogress; +static futex_t thread_start; static pid_t *helpers; static int n_helpers; static pid_t *zombies; @@ -119,10 +120,28 @@ void parasite_cleanup(void) extern void cr_restore_rt (void) asm ("__cr_restore_rt") __attribute__ ((visibility ("hidden")));
+static int args_with_notifier_kup; +static enum notifier_state notifier_state = POST_UPDATE_KERNEL_COMPLETE; +static futex_t notifier_done; + static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) { char *r; int i; + rt_sigaction_t act; + + if (signal == SIGSEGV || signal == SIGBUS || signal == SIGILL) { + /* Make sure we exit with the right signal at the end. So for instance + * the core will be dumped if enabled. */ + pr_info("recv signal: %d\n", signal); + do_notifier_rollback(args_with_notifier_kup, notifier_state); + ksigemptyset (&act.rt_sa_mask); + act.rt_sa_flags = SA_SIGINFO | SA_RESTART; + act.rt_sa_handler = (rt_sighandler_t)SIG_DFL; + sys_sigaction(signal, &act, NULL, sizeof(k_rtsigset_t)); + sys_kill(sys_getpid(),signal); + return; + }
/* We can ignore helpers that die, we expect them to after * CR_STATE_RESTORE is finished. */ @@ -149,10 +168,14 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
pr_info("Task %d %s %d\n", siginfo->si_pid, r, siginfo->si_status);
+ pr_info("%s: trace do_notifier_rollback\n", __func__); + do_notifier_rollback(args_with_notifier_kup, notifier_state); futex_abort_and_wake(&task_entries_local->nr_in_progress); /* sa_restorer may be unmaped, so we can't go back to userspace*/ sys_kill(sys_getpid(), SIGSTOP); sys_exit_group(1); + + /* for notifier, do nothing when receiving SIGCHLD signal */ }
static int lsm_set_label(char *label, char *type, int procfd) @@ -604,6 +627,27 @@ static void noinline rst_sigreturn(unsigned long new_sp, ARCH_RT_SIGRETURN(new_sp, sigframe); }
+/* Notice: only one task, so it isn't necessary to consider concurrent. */ +static int do_notifier(bool *notify) +{ + int retval = 0; + + if (!*notify) + return 0; + + pr_info("unfreeze_to_run restore notifier\n"); + retval = notifier_kup(UNFREEZE_TO_RUN, PREPARE, true); + if (retval) { + *notify = false; + notifier_state = NOTIFIER_ROLLBACK_DONE; + pr_err("call notifier: %d err\n", UNFREEZE_TO_RUN); + } + + notifier_state = UNFREEZE_TO_RUN_COMPLETE; + + return retval; +} + /* * Threads restoration via sigreturn. Note it's locked * routine and calls for unlock at the end. @@ -642,12 +686,18 @@ long __export_restore_thread(struct thread_restore_args *args)
pr_info("%ld: Restored\n", sys_gettid());
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) { + pr_err("%s: abort by CR_STATE_RESTORE\n", __func__); + goto core_restore_end; + }
if (restore_signals(args->siginfo, args->siginfo_n, false)) goto core_restore_end;
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG)) { + pr_err("%s: abort by CR_STATE_RESTORE_SIGCHLD\n", __func__); + goto core_restore_end; + }
/* * Make sure it's before creds, since it's privileged @@ -663,16 +713,29 @@ long __export_restore_thread(struct thread_restore_args *args) if (ret) BUG();
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG)) { + pr_err("%s: abort by CR_STATE_RESTORE_CREDS\n", __func__); + goto core_restore_end; + }
futex_dec_and_wake(&thread_inprogress); + futex_wait_while(&thread_start, 0); + if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) { + pr_err("%s: abort by thread_start\n", __func__); + goto wait_notifier; + }
new_sp = (long)rt_sigframe + RT_SIGFRAME_OFFSET(rt_sigframe); rst_sigreturn(new_sp, rt_sigframe);
core_restore_end: - pr_err("Restorer abnormal termination for %ld\n", sys_getpid()); - futex_abort_and_wake(&task_entries_local->nr_in_progress); + futex_abort_and_wake(&thread_start); + futex_abort_and_wake(&task_entries_local->start); + +wait_notifier: + pr_err("%s: Restorer abnormal termination for %ld\n", __func__, sys_getpid()); + futex_wait_while(¬ifier_done, 0); + sys_exit_group(1); return -1; } @@ -1470,6 +1533,10 @@ long __export_restore_task(struct task_restore_args *args) rt_sigaction_t act; bool has_vdso_proxy;
+ futex_set(&thread_inprogress, 1); + futex_set(&thread_start, 0); + futex_set(¬ifier_done, 0); + bootstrap_start = args->bootstrap_start; bootstrap_len = args->bootstrap_len;
@@ -1486,6 +1553,7 @@ long __export_restore_task(struct task_restore_args *args) #ifdef ARCH_HAS_LONG_PAGES __page_size = args->page_size; #endif + args_with_notifier_kup = args->with_notifier_kup;
ksigfillset(&act.rt_sa_mask); act.rt_sa_handler = sigchld_handler; @@ -1496,9 +1564,29 @@ long __export_restore_task(struct task_restore_args *args) pr_err("Failed to set SIGCHLD %ld\n", ret); goto core_restore_end; } + ret = sys_sigaction(SIGSEGV, &act, NULL, sizeof(k_rtsigset_t)); + if (ret) { + pr_err("Failed to set SIGCHLD %ld\n", ret); + goto core_restore_end; + } + + ret = sys_sigaction(SIGBUS, &act, NULL, sizeof(k_rtsigset_t)); + if (ret) { + pr_err("Failed to set SIGCHLD %ld\n", ret); + goto core_restore_end; + } + + ret = sys_sigaction(SIGILL, &act, NULL, sizeof(k_rtsigset_t)); + if (ret) { + pr_err("Failed to set SIGCHLD %ld\n", ret); + goto core_restore_end; + }
ksigemptyset(&to_block); ksigaddset(&to_block, SIGCHLD); + ksigaddset(&to_block, SIGSEGV); + ksigaddset(&to_block, SIGBUS); + ksigaddset(&to_block, SIGILL); ret = sys_sigprocmask(SIG_UNBLOCK, &to_block, NULL, sizeof(k_rtsigset_t)); if (ret) { pr_err("Failed to unblock SIGCHLD %ld\n", ret); @@ -1912,7 +2000,8 @@ long __export_restore_task(struct task_restore_args *args) pr_err("Unable to create a thread: %ld\n", ret); mutex_unlock(&task_entries_local->last_pid_mutex); goto core_restore_end; - } + } else + futex_inc(&thread_inprogress); }
mutex_unlock(&task_entries_local->last_pid_mutex); @@ -1936,7 +2025,14 @@ long __export_restore_task(struct task_restore_args *args)
pr_info("%ld: Restored\n", sys_getpid());
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) { + pr_err("%s: abort by CR_STATE_RESTORE\n", __func__); + goto core_restore_end; + } + + ret = do_notifier(&args->with_notifier_kup); + if (ret) + goto core_restore_end;
if (wait_helpers(args) < 0) goto core_restore_end; @@ -1984,7 +2080,8 @@ long __export_restore_task(struct task_restore_args *args) if (ret) goto core_restore_end;
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG)) + goto core_restore_end;
rst_tcp_socks_all(args);
@@ -2006,15 +2103,20 @@ long __export_restore_task(struct task_restore_args *args) ret = ret || restore_pdeath_sig(args->t); ret = ret || restore_child_subreaper(args->child_subreaper);
- futex_set_and_wake(&thread_inprogress, args->nr_threads); - - restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS); + if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG)) + goto core_restore_end;
if (ret) BUG();
/* Wait until children stop to use args->task_entries */ futex_wait_while_gt(&thread_inprogress, 1); + if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) { + pr_err("%s: terminate by main thread futex_start\n", __func__); + goto handle_notifier; + } + + futex_set_and_wake(&thread_start, 1);
sys_close(args->proc_fd); std_log_set_fd(-1); @@ -2052,8 +2154,17 @@ long __export_restore_task(struct task_restore_args *args) rst_sigreturn(new_sp, rt_sigframe);
core_restore_end: - futex_abort_and_wake(&task_entries_local->nr_in_progress); + futex_abort_and_wake(&thread_start); + futex_abort_and_wake(&task_entries_local->start); + +handle_notifier: + do_notifier_rollback(args->with_notifier_kup, notifier_state); + + futex_abort_and_wake(&task_entries_local->nr_in_progress); /* notifier the criu main process */ pr_err("Restorer fail %ld\n", sys_getpid()); + + futex_set_and_wake(¬ifier_done, 1); /* wake all other threads to exit */ + sys_exit_group(1); return -1; } diff --git a/criu/pie/util.c b/criu/pie/util.c index 4945483..752e5d0 100644 --- a/criu/pie/util.c +++ b/criu/pie/util.c @@ -11,6 +11,7 @@ #include "fcntl.h" #include "log.h" #include "util-pie.h" +#include "util.h"
#ifdef CR_NOGLIBC # include <compel/plugins/std/syscall.h> @@ -52,3 +53,93 @@ err_close: __sys(close)(fd); return -1; } + +#define KUP_BUF_SIZE 256 + +static int int_to_string(unsigned number, char *buf, size_t total) { + unsigned remainder, quotient, i, len; + + quotient = number; + len = 0; + do { + quotient /= 10; + len += 1; + } while (quotient > 0); + + if (len > total - 1) + return -1; + + quotient = number; + i = 1; + do { + remainder = quotient % 10; + quotient = quotient / 10; + buf[len-i] = '0' + remainder; + i++; + } while (quotient > 0); + buf[len] = '\0'; + + return len == 0 ? -1 : len; +} + +int notifier_kup(enum KUP_HOOK_POINT action, enum nvwa_cmd cmd, bool enable) +{ + int fd, count = 0, retval = 0; + char buf[KUP_BUF_SIZE] = {0}; + + if (!enable) + return 0; + + fd = __sys(open)(NOTIFY_PROC_PATH, O_WRONLY, 0); + if (fd == -EACCES) { + /* there is no priviledge to open file, ignore this condition. */ + pr_info("%s: open %s failed, retval: %d (-EACCES)\n", + __func__, NOTIFY_PROC_PATH, -EACCES); + return 0; + } else if (fd < 0) { + __pr_perror("%s: Can't open %s: %d\n", __func__, NOTIFY_PROC_PATH, fd); + return fd; + } + + retval = int_to_string(action, buf, sizeof(buf)-count); + if (retval <= 0) { + __pr_perror("%s: int_to_string error\n", __func__); + goto err_close; + } + + buf[retval] = ':'; + count = retval + 1; + + retval = int_to_string(cmd, buf+count, sizeof(buf)-count); + if (retval <= 0) { + __pr_perror("%s: int_to_string error\n", __func__); + goto err_close; + } + + count += retval; + retval = __sys(write)(fd, buf, count); + if (retval < 0) + __pr_perror("%s: Can't write to %s\n", __func__, NOTIFY_PROC_PATH); + +err_close: + __sys(close)(fd); + + return retval < 0 ? -1 : 0; +} + +void do_notifier_rollback(bool rollback, enum notifier_state status) +{ + if (!rollback) + return; + + switch (status) { + case POST_UPDATE_KERNEL_COMPLETE: + notifier_kup(POST_UPDATE_KERNEL, ROLLBACK, true); + break; + case UNFREEZE_TO_RUN_COMPLETE: + notifier_kup(UNFREEZE_TO_RUN, ROLLBACK, true); + break; + default: + break; + } +} diff --git a/include/common/lock.h b/include/common/lock.h index 4782b63..3db17ae 100644 --- a/include/common/lock.h +++ b/include/common/lock.h @@ -106,6 +106,10 @@ static inline void futex_inc_and_wake(futex_t *f) LOCK_BUG_ON(sys_futex((uint32_t *)&f->raw.counter, FUTEX_WAKE, INT_MAX, NULL, NULL, 0) < 0); }
+static inline uint32_t futex_inc_return(futex_t *f) { + return atomic_inc_return(&f->raw); +} + /* Plain increment futex @f value */ static inline void futex_inc(futex_t *f) { atomic_inc(&f->raw); }