From: Jingxian He hejingxian@huawei.com
criu checkpoint/restore the task, it only restore the context instead of the memory address storing the context.
To handle the problem resulted by CVE bugfix, details: - https://nvd.nist.gov/vuln/detail/CVE-2016-4565 - https://openfabrics.org/images/2018workshop/presentations/113_MRuhl_Journeyt...
Brief: Refresh the security context address of file. The infiniband code use write()` as bi-directional `ioctl()`, there is `struct cred` address uring `write()` process. However, criu uses some syscall, such as capset()` and `setgroups()`, to regenerate the new cred, the file red is fixed by `fcntl(F_SETOWN)`, then the address of new cred is ifferent from the file. This patch fix the `struct cred` address checking problem resulted by VE fixed in infiniband drivers.
Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: luolongjun luolongjun@huawei.com Signed-off-by: fu.lin fu.lin10@huawei.com --- criu/config.c | 1 + criu/cr-restore.c | 35 +++++++++++++++++++++++++++++++++++ criu/crtools.c | 2 ++ criu/include/cr_options.h | 1 + criu/include/fcntl.h | 4 ++++ criu/include/prctl.h | 4 ++++ criu/include/restorer.h | 3 +++ criu/pie/restorer.c | 38 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 88 insertions(+)
diff --git a/criu/config.c b/criu/config.c index e1de191..4d2b709 100644 --- a/criu/config.c +++ b/criu/config.c @@ -545,6 +545,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("pin-memory", &opts.pin_memory), BOOL_OPT("use-fork-pid", &opts.use_fork_pid), BOOL_OPT("with-notifier", &opts.with_notifier_kup), + BOOL_OPT("with-fd-cred", &opts.with_fd_cred), { }, };
diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 1e2ed9a..05de2ef 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -709,6 +709,28 @@ static int __collect_child_pids(struct pstree_item *p, int state, unsigned int * return 0; }
+static int collect_child_fds(int state, unsigned int *n, struct pstree_item *me) +{ + struct list_head *list = &rsti(me)->fds; + struct fdinfo_list_entry *fle, *tmp; + + *n = 0; + list_for_each_entry_safe(fle, tmp, list, ps_list) { + if (fle->fe->type == state) { + int *child; + + child = rst_mem_alloc(sizeof(*child), RM_PRIVATE); + if (!child) + return -1; + + (*n)++; + *child = fle->fe->fd; + } + } + + return 0; +} + static int collect_child_pids(int state, unsigned int *n) { struct pstree_item *pi; @@ -733,6 +755,12 @@ static int collect_child_pids(int state, unsigned int *n) return __collect_child_pids(current, state, n); }
+static int collect_chr_fds(struct pstree_item *me, struct task_restore_args *ta) +{ + ta->setcred_pids = (int *)rst_mem_align_cpos(RM_PRIVATE); + return collect_child_fds(FD_TYPES__CHR, &ta->setcred_pids_n, me); +} + static int collect_helper_pids(struct task_restore_args *ta) { ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE); @@ -938,6 +966,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) if (collect_zombie_pids(ta) < 0) return -1;
+ if (opts.with_fd_cred && collect_chr_fds(current, ta) < 0) + return -1; + if (collect_inotify_fds(ta) < 0) return -1;
@@ -3723,6 +3754,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns RST_MEM_FIXUP_PPTR(task_args->helpers); RST_MEM_FIXUP_PPTR(task_args->zombies); RST_MEM_FIXUP_PPTR(task_args->vma_ios); + if (opts.with_fd_cred) + RST_MEM_FIXUP_PPTR(task_args->setcred_pids); + else + task_args->setcred_pids_n = UINT_MAX; RST_MEM_FIXUP_PPTR(task_args->inotify_fds);
task_args->compatible_mode = core_is_compat(core); diff --git a/criu/crtools.c b/criu/crtools.c index d53be3d..942e683 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -448,6 +448,8 @@ usage: " --with-notifier Allow to checkout/restore kup notifier chain. This\n" " feature needs the kernel's assistance.\n" " Only for the host with these feature.\n" +" --with-fd-cred Allow to make the restored process has the same cred\n" +" as checkout assisted by kernel.\n" "\n" "Check options:\n" " Without options, "criu check" checks availability of absolutely required\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 1acb5ef..5b0ff24 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -179,6 +179,7 @@ struct cr_options { int pin_memory; int use_fork_pid; int with_notifier_kup; + int with_fd_cred; };
extern struct cr_options opts; diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h index ea9d48c..0936337 100644 --- a/criu/include/fcntl.h +++ b/criu/include/fcntl.h @@ -19,6 +19,10 @@ struct f_owner_ex { #define F_GETOWNER_UIDS 17 #endif
+#ifndef F_SETCRED +#define F_SETCRED 18 +#endif + /* * These things are required to compile on CentOS-6 */ diff --git a/criu/include/prctl.h b/criu/include/prctl.h index 8e7fef3..ecbc69a 100644 --- a/criu/include/prctl.h +++ b/criu/include/prctl.h @@ -82,4 +82,8 @@ struct prctl_mm_map { # define PR_GET_THP_DISABLE 42 #endif
+#ifndef PR_DEFAULT_CRED +# define PR_DEFAULT_CRED 54 +#endif + #endif /* __CR_PRCTL_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 7152b34..4afff1b 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -180,6 +180,9 @@ struct task_restore_args { pid_t *zombies; unsigned int zombies_n;
+ int *setcred_pids; + unsigned int setcred_pids_n; + int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */ unsigned int inotify_fds_n;
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index a6245e4..2173c5e 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -78,6 +78,7 @@ static struct task_entries *task_entries_local; static futex_t thread_inprogress; static futex_t thread_start; +static futex_t cred_set; static pid_t *helpers; static int n_helpers; static pid_t *zombies; @@ -345,6 +346,41 @@ static int restore_creds(struct thread_creds_args *args, int procfd, return 0; }
+static int update_cred_ref(struct task_restore_args *ta) +{ + int i; + int ret; + int pid = sys_getpid(); + long int tid = sys_gettid(); + + if (ta->setcred_pids_n == UINT_MAX) { + pr_info("no need to keep the same cred \n"); + return 0; + } + + if (pid == tid) { + /* let main thread finish cred update first */ + ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0); + pr_info("main cred restore \n"); + futex_set_and_wake(&cred_set, 1); + } else { + futex_wait_until(&cred_set, 1); + pr_info("other cred restore \n"); + ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0); + } + + if (ret) + return ret; + + pr_info("%ld (%d) is going to update current cred \n", tid, pid); + + for (i = 0; i < ta->setcred_pids_n; i++) { + sys_fcntl(ta->setcred_pids[i], F_SETCRED, 0); + } + + return 0; +} + /* * This should be done after creds restore, as * some creds changes might drop the value back @@ -708,6 +744,7 @@ long __export_restore_thread(struct thread_restore_args *args)
ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type); + ret = ret || update_cred_ref(args->ta); ret = ret || restore_dumpable_flag(&args->ta->mm); ret = ret || restore_pdeath_sig(args); if (ret) @@ -2099,6 +2136,7 @@ long __export_restore_task(struct task_restore_args *args) */ ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type); + ret = ret || update_cred_ref(args); ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_pdeath_sig(args->t); ret = ret || restore_child_subreaper(args->child_subreaper);