From: Jingxian He hejingxian@huawei.com
In order to improve criu dump performance, make the collecting vmas operation parallel run with the other collecting operations.
In order to prevent the concurrency problem by `find_unused_fd`, only the main root task will parallel.
Usage: criu --parallel
Note: Ensure criu can use multi-core, otherwise the performance will deterioration.
Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: fu.lin fulin10@huawei.com Signed-off-by: hewenliang hewenliang4@huawei.com Signed-off-by: Jingxian He hejingxian@huawei.com --- criu/Makefile.crtools | 1 + criu/Makefile.packages | 1 + criu/config.c | 1 + criu/cr-dump.c | 55 ++++++++++++----- criu/crtools.c | 3 +- criu/include/cr_options.h | 1 + criu/include/pstree.h | 3 + criu/include/taskqueue.h | 50 +++++++++++++++ criu/namespaces.c | 9 ++- criu/proc_parse.c | 6 ++ criu/taskqueue.c | 124 ++++++++++++++++++++++++++++++++++++++ 11 files changed, 237 insertions(+), 17 deletions(-) create mode 100644 criu/include/taskqueue.h create mode 100644 criu/taskqueue.c
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 70d1b73..6deb855 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -94,6 +94,7 @@ obj-y += mnl.o obj-y += nftables.o obj-y += kmsg.o obj-y += char.o +obj-y += taskqueue.o obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 diff --git a/criu/Makefile.packages b/criu/Makefile.packages index c1d87a5..875df02 100644 --- a/criu/Makefile.packages +++ b/criu/Makefile.packages @@ -37,6 +37,7 @@ endif export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet export LIBS += $(shell pkg-config --libs libmnl) export LIBS += $(shell pkg-config --libs libnftnl) +export LIBS += -lpthread
check-packages-failed: $(warning Can not find some of the required libraries) diff --git a/criu/config.c b/criu/config.c index cdafe17..e09445a 100644 --- a/criu/config.c +++ b/criu/config.c @@ -554,6 +554,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("file-locks-repair", &opts.file_locks_repair), {"reserve-ports", required_argument, 0, 'P' }, BOOL_OPT("use-nft", &opts.use_nft), + BOOL_OPT("parallel", &opts.parallel), { }, };
diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 2a1864c..6caee06 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -18,6 +18,7 @@
#include <sched.h> #include <sys/resource.h> +#include <sys/sysinfo.h>
#include "types.h" #include "protobuf.h" @@ -85,6 +86,8 @@ #include "img-streamer.h" #include "restorer.h"
+#include "taskqueue.h" + /* * Architectures can overwrite this function to restore register sets that * are not covered by ptrace_set/get_regs(). @@ -404,7 +407,7 @@ static int dump_pid_misc(pid_t pid, TaskCoreEntry *tc) return 0; }
-static int dump_filemap(struct vma_area *vma_area, int fd) +int dump_filemap(struct vma_area *vma_area, int fd) { struct fd_parms p = FD_PARMS_INIT; VmaEntry *vma = vma_area->e; @@ -1233,7 +1236,7 @@ err_cure: static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) { pid_t pid = item->pid->real; - struct vm_area_list vmas; + struct vm_area_list *vmas = NULL; struct parasite_ctl *parasite_ctl; int ret, exit_code = -1; struct parasite_dump_misc misc; @@ -1242,8 +1245,6 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) struct proc_posix_timers_stat proc_args; struct mem_dump_ctl mdc;
- vm_area_list_init(&vmas); - pr_info("========================================\n"); pr_info("Dumping task (pid: %d)\n", pid); pr_info("========================================\n"); @@ -1254,12 +1255,23 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) */ return 0;
+ if (!opts.parallel || root_item->pid->real != item->pid->real ) { + vmas = xmalloc(sizeof(struct vm_area_list)); + if (vmas == NULL) { + pr_err("xmalloc no memory\n"); + return -1; + } + vm_area_list_init(vmas); + } else + vmas = item->maps_info.vmas; + pr_info("Obtaining task stat ... \n"); ret = parse_pid_stat(pid, &pps_buf); if (ret < 0) goto err;
- ret = collect_mappings(pid, &vmas, dump_filemap); + ret = (opts.parallel && root_item->pid->real == item->pid->real) ? + 0 : collect_mappings(pid, vmas, dump_filemap); if (ret) { pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret); goto err; @@ -1293,7 +1305,10 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) goto err; }
- parasite_ctl = parasite_infect_seized(pid, item, &vmas); + if (opts.parallel && end_collect_mappings_thread(item)) + goto err; + + parasite_ctl = parasite_infect_seized(pid, item, vmas); if (!parasite_ctl) { pr_err("Can't infect (pid: %d) with parasite\n", pid); goto err; @@ -1317,13 +1332,13 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) goto err_cure_imgset; }
- ret = parasite_fixup_vdso(parasite_ctl, pid, &vmas); + ret = parasite_fixup_vdso(parasite_ctl, pid, vmas); if (ret) { pr_err("Can't fixup vdso VMAs (pid: %d)\n", pid); goto err_cure_imgset; }
- ret = parasite_collect_aios(parasite_ctl, &vmas); /* FIXME -- merge with above */ + ret = parasite_collect_aios(parasite_ctl, vmas); /* FIXME -- merge with above */ if (ret) { pr_err("Failed to check aio rings (pid: %d)\n", pid); goto err_cure_imgset; @@ -1377,7 +1392,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) mdc.stat = &pps_buf; mdc.parent_ie = parent_ie;
- ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); + ret = parasite_dump_pages_seized(item, vmas, &mdc, parasite_ctl); if (ret) goto err_cure;
@@ -1438,7 +1453,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) goto err; }
- ret = dump_task_mm(pid, &pps_buf, &misc, &vmas, cr_imgset); + ret = dump_task_mm(pid, &pps_buf, &misc, vmas, cr_imgset); if (ret) { pr_err("Dump mappings (pid: %d) failed with %d\n", pid, ret); goto err; @@ -1454,7 +1469,8 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) exit_code = 0; err: close_pid_proc(); - free_mappings(&vmas); + free_mappings(vmas); + free(vmas); xfree(dfds); return exit_code;
@@ -1785,6 +1801,7 @@ static int cr_dump_finish(int ret) network_unlock(opts.tree_id); delete_link_remaps(); clean_cr_time_mounts(); + }
if (!ret && opts.lazy_pages) @@ -1839,6 +1856,13 @@ static int cr_dump_finish(int ret) write_stats(DUMP_STATS); pr_info("Dumping finished successfully\n"); } + + /* + * Don't care threads' status and ignore unfree resources, use + * `exit_group()` to ensure exit all threads. + */ + syscall(SYS_exit_group, post_dump_ret ? : (ret != 0)); + return post_dump_ret ? : (ret != 0); }
@@ -1864,6 +1888,9 @@ int cr_dump_tasks(pid_t pid) if (opts.dump_char_dev && parse_devname() < 0) goto err;
+ if (opts.parallel && init_parallel_env() != 0) + goto err; + root_item = alloc_pstree_item(); if (!root_item) goto err; @@ -1941,13 +1968,13 @@ int cr_dump_tasks(pid_t pid) if (collect_file_locks()) goto err;
- if (collect_namespaces(true) < 0) - goto err; - glob_imgset = cr_glob_imgset_open(O_DUMP); if (!glob_imgset) goto err;
+ if (collect_namespaces(true) < 0) + goto err; + if (seccomp_collect_dump_filters() < 0) goto err;
diff --git a/criu/crtools.c b/criu/crtools.c index bac2992..18945e7 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -488,7 +488,8 @@ usage: " --weak-file-check Allow file size and mod larger than dumping value\n" " --file-locks-repair Use repair mode to dump and restore file locks\n" " --reserve-ports Reserve src ports in kernel\n" -" --use-nft Use nft API instead of iptables cmd in network locking" +" --use-nft Use nft API instead of iptables cmd in network locking\n" +" --parallel Parallel to accellrate dumping speed\n" "\n" "Check options:\n" " Without options, "criu check" checks availability of absolutely required\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 236d1c7..cc8d1ae 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -188,6 +188,7 @@ struct cr_options { char *share_src_ports; int reserve_ports; int use_nft; + int parallel; };
extern struct cr_options opts; diff --git a/criu/include/pstree.h b/criu/include/pstree.h index 61ab0ce..be0942a 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -1,6 +1,8 @@ #ifndef __CR_PSTREE_H__ #define __CR_PSTREE_H__
+#include "taskqueue.h" + #include "common/list.h" #include "common/lock.h" #include "pid.h" @@ -30,6 +32,7 @@ struct pstree_item { futex_t task_st; unsigned long task_st_le_bits; }; + struct mappings_info maps_info; };
static inline pid_t vpid(const struct pstree_item *i) diff --git a/criu/include/taskqueue.h b/criu/include/taskqueue.h new file mode 100644 index 0000000..16f9e3d --- /dev/null +++ b/criu/include/taskqueue.h @@ -0,0 +1,50 @@ +#ifndef __CR_TASKQUEUE_H__ +#define __CR_TASKQUEUE_H__ + +#include <stdbool.h> +#include <pthread.h> +#include <semaphore.h> + +#include "vma.h" +#include "pstree.h" + +#include "common/list.h" + +#define TASKQUEUE_HASH_SIZE 8 + +struct taskqueue { + pthread_t task; + void *(*routine)(void *); + void *arg; + int result; +}; +#define queue_task queue.task +#define queue_routine queue.routine +#define queue_arg queue.arg +#define queue_result queue.result + +int init_parallel_env(void); + +static inline int taskqueue_create(struct taskqueue *queue) +{ + return pthread_create(&queue->task, NULL, queue->routine, queue->arg); +} + +static inline int taskqueue_join(struct taskqueue *queue) +{ + return pthread_join(queue->task, NULL); +} + +/* parallel collect smaps */ +struct mappings_info { + struct hlist_node hash; + pid_t pid; + struct vm_area_list *vmas; + dump_filemap_t dump_file; + struct taskqueue queue; +}; + +int start_collect_mappings_thread(void); +int end_collect_mappings_thread(struct pstree_item *item); + +#endif /* __CR_TASKQUEUE_H__ */ diff --git a/criu/namespaces.c b/criu/namespaces.c index 9ffcd16..e71817f 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -27,6 +27,7 @@ #include "net.h" #include "cgroup.h" #include "fdstore.h" +#include "taskqueue.h"
#include "protobuf.h" #include "util.h" @@ -1570,11 +1571,15 @@ int collect_namespaces(bool for_dump) { int ret;
- ret = collect_user_namespaces(for_dump); + ret = collect_mnt_namespaces(for_dump); if (ret < 0) return ret;
- ret = collect_mnt_namespaces(for_dump); + /* need mnt info provided by `mntinfo` */ + if (opts.parallel && start_collect_mappings_thread()) + return -1; + + ret = collect_user_namespaces(for_dump); if (ret < 0) return ret;
diff --git a/criu/proc_parse.c b/criu/proc_parse.c index b3d1c0b..4a6a598 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -64,6 +64,12 @@
#define BUF_SIZE 4096 /* Good enough value - can be changed */
+/* cancel log to optimize performance because of the lock contention of print */ +#undef pr_info +#undef pr_debug +#define pr_info(fmt, ...) +#define pr_debug(fmt, ...) + struct buffer { char buf[BUF_SIZE]; char end; /* '\0' */ diff --git a/criu/taskqueue.c b/criu/taskqueue.c new file mode 100644 index 0000000..1196a5e --- /dev/null +++ b/criu/taskqueue.c @@ -0,0 +1,124 @@ +/* + * Target: + * parallel dump process + */ + +#include <string.h> +#include <errno.h> +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "pstree.h" +#include "log.h" +#include "taskqueue.h" + +/* + * Sometimes, only one cpu can be used which is bad for parallel routine. + * Therefore, set cpu affinity for criu routine. + */ +static int set_cpuaffinity(void) +{ + cpu_set_t *set; + int num_cpus = get_nprocs_conf(); + size_t cpusetsize = CPU_ALLOC_SIZE(num_cpus); + int retval; + + set = CPU_ALLOC(num_cpus); + memset(set, 0xff, cpusetsize); + + retval = sched_setaffinity(getpid(), cpusetsize, set); + if (retval != 0) + pr_err("sched_setaffinity failed: %s\n", strerror(errno)); + + CPU_FREE(set); + + return retval; +} + +int init_parallel_env(void) +{ + return set_cpuaffinity(); +} + +static void *collect_mappings_routine(void *_arg) +{ + struct mappings_info *info = _arg; + + info->queue_result = collect_mappings(info->pid, info->vmas, info->dump_file); + return NULL; +} + +int dump_filemap(struct vma_area *vma_area, int fd); /* defined in criu/cr-dump.c */ + +int start_collect_mappings_thread(void) +{ + struct pstree_item *pi; + struct mappings_info *info; + + for_each_pstree_item(pi) { + /* disable parallel collect for non-root item because of the + * concurrence. + */ + if (pi->pid->real != root_item->pid->real) + continue; + + info = &pi->maps_info; + + info->vmas = xmalloc(sizeof(struct vm_area_list)); + if (info->vmas == NULL) { + pr_err("xzalloc vmas no memory\n"); + return -1; + } + vm_area_list_init(info->vmas); + + info->pid = pi->pid->real; + info->dump_file = dump_filemap; + info->queue_routine = collect_mappings_routine; + info->queue_arg = info; + + pr_info("Start thread to collect %d mappings\n", info->pid); + + if (taskqueue_create(&info->queue) < 0) { + pr_err("parallel_collect_mappings failed: %s\n", strerror(errno)); + free(info->vmas); + /* + * Don't care other threads status, use `exit_group()` + * to ensure all threads exit. + */ + return -1; + } + } + + return 0; +} + +int end_collect_mappings_thread(struct pstree_item *item) +{ + struct mappings_info *info = &item->maps_info; + int retval; + + /* disable parallel collect for non-root item because of the + * concurrence. + */ + if (root_item->pid->real != item->pid->real) + return 0; + + retval = taskqueue_join(&info->queue); + if (retval != 0 || info->queue_result != 0) { + pr_err("taskqueue_join failed, retval %d(errno %d: %s)," + " queue_result: %d\n", + retval, + retval == 0 ? 0 : errno, + retval == 0 ? "nil" : strerror(errno), + info->queue_result); + retval = -1; + } + + pr_info("End thread to collect %d mappings\n", info->pid); + + /* + * Don't care other threads status, use `exit_group()` to ensure all + * threads exit. + */ + return retval; +}