From: Luo Longjun luolongjun@huawei.com
When dump unix stream socket with external connections, we will tell kernel to turn repair mode on for this sock. And then kernel will keep this sock before restoring it. In this process, the other socket which communicates with this sock in repair mode will get EAGAIN or blocked.
Signed-off-by: Luo Longjun luolongjun@huawei.com
fix unix socket dump and restore err
Fix name-less unix socket dump and restore problem.
Signed-off-by: Jingxian He hejingxian@huawei.com
unix socket:ignore repair error from kernel
leave error for applications to deal with.
Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: Luo Longjun luolongjun@huawei.com
- enable this feature by check cmdline `unix_stream_restore_enable` - don't set repair mode for non-external socket
Signed-off-by: fu.lin fulin10@huawei.com --- criu/cr-dump.c | 1 + criu/include/kerndat.h | 1 + criu/include/sockets.h | 1 + criu/kerndat.c | 32 ++++++++++ criu/sk-unix.c | 137 ++++++++++++++++++++++++++++++++++++++--- images/sk-unix.proto | 1 + 6 files changed, 164 insertions(+), 9 deletions(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 9ba27a2..2bbcef3 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1715,6 +1715,7 @@ static int cr_dump_finish(int ret)
cr_plugin_fini(CR_PLUGIN_STAGE__DUMP, ret); cgp_fini(); + unix_stream_unlock(ret);
if (!ret) { /* diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index ad5f7d3..665051d 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -68,6 +68,7 @@ struct kerndat_s { bool has_fsopen; bool has_clone3_set_tid; bool has_timens; + bool has_unix_sk_repair; };
extern struct kerndat_s kdat; diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 74c5ae4..c9cf427 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -43,6 +43,7 @@ extern int add_fake_unix_queuers(void); extern int fix_external_unix_sockets(void); extern int prepare_scms(void); extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); +extern void unix_stream_unlock(int ret);
extern struct collect_image_info netlink_sk_cinfo;
diff --git a/criu/kerndat.c b/criu/kerndat.c index b2c47c5..c87f551 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1052,6 +1052,36 @@ static bool kerndat_has_clone3_set_tid(void) return 0; }
+#define UNIX_STREAM_RESTORE_ENABLE_FILE "/sys/module/kernel/parameters/unix_stream_restore_enable" + +static void kerndat_has_unix_sk_repair(void) +{ + FILE *fp; + char ch = 'N'; + + if (access(UNIX_STREAM_RESTORE_ENABLE_FILE, F_OK) < 0) { + pr_debug("C/R external unix stream socket is not support\n"); + return; + } + + fp = fopen(UNIX_STREAM_RESTORE_ENABLE_FILE, "r"); + if (fp == NULL) { + pr_err("failed to open '%s': %s\n", + UNIX_STREAM_RESTORE_ENABLE_FILE, strerror(errno)); + return; + } + + fscanf(fp, "%c", &ch); + if (ch == 'Y') { + pr_debug("enable C/R external unix stream socket support\n"); + kdat.has_unix_sk_repair = true; + } + + fclose(fp); + + return; +} + int kerndat_init(void) { int ret; @@ -1186,6 +1216,8 @@ int kerndat_init(void) ret = -1; }
+ kerndat_has_unix_sk_repair(); + kerndat_lsm(); kerndat_mmap_min_addr(); kerndat_files_stat(); diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 00d09cc..d4c15ce 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -72,6 +72,7 @@ struct unix_sk_desc { char *name; unsigned int nr_icons; unsigned int *icons; + int repair_ino;
unsigned int vfs_dev; unsigned int vfs_ino; @@ -89,9 +90,18 @@ struct unix_sk_desc { struct list_head peer_list; struct list_head peer_node;
+ struct list_head repair_list; + struct list_head repair_node; + struct unix_stream_extern_socket_desc *ext_node; + UnixSkEntry *ue; };
+struct unix_stream_extern_socket_desc { + struct list_head list; + int fd; +}; + /* * The mutex_ghost is accessed from different tasks, * so make sure it is in shared memory. @@ -99,6 +109,7 @@ struct unix_sk_desc { static mutex_t *mutex_ghost;
static LIST_HEAD(unix_sockets); +static LIST_HEAD(unix_stream_external_sockets); static LIST_HEAD(unix_ghost_addr);
static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, @@ -117,6 +128,26 @@ struct unix_sk_listen_icon {
static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
+static int unix_stream_repair_on(int fd) +{ + int ret, aux = 1; + ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux)); + if (ret < 0) + pr_err("Can't turn repair mod for unix stream on. \n"); + + return ret; +} + +static int unix_stream_repair_off(int fd) +{ + int ret, aux = 0; + ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux)); + if (ret < 0) + pr_err("Can't turn repair mod for unix stream off. \n"); + + return ret; +} + static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_ino) { struct unix_sk_listen_icon *ic; @@ -338,6 +369,8 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) FilePermsEntry *perms; FownEntry *fown; void *m; + unsigned int len; + int ret;
m = xmalloc(sizeof(UnixSkEntry) + sizeof(SkOptsEntry) + @@ -382,6 +415,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) ue->fown = fown; ue->opts = skopts; ue->uflags = 0; + ue->repair_ino = 0;
if (unix_resolve_name(lfd, id, sk, ue, p)) goto err; @@ -431,6 +465,35 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) goto err; }
+ /* don't handle non-external unix socket, criu will restore it. */ + if (kdat.has_unix_sk_repair && !sk->sd.already_dumped + && peer->name && ue->type == SOCK_STREAM) { + struct unix_stream_extern_socket_desc *d; + + d = xzalloc(sizeof(*d)); + if (!d) + goto err; + + /* Attention: used for upgrade in the same machine + * May in conflict with original usage + */ + pr_info("set %d(fd %d) unix stream repair on \n", sk->sd.ino, lfd); + ret = unix_stream_repair_on(lfd); + if (ret < 0) + goto err; + + d->fd = dup(lfd); + pr_info("add %d into unix_stream_external_sockets\n", sk->sd.ino); + list_add_tail(&d->list, &unix_stream_external_sockets); + list_add(&sk->repair_node, &peer->repair_list); + sk->ext_node = d; + + len = sizeof(ue->repair_ino); + ret = getsockopt(lfd, SOL_TCP, TCP_REPAIR_OPTIONS, &ue->repair_ino, &len); + if (ret < 0) + goto err; + } + /* * Peer should have us as peer or have a name by which * we can access one. @@ -535,6 +598,26 @@ dump:
sk->sd.already_dumped = 1;
+ while (!list_empty(&sk->repair_list)) { + struct unix_sk_desc *psk; + struct unix_stream_extern_socket_desc *d; + + psk = list_first_entry(&sk->repair_list, struct unix_sk_desc, repair_node); + list_del_init(&psk->repair_node); + + pr_info("delete ino %d into unix_stream_external_sockets\n", psk->sd.ino); + + d = psk->ext_node; + list_del_init(&d->list); + psk->ext_node = NULL; + /* ino start from 1, using 0 to tag the non-repairing socket is safe. */ + psk->ue->repair_ino = 0; + + unix_stream_repair_off(d->fd); + close_safe(&d->fd); + xfree(d); + } + while (!list_empty(&sk->peer_list)) { struct unix_sk_desc *psk; psk = list_first_entry(&sk->peer_list, struct unix_sk_desc, peer_node); @@ -697,6 +780,8 @@ static int unix_collect_one(const struct unix_diag_msg *m,
INIT_LIST_HEAD(&d->peer_list); INIT_LIST_HEAD(&d->peer_node); + INIT_LIST_HEAD(&d->repair_list); + INIT_LIST_HEAD(&d->repair_node); d->fd = -1;
if (tb[UNIX_DIAG_SHUTDOWN]) @@ -810,16 +895,18 @@ static int __dump_external_socket(struct unix_sk_desc *sk, return -1; }
- if (peer->type != SOCK_DGRAM) { - show_one_unix("Ext stream not supported", peer); - pr_err("Can't dump half of stream unix connection.\n"); + if (peer->type != SOCK_DGRAM && + peer->type != SOCK_STREAM) { + show_one_unix("Ext unix type not supported", peer); + pr_err("Can't dump this kind of unix connection.\n"); return -1; }
- if (!peer->name) { + /* part 1: prevent NULL pointer oops */ + if (!peer->name && !sk->name) { show_one_unix("Ext dgram w/o name", peer); + show_one_unix("Ext dgram w/o name", sk); pr_err("Can't dump name-less external socket.\n"); - pr_err("%d\n", sk->fd); return -1; }
@@ -866,7 +953,7 @@ int fix_external_unix_sockets(void)
fd_id_generate_special(NULL, &e.id); e.ino = sk->sd.ino; - e.type = SOCK_DGRAM; + e.type = sk->type; e.state = TCP_LISTEN; e.name.data = (void *)sk->name; e.name.len = (size_t)sk->namelen; @@ -893,6 +980,19 @@ err: return -1; }
+void unix_stream_unlock(int ret) +{ + struct unix_stream_extern_socket_desc *d; + pr_debug("Unlocking unix stream sockets\n"); + list_for_each_entry(d, &unix_stream_external_sockets, list) { + if (ret) { + pr_debug("unlock fd %d \n", d->fd); + unix_stream_repair_off(d->fd); + } + close_safe(&d->fd); + } +} + struct unix_sk_info { UnixSkEntry *ue; struct list_head list; @@ -1278,6 +1378,7 @@ static int post_open_standalone(struct file_desc *d, int fd) struct unix_sk_info *peer; struct sockaddr_un addr; int cwd_fd = -1, root_fd = -1, ns_fd = -1; + int ret, value;
ui = container_of(d, struct unix_sk_info, d); BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) || @@ -1335,7 +1436,23 @@ static int post_open_standalone(struct file_desc *d, int fd) * while we're connecting in sake of ghost sockets. */ mutex_lock(mutex_ghost); - if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { + + /* we handle unix stream with external connections here */ + if (kdat.has_unix_sk_repair && peer->name + && ui->ue->type == SOCK_STREAM && ui->ue->repair_ino != 0) { + value = ui->ue->repair_ino; + ret = setsockopt(fd, SOL_TCP, TCP_REPAIR, &value, sizeof(value)); + if (ret < 0) { + /* permit the unix sk resume successfully when the peer has been + * closed, just warn here */ + pr_warn("Can't repair %d socket\n", value); + } + + ret = unix_stream_repair_off(fd); + if (ret < 0) { + goto err_revert_and_exit; + } + } else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { pr_perror("Can't connect %d socket", ui->ue->ino); goto err_revert_and_exit; } @@ -2037,8 +2154,10 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) }
ui->name = (void *)ue->name.data; - } else - ui->name = NULL; + } else { + /* part 2: prevent NULL pointer oops */ + ui->name = ""; + } ui->name_dir = (void *)ue->name_dir;
ui->flags = 0; diff --git a/images/sk-unix.proto b/images/sk-unix.proto index 2a3a7cc..610080a 100644 --- a/images/sk-unix.proto +++ b/images/sk-unix.proto @@ -52,4 +52,5 @@ message unix_sk_entry { optional uint32 ns_id = 16; optional sint32 mnt_id = 17 [default = -1]; /* Please, don't use field with number 18. */ + required sint32 repair_ino = 19; }