We should try out best to ensure the success of criu. As for unix dgram socket, criu use re-connect instead of repair instead of unix stream socket. Therefore, this patch does the following things:
- detect unix dgram unix sock file when criu dumps unix dgram socket - add the fault tolerance of unix dgram socket connecting (focus on the condition of `/dev/log` disappearance when rsyslog restart)
Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: fu.lin fulin10@huawei.com --- criu/sk-unix.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-)
diff --git a/criu/sk-unix.c b/criu/sk-unix.c index d4c15ce..b4c24ed 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <dlfcn.h> #include <libgen.h> +#include <time.h>
#include "libnetlink.h" #include "cr_options.h" @@ -1371,6 +1372,33 @@ err: return -1; }
+/* + * Sometimes, `/dev/log` will disappear because of the restart of rsyslog when + * rotating, criu try to connect `/dev/log` will report error at this time. We + * should try our best to ensure the success of criu restoration. Therefore, + * retry three times here. + */ +static int unix_dgram_reconnect(int fd, struct sockaddr_un *addr, int len) +{ + int retval = 0; + struct timespec tim = { + .tv_sec = 0, + .tv_nsec = 5e+8, + }; + + for (int i = 0; i < 3; i++) { + nanosleep(&tim, NULL); + pr_warn("Can't connect unix socket(%s), %d retry\n", + addr->sun_path, i); + retval = connect(fd, (struct sockaddr *)addr, + sizeof(addr->sun_family) + len); + if (retval == 0) + break; + } + + return retval; +} + static int post_open_standalone(struct file_desc *d, int fd) { int fdstore_fd = -1, procfs_self_dir = -1, len; @@ -1453,8 +1481,11 @@ static int post_open_standalone(struct file_desc *d, int fd) goto err_revert_and_exit; } } else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { - pr_perror("Can't connect %d socket", ui->ue->ino); - goto err_revert_and_exit; + if (ui->ue->type != SOCK_DGRAM || errno != ENOENT + || unix_dgram_reconnect(fd, &addr, len) != 0) { + pr_perror("Can't connect %d socket", ui->ue->ino); + goto err_revert_and_exit; + } } mutex_unlock(mutex_ghost);