diff options
Diffstat (limited to 'pkg/rootless')
-rw-r--r-- | pkg/rootless/rootless_linux.c | 149 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 133 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 14 |
3 files changed, 248 insertions, 48 deletions
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 098ca7830..2356882e7 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -69,6 +69,19 @@ rootless_gid () static void do_pause () { + int i; + struct sigaction act; + int const sig[] = + { + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, SIGPOLL, + SIGPROF, SIGVTALRM, SIGXCPU, SIGXFSZ, 0 + }; + + act.sa_handler = SIG_IGN; + + for (i = 0; sig[i]; i++) + sigaction (sig[i], &act, NULL); + prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL); while (1) pause (); @@ -333,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack) #endif } +int +reexec_in_user_namespace_wait (int pid, int options) +{ + pid_t p; + int status; + + do + p = waitpid (pid, &status, 0); + while (p < 0 && errno == EINTR); + + if (p < 0) + return -1; + + if (WIFEXITED (status)) + return WEXITSTATUS (status); + if (WIFSIGNALED (status)) + return 128 + WTERMSIG (status); + return -1; +} + static int create_pause_process (const char *pause_pid_file_path, char **argv) { @@ -356,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv) while (r < 0 && errno == EINTR); close (p[0]); + reexec_in_user_namespace_wait(r, 0); + return r == 1 && b == '0' ? 0 : -1; } else @@ -560,8 +595,51 @@ check_proc_sys_userns_file (const char *path) } } +static int +copy_file_to_fd (const char *file_to_read, int outfd) +{ + char buf[512]; + int fd; + + fd = open (file_to_read, O_RDONLY); + if (fd < 0) + return fd; + + for (;;) + { + ssize_t r, w, t = 0; + + do + r = read (fd, buf, sizeof buf); + while (r < 0 && errno == EINTR); + if (r < 0) + { + close (fd); + return r; + } + + if (r == 0) + break; + + while (t < r) + { + do + w = write (outfd, &buf[t], r - t); + while (w < 0 && errno == EINTR); + if (w < 0) + { + close (fd); + return w; + } + t += w; + } + } + close (fd); + return 0; +} + int -reexec_in_user_namespace (int ready, char *pause_pid_file_path) +reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd) { int ret; pid_t pid; @@ -574,6 +652,7 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) char *listen_pid = NULL; bool do_socket_activation = false; char *cwd = getcwd (NULL, 0); + sigset_t sigset, oldsigset; if (cwd == NULL) { @@ -584,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) listen_pid = getenv("LISTEN_PID"); listen_fds = getenv("LISTEN_FDS"); - if (listen_pid != NULL && listen_fds != NULL) { - if (strtol(listen_pid, NULL, 10) == getpid()) { - do_socket_activation = true; + if (listen_pid != NULL && listen_fds != NULL) + { + if (strtol(listen_pid, NULL, 10) == getpid()) + do_socket_activation = true; } - } sprintf (uid, "%d", geteuid ()); sprintf (gid, "%d", getegid ()); @@ -621,6 +700,22 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) return pid; } + if (sigfillset (&sigset) < 0) + { + fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (sigdelset (&sigset, SIGCHLD) < 0) + { + fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0) + { + fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + argv = get_cmd_line_args (ppid); if (argv == NULL) { @@ -628,11 +723,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) _exit (EXIT_FAILURE); } - if (do_socket_activation) { - char s[32]; - sprintf (s, "%d", getpid()); - setenv ("LISTEN_PID", s, true); - } + if (do_socket_activation) + { + char s[32]; + sprintf (s, "%d", getpid()); + setenv ("LISTEN_PID", s, true); + } setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); @@ -685,27 +781,20 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) while (ret < 0 && errno == EINTR); close (ready); - execvp (argv[0], argv); - - _exit (EXIT_FAILURE); -} - -int -reexec_in_user_namespace_wait (int pid) -{ - pid_t p; - int status; + if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0) + { + fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } - do - p = waitpid (pid, &status, 0); - while (p < 0 && errno == EINTR); + if (file_to_read && file_to_read[0]) + { + ret = copy_file_to_fd (file_to_read, outputfd); + close (outputfd); + _exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); + } - if (p < 0) - return -1; + execvp (argv[0], argv); - if (WIFEXITED (status)) - return WEXITSTATUS (status); - if (WIFSIGNALED (status)) - return 128 + WTERMSIG (status); - return -1; + _exit (EXIT_FAILURE); } diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 9132c0fe5..d302b1777 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -26,8 +26,8 @@ import ( #include <stdlib.h> extern uid_t rootless_uid(); extern uid_t rootless_gid(); -extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); -extern int reexec_in_user_namespace_wait(int pid); +extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd); +extern int reexec_in_user_namespace_wait(int pid, int options); extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); */ import "C" @@ -194,10 +194,24 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) { } } -// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount +func enableLinger(pausePid string) { + if pausePid == "" { + return + } + // If we are trying to write a pause pid file, make sure we can leave processes + // running longer than the user session. + err := exec.Command("loginctl", "enable-linger", fmt.Sprintf("%d", GetRootlessUID())).Run() + if err != nil { + logrus.Warnf("cannot run `loginctl enable-linger` for the current user: %v", err) + } +} + +// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. -func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { +func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { + enableLinger(pausePid) + if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } @@ -226,7 +240,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, errors.Errorf("cannot re-exec process") } - ret := C.reexec_in_user_namespace_wait(pidC) + ret := C.reexec_in_user_namespace_wait(pidC, 0) if ret < 0 { return false, -1, errors.New("error waiting for the re-exec process") } @@ -234,11 +248,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return true, int(ret), nil } -// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed -// into a new user namespace and the return code from the re-executed podman process. -// If podman was re-executed the caller needs to propagate the error code returned by the child -// process. -func BecomeRootInUserNS(pausePid string) (bool, int, error) { +func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -249,6 +259,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { cPausePid := C.CString(pausePid) defer C.free(unsafe.Pointer(cPausePid)) + cFileToRead := C.CString(fileToRead) + defer C.free(unsafe.Pointer(cFileToRead)) + var fileOutputFD C.int + if fileOutput != nil { + fileOutputFD = C.int(fileOutput.Fd()) + } + runtime.LockOSThread() defer runtime.UnlockOSThread() @@ -262,7 +279,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { defer w.Close() defer w.Write([]byte("0")) - pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) + pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD) pid := int(pidC) if pid < 0 { return false, -1, errors.Errorf("cannot re-exec process") @@ -328,6 +345,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { return false, -1, errors.Wrapf(err, "read from sync pipe") } + if fileOutput != nil { + return true, 0, nil + } + if b[0] == '2' { // We have lost the race for writing the PID file, as probably another // process created a namespace and wrote the PID. @@ -336,7 +357,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { if err == nil { pid, err := strconv.ParseUint(string(data), 10, 0) if err == nil { - return JoinUserAndMountNS(uint(pid), "") + return joinUserAndMountNS(uint(pid), "") } } return false, -1, errors.Wrapf(err, "error setting up the process") @@ -368,10 +389,96 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { } }() - ret := C.reexec_in_user_namespace_wait(pidC) + ret := C.reexec_in_user_namespace_wait(pidC, 0) if ret < 0 { return false, -1, errors.New("error waiting for the re-exec process") } return true, int(ret), nil } + +// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed +// into a new user namespace and the return code from the re-executed podman process. +// If podman was re-executed the caller needs to propagate the error code returned by the child +// process. +func BecomeRootInUserNS(pausePid string) (bool, int, error) { + enableLinger(pausePid) + return becomeRootInUserNS(pausePid, "", nil) +} + +// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. +// This is useful when there are already running containers and we +// don't have a pause process yet. We can use the paths to the conmon +// processes to attempt joining their namespaces. +// If needNewNamespace is set, the file is read from a temporary user +// namespace, this is useful for containers that are running with a +// different uidmap and the unprivileged user has no way to read the +// file owned by the root in the container. +func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { + if len(paths) == 0 { + return BecomeRootInUserNS(pausePidPath) + } + + var lastErr error + var pausePid int + + for _, path := range paths { + if !needNewNamespace { + data, err := ioutil.ReadFile(path) + if err != nil { + lastErr = err + continue + } + + pausePid, err = strconv.Atoi(string(data)) + if err != nil { + lastErr = errors.Wrapf(err, "cannot parse file %s", path) + continue + } + + lastErr = nil + break + } else { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + lastErr = err + continue + } + + r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file") + + defer w.Close() + defer r.Close() + + if _, _, err := becomeRootInUserNS("", path, w); err != nil { + lastErr = err + continue + } + + w.Close() + defer func() { + r.Close() + C.reexec_in_user_namespace_wait(-1, 0) + }() + + b := make([]byte, 32) + + n, err := r.Read(b) + if err != nil { + lastErr = errors.Wrapf(err, "cannot read %s\n", path) + continue + } + + pausePid, err = strconv.Atoi(string(b[:n])) + if err == nil { + lastErr = nil + break + } + } + } + if lastErr != nil { + return false, 0, lastErr + } + + return joinUserAndMountNS(uint(pausePid), pausePidPath) +} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 221baff97..c063adee5 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -29,10 +29,14 @@ func GetRootlessGID() int { return -1 } -// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount -// namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts -// with a default configuration. -func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { +// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. +// This is useful when there are already running containers and we +// don't have a pause process yet. We can use the paths to the conmon +// processes to attempt joining their namespaces. +// If needNewNamespace is set, the file is read from a temporary user +// namespace, this is useful for containers that are running with a +// different uidmap and the unprivileged user has no way to read the +// file owned by the root in the container. +func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } |