From ce26aa701f5d49b3aaac08b34b1fbc1492067f46 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 23 May 2019 12:09:25 +0200 Subject: rootless: block signals for pause block signals for the pause process, so it can't be killed by mistake. Signed-off-by: Giuseppe Scrivano --- pkg/rootless/rootless_linux.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 098ca7830..b08a27fec 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -69,6 +69,19 @@ rootless_gid () static void do_pause () { + int i; + struct sigaction act; + int const sig[] = + { + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, SIGPOLL, + SIGPROF, SIGVTALRM, SIGXCPU, SIGXFSZ, 0 + }; + + act.sa_handler = SIG_IGN; + + for (i = 0; sig[i]; i++) + sigaction (sig[i], &act, NULL); + prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL); while (1) pause (); @@ -574,6 +587,7 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) char *listen_pid = NULL; bool do_socket_activation = false; char *cwd = getcwd (NULL, 0); + sigset_t sigset, oldsigset; if (cwd == NULL) { @@ -621,6 +635,22 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) return pid; } + if (sigfillset (&sigset) < 0) + { + fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (sigdelset (&sigset, SIGCHLD) < 0) + { + fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0) + { + fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + argv = get_cmd_line_args (ppid); if (argv == NULL) { @@ -685,6 +715,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) while (ret < 0 && errno == EINTR); close (ready); + if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0) + { + fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + execvp (argv[0], argv); _exit (EXIT_FAILURE); -- cgit v1.2.3-54-g00ecf From ee11f3bce960cf2eeff0bf9b410c752bbc6af89f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 23 May 2019 11:57:51 +0200 Subject: rootless: new function to join existing conmon processes move the logic for joining existing namespaces down to the rootless package. In main_local we still retrieve the list of conmon pid files and use it from the rootless package. In addition, create a temporary user namespace for reading these files, as the unprivileged user might not have enough privileges for reading the conmon pid file, for example when running with a different uidmap and root in the container is different than the rootless user. Closes: https://github.com/containers/libpod/issues/3187 Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 44 ++++---------- pkg/rootless/rootless_linux.c | 113 +++++++++++++++++++++++++---------- pkg/rootless/rootless_linux.go | 112 ++++++++++++++++++++++++++++++---- pkg/rootless/rootless_unsupported.go | 12 ++++ 4 files changed, 207 insertions(+), 74 deletions(-) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 5af05a11e..b4f21bd0c 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -4,11 +4,9 @@ package main import ( "context" - "io/ioutil" "log/syslog" "os" "runtime/pprof" - "strconv" "strings" "syscall" @@ -120,18 +118,10 @@ func setupRootless(cmd *cobra.Command, args []string) error { return errors.Wrapf(err, "could not get pause process pid file path") } - data, err := ioutil.ReadFile(pausePidPath) - if err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath) - } - if err == nil { - pausePid, err := strconv.Atoi(string(data)) - if err != nil { - return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath) - } - became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "") + if _, err := os.Stat(pausePidPath); err == nil { + became, ret, err := rootless.TryJoinFromFilePaths("", false, []string{pausePidPath}) if err != nil { - logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath) + logrus.Errorf("cannot join pause process. You may need to remove %s and stop all containers", pausePidPath) logrus.Errorf("you can use `system migrate` to recreate the pause process") logrus.Errorf(err.Error()) os.Exit(1) @@ -154,28 +144,13 @@ func setupRootless(cmd *cobra.Command, args []string) error { logrus.Errorf(err.Error()) os.Exit(1) } - var became bool - var ret int - if len(ctrs) == 0 { - became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) - } else { - for _, ctr := range ctrs { - data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile) - if err != nil { - logrus.Errorf(err.Error()) - continue - } - conmonPid, err := strconv.Atoi(string(data)) - if err != nil { - logrus.Errorf(err.Error()) - continue - } - became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath) - if err == nil { - break - } - } + + paths := []string{} + for _, ctr := range ctrs { + paths = append(paths, ctr.Config().ConmonPidFile) } + + became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths) if err != nil { logrus.Errorf(err.Error()) os.Exit(1) @@ -185,6 +160,7 @@ func setupRootless(cmd *cobra.Command, args []string) error { } return nil } + func setRLimits() error { rlimits := new(syscall.Rlimit) rlimits.Cur = 1048576 diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index b08a27fec..2356882e7 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -346,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack) #endif } +int +reexec_in_user_namespace_wait (int pid, int options) +{ + pid_t p; + int status; + + do + p = waitpid (pid, &status, 0); + while (p < 0 && errno == EINTR); + + if (p < 0) + return -1; + + if (WIFEXITED (status)) + return WEXITSTATUS (status); + if (WIFSIGNALED (status)) + return 128 + WTERMSIG (status); + return -1; +} + static int create_pause_process (const char *pause_pid_file_path, char **argv) { @@ -369,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv) while (r < 0 && errno == EINTR); close (p[0]); + reexec_in_user_namespace_wait(r, 0); + return r == 1 && b == '0' ? 0 : -1; } else @@ -573,8 +595,51 @@ check_proc_sys_userns_file (const char *path) } } +static int +copy_file_to_fd (const char *file_to_read, int outfd) +{ + char buf[512]; + int fd; + + fd = open (file_to_read, O_RDONLY); + if (fd < 0) + return fd; + + for (;;) + { + ssize_t r, w, t = 0; + + do + r = read (fd, buf, sizeof buf); + while (r < 0 && errno == EINTR); + if (r < 0) + { + close (fd); + return r; + } + + if (r == 0) + break; + + while (t < r) + { + do + w = write (outfd, &buf[t], r - t); + while (w < 0 && errno == EINTR); + if (w < 0) + { + close (fd); + return w; + } + t += w; + } + } + close (fd); + return 0; +} + int -reexec_in_user_namespace (int ready, char *pause_pid_file_path) +reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd) { int ret; pid_t pid; @@ -598,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) listen_pid = getenv("LISTEN_PID"); listen_fds = getenv("LISTEN_FDS"); - if (listen_pid != NULL && listen_fds != NULL) { - if (strtol(listen_pid, NULL, 10) == getpid()) { - do_socket_activation = true; + if (listen_pid != NULL && listen_fds != NULL) + { + if (strtol(listen_pid, NULL, 10) == getpid()) + do_socket_activation = true; } - } sprintf (uid, "%d", geteuid ()); sprintf (gid, "%d", getegid ()); @@ -658,11 +723,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) _exit (EXIT_FAILURE); } - if (do_socket_activation) { - char s[32]; - sprintf (s, "%d", getpid()); - setenv ("LISTEN_PID", s, true); - } + if (do_socket_activation) + { + char s[32]; + sprintf (s, "%d", getpid()); + setenv ("LISTEN_PID", s, true); + } setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); @@ -721,27 +787,14 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path) _exit (EXIT_FAILURE); } + if (file_to_read && file_to_read[0]) + { + ret = copy_file_to_fd (file_to_read, outputfd); + close (outputfd); + _exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); + } + execvp (argv[0], argv); _exit (EXIT_FAILURE); } - -int -reexec_in_user_namespace_wait (int pid) -{ - pid_t p; - int status; - - do - p = waitpid (pid, &status, 0); - while (p < 0 && errno == EINTR); - - if (p < 0) - return -1; - - if (WIFEXITED (status)) - return WEXITSTATUS (status); - if (WIFSIGNALED (status)) - return 128 + WTERMSIG (status); - return -1; -} diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 9132c0fe5..27d64d1fc 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -26,8 +26,8 @@ import ( #include extern uid_t rootless_uid(); extern uid_t rootless_gid(); -extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); -extern int reexec_in_user_namespace_wait(int pid); +extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd); +extern int reexec_in_user_namespace_wait(int pid, int options); extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); */ import "C" @@ -226,7 +226,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, errors.Errorf("cannot re-exec process") } - ret := C.reexec_in_user_namespace_wait(pidC) + ret := C.reexec_in_user_namespace_wait(pidC, 0) if ret < 0 { return false, -1, errors.New("error waiting for the re-exec process") } @@ -234,11 +234,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return true, int(ret), nil } -// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed -// into a new user namespace and the return code from the re-executed podman process. -// If podman was re-executed the caller needs to propagate the error code returned by the child -// process. -func BecomeRootInUserNS(pausePid string) (bool, int, error) { +func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -249,6 +245,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { cPausePid := C.CString(pausePid) defer C.free(unsafe.Pointer(cPausePid)) + cFileToRead := C.CString(fileToRead) + defer C.free(unsafe.Pointer(cFileToRead)) + var fileOutputFD C.int + if fileOutput != nil { + fileOutputFD = C.int(fileOutput.Fd()) + } + runtime.LockOSThread() defer runtime.UnlockOSThread() @@ -262,7 +265,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { defer w.Close() defer w.Write([]byte("0")) - pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) + pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD) pid := int(pidC) if pid < 0 { return false, -1, errors.Errorf("cannot re-exec process") @@ -328,6 +331,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { return false, -1, errors.Wrapf(err, "read from sync pipe") } + if fileOutput != nil { + return true, 0, nil + } + if b[0] == '2' { // We have lost the race for writing the PID file, as probably another // process created a namespace and wrote the PID. @@ -368,10 +375,95 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) { } }() - ret := C.reexec_in_user_namespace_wait(pidC) + ret := C.reexec_in_user_namespace_wait(pidC, 0) if ret < 0 { return false, -1, errors.New("error waiting for the re-exec process") } return true, int(ret), nil } + +// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed +// into a new user namespace and the return code from the re-executed podman process. +// If podman was re-executed the caller needs to propagate the error code returned by the child +// process. +func BecomeRootInUserNS(pausePid string) (bool, int, error) { + return becomeRootInUserNS(pausePid, "", nil) +} + +// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. +// This is useful when there are already running containers and we +// don't have a pause process yet. We can use the paths to the conmon +// processes to attempt joining their namespaces. +// If needNewNamespace is set, the file is read from a temporary user +// namespace, this is useful for containers that are running with a +// different uidmap and the unprivileged user has no way to read the +// file owned by the root in the container. +func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { + if len(paths) == 0 { + return BecomeRootInUserNS(pausePidPath) + } + + var lastErr error + var pausePid int + + for _, path := range paths { + if !needNewNamespace { + data, err := ioutil.ReadFile(path) + if err != nil { + lastErr = err + continue + } + + pausePid, err = strconv.Atoi(string(data)) + if err != nil { + lastErr = errors.Wrapf(err, "cannot parse file %s", path) + continue + } + + lastErr = nil + break + } else { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + lastErr = err + continue + } + + r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file") + + defer w.Close() + defer r.Close() + + if _, _, err := becomeRootInUserNS("", path, w); err != nil { + lastErr = err + continue + } + + w.Close() + defer func() { + r.Close() + C.reexec_in_user_namespace_wait(-1, 0) + }() + + b := make([]byte, 32) + + n, err := r.Read(b) + if err != nil { + lastErr = errors.Wrapf(err, "cannot read %s\n", path) + continue + } + + pausePid, err = strconv.Atoi(string(b[:n])) + if err == nil { + lastErr = nil + break + } + } + } + if lastErr != nil { + return false, 0, lastErr + } + + return JoinUserAndMountNS(uint(pausePid), pausePidPath) +} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 221baff97..06781e6ce 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -36,3 +36,15 @@ func GetRootlessGID() int { func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } + +// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. +// This is useful when there are already running containers and we +// don't have a pause process yet. We can use the paths to the conmon +// processes to attempt joining their namespaces. +// If needNewNamespace is set, the file is read from a temporary user +// namespace, this is useful for containers that are running with a +// different uidmap and the unprivileged user has no way to read the +// file owned by the root in the container. +func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { + return false, -1, errors.New("this function is not supported on this os") +} -- cgit v1.2.3-54-g00ecf From 30ef6ba125536077923f19ed079b321f264ede99 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 23 May 2019 18:13:21 +0200 Subject: rootless: enable loginctl linger otherwise the processes we leave around will be killed once the session terminates. Signed-off-by: Giuseppe Scrivano --- pkg/rootless/rootless_linux.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 27d64d1fc..3743911b8 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -194,10 +194,24 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) { } } +func enableLinger(pausePid string) { + if pausePid == "" { + return + } + // If we are trying to write a pause pid file, make sure we can leave processes + // running longer than the user session. + err := exec.Command("loginctl", "enable-linger", fmt.Sprintf("%d", GetRootlessUID())).Run() + if err != nil { + logrus.Warnf("cannot run `loginctl enable-linger` for the current user: %v", err) + } +} + // JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { + enableLinger(pausePid) + if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } @@ -388,6 +402,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, // If podman was re-executed the caller needs to propagate the error code returned by the child // process. func BecomeRootInUserNS(pausePid string) (bool, int, error) { + enableLinger(pausePid) return becomeRootInUserNS(pausePid, "", nil) } -- cgit v1.2.3-54-g00ecf From c4dedd302182fa44492baf7b0f9da3f148a08ae4 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 24 May 2019 09:06:22 +0200 Subject: Revert "rootless: change default path for conmon.pid" since we now enter the user namespace prior to read the conmon.pid, we can write the conmon.pid file again to the runtime dir. This reverts commit 6c6a8654363457a9638d58265d0a7e8743575d7a. Signed-off-by: Giuseppe Scrivano --- libpod/runtime_ctr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index c7758055f..cba8bdb1a 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -167,7 +167,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. }() if rootless.IsRootless() && ctr.config.ConmonPidFile == "" { - ctr.config.ConmonPidFile = filepath.Join(ctr.config.StaticDir, "conmon.pid") + ctr.config.ConmonPidFile = filepath.Join(ctr.state.RunDir, "conmon.pid") } // Go through named volumes and add them. -- cgit v1.2.3-54-g00ecf From 153503e3916accf3da27c4893a37ea94731ac2c7 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 24 May 2019 09:41:06 +0200 Subject: rootless: make JoinUserAndMountNS private as it is used only by the rootless package now. Signed-off-by: Giuseppe Scrivano --- pkg/rootless/rootless_linux.go | 8 ++++---- pkg/rootless/rootless_unsupported.go | 8 -------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 3743911b8..d302b1777 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -206,10 +206,10 @@ func enableLinger(pausePid string) { } } -// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount +// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. -func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { +func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { enableLinger(pausePid) if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { @@ -357,7 +357,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, if err == nil { pid, err := strconv.ParseUint(string(data), 10, 0) if err == nil { - return JoinUserAndMountNS(uint(pid), "") + return joinUserAndMountNS(uint(pid), "") } } return false, -1, errors.Wrapf(err, "error setting up the process") @@ -480,5 +480,5 @@ func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []st return false, 0, lastErr } - return JoinUserAndMountNS(uint(pausePid), pausePidPath) + return joinUserAndMountNS(uint(pausePid), pausePidPath) } diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 06781e6ce..c063adee5 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -29,14 +29,6 @@ func GetRootlessGID() int { return -1 } -// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount -// namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts -// with a default configuration. -func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - // TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. // This is useful when there are already running containers and we // don't have a pause process yet. We can use the paths to the conmon -- cgit v1.2.3-54-g00ecf