From 2e0fef51b3928337ef46629b4627ff1700a918d1 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 8 May 2019 13:44:06 +0200 Subject: migrate: not create a new namespace this leaves the containers stopped but we won't risk to use the wrong user namespace. Signed-off-by: Giuseppe Scrivano --- libpod/runtime.go | 12 ------------ libpod/runtime_migrate.go | 9 ++------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/libpod/runtime.go b/libpod/runtime.go index 18e9dfeb3..f9b34e315 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -966,18 +966,6 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { runtime.valid = true if runtime.doMigrate { - if os.Geteuid() != 0 { - aliveLock.Unlock() - locked = false - - became, ret, err := rootless.BecomeRootInUserNS() - if err != nil { - return err - } - if became { - os.Exit(ret) - } - } if err := runtime.migrate(ctx); err != nil { return err } diff --git a/libpod/runtime_migrate.go b/libpod/runtime_migrate.go index 0bb8e952f..116885d3a 100644 --- a/libpod/runtime_migrate.go +++ b/libpod/runtime_migrate.go @@ -2,6 +2,7 @@ package libpod import ( "context" + "fmt" "path/filepath" "github.com/pkg/errors" @@ -21,7 +22,7 @@ func (r *Runtime) migrate(ctx context.Context) error { logrus.Infof("stopping all containers") for _, ctr := range runningContainers { - logrus.Infof("stopping %s", ctr.ID()) + fmt.Printf("stopped %s\n", ctr.ID()) if err := ctr.Stop(); err != nil { return errors.Wrapf(err, "cannot stop container %s", ctr.ID()) } @@ -38,11 +39,5 @@ func (r *Runtime) migrate(ctx context.Context) error { } } - for _, ctr := range runningContainers { - if err := ctr.Start(ctx, true); err != nil { - logrus.Errorf("error restarting container %s", ctr.ID()) - } - } - return nil } -- cgit v1.2.3-54-g00ecf From 791d53a21421fba249156ea3a503e9e04a4912e4 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 8 May 2019 13:49:07 +0200 Subject: rootless: use a pause process use a pause process to keep the user and mount namespace alive. The pause process is created immediately on reload, and all successive Podman processes will refer to it for joining the user&mount namespace. This solves all the race conditions we had on joining the correct namespaces using the conmon processes. As a fallback if the join fails for any reason (e.g. the pause process was killed), then we try to join the running containers as we were doing before. Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 37 +++++- cmd/podman/mount.go | 2 +- libpod/runtime.go | 6 +- pkg/rootless/rootless_linux.c | 240 +++++++++++++++++++++++++++++------ pkg/rootless/rootless_linux.go | 49 +++++-- pkg/rootless/rootless_unsupported.go | 4 +- pkg/util/utils_supported.go | 10 ++ pkg/util/utils_windows.go | 6 + 8 files changed, 301 insertions(+), 53 deletions(-) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 7452965a2..2024d4b31 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -16,6 +16,7 @@ import ( "github.com/containers/libpod/cmd/podman/libpodruntime" "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/tracing" + "github.com/containers/libpod/pkg/util" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -113,6 +114,34 @@ func setupRootless(cmd *cobra.Command, args []string) error { MainGlobalOpts, remoteclient, } + + pausePidPath, err := util.GetRootlessPauseProcessPidPath() + if err != nil { + return errors.Wrapf(err, "could not get pause process pid file path") + } + + data, err := ioutil.ReadFile(pausePidPath) + if err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath) + } + if err == nil { + pausePid, err := strconv.Atoi(string(data)) + if err != nil { + return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath) + } + became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "") + if err != nil { + logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath) + logrus.Errorf(err.Error()) + os.Exit(1) + } + if became { + os.Exit(ret) + } + } + + // if there is no pid file, try to join existing containers, and create a pause process. + runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) if err != nil { return errors.Wrapf(err, "could not get runtime") @@ -127,20 +156,20 @@ func setupRootless(cmd *cobra.Command, args []string) error { var became bool var ret int if len(ctrs) == 0 { - became, ret, err = rootless.BecomeRootInUserNS() + became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) } else { for _, ctr := range ctrs { data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile) if err != nil { logrus.Errorf(err.Error()) - os.Exit(1) + continue } conmonPid, err := strconv.Atoi(string(data)) if err != nil { logrus.Errorf(err.Error()) - os.Exit(1) + continue } - became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid)) + became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath) if err == nil { break } diff --git a/cmd/podman/mount.go b/cmd/podman/mount.go index 7c9150d1b..662fb0a28 100644 --- a/cmd/podman/mount.go +++ b/cmd/podman/mount.go @@ -78,7 +78,7 @@ func mountCmd(c *cliconfig.MountValues) error { return fmt.Errorf("cannot mount using driver %s in rootless mode", driver) } - became, ret, err := rootless.BecomeRootInUserNS() + became, ret, err := rootless.BecomeRootInUserNS("") if err != nil { return err } diff --git a/libpod/runtime.go b/libpod/runtime.go index f9b34e315..def7ba639 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -892,7 +892,11 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { // we will need to access the storage. if os.Geteuid() != 0 { aliveLock.Unlock() - became, ret, err := rootless.BecomeRootInUserNS() + pausePid, err := util.GetRootlessPauseProcessPidPath() + if err != nil { + return errors.Wrapf(err, "could not get pause process pid file path") + } + became, ret, err := rootless.BecomeRootInUserNS(pausePid) if err != nil { return err } diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 1d32b1adb..b87deb86e 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -17,6 +17,22 @@ #include #include #include +#include + +#ifndef RENAME_NOREPLACE +# define RENAME_NOREPLACE (1 << 0) + +int renameat2 (int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags) +{ +# ifdef __NR_renameat2 + return (int) syscall (__NR_renameat2, olddirfd, oldpath, newdirfd, newpath, flags); +# else + /* no way to implement it atomically. */ + errno = ENOSYS; + return -1; +# endif +} +#endif static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces"; static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone"; @@ -24,32 +40,6 @@ static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivilege static int open_files_max_fd; fd_set open_files_set; -static void __attribute__((constructor)) init() -{ - DIR *d; - - /* Store how many FDs were open before the Go runtime kicked in. */ - d = opendir ("/proc/self/fd"); - if (d) - { - struct dirent *ent; - - FD_ZERO (&open_files_set); - for (ent = readdir (d); ent; ent = readdir (d)) - { - int fd = atoi (ent->d_name); - if (fd != dirfd (d)) - { - if (fd > open_files_max_fd) - open_files_max_fd = fd; - FD_SET (fd, &open_files_set); - } - } - closedir (d); - } -} - - static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) { @@ -62,14 +52,12 @@ syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid) return (int) syscall (__NR_setresgid, rgid, egid, sgid); } -static int -syscall_clone (unsigned long flags, void *child_stack) +static void +do_pause () { -#if defined(__s390__) || defined(__CRIS__) - return (int) syscall (__NR_clone, child_stack, flags); -#else - return (int) syscall (__NR_clone, flags, child_stack); -#endif + prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL); + while (1) + pause (); } static char ** @@ -139,8 +127,164 @@ get_cmd_line_args (pid_t pid) return argv; } +static void __attribute__((constructor)) init() +{ + const char *xdg_runtime_dir; + const char *pause; + DIR *d; + + pause = getenv ("_PODMAN_PAUSE"); + if (pause && pause[0]) + { + do_pause (); + _exit (EXIT_FAILURE); + } + + /* Store how many FDs were open before the Go runtime kicked in. */ + d = opendir ("/proc/self/fd"); + if (d) + { + struct dirent *ent; + + FD_ZERO (&open_files_set); + for (ent = readdir (d); ent; ent = readdir (d)) + { + int fd = atoi (ent->d_name); + if (fd != dirfd (d)) + { + if (fd > open_files_max_fd) + open_files_max_fd = fd; + FD_SET (fd, &open_files_set); + } + } + closedir (d); + } +} + +static int +syscall_clone (unsigned long flags, void *child_stack) +{ +#if defined(__s390__) || defined(__CRIS__) + return (int) syscall (__NR_clone, child_stack, flags); +#else + return (int) syscall (__NR_clone, flags, child_stack); +#endif +} + +static int +create_pause_process (const char *pause_pid_file_path, char **argv) +{ + int r, p[2]; + + if (pipe (p) < 0) + _exit (EXIT_FAILURE); + + r = fork (); + if (r < 0) + _exit (EXIT_FAILURE); + + if (r) + { + char b; + + close (p[1]); + /* Block until we write the pid file. */ + do + r = read (p[0], &b, 1); + while (r < 0 && errno == EINTR); + close (p[0]); + + return r == 1 && b == '0' ? 0 : -1; + } + else + { + int fd; + pid_t pid; + + close (p[0]); + + setsid (); + pid = fork (); + if (r < 0) + _exit (EXIT_FAILURE); + + if (pid) + { + char pid_str[12]; + char *tmp_file_path = NULL; + + sprintf (pid_str, "%d", pid); + + asprintf (&tmp_file_path, "%s.XXXXXX", pause_pid_file_path); + if (tmp_file_path == NULL) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + fd = mkstemp (tmp_file_path); + if (fd < 0) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + do + r = write (fd, pid_str, strlen (pid_str)); + while (r < 0 && errno == EINTR); + if (r < 0) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + close (fd); + + /* There can be another process at this point trying to configure the user namespace and the pause + process, do not override the pid file if it already exists. */ + if (renameat2 (AT_FDCWD, tmp_file_path, AT_FDCWD, pause_pid_file_path, RENAME_NOREPLACE) < 0) + { + unlink (tmp_file_path); + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + do + r = write (p[1], "0", 1); + while (r < 0 && errno == EINTR); + close (p[1]); + + _exit (EXIT_SUCCESS); + } + else + { + int null; + + close (p[1]); + + null = open ("/dev/null", O_RDWR); + if (null >= 0) + { + dup2 (null, 0); + dup2 (null, 1); + dup2 (null, 2); + close (null); + } + + for (fd = 3; fd < open_files_max_fd + 16; fd++) + close (fd); + + setenv ("_PODMAN_PAUSE", "1", 1); + execlp (argv[0], NULL); + + /* If the execve fails, then do the pause here. */ + do_pause (); + _exit (EXIT_FAILURE); + } + } +} + int -reexec_userns_join (int userns, int mountns) +reexec_userns_join (int userns, int mountns, char *pause_pid_file_path) { pid_t ppid = getpid (); char uid[16]; @@ -221,6 +365,12 @@ reexec_userns_join (int userns, int mountns) } free (cwd); + if (pause_pid_file_path && pause_pid_file_path[0] != '\0') + { + /* We ignore errors here as we didn't create the namespace anyway. */ + create_pause_process (pause_pid_file_path, argv); + } + execvp (argv[0], argv); _exit (EXIT_FAILURE); @@ -246,7 +396,7 @@ check_proc_sys_userns_file (const char *path) } int -reexec_in_user_namespace (int ready) +reexec_in_user_namespace (int ready, char *pause_pid_file_path) { int ret; pid_t pid; @@ -328,29 +478,45 @@ reexec_in_user_namespace (int ready) fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno)); _exit (EXIT_FAILURE); } - close (ready); - if (b != '1') + if (b != '0') _exit (EXIT_FAILURE); if (syscall_setresgid (0, 0, 0) < 0) { fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } if (syscall_setresuid (0, 0, 0) < 0) { fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } if (chdir (cwd) < 0) { fprintf (stderr, "cannot chdir: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } free (cwd); + if (pause_pid_file_path && pause_pid_file_path[0] != '\0') + { + if (create_pause_process (pause_pid_file_path, argv) < 0) + { + write (ready, "2", 1); + _exit (EXIT_FAILURE); + } + } + + do + ret = write (ready, "0", 1) < 0; + while (ret < 0 && errno == EINTR); + close (ready); + execvp (argv[0], argv); _exit (EXIT_FAILURE); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 2c99f41a4..0390bbb6a 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -22,9 +22,10 @@ import ( ) /* -extern int reexec_in_user_namespace(int ready); +#include +extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); extern int reexec_in_user_namespace_wait(int pid); -extern int reexec_userns_join(int userns, int mountns); +extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); */ import "C" @@ -168,11 +169,14 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) { // JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. -func JoinUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } + cPausePid := C.CString(pausePid) + defer C.free(unsafe.Pointer(cPausePid)) + userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid)) if err != nil { return false, -1, err @@ -189,7 +193,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) { if err != nil { return false, -1, err } - pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd())) + pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } @@ -206,7 +210,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) { // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child // process. -func BecomeRootInUserNS() (bool, int, error) { +func BecomeRootInUserNS(pausePid string) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -214,18 +218,23 @@ func BecomeRootInUserNS() (bool, int, error) { return false, 0, nil } + cPausePid := C.CString(pausePid) + defer C.free(unsafe.Pointer(cPausePid)) + runtime.LockOSThread() defer runtime.UnlockOSThread() - r, w, err := os.Pipe() + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) if err != nil { return false, -1, err } + r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child") + defer r.Close() defer w.Close() defer w.Write([]byte("0")) - pidC := C.reexec_in_user_namespace(C.int(r.Fd())) + pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) pid := int(pidC) if pid < 0 { return false, -1, errors.Errorf("cannot re-exec process") @@ -280,11 +289,35 @@ func BecomeRootInUserNS() (bool, int, error) { } } - _, err = w.Write([]byte("1")) + _, err = w.Write([]byte("0")) if err != nil { return false, -1, errors.Wrapf(err, "write to sync pipe") } + b := make([]byte, 1, 1) + _, err = w.Read(b) + if err != nil { + return false, -1, errors.Wrapf(err, "read from sync pipe") + } + + if b[0] == '2' { + // We have lost the race for writing the PID file, as probably another + // process created a namespace and wrote the PID. + // Try to join it. + data, err := ioutil.ReadFile(pausePid) + if err == nil { + pid, err := strconv.ParseUint(string(data), 10, 0) + if err == nil { + return JoinUserAndMountNS(uint(pid), "") + } + } + return false, -1, errors.Wrapf(err, "error setting up the process") + } + + if b[0] != '0' { + return false, -1, errors.Wrapf(err, "error setting up the process") + } + c := make(chan os.Signal, 1) signals := []os.Signal{} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 47b5dd7cc..42f8f3aec 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -15,7 +15,7 @@ func IsRootless() bool { // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child // process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration. -func BecomeRootInUserNS() (bool, int, error) { +func BecomeRootInUserNS(pausePid string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } @@ -28,6 +28,6 @@ func GetRootlessUID() int { // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts // with a default configuration. -func JoinUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } diff --git a/pkg/util/utils_supported.go b/pkg/util/utils_supported.go index 8b98658c2..3d9140a23 100644 --- a/pkg/util/utils_supported.go +++ b/pkg/util/utils_supported.go @@ -82,3 +82,13 @@ func GetRootlessRuntimeDir() (string, error) { } return rootlessRuntimeDir, nil } + +// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for +// the pause process +func GetRootlessPauseProcessPidPath() (string, error) { + runtimeDir, err := GetRootlessRuntimeDir() + if err != nil { + return "", err + } + return filepath.Join(runtimeDir, "libpod", "pause.pid"), nil +} diff --git a/pkg/util/utils_windows.go b/pkg/util/utils_windows.go index b33733da9..3faa6f10c 100644 --- a/pkg/util/utils_windows.go +++ b/pkg/util/utils_windows.go @@ -15,3 +15,9 @@ func GetRootlessRuntimeDir() (string, error) { func IsCgroup2UnifiedMode() (bool, error) { return false, errors.New("this function is not implemented for windows") } + +// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for +// the pause process +func GetRootlessPauseProcessPidPath() (string, error) { + return "", errors.New("this function is not implemented for windows") +} -- cgit v1.2.3-54-g00ecf From 562357ebb26cacbe9a97c8c0a87c9524345158d0 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 8 May 2019 13:49:46 +0200 Subject: rootless: join namespace immediately when possible add a shortcut for joining immediately the namespace so we don't need to re-exec Podman. With the pause process simplificaton, we can now attempt to join the namespaces as soon as Podman starts (and before the Go runtime kicks in), so that we don't need to re-exec and use just one process. Signed-off-by: Giuseppe Scrivano --- pkg/rootless/rootless_linux.c | 172 ++++++++++++++++++++++++++++++++++++++--- pkg/rootless/rootless_linux.go | 8 ++ 2 files changed, 170 insertions(+), 10 deletions(-) diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index b87deb86e..a08cfd36a 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -39,6 +39,7 @@ static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivilege static int open_files_max_fd; fd_set open_files_set; +static uid_t rootless_uid_init; static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) @@ -52,6 +53,12 @@ syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid) return (int) syscall (__NR_setresgid, rgid, egid, sgid); } +uid_t +rootless_uid () +{ + return rootless_uid_init; +} + static void do_pause () { @@ -72,7 +79,10 @@ get_cmd_line_args (pid_t pid) int i, argc = 0; char **argv; - sprintf (path, "/proc/%d/cmdline", pid); + if (pid) + sprintf (path, "/proc/%d/cmdline", pid); + else + strcpy (path, "/proc/self/cmdline"); fd = open (path, O_RDONLY); if (fd < 0) return NULL; @@ -87,7 +97,10 @@ get_cmd_line_args (pid_t pid) ret = read (fd, buffer + used, allocated - used); while (ret < 0 && errno == EINTR); if (ret < 0) - return NULL; + { + free (buffer); + return NULL; + } if (ret == 0) break; @@ -97,11 +110,12 @@ get_cmd_line_args (pid_t pid) { allocated += 512; char *tmp = realloc (buffer, allocated); - if (buffer == NULL) { - free(buffer); - return NULL; - } - buffer=tmp; + if (buffer == NULL) + { + free (buffer); + return NULL; + } + buffer = tmp; } } close (fd); @@ -110,11 +124,17 @@ get_cmd_line_args (pid_t pid) if (buffer[i] == '\0') argc++; if (argc == 0) - return NULL; + { + free (buffer); + return NULL; + } argv = malloc (sizeof (char *) * (argc + 1)); if (argv == NULL) - return NULL; + { + free (buffer); + return NULL; + } argc = 0; argv[argc++] = buffer; @@ -127,6 +147,40 @@ get_cmd_line_args (pid_t pid) return argv; } +static bool +can_use_shortcut () +{ + int argc; + char **argv; + bool ret = true; + +#ifdef DISABLE_JOIN_SHORTCUT + return false; +#endif + + argv = get_cmd_line_args (0); + if (argv == NULL) + return NULL; + + for (argc = 0; argv[argc]; argc++) + { + if (argc == 0 || argv[argc][0] == '-') + continue; + + if (strcmp (argv[argc], "mount") == 0 + || strcmp (argv[argc], "search") == 0 + || strcmp (argv[argc], "system") == 0) + { + ret = false; + break; + } + } + + free (argv[0]); + free (argv); + return ret; +} + static void __attribute__((constructor)) init() { const char *xdg_runtime_dir; @@ -159,6 +213,104 @@ static void __attribute__((constructor)) init() } closedir (d); } + + /* Shortcut. If we are able to join the pause pid file, do it now so we don't + need to re-exec. */ + xdg_runtime_dir = getenv ("XDG_RUNTIME_DIR"); + if (xdg_runtime_dir && xdg_runtime_dir[0] && can_use_shortcut ()) + { + int r; + int fd; + long pid; + char buf[12]; + uid_t uid; + char path[PATH_MAX]; + const char *const suffix = "/libpod/pause.pid"; + char *cwd = getcwd (NULL, 0); + + if (cwd == NULL) + { + fprintf (stderr, "error getting current working directory: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (strlen (xdg_runtime_dir) >= PATH_MAX - strlen (suffix)) + { + fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %s", strerror (ENAMETOOLONG)); + exit (EXIT_FAILURE); + } + + sprintf (path, "%s%s", xdg_runtime_dir, suffix); + fd = open (path, O_RDONLY); + if (fd < 0) + { + free (cwd); + return; + } + + r = read (fd, buf, sizeof (buf)); + close (fd); + if (r < 0) + { + free (cwd); + return; + } + pid = strtol (buf, NULL, 10); + if (pid == LONG_MAX) + { + free (cwd); + return; + } + + uid = geteuid (); + + sprintf (path, "/proc/%d/ns/user", pid); + fd = open (path, O_RDONLY); + if (fd < 0 || setns (fd, 0) < 0) + { + free (cwd); + return; + } + close (fd); + + /* Errors here cannot be ignored as we already joined a ns. */ + sprintf (path, "/proc/%d/ns/mnt", pid); + fd = open (path, O_RDONLY); + if (fd < 0) + { + fprintf (stderr, "cannot open %s: %s", path, strerror (errno)); + exit (EXIT_FAILURE); + } + + r = setns (fd, 0); + if (r < 0) + { + fprintf (stderr, "cannot join mount namespace for %d: %s", pid, strerror (errno)); + exit (EXIT_FAILURE); + } + close (fd); + + if (syscall_setresgid (0, 0, 0) < 0) + { + fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (syscall_setresuid (0, 0, 0) < 0) + { + fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (chdir (cwd) < 0) + { + fprintf (stderr, "cannot chdir: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + free (cwd); + rootless_uid_init = uid; + } } static int @@ -344,7 +496,7 @@ reexec_userns_join (int userns, int mountns, char *pause_pid_file_path) fprintf (stderr, "cannot setns: %s\n", strerror (errno)); _exit (EXIT_FAILURE); } - close (userns); + close (mountns); if (syscall_setresgid (0, 0, 0) < 0) { diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 0390bbb6a..ddf881368 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -22,7 +22,9 @@ import ( ) /* +#cgo remoteclient CFLAGS: -DDISABLE_JOIN_SHORTCUT #include +extern uid_t rootless_uid(); extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); extern int reexec_in_user_namespace_wait(int pid); extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); @@ -46,6 +48,12 @@ var ( // IsRootless tells us if we are running in rootless mode func IsRootless() bool { isRootlessOnce.Do(func() { + rootlessUIDInit := int(C.rootless_uid()) + if rootlessUIDInit != 0 { + // This happens if we joined the user+mount namespace as part of + os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done") + os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit)) + } isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" }) return isRootless -- cgit v1.2.3-54-g00ecf From 9dabb16e6541a1b7bbb1c5a27a91e08863a5b491 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 9 May 2019 19:06:46 +0200 Subject: system: migrate stops the pause process Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 1 + libpod/runtime_migrate.go | 35 ++++++++++++++++++++++++++++++++++- libpod/runtime_migrate_unsupported.go | 11 +++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 libpod/runtime_migrate_unsupported.go diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 2024d4b31..5af05a11e 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -132,6 +132,7 @@ func setupRootless(cmd *cobra.Command, args []string) error { became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "") if err != nil { logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath) + logrus.Errorf("you can use `system migrate` to recreate the pause process") logrus.Errorf(err.Error()) os.Exit(1) } diff --git a/libpod/runtime_migrate.go b/libpod/runtime_migrate.go index 116885d3a..e32e6edf6 100644 --- a/libpod/runtime_migrate.go +++ b/libpod/runtime_migrate.go @@ -1,14 +1,47 @@ +// +build linux + package libpod import ( "context" "fmt" + "io/ioutil" + "os" "path/filepath" + "strconv" + "syscall" + "github.com/containers/libpod/pkg/rootless" + "github.com/containers/libpod/pkg/util" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) +func stopPauseProcess() error { + if rootless.IsRootless() { + pausePidPath, err := util.GetRootlessPauseProcessPidPath() + if err != nil { + return errors.Wrapf(err, "could not get pause process pid file path") + } + data, err := ioutil.ReadFile(pausePidPath) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath) + } + pausePid, err := strconv.Atoi(string(data)) + if err != nil { + return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath) + } + if err := os.Remove(pausePidPath); err != nil { + return errors.Wrapf(err, "cannot delete pause pid file %s", pausePidPath) + } + syscall.Kill(pausePid, syscall.SIGKILL) + } + return nil +} + func (r *Runtime) migrate(ctx context.Context) error { runningContainers, err := r.GetRunningContainers() if err != nil { @@ -39,5 +72,5 @@ func (r *Runtime) migrate(ctx context.Context) error { } } - return nil + return stopPauseProcess() } diff --git a/libpod/runtime_migrate_unsupported.go b/libpod/runtime_migrate_unsupported.go new file mode 100644 index 000000000..1a9e46fdc --- /dev/null +++ b/libpod/runtime_migrate_unsupported.go @@ -0,0 +1,11 @@ +// +build !linux + +package libpod + +import ( + "context" +) + +func (r *Runtime) migrate(ctx context.Context) error { + return nil +} -- cgit v1.2.3-54-g00ecf From 53a76223ee5bded3be3e0ed957517513ad357a0e Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 9 May 2019 11:03:11 +0200 Subject: troubleshooting.md: add note about updating subuid/subgid Signed-off-by: Giuseppe Scrivano --- troubleshooting.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/troubleshooting.md b/troubleshooting.md index 08d79723a..64aec475e 100644 --- a/troubleshooting.md +++ b/troubleshooting.md @@ -247,6 +247,11 @@ would potentially allow one user to attack another user. You could also use the usermod program to assign UIDs to a user. +If you update either the /etc/subuid or /etc/subgid file, you need to +stop all running containers and kill the pause process. This is done +automatically by the `system migrate` command, which can also be used +to stop all the containers and kill the pause process. + ``` usermod --add-subuids 200000-201000 --add-subgids 200000-201000 johndoe grep johndoe /etc/subuid /etc/subgid -- cgit v1.2.3-54-g00ecf