summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2019-05-21 22:08:08 +0200
committerGitHub <noreply@github.com>2019-05-21 22:08:08 +0200
commit536fd6adddd9693649457441bd4721c3a774ff0b (patch)
treee4837741f40bc2a6476d6416bfc5566dcd672061
parent8f43d08d966b9519011cb8ca86e2db9f1f18dfcb (diff)
parent53a76223ee5bded3be3e0ed957517513ad357a0e (diff)
downloadpodman-536fd6adddd9693649457441bd4721c3a774ff0b.tar.gz
podman-536fd6adddd9693649457441bd4721c3a774ff0b.tar.bz2
podman-536fd6adddd9693649457441bd4721c3a774ff0b.zip
Merge pull request #3084 from giuseppe/rootless-pause-process
rootless: use a pause process to keep namespaces alive
-rw-r--r--cmd/podman/main_local.go38
-rw-r--r--cmd/podman/mount.go2
-rw-r--r--libpod/runtime.go18
-rw-r--r--libpod/runtime_migrate.go44
-rw-r--r--libpod/runtime_migrate_unsupported.go11
-rw-r--r--pkg/rootless/rootless_linux.c412
-rw-r--r--pkg/rootless/rootless_linux.go57
-rw-r--r--pkg/rootless/rootless_unsupported.go4
-rw-r--r--pkg/util/utils_supported.go10
-rw-r--r--pkg/util/utils_windows.go6
-rw-r--r--troubleshooting.md5
11 files changed, 524 insertions, 83 deletions
diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go
index 7452965a2..5af05a11e 100644
--- a/cmd/podman/main_local.go
+++ b/cmd/podman/main_local.go
@@ -16,6 +16,7 @@ import (
"github.com/containers/libpod/cmd/podman/libpodruntime"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/tracing"
+ "github.com/containers/libpod/pkg/util"
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -113,6 +114,35 @@ func setupRootless(cmd *cobra.Command, args []string) error {
MainGlobalOpts,
remoteclient,
}
+
+ pausePidPath, err := util.GetRootlessPauseProcessPidPath()
+ if err != nil {
+ return errors.Wrapf(err, "could not get pause process pid file path")
+ }
+
+ data, err := ioutil.ReadFile(pausePidPath)
+ if err != nil && !os.IsNotExist(err) {
+ return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
+ }
+ if err == nil {
+ pausePid, err := strconv.Atoi(string(data))
+ if err != nil {
+ return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath)
+ }
+ became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "")
+ if err != nil {
+ logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath)
+ logrus.Errorf("you can use `system migrate` to recreate the pause process")
+ logrus.Errorf(err.Error())
+ os.Exit(1)
+ }
+ if became {
+ os.Exit(ret)
+ }
+ }
+
+ // if there is no pid file, try to join existing containers, and create a pause process.
+
runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd)
if err != nil {
return errors.Wrapf(err, "could not get runtime")
@@ -127,20 +157,20 @@ func setupRootless(cmd *cobra.Command, args []string) error {
var became bool
var ret int
if len(ctrs) == 0 {
- became, ret, err = rootless.BecomeRootInUserNS()
+ became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
} else {
for _, ctr := range ctrs {
data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile)
if err != nil {
logrus.Errorf(err.Error())
- os.Exit(1)
+ continue
}
conmonPid, err := strconv.Atoi(string(data))
if err != nil {
logrus.Errorf(err.Error())
- os.Exit(1)
+ continue
}
- became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid))
+ became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath)
if err == nil {
break
}
diff --git a/cmd/podman/mount.go b/cmd/podman/mount.go
index 7c9150d1b..662fb0a28 100644
--- a/cmd/podman/mount.go
+++ b/cmd/podman/mount.go
@@ -78,7 +78,7 @@ func mountCmd(c *cliconfig.MountValues) error {
return fmt.Errorf("cannot mount using driver %s in rootless mode", driver)
}
- became, ret, err := rootless.BecomeRootInUserNS()
+ became, ret, err := rootless.BecomeRootInUserNS("")
if err != nil {
return err
}
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 18e9dfeb3..def7ba639 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -892,7 +892,11 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
// we will need to access the storage.
if os.Geteuid() != 0 {
aliveLock.Unlock()
- became, ret, err := rootless.BecomeRootInUserNS()
+ pausePid, err := util.GetRootlessPauseProcessPidPath()
+ if err != nil {
+ return errors.Wrapf(err, "could not get pause process pid file path")
+ }
+ became, ret, err := rootless.BecomeRootInUserNS(pausePid)
if err != nil {
return err
}
@@ -966,18 +970,6 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
runtime.valid = true
if runtime.doMigrate {
- if os.Geteuid() != 0 {
- aliveLock.Unlock()
- locked = false
-
- became, ret, err := rootless.BecomeRootInUserNS()
- if err != nil {
- return err
- }
- if became {
- os.Exit(ret)
- }
- }
if err := runtime.migrate(ctx); err != nil {
return err
}
diff --git a/libpod/runtime_migrate.go b/libpod/runtime_migrate.go
index 0bb8e952f..e32e6edf6 100644
--- a/libpod/runtime_migrate.go
+++ b/libpod/runtime_migrate.go
@@ -1,13 +1,47 @@
+// +build linux
+
package libpod
import (
"context"
+ "fmt"
+ "io/ioutil"
+ "os"
"path/filepath"
+ "strconv"
+ "syscall"
+ "github.com/containers/libpod/pkg/rootless"
+ "github.com/containers/libpod/pkg/util"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
+func stopPauseProcess() error {
+ if rootless.IsRootless() {
+ pausePidPath, err := util.GetRootlessPauseProcessPidPath()
+ if err != nil {
+ return errors.Wrapf(err, "could not get pause process pid file path")
+ }
+ data, err := ioutil.ReadFile(pausePidPath)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
+ }
+ pausePid, err := strconv.Atoi(string(data))
+ if err != nil {
+ return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath)
+ }
+ if err := os.Remove(pausePidPath); err != nil {
+ return errors.Wrapf(err, "cannot delete pause pid file %s", pausePidPath)
+ }
+ syscall.Kill(pausePid, syscall.SIGKILL)
+ }
+ return nil
+}
+
func (r *Runtime) migrate(ctx context.Context) error {
runningContainers, err := r.GetRunningContainers()
if err != nil {
@@ -21,7 +55,7 @@ func (r *Runtime) migrate(ctx context.Context) error {
logrus.Infof("stopping all containers")
for _, ctr := range runningContainers {
- logrus.Infof("stopping %s", ctr.ID())
+ fmt.Printf("stopped %s\n", ctr.ID())
if err := ctr.Stop(); err != nil {
return errors.Wrapf(err, "cannot stop container %s", ctr.ID())
}
@@ -38,11 +72,5 @@ func (r *Runtime) migrate(ctx context.Context) error {
}
}
- for _, ctr := range runningContainers {
- if err := ctr.Start(ctx, true); err != nil {
- logrus.Errorf("error restarting container %s", ctr.ID())
- }
- }
-
- return nil
+ return stopPauseProcess()
}
diff --git a/libpod/runtime_migrate_unsupported.go b/libpod/runtime_migrate_unsupported.go
new file mode 100644
index 000000000..1a9e46fdc
--- /dev/null
+++ b/libpod/runtime_migrate_unsupported.go
@@ -0,0 +1,11 @@
+// +build !linux
+
+package libpod
+
+import (
+ "context"
+)
+
+func (r *Runtime) migrate(ctx context.Context) error {
+ return nil
+}
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
index 1d32b1adb..a08cfd36a 100644
--- a/pkg/rootless/rootless_linux.c
+++ b/pkg/rootless/rootless_linux.c
@@ -17,38 +17,29 @@
#include <sys/prctl.h>
#include <dirent.h>
#include <sys/select.h>
+#include <stdio.h>
+
+#ifndef RENAME_NOREPLACE
+# define RENAME_NOREPLACE (1 << 0)
+
+int renameat2 (int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags)
+{
+# ifdef __NR_renameat2
+ return (int) syscall (__NR_renameat2, olddirfd, oldpath, newdirfd, newpath, flags);
+# else
+ /* no way to implement it atomically. */
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+#endif
static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces";
static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone";
static int open_files_max_fd;
fd_set open_files_set;
-
-static void __attribute__((constructor)) init()
-{
- DIR *d;
-
- /* Store how many FDs were open before the Go runtime kicked in. */
- d = opendir ("/proc/self/fd");
- if (d)
- {
- struct dirent *ent;
-
- FD_ZERO (&open_files_set);
- for (ent = readdir (d); ent; ent = readdir (d))
- {
- int fd = atoi (ent->d_name);
- if (fd != dirfd (d))
- {
- if (fd > open_files_max_fd)
- open_files_max_fd = fd;
- FD_SET (fd, &open_files_set);
- }
- }
- closedir (d);
- }
-}
-
+static uid_t rootless_uid_init;
static int
syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid)
@@ -62,14 +53,18 @@ syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid)
return (int) syscall (__NR_setresgid, rgid, egid, sgid);
}
-static int
-syscall_clone (unsigned long flags, void *child_stack)
+uid_t
+rootless_uid ()
{
-#if defined(__s390__) || defined(__CRIS__)
- return (int) syscall (__NR_clone, child_stack, flags);
-#else
- return (int) syscall (__NR_clone, flags, child_stack);
-#endif
+ return rootless_uid_init;
+}
+
+static void
+do_pause ()
+{
+ prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL);
+ while (1)
+ pause ();
}
static char **
@@ -84,7 +79,10 @@ get_cmd_line_args (pid_t pid)
int i, argc = 0;
char **argv;
- sprintf (path, "/proc/%d/cmdline", pid);
+ if (pid)
+ sprintf (path, "/proc/%d/cmdline", pid);
+ else
+ strcpy (path, "/proc/self/cmdline");
fd = open (path, O_RDONLY);
if (fd < 0)
return NULL;
@@ -99,7 +97,10 @@ get_cmd_line_args (pid_t pid)
ret = read (fd, buffer + used, allocated - used);
while (ret < 0 && errno == EINTR);
if (ret < 0)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
if (ret == 0)
break;
@@ -109,11 +110,12 @@ get_cmd_line_args (pid_t pid)
{
allocated += 512;
char *tmp = realloc (buffer, allocated);
- if (buffer == NULL) {
- free(buffer);
- return NULL;
- }
- buffer=tmp;
+ if (buffer == NULL)
+ {
+ free (buffer);
+ return NULL;
+ }
+ buffer = tmp;
}
}
close (fd);
@@ -122,11 +124,17 @@ get_cmd_line_args (pid_t pid)
if (buffer[i] == '\0')
argc++;
if (argc == 0)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
argv = malloc (sizeof (char *) * (argc + 1));
if (argv == NULL)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
argc = 0;
argv[argc++] = buffer;
@@ -139,8 +147,296 @@ get_cmd_line_args (pid_t pid)
return argv;
}
+static bool
+can_use_shortcut ()
+{
+ int argc;
+ char **argv;
+ bool ret = true;
+
+#ifdef DISABLE_JOIN_SHORTCUT
+ return false;
+#endif
+
+ argv = get_cmd_line_args (0);
+ if (argv == NULL)
+ return NULL;
+
+ for (argc = 0; argv[argc]; argc++)
+ {
+ if (argc == 0 || argv[argc][0] == '-')
+ continue;
+
+ if (strcmp (argv[argc], "mount") == 0
+ || strcmp (argv[argc], "search") == 0
+ || strcmp (argv[argc], "system") == 0)
+ {
+ ret = false;
+ break;
+ }
+ }
+
+ free (argv[0]);
+ free (argv);
+ return ret;
+}
+
+static void __attribute__((constructor)) init()
+{
+ const char *xdg_runtime_dir;
+ const char *pause;
+ DIR *d;
+
+ pause = getenv ("_PODMAN_PAUSE");
+ if (pause && pause[0])
+ {
+ do_pause ();
+ _exit (EXIT_FAILURE);
+ }
+
+ /* Store how many FDs were open before the Go runtime kicked in. */
+ d = opendir ("/proc/self/fd");
+ if (d)
+ {
+ struct dirent *ent;
+
+ FD_ZERO (&open_files_set);
+ for (ent = readdir (d); ent; ent = readdir (d))
+ {
+ int fd = atoi (ent->d_name);
+ if (fd != dirfd (d))
+ {
+ if (fd > open_files_max_fd)
+ open_files_max_fd = fd;
+ FD_SET (fd, &open_files_set);
+ }
+ }
+ closedir (d);
+ }
+
+ /* Shortcut. If we are able to join the pause pid file, do it now so we don't
+ need to re-exec. */
+ xdg_runtime_dir = getenv ("XDG_RUNTIME_DIR");
+ if (xdg_runtime_dir && xdg_runtime_dir[0] && can_use_shortcut ())
+ {
+ int r;
+ int fd;
+ long pid;
+ char buf[12];
+ uid_t uid;
+ char path[PATH_MAX];
+ const char *const suffix = "/libpod/pause.pid";
+ char *cwd = getcwd (NULL, 0);
+
+ if (cwd == NULL)
+ {
+ fprintf (stderr, "error getting current working directory: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (strlen (xdg_runtime_dir) >= PATH_MAX - strlen (suffix))
+ {
+ fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %s", strerror (ENAMETOOLONG));
+ exit (EXIT_FAILURE);
+ }
+
+ sprintf (path, "%s%s", xdg_runtime_dir, suffix);
+ fd = open (path, O_RDONLY);
+ if (fd < 0)
+ {
+ free (cwd);
+ return;
+ }
+
+ r = read (fd, buf, sizeof (buf));
+ close (fd);
+ if (r < 0)
+ {
+ free (cwd);
+ return;
+ }
+ pid = strtol (buf, NULL, 10);
+ if (pid == LONG_MAX)
+ {
+ free (cwd);
+ return;
+ }
+
+ uid = geteuid ();
+
+ sprintf (path, "/proc/%d/ns/user", pid);
+ fd = open (path, O_RDONLY);
+ if (fd < 0 || setns (fd, 0) < 0)
+ {
+ free (cwd);
+ return;
+ }
+ close (fd);
+
+ /* Errors here cannot be ignored as we already joined a ns. */
+ sprintf (path, "/proc/%d/ns/mnt", pid);
+ fd = open (path, O_RDONLY);
+ if (fd < 0)
+ {
+ fprintf (stderr, "cannot open %s: %s", path, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+
+ r = setns (fd, 0);
+ if (r < 0)
+ {
+ fprintf (stderr, "cannot join mount namespace for %d: %s", pid, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ close (fd);
+
+ if (syscall_setresgid (0, 0, 0) < 0)
+ {
+ fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (syscall_setresuid (0, 0, 0) < 0)
+ {
+ fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (chdir (cwd) < 0)
+ {
+ fprintf (stderr, "cannot chdir: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ free (cwd);
+ rootless_uid_init = uid;
+ }
+}
+
+static int
+syscall_clone (unsigned long flags, void *child_stack)
+{
+#if defined(__s390__) || defined(__CRIS__)
+ return (int) syscall (__NR_clone, child_stack, flags);
+#else
+ return (int) syscall (__NR_clone, flags, child_stack);
+#endif
+}
+
+static int
+create_pause_process (const char *pause_pid_file_path, char **argv)
+{
+ int r, p[2];
+
+ if (pipe (p) < 0)
+ _exit (EXIT_FAILURE);
+
+ r = fork ();
+ if (r < 0)
+ _exit (EXIT_FAILURE);
+
+ if (r)
+ {
+ char b;
+
+ close (p[1]);
+ /* Block until we write the pid file. */
+ do
+ r = read (p[0], &b, 1);
+ while (r < 0 && errno == EINTR);
+ close (p[0]);
+
+ return r == 1 && b == '0' ? 0 : -1;
+ }
+ else
+ {
+ int fd;
+ pid_t pid;
+
+ close (p[0]);
+
+ setsid ();
+ pid = fork ();
+ if (r < 0)
+ _exit (EXIT_FAILURE);
+
+ if (pid)
+ {
+ char pid_str[12];
+ char *tmp_file_path = NULL;
+
+ sprintf (pid_str, "%d", pid);
+
+ asprintf (&tmp_file_path, "%s.XXXXXX", pause_pid_file_path);
+ if (tmp_file_path == NULL)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ fd = mkstemp (tmp_file_path);
+ if (fd < 0)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ do
+ r = write (fd, pid_str, strlen (pid_str));
+ while (r < 0 && errno == EINTR);
+ if (r < 0)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+ close (fd);
+
+ /* There can be another process at this point trying to configure the user namespace and the pause
+ process, do not override the pid file if it already exists. */
+ if (renameat2 (AT_FDCWD, tmp_file_path, AT_FDCWD, pause_pid_file_path, RENAME_NOREPLACE) < 0)
+ {
+ unlink (tmp_file_path);
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ do
+ r = write (p[1], "0", 1);
+ while (r < 0 && errno == EINTR);
+ close (p[1]);
+
+ _exit (EXIT_SUCCESS);
+ }
+ else
+ {
+ int null;
+
+ close (p[1]);
+
+ null = open ("/dev/null", O_RDWR);
+ if (null >= 0)
+ {
+ dup2 (null, 0);
+ dup2 (null, 1);
+ dup2 (null, 2);
+ close (null);
+ }
+
+ for (fd = 3; fd < open_files_max_fd + 16; fd++)
+ close (fd);
+
+ setenv ("_PODMAN_PAUSE", "1", 1);
+ execlp (argv[0], NULL);
+
+ /* If the execve fails, then do the pause here. */
+ do_pause ();
+ _exit (EXIT_FAILURE);
+ }
+ }
+}
+
int
-reexec_userns_join (int userns, int mountns)
+reexec_userns_join (int userns, int mountns, char *pause_pid_file_path)
{
pid_t ppid = getpid ();
char uid[16];
@@ -200,7 +496,7 @@ reexec_userns_join (int userns, int mountns)
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
- close (userns);
+ close (mountns);
if (syscall_setresgid (0, 0, 0) < 0)
{
@@ -221,6 +517,12 @@ reexec_userns_join (int userns, int mountns)
}
free (cwd);
+ if (pause_pid_file_path && pause_pid_file_path[0] != '\0')
+ {
+ /* We ignore errors here as we didn't create the namespace anyway. */
+ create_pause_process (pause_pid_file_path, argv);
+ }
+
execvp (argv[0], argv);
_exit (EXIT_FAILURE);
@@ -246,7 +548,7 @@ check_proc_sys_userns_file (const char *path)
}
int
-reexec_in_user_namespace (int ready)
+reexec_in_user_namespace (int ready, char *pause_pid_file_path)
{
int ret;
pid_t pid;
@@ -328,29 +630,45 @@ reexec_in_user_namespace (int ready)
fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
- close (ready);
- if (b != '1')
+ if (b != '0')
_exit (EXIT_FAILURE);
if (syscall_setresgid (0, 0, 0) < 0)
{
fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
if (syscall_setresuid (0, 0, 0) < 0)
{
fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
if (chdir (cwd) < 0)
{
fprintf (stderr, "cannot chdir: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
free (cwd);
+ if (pause_pid_file_path && pause_pid_file_path[0] != '\0')
+ {
+ if (create_pause_process (pause_pid_file_path, argv) < 0)
+ {
+ write (ready, "2", 1);
+ _exit (EXIT_FAILURE);
+ }
+ }
+
+ do
+ ret = write (ready, "0", 1) < 0;
+ while (ret < 0 && errno == EINTR);
+ close (ready);
+
execvp (argv[0], argv);
_exit (EXIT_FAILURE);
diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go
index 2c99f41a4..ddf881368 100644
--- a/pkg/rootless/rootless_linux.go
+++ b/pkg/rootless/rootless_linux.go
@@ -22,9 +22,12 @@ import (
)
/*
-extern int reexec_in_user_namespace(int ready);
+#cgo remoteclient CFLAGS: -DDISABLE_JOIN_SHORTCUT
+#include <stdlib.h>
+extern uid_t rootless_uid();
+extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid);
-extern int reexec_userns_join(int userns, int mountns);
+extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
*/
import "C"
@@ -45,6 +48,12 @@ var (
// IsRootless tells us if we are running in rootless mode
func IsRootless() bool {
isRootlessOnce.Do(func() {
+ rootlessUIDInit := int(C.rootless_uid())
+ if rootlessUIDInit != 0 {
+ // This happens if we joined the user+mount namespace as part of
+ os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done")
+ os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit))
+ }
isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != ""
})
return isRootless
@@ -168,11 +177,14 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) {
// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process.
-func JoinUserAndMountNS(pid uint) (bool, int, error) {
+func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
return false, -1, nil
}
+ cPausePid := C.CString(pausePid)
+ defer C.free(unsafe.Pointer(cPausePid))
+
userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid))
if err != nil {
return false, -1, err
@@ -189,7 +201,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) {
if err != nil {
return false, -1, err
}
- pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()))
+ pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid)
if int(pidC) < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
}
@@ -206,7 +218,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) {
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
-func BecomeRootInUserNS() (bool, int, error) {
+func BecomeRootInUserNS(pausePid string) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
@@ -214,18 +226,23 @@ func BecomeRootInUserNS() (bool, int, error) {
return false, 0, nil
}
+ cPausePid := C.CString(pausePid)
+ defer C.free(unsafe.Pointer(cPausePid))
+
runtime.LockOSThread()
defer runtime.UnlockOSThread()
- r, w, err := os.Pipe()
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
if err != nil {
return false, -1, err
}
+ r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child")
+
defer r.Close()
defer w.Close()
defer w.Write([]byte("0"))
- pidC := C.reexec_in_user_namespace(C.int(r.Fd()))
+ pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pid := int(pidC)
if pid < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
@@ -280,11 +297,35 @@ func BecomeRootInUserNS() (bool, int, error) {
}
}
- _, err = w.Write([]byte("1"))
+ _, err = w.Write([]byte("0"))
if err != nil {
return false, -1, errors.Wrapf(err, "write to sync pipe")
}
+ b := make([]byte, 1, 1)
+ _, err = w.Read(b)
+ if err != nil {
+ return false, -1, errors.Wrapf(err, "read from sync pipe")
+ }
+
+ if b[0] == '2' {
+ // We have lost the race for writing the PID file, as probably another
+ // process created a namespace and wrote the PID.
+ // Try to join it.
+ data, err := ioutil.ReadFile(pausePid)
+ if err == nil {
+ pid, err := strconv.ParseUint(string(data), 10, 0)
+ if err == nil {
+ return JoinUserAndMountNS(uint(pid), "")
+ }
+ }
+ return false, -1, errors.Wrapf(err, "error setting up the process")
+ }
+
+ if b[0] != '0' {
+ return false, -1, errors.Wrapf(err, "error setting up the process")
+ }
+
c := make(chan os.Signal, 1)
signals := []os.Signal{}
diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go
index 47b5dd7cc..42f8f3aec 100644
--- a/pkg/rootless/rootless_unsupported.go
+++ b/pkg/rootless/rootless_unsupported.go
@@ -15,7 +15,7 @@ func IsRootless() bool {
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration.
-func BecomeRootInUserNS() (bool, int, error) {
+func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
@@ -28,6 +28,6 @@ func GetRootlessUID() int {
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts
// with a default configuration.
-func JoinUserAndMountNS(pid uint) (bool, int, error) {
+func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
diff --git a/pkg/util/utils_supported.go b/pkg/util/utils_supported.go
index 8b98658c2..3d9140a23 100644
--- a/pkg/util/utils_supported.go
+++ b/pkg/util/utils_supported.go
@@ -82,3 +82,13 @@ func GetRootlessRuntimeDir() (string, error) {
}
return rootlessRuntimeDir, nil
}
+
+// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for
+// the pause process
+func GetRootlessPauseProcessPidPath() (string, error) {
+ runtimeDir, err := GetRootlessRuntimeDir()
+ if err != nil {
+ return "", err
+ }
+ return filepath.Join(runtimeDir, "libpod", "pause.pid"), nil
+}
diff --git a/pkg/util/utils_windows.go b/pkg/util/utils_windows.go
index b33733da9..3faa6f10c 100644
--- a/pkg/util/utils_windows.go
+++ b/pkg/util/utils_windows.go
@@ -15,3 +15,9 @@ func GetRootlessRuntimeDir() (string, error) {
func IsCgroup2UnifiedMode() (bool, error) {
return false, errors.New("this function is not implemented for windows")
}
+
+// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for
+// the pause process
+func GetRootlessPauseProcessPidPath() (string, error) {
+ return "", errors.New("this function is not implemented for windows")
+}
diff --git a/troubleshooting.md b/troubleshooting.md
index 08d79723a..64aec475e 100644
--- a/troubleshooting.md
+++ b/troubleshooting.md
@@ -247,6 +247,11 @@ would potentially allow one user to attack another user.
You could also use the usermod program to assign UIDs to a user.
+If you update either the /etc/subuid or /etc/subgid file, you need to
+stop all running containers and kill the pause process. This is done
+automatically by the `system migrate` command, which can also be used
+to stop all the containers and kill the pause process.
+
```
usermod --add-subuids 200000-201000 --add-subgids 200000-201000 johndoe
grep johndoe /etc/subuid /etc/subgid