aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/podman/main_local.go44
-rw-r--r--libpod/runtime_ctr.go2
-rw-r--r--pkg/rootless/rootless_linux.c149
-rw-r--r--pkg/rootless/rootless_linux.go133
-rw-r--r--pkg/rootless/rootless_unsupported.go14
5 files changed, 259 insertions, 83 deletions
diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go
index 5af05a11e..b4f21bd0c 100644
--- a/cmd/podman/main_local.go
+++ b/cmd/podman/main_local.go
@@ -4,11 +4,9 @@ package main
import (
"context"
- "io/ioutil"
"log/syslog"
"os"
"runtime/pprof"
- "strconv"
"strings"
"syscall"
@@ -120,18 +118,10 @@ func setupRootless(cmd *cobra.Command, args []string) error {
return errors.Wrapf(err, "could not get pause process pid file path")
}
- data, err := ioutil.ReadFile(pausePidPath)
- if err != nil && !os.IsNotExist(err) {
- return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
- }
- if err == nil {
- pausePid, err := strconv.Atoi(string(data))
- if err != nil {
- return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath)
- }
- became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "")
+ if _, err := os.Stat(pausePidPath); err == nil {
+ became, ret, err := rootless.TryJoinFromFilePaths("", false, []string{pausePidPath})
if err != nil {
- logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath)
+ logrus.Errorf("cannot join pause process. You may need to remove %s and stop all containers", pausePidPath)
logrus.Errorf("you can use `system migrate` to recreate the pause process")
logrus.Errorf(err.Error())
os.Exit(1)
@@ -154,28 +144,13 @@ func setupRootless(cmd *cobra.Command, args []string) error {
logrus.Errorf(err.Error())
os.Exit(1)
}
- var became bool
- var ret int
- if len(ctrs) == 0 {
- became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
- } else {
- for _, ctr := range ctrs {
- data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile)
- if err != nil {
- logrus.Errorf(err.Error())
- continue
- }
- conmonPid, err := strconv.Atoi(string(data))
- if err != nil {
- logrus.Errorf(err.Error())
- continue
- }
- became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath)
- if err == nil {
- break
- }
- }
+
+ paths := []string{}
+ for _, ctr := range ctrs {
+ paths = append(paths, ctr.Config().ConmonPidFile)
}
+
+ became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
if err != nil {
logrus.Errorf(err.Error())
os.Exit(1)
@@ -185,6 +160,7 @@ func setupRootless(cmd *cobra.Command, args []string) error {
}
return nil
}
+
func setRLimits() error {
rlimits := new(syscall.Rlimit)
rlimits.Cur = 1048576
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index c7758055f..cba8bdb1a 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -167,7 +167,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
}()
if rootless.IsRootless() && ctr.config.ConmonPidFile == "" {
- ctr.config.ConmonPidFile = filepath.Join(ctr.config.StaticDir, "conmon.pid")
+ ctr.config.ConmonPidFile = filepath.Join(ctr.state.RunDir, "conmon.pid")
}
// Go through named volumes and add them.
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
index 098ca7830..2356882e7 100644
--- a/pkg/rootless/rootless_linux.c
+++ b/pkg/rootless/rootless_linux.c
@@ -69,6 +69,19 @@ rootless_gid ()
static void
do_pause ()
{
+ int i;
+ struct sigaction act;
+ int const sig[] =
+ {
+ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, SIGPOLL,
+ SIGPROF, SIGVTALRM, SIGXCPU, SIGXFSZ, 0
+ };
+
+ act.sa_handler = SIG_IGN;
+
+ for (i = 0; sig[i]; i++)
+ sigaction (sig[i], &act, NULL);
+
prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL);
while (1)
pause ();
@@ -333,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack)
#endif
}
+int
+reexec_in_user_namespace_wait (int pid, int options)
+{
+ pid_t p;
+ int status;
+
+ do
+ p = waitpid (pid, &status, 0);
+ while (p < 0 && errno == EINTR);
+
+ if (p < 0)
+ return -1;
+
+ if (WIFEXITED (status))
+ return WEXITSTATUS (status);
+ if (WIFSIGNALED (status))
+ return 128 + WTERMSIG (status);
+ return -1;
+}
+
static int
create_pause_process (const char *pause_pid_file_path, char **argv)
{
@@ -356,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
while (r < 0 && errno == EINTR);
close (p[0]);
+ reexec_in_user_namespace_wait(r, 0);
+
return r == 1 && b == '0' ? 0 : -1;
}
else
@@ -560,8 +595,51 @@ check_proc_sys_userns_file (const char *path)
}
}
+static int
+copy_file_to_fd (const char *file_to_read, int outfd)
+{
+ char buf[512];
+ int fd;
+
+ fd = open (file_to_read, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ for (;;)
+ {
+ ssize_t r, w, t = 0;
+
+ do
+ r = read (fd, buf, sizeof buf);
+ while (r < 0 && errno == EINTR);
+ if (r < 0)
+ {
+ close (fd);
+ return r;
+ }
+
+ if (r == 0)
+ break;
+
+ while (t < r)
+ {
+ do
+ w = write (outfd, &buf[t], r - t);
+ while (w < 0 && errno == EINTR);
+ if (w < 0)
+ {
+ close (fd);
+ return w;
+ }
+ t += w;
+ }
+ }
+ close (fd);
+ return 0;
+}
+
int
-reexec_in_user_namespace (int ready, char *pause_pid_file_path)
+reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
{
int ret;
pid_t pid;
@@ -574,6 +652,7 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
char *listen_pid = NULL;
bool do_socket_activation = false;
char *cwd = getcwd (NULL, 0);
+ sigset_t sigset, oldsigset;
if (cwd == NULL)
{
@@ -584,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
listen_pid = getenv("LISTEN_PID");
listen_fds = getenv("LISTEN_FDS");
- if (listen_pid != NULL && listen_fds != NULL) {
- if (strtol(listen_pid, NULL, 10) == getpid()) {
- do_socket_activation = true;
+ if (listen_pid != NULL && listen_fds != NULL)
+ {
+ if (strtol(listen_pid, NULL, 10) == getpid())
+ do_socket_activation = true;
}
- }
sprintf (uid, "%d", geteuid ());
sprintf (gid, "%d", getegid ());
@@ -621,6 +700,22 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
return pid;
}
+ if (sigfillset (&sigset) < 0)
+ {
+ fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+ if (sigdelset (&sigset, SIGCHLD) < 0)
+ {
+ fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+ if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0)
+ {
+ fprintf (stderr, "cannot block signals: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
argv = get_cmd_line_args (ppid);
if (argv == NULL)
{
@@ -628,11 +723,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
_exit (EXIT_FAILURE);
}
- if (do_socket_activation) {
- char s[32];
- sprintf (s, "%d", getpid());
- setenv ("LISTEN_PID", s, true);
- }
+ if (do_socket_activation)
+ {
+ char s[32];
+ sprintf (s, "%d", getpid());
+ setenv ("LISTEN_PID", s, true);
+ }
setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
@@ -685,27 +781,20 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
while (ret < 0 && errno == EINTR);
close (ready);
- execvp (argv[0], argv);
-
- _exit (EXIT_FAILURE);
-}
-
-int
-reexec_in_user_namespace_wait (int pid)
-{
- pid_t p;
- int status;
+ if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0)
+ {
+ fprintf (stderr, "cannot block signals: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
- do
- p = waitpid (pid, &status, 0);
- while (p < 0 && errno == EINTR);
+ if (file_to_read && file_to_read[0])
+ {
+ ret = copy_file_to_fd (file_to_read, outputfd);
+ close (outputfd);
+ _exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ }
- if (p < 0)
- return -1;
+ execvp (argv[0], argv);
- if (WIFEXITED (status))
- return WEXITSTATUS (status);
- if (WIFSIGNALED (status))
- return 128 + WTERMSIG (status);
- return -1;
+ _exit (EXIT_FAILURE);
}
diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go
index 9132c0fe5..d302b1777 100644
--- a/pkg/rootless/rootless_linux.go
+++ b/pkg/rootless/rootless_linux.go
@@ -26,8 +26,8 @@ import (
#include <stdlib.h>
extern uid_t rootless_uid();
extern uid_t rootless_gid();
-extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
-extern int reexec_in_user_namespace_wait(int pid);
+extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
+extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
*/
import "C"
@@ -194,10 +194,24 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) {
}
}
-// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
+func enableLinger(pausePid string) {
+ if pausePid == "" {
+ return
+ }
+ // If we are trying to write a pause pid file, make sure we can leave processes
+ // running longer than the user session.
+ err := exec.Command("loginctl", "enable-linger", fmt.Sprintf("%d", GetRootlessUID())).Run()
+ if err != nil {
+ logrus.Warnf("cannot run `loginctl enable-linger` for the current user: %v", err)
+ }
+}
+
+// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process.
-func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
+func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
+ enableLinger(pausePid)
+
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
return false, -1, nil
}
@@ -226,7 +240,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.Errorf("cannot re-exec process")
}
- ret := C.reexec_in_user_namespace_wait(pidC)
+ ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process")
}
@@ -234,11 +248,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return true, int(ret), nil
}
-// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
-// into a new user namespace and the return code from the re-executed podman process.
-// If podman was re-executed the caller needs to propagate the error code returned by the child
-// process.
-func BecomeRootInUserNS(pausePid string) (bool, int, error) {
+func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
@@ -249,6 +259,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))
+ cFileToRead := C.CString(fileToRead)
+ defer C.free(unsafe.Pointer(cFileToRead))
+ var fileOutputFD C.int
+ if fileOutput != nil {
+ fileOutputFD = C.int(fileOutput.Fd())
+ }
+
runtime.LockOSThread()
defer runtime.UnlockOSThread()
@@ -262,7 +279,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
defer w.Close()
defer w.Write([]byte("0"))
- pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
+ pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pid := int(pidC)
if pid < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
@@ -328,6 +345,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return false, -1, errors.Wrapf(err, "read from sync pipe")
}
+ if fileOutput != nil {
+ return true, 0, nil
+ }
+
if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID.
@@ -336,7 +357,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
if err == nil {
pid, err := strconv.ParseUint(string(data), 10, 0)
if err == nil {
- return JoinUserAndMountNS(uint(pid), "")
+ return joinUserAndMountNS(uint(pid), "")
}
}
return false, -1, errors.Wrapf(err, "error setting up the process")
@@ -368,10 +389,96 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
}
}()
- ret := C.reexec_in_user_namespace_wait(pidC)
+ ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process")
}
return true, int(ret), nil
}
+
+// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
+// into a new user namespace and the return code from the re-executed podman process.
+// If podman was re-executed the caller needs to propagate the error code returned by the child
+// process.
+func BecomeRootInUserNS(pausePid string) (bool, int, error) {
+ enableLinger(pausePid)
+ return becomeRootInUserNS(pausePid, "", nil)
+}
+
+// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
+// This is useful when there are already running containers and we
+// don't have a pause process yet. We can use the paths to the conmon
+// processes to attempt joining their namespaces.
+// If needNewNamespace is set, the file is read from a temporary user
+// namespace, this is useful for containers that are running with a
+// different uidmap and the unprivileged user has no way to read the
+// file owned by the root in the container.
+func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
+ if len(paths) == 0 {
+ return BecomeRootInUserNS(pausePidPath)
+ }
+
+ var lastErr error
+ var pausePid int
+
+ for _, path := range paths {
+ if !needNewNamespace {
+ data, err := ioutil.ReadFile(path)
+ if err != nil {
+ lastErr = err
+ continue
+ }
+
+ pausePid, err = strconv.Atoi(string(data))
+ if err != nil {
+ lastErr = errors.Wrapf(err, "cannot parse file %s", path)
+ continue
+ }
+
+ lastErr = nil
+ break
+ } else {
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
+ if err != nil {
+ lastErr = err
+ continue
+ }
+
+ r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file")
+
+ defer w.Close()
+ defer r.Close()
+
+ if _, _, err := becomeRootInUserNS("", path, w); err != nil {
+ lastErr = err
+ continue
+ }
+
+ w.Close()
+ defer func() {
+ r.Close()
+ C.reexec_in_user_namespace_wait(-1, 0)
+ }()
+
+ b := make([]byte, 32)
+
+ n, err := r.Read(b)
+ if err != nil {
+ lastErr = errors.Wrapf(err, "cannot read %s\n", path)
+ continue
+ }
+
+ pausePid, err = strconv.Atoi(string(b[:n]))
+ if err == nil {
+ lastErr = nil
+ break
+ }
+ }
+ }
+ if lastErr != nil {
+ return false, 0, lastErr
+ }
+
+ return joinUserAndMountNS(uint(pausePid), pausePidPath)
+}
diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go
index 221baff97..c063adee5 100644
--- a/pkg/rootless/rootless_unsupported.go
+++ b/pkg/rootless/rootless_unsupported.go
@@ -29,10 +29,14 @@ func GetRootlessGID() int {
return -1
}
-// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
-// namespace of the specified PID without looking up its parent. Useful to join directly
-// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts
-// with a default configuration.
-func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
+// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
+// This is useful when there are already running containers and we
+// don't have a pause process yet. We can use the paths to the conmon
+// processes to attempt joining their namespaces.
+// If needNewNamespace is set, the file is read from a temporary user
+// namespace, this is useful for containers that are running with a
+// different uidmap and the unprivileged user has no way to read the
+// file owned by the root in the container.
+func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}