diff options
-rw-r--r-- | libpod/container_api.go | 6 | ||||
-rw-r--r-- | libpod/oci.go | 14 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 85 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 7 | ||||
-rw-r--r-- | test/e2e/rootless_test.go | 30 |
5 files changed, 127 insertions, 15 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 56947eb3a..5df7e2f0e 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -335,11 +335,7 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user string) e execCmd, err := c.runtime.ociRuntime.execContainer(c, cmd, capList, env, tty, hostUser, sessionID) if err != nil { - return errors.Wrapf(err, "error creating exec command for container %s", c.ID()) - } - - if err := execCmd.Start(); err != nil { - return errors.Wrapf(err, "error starting exec command for container %s", c.ID()) + return errors.Wrapf(err, "error exec %s", c.ID()) } pidFile := c.execPidPath(sessionID) diff --git a/libpod/oci.go b/libpod/oci.go index da054eceb..4f0fbe8e9 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -682,15 +682,23 @@ func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty execCmd := exec.Command(r.path, args...) if rootless.IsRootless() { - args = append([]string{"--preserve-credentials", "-U", "-t", fmt.Sprintf("%d", c.state.PID), r.path}, args...) - // using nsenter might not be correct if the target PID joined a different user namespace. - // A better way would be to retrieve the parent ns (NS_GET_PARENT) until it is a child of the current namespace. + args = append([]string{"--preserve-credentials", "--user=/proc/self/fd/3", r.path}, args...) + f, err := rootless.GetUserNSForPid(uint(c.state.PID)) + if err != nil { + return nil, err + } execCmd = exec.Command("nsenter", args...) + execCmd.ExtraFiles = append(execCmd.ExtraFiles, f) } execCmd.Stdout = os.Stdout execCmd.Stderr = os.Stderr execCmd.Stdin = os.Stdin execCmd.Env = append(execCmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) + + if err := execCmd.Start(); err != nil { + return nil, errors.Wrapf(err, "cannot start container %s", c.ID()) + } + return execCmd, nil } diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 26f4b0b18..904d22ee2 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -9,9 +9,11 @@ import ( "os/exec" gosignal "os/signal" "os/user" + "path/filepath" "runtime" "strconv" "syscall" + "unsafe" "github.com/containers/storage/pkg/idtools" "github.com/docker/docker/pkg/signal" @@ -186,3 +188,86 @@ func BecomeRootInUserNS() (bool, int, error) { return true, int(ret), nil } + +func readUserNs(path string) (string, error) { + b := make([]byte, 256) + _, err := syscall.Readlink(path, b) + if err != nil { + return "", err + } + return string(b), nil +} + +func readUserNsFd(fd uintptr) (string, error) { + return readUserNs(filepath.Join("/proc/self/fd", fmt.Sprintf("%d", fd))) +} + +func getParentUserNs(fd uintptr) (uintptr, error) { + const nsGetParent = 0xb702 + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) + if errno != 0 { + return 0, errno + } + return (uintptr)(unsafe.Pointer(ret)), nil +} + +// GetUserNSForPid returns an open FD for the first direct child user namespace that created the process +// Each container creates a new user namespace where the runtime runs. The current process in the container +// might have created new user namespaces that are child of the initial namespace we created. +// This function finds the initial namespace created for the container that is a child of the current namespace. +// +// current ns +// / \ +// TARGET -> a [other containers] +// / +// b +// / +// NS READ USING THE PID -> c +func GetUserNSForPid(pid uint) (*os.File, error) { + currentNS, err := readUserNs("/proc/self/ns/user") + if err != nil { + return nil, err + } + + path := filepath.Join("/proc", fmt.Sprintf("%d", pid), "ns/user") + u, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "cannot open %s", path) + } + + fd := u.Fd() + ns, err := readUserNsFd(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") + } + if ns == currentNS { + return nil, errors.New("process running in the same user namespace") + } + + for { + nextFd, err := getParentUserNs(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot get parent user namespace") + } + + ns, err = readUserNsFd(nextFd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") + } + + if ns == currentNS { + syscall.Close(int(nextFd)) + + // Drop O_CLOEXEC for the fd. + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) + if errno != 0 { + syscall.Close(int(fd)) + return nil, errno + } + + return os.NewFile(fd, "userns child"), nil + } + syscall.Close(int(fd)) + fd = nextFd + } +} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 11dfd5aa4..93b04adfd 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -3,6 +3,8 @@ package rootless import ( + "os" + "github.com/pkg/errors" ) @@ -30,3 +32,8 @@ func SetSkipStorageSetup(bool) { func SkipStorageSetup() bool { return false } + +// GetUserNSForPid returns an open FD for the first direct child user namespace that created the process +func GetUserNSForPid(pid uint) (*os.File, error) { + return nil, errors.New("this function is not supported on this os") +} diff --git a/test/e2e/rootless_test.go b/test/e2e/rootless_test.go index 8813d040d..195f403e1 100644 --- a/test/e2e/rootless_test.go +++ b/test/e2e/rootless_test.go @@ -6,11 +6,25 @@ import ( "os" "os/exec" "path/filepath" + "syscall" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) +func canExec() bool { + const nsGetParent = 0xb702 + + u, err := os.Open("/proc/self/ns/user") + if err != nil { + return false + } + defer u.Close() + + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, u.Fd(), uintptr(nsGetParent), 0) + return errno != syscall.ENOTTY +} + var _ = Describe("Podman rootless", func() { var ( tempdir string @@ -100,18 +114,20 @@ var _ = Describe("Podman rootless", func() { allArgs = append(allArgs, "--rootfs", mountPath, "echo", "hello") cmd := podmanTest.PodmanAsUser(allArgs, 1000, 1000, env) cmd.WaitWithDefaultTimeout() - Expect(cmd.LineInOutputContains("hello")).To(BeTrue()) Expect(cmd.ExitCode()).To(Equal(0)) + Expect(cmd.LineInOutputContains("hello")).To(BeTrue()) - allArgsD := append([]string{"run", "-d"}, args...) - allArgsD = append(allArgsD, "--rootfs", mountPath, "sleep", "1d") - cmd = podmanTest.PodmanAsUser(allArgsD, 1000, 1000, env) + allArgs = append([]string{"run", "-d"}, args...) + allArgs = append(allArgs, "--security-opt", "seccomp=unconfined", "--rootfs", mountPath, "unshare", "-r", "unshare", "-r", "top") + cmd = podmanTest.PodmanAsUser(allArgs, 1000, 1000, env) cmd.WaitWithDefaultTimeout() Expect(cmd.ExitCode()).To(Equal(0)) - cid := cmd.OutputToStringArray()[0] - allArgsE := []string{"exec", cid, "echo", "hello"} - cmd = podmanTest.PodmanAsUser(allArgsE, 1000, 1000, env) + if !canExec() { + Skip("ioctl(NS_GET_PARENT) not supported.") + } + + cmd = podmanTest.PodmanAsUser([]string{"exec", "-l", "echo", "hello"}, 1000, 1000, env) cmd.WaitWithDefaultTimeout() Expect(cmd.ExitCode()).To(Equal(0)) Expect(cmd.LineInOutputContains("hello")).To(BeTrue()) |