diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/adapter/runtime.go | 73 | ||||
-rw-r--r-- | pkg/adapter/runtime_remote.go | 7 | ||||
-rw-r--r-- | pkg/rootless/rootless.go | 9 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.c | 41 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 277 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 45 | ||||
-rw-r--r-- | pkg/spec/createconfig.go | 3 |
7 files changed, 115 insertions, 340 deletions
diff --git a/pkg/adapter/runtime.go b/pkg/adapter/runtime.go index dd51c7233..182a04044 100644 --- a/pkg/adapter/runtime.go +++ b/pkg/adapter/runtime.go @@ -8,7 +8,6 @@ import ( "io" "io/ioutil" "os" - "strconv" "text/template" "github.com/containers/buildah" @@ -124,38 +123,6 @@ func (r *LocalRuntime) Export(name string, path string) error { if err != nil { return errors.Wrapf(err, "error looking up container %q", name) } - if os.Geteuid() != 0 { - state, err := ctr.State() - if err != nil { - return errors.Wrapf(err, "cannot read container state %q", ctr.ID()) - } - if state == libpod.ContainerStateRunning || state == libpod.ContainerStatePaused { - data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile) - if err != nil { - return errors.Wrapf(err, "cannot read conmon PID file %q", ctr.Config().ConmonPidFile) - } - conmonPid, err := strconv.Atoi(string(data)) - if err != nil { - return errors.Wrapf(err, "cannot parse PID %q", data) - } - became, ret, err := rootless.JoinDirectUserAndMountNS(uint(conmonPid)) - if err != nil { - return err - } - if became { - os.Exit(ret) - } - } else { - became, ret, err := rootless.BecomeRootInUserNS() - if err != nil { - return err - } - if became { - os.Exit(ret) - } - } - } - return ctr.Export(path) } @@ -343,46 +310,6 @@ func (r *LocalRuntime) HealthCheck(c *cliconfig.HealthCheckValues) (libpod.Healt return r.Runtime.HealthCheck(c.InputArgs[0]) } -// JoinOrCreateRootlessPod joins the specified pod if it is running or it creates a new user namespace -// if the pod is stopped -func (r *LocalRuntime) JoinOrCreateRootlessPod(pod *Pod) (bool, int, error) { - if os.Geteuid() == 0 { - return false, 0, nil - } - opts := rootless.Opts{ - Argument: pod.ID(), - } - - inspect, err := pod.Inspect() - if err != nil { - return false, 0, err - } - for _, ctr := range inspect.Containers { - prevCtr, err := r.LookupContainer(ctr.ID) - if err != nil { - return false, -1, err - } - s, err := prevCtr.State() - if err != nil { - return false, -1, err - } - if s != libpod.ContainerStateRunning && s != libpod.ContainerStatePaused { - continue - } - data, err := ioutil.ReadFile(prevCtr.Config().ConmonPidFile) - if err != nil { - return false, -1, errors.Wrapf(err, "cannot read conmon PID file %q", prevCtr.Config().ConmonPidFile) - } - conmonPid, err := strconv.Atoi(string(data)) - if err != nil { - return false, -1, errors.Wrapf(err, "cannot parse PID %q", data) - } - return rootless.JoinDirectUserAndMountNSWithOpts(uint(conmonPid), &opts) - } - - return rootless.BecomeRootInUserNSWithOpts(&opts) -} - // Events is a wrapper to libpod to obtain libpod/podman events func (r *LocalRuntime) Events(c *cliconfig.EventValues) error { var ( diff --git a/pkg/adapter/runtime_remote.go b/pkg/adapter/runtime_remote.go index c3a4f322d..978c9ffd8 100644 --- a/pkg/adapter/runtime_remote.go +++ b/pkg/adapter/runtime_remote.go @@ -755,13 +755,6 @@ func (r *LocalRuntime) HealthCheck(c *cliconfig.HealthCheckValues) (libpod.Healt return -1, libpod.ErrNotImplemented } -// JoinOrCreateRootlessPod joins the specified pod if it is running or it creates a new user namespace -// if the pod is stopped -func (r *LocalRuntime) JoinOrCreateRootlessPod(pod *Pod) (bool, int, error) { - // Nothing to do in the remote case - return true, 0, nil -} - // Events monitors libpod/podman events over a varlink connection func (r *LocalRuntime) Events(c *cliconfig.EventValues) error { var more uint64 diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go deleted file mode 100644 index a531e43ce..000000000 --- a/pkg/rootless/rootless.go +++ /dev/null @@ -1,9 +0,0 @@ -package rootless - -// Opts allows to customize how re-execing to a rootless process is done -type Opts struct { - // Argument overrides the arguments on the command line - // for the re-execed process. The process in the namespace - // must use rootless.Argument() to read its value. - Argument string -} diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 2e2c3acac..9cb79ed4d 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -13,10 +13,36 @@ #include <sys/wait.h> #include <string.h> #include <stdbool.h> +#include <sys/types.h> +#include <sys/prctl.h> +#include <dirent.h> static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces"; static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone"; +static int n_files; + +static void __attribute__((constructor)) init() +{ + DIR *d; + + /* Store how many FDs were open before the Go runtime kicked in. */ + d = opendir ("/proc/self/fd"); + if (d) + { + struct dirent *ent; + + for (ent = readdir (d); ent; ent = readdir (d)) + { + int fd = atoi (ent->d_name); + if (fd > n_files && fd != dirfd (d)) + n_files = fd; + } + closedir (d); + } +} + + static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) { @@ -133,12 +159,25 @@ reexec_userns_join (int userns, int mountns) pid = fork (); if (pid < 0) fprintf (stderr, "cannot fork: %s\n", strerror (errno)); + if (pid) - return pid; + { + /* We passed down these fds, close them. */ + int f; + for (f = 3; f < n_files; f++) + close (f); + return pid; + } setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); + if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0) + { + fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (setns (userns, 0) < 0) { fprintf (stderr, "cannot setns: %s\n", strerror (errno)); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 0be0e08bf..1d1b1713d 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -46,25 +46,6 @@ func IsRootless() bool { return isRootless } -var ( - skipStorageSetup = false -) - -// SetSkipStorageSetup tells the runtime to not setup containers/storage -func SetSkipStorageSetup(v bool) { - skipStorageSetup = v -} - -// SkipStorageSetup tells if we should skip the containers/storage setup -func SkipStorageSetup() bool { - return skipStorageSetup -} - -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return os.Getenv("_CONTAINERS_ROOTLESS_ARG") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") @@ -104,51 +85,86 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) return nil } -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil +func readUserNs(path string) (string, error) { + b := make([]byte, 256) + _, err := syscall.Readlink(path, b) + if err != nil { + return "", err } + return string(b), nil +} - userNS, err := getUserNSForPid(pid) +func readUserNsFd(fd uintptr) (string, error) { + return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) +} + +func getParentUserNs(fd uintptr) (uintptr, error) { + const nsGetParent = 0xb702 + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) + if errno != 0 { + return 0, errno + } + return (uintptr)(unsafe.Pointer(ret)), nil +} + +// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process +// Each container creates a new user namespace where the runtime runs. The current process in the container +// might have created new user namespaces that are child of the initial namespace we created. +// This function finds the initial namespace created for the container that is a child of the current namespace. +// +// current ns +// / \ +// TARGET -> a [other containers] +// / +// b +// / +// NS READ USING THE PID -> c +func getUserNSFirstChild(fd uintptr) (*os.File, error) { + currentNS, err := readUserNs("/proc/self/ns/user") if err != nil { - return false, -1, err + return nil, err } - defer userNS.Close() - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") + ns, err := readUserNsFd(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") } - if preserveFDs > 0 { - for fd := 3; fd < 3+preserveFDs; fd++ { - // These fds were passed down to the runtime. Close them - // and not interfere - os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close() - } + if ns == currentNS { + return nil, errors.New("process running in the same user namespace") } - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } + for { + nextFd, err := getParentUserNs(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot get parent user namespace") + } - return true, int(ret), nil -} + ns, err = readUserNsFd(nextFd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") + } -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount -// namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts -// with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { - return JoinDirectUserAndMountNSWithOpts(pid, nil) + if ns == currentNS { + syscall.Close(int(nextFd)) + + // Drop O_CLOEXEC for the fd. + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) + if errno != 0 { + syscall.Close(int(fd)) + return nil, errno + } + + return os.NewFile(fd, "userns child"), nil + } + syscall.Close(int(fd)) + fd = nextFd + } } -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount +// namespace of the specified PID without looking up its parent. Useful to join directly +// the conmon process. +func JoinUserAndMountNS(pid uint) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } @@ -165,39 +181,11 @@ func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { } defer userNS.Close() - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), C.int(mountNS.Fd())) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") - } - - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } - - return true, int(ret), nil -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil - } - - userNS, err := getUserNSForPath(path) + fd, err := getUserNSFirstChild(userNS.Fd()) if err != nil { return false, -1, err } - defer userNS.Close() - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) + pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd())) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } @@ -213,16 +201,8 @@ func JoinNSPath(path string) (bool, int, error) { // BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child -// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration. -func BecomeRootInUserNS() (bool, int, error) { - return BecomeRootInUserNSWithOpts(nil) -} - -// BecomeRootInUserNSWithOpts re-exec podman in a new userNS. It returns whether podman was -// re-execute into a new user namespace and the return code from the re-executed podman process. -// If podman was re-executed the caller needs to propagate the error code returned by the child // process. -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { +func BecomeRootInUserNS() (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -241,12 +221,6 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { defer w.Close() defer w.Write([]byte("0")) - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - pidC := C.reexec_in_user_namespace(C.int(r.Fd())) pid := int(pidC) if pid < 0 { @@ -328,112 +302,3 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { return true, int(ret), nil } - -func readUserNs(path string) (string, error) { - b := make([]byte, 256) - _, err := syscall.Readlink(path, b) - if err != nil { - return "", err - } - return string(b), nil -} - -func readUserNsFd(fd uintptr) (string, error) { - return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) -} - -func getOwner(fd uintptr) (uintptr, error) { - const nsGetUserns = 0xb701 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getParentUserNs(fd uintptr) (uintptr, error) { - const nsGetParent = 0xb702 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getUserNSForPath(path string) (*os.File, error) { - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - defer u.Close() - fd, err := getOwner(u.Fd()) - if err != nil { - return nil, err - } - - return getUserNSFirstChild(fd) -} - -func getUserNSForPid(pid uint) (*os.File, error) { - path := fmt.Sprintf("/proc/%d/ns/user", pid) - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - - return getUserNSFirstChild(u.Fd()) -} - -// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process -// Each container creates a new user namespace where the runtime runs. The current process in the container -// might have created new user namespaces that are child of the initial namespace we created. -// This function finds the initial namespace created for the container that is a child of the current namespace. -// -// current ns -// / \ -// TARGET -> a [other containers] -// / -// b -// / -// NS READ USING THE PID -> c -func getUserNSFirstChild(fd uintptr) (*os.File, error) { - currentNS, err := readUserNs("/proc/self/ns/user") - if err != nil { - return nil, err - } - - ns, err := readUserNsFd(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - if ns == currentNS { - return nil, errors.New("process running in the same user namespace") - } - - for { - nextFd, err := getParentUserNs(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot get parent user namespace") - } - - ns, err = readUserNsFd(nextFd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - - if ns == currentNS { - syscall.Close(int(nextFd)) - - // Drop O_CLOEXEC for the fd. - _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) - if errno != 0 { - syscall.Close(int(fd)) - return nil, errno - } - - return os.NewFile(fd, "userns child"), nil - } - syscall.Close(int(fd)) - fd = nextFd - } -} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index e01d7855c..47b5dd7cc 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -19,54 +19,15 @@ func BecomeRootInUserNS() (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } -// BecomeRootInUserNS is a stub function that always returns false and an -// error on unsupported OS's -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { return -1 } -// SetSkipStorageSetup tells the runtime to not setup containers/storage -func SetSkipStorageSetup(bool) { -} - -// SkipStorageSetup tells if we should skip the containers/storage setup -func SkipStorageSetup() bool { - return false -} - -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts +// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts // with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } - -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return "" -} diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 0a12e3dca..a433fc16d 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -12,7 +12,6 @@ import ( "github.com/containers/image/manifest" "github.com/containers/libpod/libpod" "github.com/containers/libpod/pkg/namespaces" - "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage" "github.com/containers/storage/pkg/stringid" "github.com/cri-o/ocicni/pkg/ocicni" @@ -271,7 +270,7 @@ func (c *CreateConfig) GetVolumeMounts(specMounts []spec.Mount) ([]spec.Mount, e func (c *CreateConfig) GetVolumesFrom() error { var options string - if rootless.SkipStorageSetup() { + if os.Geteuid() != 0 { return nil } |