diff options
Diffstat (limited to 'pkg/rootless')
-rw-r--r-- | pkg/rootless/rootless.go | 9 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.c | 41 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 263 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 36 |
4 files changed, 114 insertions, 235 deletions
diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go deleted file mode 100644 index a531e43ce..000000000 --- a/pkg/rootless/rootless.go +++ /dev/null @@ -1,9 +0,0 @@ -package rootless - -// Opts allows to customize how re-execing to a rootless process is done -type Opts struct { - // Argument overrides the arguments on the command line - // for the re-execed process. The process in the namespace - // must use rootless.Argument() to read its value. - Argument string -} diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 2e2c3acac..9cb79ed4d 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -13,10 +13,36 @@ #include <sys/wait.h> #include <string.h> #include <stdbool.h> +#include <sys/types.h> +#include <sys/prctl.h> +#include <dirent.h> static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces"; static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone"; +static int n_files; + +static void __attribute__((constructor)) init() +{ + DIR *d; + + /* Store how many FDs were open before the Go runtime kicked in. */ + d = opendir ("/proc/self/fd"); + if (d) + { + struct dirent *ent; + + for (ent = readdir (d); ent; ent = readdir (d)) + { + int fd = atoi (ent->d_name); + if (fd > n_files && fd != dirfd (d)) + n_files = fd; + } + closedir (d); + } +} + + static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) { @@ -133,12 +159,25 @@ reexec_userns_join (int userns, int mountns) pid = fork (); if (pid < 0) fprintf (stderr, "cannot fork: %s\n", strerror (errno)); + if (pid) - return pid; + { + /* We passed down these fds, close them. */ + int f; + for (f = 3; f < n_files; f++) + close (f); + return pid; + } setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); + if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0) + { + fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + if (setns (userns, 0) < 0) { fprintf (stderr, "cannot setns: %s\n", strerror (errno)); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index c753228f1..1d1b1713d 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -46,11 +46,6 @@ func IsRootless() bool { return isRootless } -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return os.Getenv("_CONTAINERS_ROOTLESS_ARG") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") @@ -90,51 +85,86 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) return nil } -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil +func readUserNs(path string) (string, error) { + b := make([]byte, 256) + _, err := syscall.Readlink(path, b) + if err != nil { + return "", err + } + return string(b), nil +} + +func readUserNsFd(fd uintptr) (string, error) { + return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) +} + +func getParentUserNs(fd uintptr) (uintptr, error) { + const nsGetParent = 0xb702 + ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) + if errno != 0 { + return 0, errno } + return (uintptr)(unsafe.Pointer(ret)), nil +} - userNS, err := getUserNSForPid(pid) +// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process +// Each container creates a new user namespace where the runtime runs. The current process in the container +// might have created new user namespaces that are child of the initial namespace we created. +// This function finds the initial namespace created for the container that is a child of the current namespace. +// +// current ns +// / \ +// TARGET -> a [other containers] +// / +// b +// / +// NS READ USING THE PID -> c +func getUserNSFirstChild(fd uintptr) (*os.File, error) { + currentNS, err := readUserNs("/proc/self/ns/user") if err != nil { - return false, -1, err + return nil, err } - defer userNS.Close() - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") + ns, err := readUserNsFd(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") } - if preserveFDs > 0 { - for fd := 3; fd < 3+preserveFDs; fd++ { - // These fds were passed down to the runtime. Close them - // and not interfere - os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close() - } + if ns == currentNS { + return nil, errors.New("process running in the same user namespace") } - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } + for { + nextFd, err := getParentUserNs(fd) + if err != nil { + return nil, errors.Wrapf(err, "cannot get parent user namespace") + } - return true, int(ret), nil -} + ns, err = readUserNsFd(nextFd) + if err != nil { + return nil, errors.Wrapf(err, "cannot read user namespace") + } -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount -// namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts -// with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { - return JoinDirectUserAndMountNSWithOpts(pid, nil) + if ns == currentNS { + syscall.Close(int(nextFd)) + + // Drop O_CLOEXEC for the fd. + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) + if errno != 0 { + syscall.Close(int(fd)) + return nil, errno + } + + return os.NewFile(fd, "userns child"), nil + } + syscall.Close(int(fd)) + fd = nextFd + } } -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount +// namespace of the specified PID without looking up its parent. Useful to join directly +// the conmon process. +func JoinUserAndMountNS(pid uint) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } @@ -151,39 +181,11 @@ func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { } defer userNS.Close() - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), C.int(mountNS.Fd())) - if int(pidC) < 0 { - return false, -1, errors.Errorf("cannot re-exec process") - } - - ret := C.reexec_in_user_namespace_wait(pidC) - if ret < 0 { - return false, -1, errors.New("error waiting for the re-exec process") - } - - return true, int(ret), nil -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { - return false, -1, nil - } - - userNS, err := getUserNSForPath(path) + fd, err := getUserNSFirstChild(userNS.Fd()) if err != nil { return false, -1, err } - defer userNS.Close() - - pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1) + pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd())) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } @@ -199,16 +201,8 @@ func JoinNSPath(path string) (bool, int, error) { // BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child -// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration. -func BecomeRootInUserNS() (bool, int, error) { - return BecomeRootInUserNSWithOpts(nil) -} - -// BecomeRootInUserNSWithOpts re-exec podman in a new userNS. It returns whether podman was -// re-execute into a new user namespace and the return code from the re-executed podman process. -// If podman was re-executed the caller needs to propagate the error code returned by the child // process. -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { +func BecomeRootInUserNS() (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -227,12 +221,6 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { defer w.Close() defer w.Write([]byte("0")) - if opts != nil && opts.Argument != "" { - if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil { - return false, -1, err - } - } - pidC := C.reexec_in_user_namespace(C.int(r.Fd())) pid := int(pidC) if pid < 0 { @@ -314,112 +302,3 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { return true, int(ret), nil } - -func readUserNs(path string) (string, error) { - b := make([]byte, 256) - _, err := syscall.Readlink(path, b) - if err != nil { - return "", err - } - return string(b), nil -} - -func readUserNsFd(fd uintptr) (string, error) { - return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) -} - -func getOwner(fd uintptr) (uintptr, error) { - const nsGetUserns = 0xb701 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getParentUserNs(fd uintptr) (uintptr, error) { - const nsGetParent = 0xb702 - ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -func getUserNSForPath(path string) (*os.File, error) { - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - defer u.Close() - fd, err := getOwner(u.Fd()) - if err != nil { - return nil, err - } - - return getUserNSFirstChild(fd) -} - -func getUserNSForPid(pid uint) (*os.File, error) { - path := fmt.Sprintf("/proc/%d/ns/user", pid) - u, err := os.Open(path) - if err != nil { - return nil, errors.Wrapf(err, "cannot open %s", path) - } - - return getUserNSFirstChild(u.Fd()) -} - -// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process -// Each container creates a new user namespace where the runtime runs. The current process in the container -// might have created new user namespaces that are child of the initial namespace we created. -// This function finds the initial namespace created for the container that is a child of the current namespace. -// -// current ns -// / \ -// TARGET -> a [other containers] -// / -// b -// / -// NS READ USING THE PID -> c -func getUserNSFirstChild(fd uintptr) (*os.File, error) { - currentNS, err := readUserNs("/proc/self/ns/user") - if err != nil { - return nil, err - } - - ns, err := readUserNsFd(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - if ns == currentNS { - return nil, errors.New("process running in the same user namespace") - } - - for { - nextFd, err := getParentUserNs(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot get parent user namespace") - } - - ns, err = readUserNsFd(nextFd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - - if ns == currentNS { - syscall.Close(int(nextFd)) - - // Drop O_CLOEXEC for the fd. - _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0) - if errno != 0 { - syscall.Close(int(fd)) - return nil, errno - } - - return os.NewFile(fd, "userns child"), nil - } - syscall.Close(int(fd)) - fd = nextFd - } -} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 24009610a..47b5dd7cc 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -19,45 +19,15 @@ func BecomeRootInUserNS() (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } -// BecomeRootInUserNS is a stub function that always returns false and an -// error on unsupported OS's -func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - // GetRootlessUID returns the UID of the user in the parent userNS func GetRootlessUID() int { return -1 } -// JoinNS re-exec podman in a new userNS and join the user namespace of the specified -// PID. -func JoinNS(pid uint, preserveFDs int) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the -// specified path. -func JoinNSPath(path string) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and -// mount namespace of the specified PID without looking up its parent. Useful to join -// directly the conmon process. -func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) { - return false, -1, errors.New("this function is not supported on this os") -} - -// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount +// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly -// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts +// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts // with a default configuration. -func JoinDirectUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } - -// Argument returns the argument that was set for the rootless session. -func Argument() string { - return "" -} |