summaryrefslogtreecommitdiff
path: root/pkg/rootless
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/rootless')
-rw-r--r--pkg/rootless/rootless.go9
-rw-r--r--pkg/rootless/rootless_linux.c41
-rw-r--r--pkg/rootless/rootless_linux.go263
-rw-r--r--pkg/rootless/rootless_unsupported.go36
4 files changed, 114 insertions, 235 deletions
diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go
deleted file mode 100644
index a531e43ce..000000000
--- a/pkg/rootless/rootless.go
+++ /dev/null
@@ -1,9 +0,0 @@
-package rootless
-
-// Opts allows to customize how re-execing to a rootless process is done
-type Opts struct {
- // Argument overrides the arguments on the command line
- // for the re-execed process. The process in the namespace
- // must use rootless.Argument() to read its value.
- Argument string
-}
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
index 2e2c3acac..9cb79ed4d 100644
--- a/pkg/rootless/rootless_linux.c
+++ b/pkg/rootless/rootless_linux.c
@@ -13,10 +13,36 @@
#include <sys/wait.h>
#include <string.h>
#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <dirent.h>
static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces";
static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone";
+static int n_files;
+
+static void __attribute__((constructor)) init()
+{
+ DIR *d;
+
+ /* Store how many FDs were open before the Go runtime kicked in. */
+ d = opendir ("/proc/self/fd");
+ if (d)
+ {
+ struct dirent *ent;
+
+ for (ent = readdir (d); ent; ent = readdir (d))
+ {
+ int fd = atoi (ent->d_name);
+ if (fd > n_files && fd != dirfd (d))
+ n_files = fd;
+ }
+ closedir (d);
+ }
+}
+
+
static int
syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid)
{
@@ -133,12 +159,25 @@ reexec_userns_join (int userns, int mountns)
pid = fork ();
if (pid < 0)
fprintf (stderr, "cannot fork: %s\n", strerror (errno));
+
if (pid)
- return pid;
+ {
+ /* We passed down these fds, close them. */
+ int f;
+ for (f = 3; f < n_files; f++)
+ close (f);
+ return pid;
+ }
setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
+ if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0)
+ {
+ fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
if (setns (userns, 0) < 0)
{
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go
index c753228f1..1d1b1713d 100644
--- a/pkg/rootless/rootless_linux.go
+++ b/pkg/rootless/rootless_linux.go
@@ -46,11 +46,6 @@ func IsRootless() bool {
return isRootless
}
-// Argument returns the argument that was set for the rootless session.
-func Argument() string {
- return os.Getenv("_CONTAINERS_ROOTLESS_ARG")
-}
-
// GetRootlessUID returns the UID of the user in the parent userNS
func GetRootlessUID() int {
uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
@@ -90,51 +85,86 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap)
return nil
}
-// JoinNS re-exec podman in a new userNS and join the user namespace of the specified
-// PID.
-func JoinNS(pid uint, preserveFDs int) (bool, int, error) {
- if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
- return false, -1, nil
+func readUserNs(path string) (string, error) {
+ b := make([]byte, 256)
+ _, err := syscall.Readlink(path, b)
+ if err != nil {
+ return "", err
+ }
+ return string(b), nil
+}
+
+func readUserNsFd(fd uintptr) (string, error) {
+ return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd))
+}
+
+func getParentUserNs(fd uintptr) (uintptr, error) {
+ const nsGetParent = 0xb702
+ ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0)
+ if errno != 0 {
+ return 0, errno
}
+ return (uintptr)(unsafe.Pointer(ret)), nil
+}
- userNS, err := getUserNSForPid(pid)
+// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process
+// Each container creates a new user namespace where the runtime runs. The current process in the container
+// might have created new user namespaces that are child of the initial namespace we created.
+// This function finds the initial namespace created for the container that is a child of the current namespace.
+//
+// current ns
+// / \
+// TARGET -> a [other containers]
+// /
+// b
+// /
+// NS READ USING THE PID -> c
+func getUserNSFirstChild(fd uintptr) (*os.File, error) {
+ currentNS, err := readUserNs("/proc/self/ns/user")
if err != nil {
- return false, -1, err
+ return nil, err
}
- defer userNS.Close()
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1)
- if int(pidC) < 0 {
- return false, -1, errors.Errorf("cannot re-exec process")
+ ns, err := readUserNsFd(fd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot read user namespace")
}
- if preserveFDs > 0 {
- for fd := 3; fd < 3+preserveFDs; fd++ {
- // These fds were passed down to the runtime. Close them
- // and not interfere
- os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close()
- }
+ if ns == currentNS {
+ return nil, errors.New("process running in the same user namespace")
}
- ret := C.reexec_in_user_namespace_wait(pidC)
- if ret < 0 {
- return false, -1, errors.New("error waiting for the re-exec process")
- }
+ for {
+ nextFd, err := getParentUserNs(fd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot get parent user namespace")
+ }
- return true, int(ret), nil
-}
+ ns, err = readUserNsFd(nextFd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot read user namespace")
+ }
-// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount
-// namespace of the specified PID without looking up its parent. Useful to join directly
-// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts
-// with a default configuration.
-func JoinDirectUserAndMountNS(pid uint) (bool, int, error) {
- return JoinDirectUserAndMountNSWithOpts(pid, nil)
+ if ns == currentNS {
+ syscall.Close(int(nextFd))
+
+ // Drop O_CLOEXEC for the fd.
+ _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0)
+ if errno != 0 {
+ syscall.Close(int(fd))
+ return nil, errno
+ }
+
+ return os.NewFile(fd, "userns child"), nil
+ }
+ syscall.Close(int(fd))
+ fd = nextFd
+ }
}
-// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and
-// mount namespace of the specified PID without looking up its parent. Useful to join
-// directly the conmon process.
-func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) {
+// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
+// namespace of the specified PID without looking up its parent. Useful to join directly
+// the conmon process.
+func JoinUserAndMountNS(pid uint) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
return false, -1, nil
}
@@ -151,39 +181,11 @@ func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) {
}
defer userNS.Close()
- if opts != nil && opts.Argument != "" {
- if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil {
- return false, -1, err
- }
- }
-
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), C.int(mountNS.Fd()))
- if int(pidC) < 0 {
- return false, -1, errors.Errorf("cannot re-exec process")
- }
-
- ret := C.reexec_in_user_namespace_wait(pidC)
- if ret < 0 {
- return false, -1, errors.New("error waiting for the re-exec process")
- }
-
- return true, int(ret), nil
-}
-
-// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the
-// specified path.
-func JoinNSPath(path string) (bool, int, error) {
- if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
- return false, -1, nil
- }
-
- userNS, err := getUserNSForPath(path)
+ fd, err := getUserNSFirstChild(userNS.Fd())
if err != nil {
return false, -1, err
}
- defer userNS.Close()
-
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1)
+ pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()))
if int(pidC) < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
}
@@ -199,16 +201,8 @@ func JoinNSPath(path string) (bool, int, error) {
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
-// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration.
-func BecomeRootInUserNS() (bool, int, error) {
- return BecomeRootInUserNSWithOpts(nil)
-}
-
-// BecomeRootInUserNSWithOpts re-exec podman in a new userNS. It returns whether podman was
-// re-execute into a new user namespace and the return code from the re-executed podman process.
-// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
-func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
+func BecomeRootInUserNS() (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
@@ -227,12 +221,6 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
defer w.Close()
defer w.Write([]byte("0"))
- if opts != nil && opts.Argument != "" {
- if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil {
- return false, -1, err
- }
- }
-
pidC := C.reexec_in_user_namespace(C.int(r.Fd()))
pid := int(pidC)
if pid < 0 {
@@ -314,112 +302,3 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
return true, int(ret), nil
}
-
-func readUserNs(path string) (string, error) {
- b := make([]byte, 256)
- _, err := syscall.Readlink(path, b)
- if err != nil {
- return "", err
- }
- return string(b), nil
-}
-
-func readUserNsFd(fd uintptr) (string, error) {
- return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd))
-}
-
-func getOwner(fd uintptr) (uintptr, error) {
- const nsGetUserns = 0xb701
- ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0)
- if errno != 0 {
- return 0, errno
- }
- return (uintptr)(unsafe.Pointer(ret)), nil
-}
-
-func getParentUserNs(fd uintptr) (uintptr, error) {
- const nsGetParent = 0xb702
- ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0)
- if errno != 0 {
- return 0, errno
- }
- return (uintptr)(unsafe.Pointer(ret)), nil
-}
-
-func getUserNSForPath(path string) (*os.File, error) {
- u, err := os.Open(path)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot open %s", path)
- }
- defer u.Close()
- fd, err := getOwner(u.Fd())
- if err != nil {
- return nil, err
- }
-
- return getUserNSFirstChild(fd)
-}
-
-func getUserNSForPid(pid uint) (*os.File, error) {
- path := fmt.Sprintf("/proc/%d/ns/user", pid)
- u, err := os.Open(path)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot open %s", path)
- }
-
- return getUserNSFirstChild(u.Fd())
-}
-
-// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process
-// Each container creates a new user namespace where the runtime runs. The current process in the container
-// might have created new user namespaces that are child of the initial namespace we created.
-// This function finds the initial namespace created for the container that is a child of the current namespace.
-//
-// current ns
-// / \
-// TARGET -> a [other containers]
-// /
-// b
-// /
-// NS READ USING THE PID -> c
-func getUserNSFirstChild(fd uintptr) (*os.File, error) {
- currentNS, err := readUserNs("/proc/self/ns/user")
- if err != nil {
- return nil, err
- }
-
- ns, err := readUserNsFd(fd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot read user namespace")
- }
- if ns == currentNS {
- return nil, errors.New("process running in the same user namespace")
- }
-
- for {
- nextFd, err := getParentUserNs(fd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot get parent user namespace")
- }
-
- ns, err = readUserNsFd(nextFd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot read user namespace")
- }
-
- if ns == currentNS {
- syscall.Close(int(nextFd))
-
- // Drop O_CLOEXEC for the fd.
- _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0)
- if errno != 0 {
- syscall.Close(int(fd))
- return nil, errno
- }
-
- return os.NewFile(fd, "userns child"), nil
- }
- syscall.Close(int(fd))
- fd = nextFd
- }
-}
diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go
index 24009610a..47b5dd7cc 100644
--- a/pkg/rootless/rootless_unsupported.go
+++ b/pkg/rootless/rootless_unsupported.go
@@ -19,45 +19,15 @@ func BecomeRootInUserNS() (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
-// BecomeRootInUserNS is a stub function that always returns false and an
-// error on unsupported OS's
-func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
- return false, -1, errors.New("this function is not supported on this os")
-}
-
// GetRootlessUID returns the UID of the user in the parent userNS
func GetRootlessUID() int {
return -1
}
-// JoinNS re-exec podman in a new userNS and join the user namespace of the specified
-// PID.
-func JoinNS(pid uint, preserveFDs int) (bool, int, error) {
- return false, -1, errors.New("this function is not supported on this os")
-}
-
-// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the
-// specified path.
-func JoinNSPath(path string) (bool, int, error) {
- return false, -1, errors.New("this function is not supported on this os")
-}
-
-// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and
-// mount namespace of the specified PID without looking up its parent. Useful to join
-// directly the conmon process.
-func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) {
- return false, -1, errors.New("this function is not supported on this os")
-}
-
-// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount
+// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
-// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts
+// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts
// with a default configuration.
-func JoinDirectUserAndMountNS(pid uint) (bool, int, error) {
+func JoinUserAndMountNS(pid uint) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
-
-// Argument returns the argument that was set for the rootless session.
-func Argument() string {
- return ""
-}