diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2020-04-20 16:08:39 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-20 16:08:39 -0400 |
commit | 354088a94b5dbbf2c4bf3315077fd89d98faebb5 (patch) | |
tree | bcef84b40cae5e8115c962f00902af982912b33e | |
parent | 5928e8fe80e1f013049f1a980a4c945ee35570c3 (diff) | |
parent | 788fdc685b00dee5ccb594bef845204250c4c123 (diff) | |
download | podman-354088a94b5dbbf2c4bf3315077fd89d98faebb5.tar.gz podman-354088a94b5dbbf2c4bf3315077fd89d98faebb5.tar.bz2 podman-354088a94b5dbbf2c4bf3315077fd89d98faebb5.zip |
Merge pull request #5889 from giuseppe/rootless-fd-join
rootless: move join namespace inside child process
-rw-r--r-- | pkg/rootless/rootless_linux.c | 45 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 113 |
2 files changed, 33 insertions, 125 deletions
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index da52a7217..72d461cdc 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -535,8 +535,36 @@ create_pause_process (const char *pause_pid_file_path, char **argv) } } +static void +join_namespace_or_die (int pid_to_join, const char *ns_file) +{ + char ns_path[PATH_MAX]; + int ret; + int fd; + + ret = snprintf (ns_path, PATH_MAX, "/proc/%d/ns/%s", pid_to_join, ns_file); + if (ret == PATH_MAX) + { + fprintf (stderr, "internal error: namespace path too long\n"); + _exit (EXIT_FAILURE); + } + + fd = open (ns_path, O_CLOEXEC | O_RDONLY); + if (fd < 0) + { + fprintf (stderr, "cannot open: %s\n", ns_path); + _exit (EXIT_FAILURE); + } + if (setns (fd, 0) < 0) + { + fprintf (stderr, "cannot set namespace to %s: %s\n", ns_path, strerror (errno)); + _exit (EXIT_FAILURE); + } + close (fd); +} + int -reexec_userns_join (int userns, int mountns, char *pause_pid_file_path) +reexec_userns_join (int pid_to_join, char *pause_pid_file_path) { char uid[16]; char gid[16]; @@ -606,19 +634,8 @@ reexec_userns_join (int userns, int mountns, char *pause_pid_file_path) _exit (EXIT_FAILURE); } - if (setns (userns, 0) < 0) - { - fprintf (stderr, "cannot setns: %s\n", strerror (errno)); - _exit (EXIT_FAILURE); - } - close (userns); - - if (mountns >= 0 && setns (mountns, 0) < 0) - { - fprintf (stderr, "cannot setns: %s\n", strerror (errno)); - _exit (EXIT_FAILURE); - } - close (mountns); + join_namespace_or_die (pid_to_join, "user"); + join_namespace_or_die (pid_to_join, "mnt"); if (syscall_setresgid (0, 0, 0) < 0) { diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 5ddfab7ad..3de136f12 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -31,7 +31,7 @@ extern uid_t rootless_uid(); extern uid_t rootless_gid(); extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd); extern int reexec_in_user_namespace_wait(int pid, int options); -extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); +extern int reexec_userns_join(int pid, char *pause_pid_file_path); */ import "C" @@ -124,91 +124,6 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) return nil } -func readUserNs(path string) (string, error) { - b := make([]byte, 256) - _, err := unix.Readlink(path, b) - if err != nil { - return "", err - } - return string(b), nil -} - -func readUserNsFd(fd uintptr) (string, error) { - return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd)) -} - -func getParentUserNs(fd uintptr) (uintptr, error) { - const nsGetParent = 0xb702 - ret, _, errno := unix.Syscall(unix.SYS_IOCTL, fd, uintptr(nsGetParent), 0) - if errno != 0 { - return 0, errno - } - return (uintptr)(unsafe.Pointer(ret)), nil -} - -// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process -// Each container creates a new user namespace where the runtime runs. The current process in the container -// might have created new user namespaces that are child of the initial namespace we created. -// This function finds the initial namespace created for the container that is a child of the current namespace. -// -// current ns -// / \ -// TARGET -> a [other containers] -// / -// b -// / -// NS READ USING THE PID -> c -func getUserNSFirstChild(fd uintptr) (*os.File, error) { - currentNS, err := readUserNs("/proc/self/ns/user") - if err != nil { - return nil, err - } - - ns, err := readUserNsFd(fd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - if ns == currentNS { - return nil, errors.New("process running in the same user namespace") - } - - for { - nextFd, err := getParentUserNs(fd) - if err != nil { - if err == unix.ENOTTY { - return os.NewFile(fd, "userns child"), nil - } - return nil, errors.Wrapf(err, "cannot get parent user namespace") - } - - ns, err = readUserNsFd(nextFd) - if err != nil { - return nil, errors.Wrapf(err, "cannot read user namespace") - } - - if ns == currentNS { - if err := unix.Close(int(nextFd)); err != nil { - return nil, err - } - - // Drop O_CLOEXEC for the fd. - _, _, errno := unix.Syscall(unix.SYS_FCNTL, fd, unix.F_SETFD, 0) - if errno != 0 { - if err := unix.Close(int(fd)); err != nil { - logrus.Errorf("failed to close file descriptor %d", fd) - } - return nil, errno - } - - return os.NewFile(fd, "userns child"), nil - } - if err := unix.Close(int(fd)); err != nil { - return nil, err - } - fd = nextFd - } -} - // joinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. @@ -220,31 +135,7 @@ func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { cPausePid := C.CString(pausePid) defer C.free(unsafe.Pointer(cPausePid)) - userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid)) - if err != nil { - return false, -1, err - } - defer func() { - if err := userNS.Close(); err != nil { - logrus.Errorf("unable to close namespace: %q", err) - } - }() - - mountNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", pid)) - if err != nil { - return false, -1, err - } - defer func() { - if err := mountNS.Close(); err != nil { - logrus.Errorf("unable to close namespace: %q", err) - } - }() - - fd, err := getUserNSFirstChild(userNS.Fd()) - if err != nil { - return false, -1, err - } - pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid) + pidC := C.reexec_userns_join(C.int(pid), cPausePid) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } |