summaryrefslogtreecommitdiff
path: root/pkg/rootless/rootless_linux.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/rootless/rootless_linux.go')
-rw-r--r--pkg/rootless/rootless_linux.go277
1 files changed, 71 insertions, 206 deletions
diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go
index 0be0e08bf..1d1b1713d 100644
--- a/pkg/rootless/rootless_linux.go
+++ b/pkg/rootless/rootless_linux.go
@@ -46,25 +46,6 @@ func IsRootless() bool {
return isRootless
}
-var (
- skipStorageSetup = false
-)
-
-// SetSkipStorageSetup tells the runtime to not setup containers/storage
-func SetSkipStorageSetup(v bool) {
- skipStorageSetup = v
-}
-
-// SkipStorageSetup tells if we should skip the containers/storage setup
-func SkipStorageSetup() bool {
- return skipStorageSetup
-}
-
-// Argument returns the argument that was set for the rootless session.
-func Argument() string {
- return os.Getenv("_CONTAINERS_ROOTLESS_ARG")
-}
-
// GetRootlessUID returns the UID of the user in the parent userNS
func GetRootlessUID() int {
uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
@@ -104,51 +85,86 @@ func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap)
return nil
}
-// JoinNS re-exec podman in a new userNS and join the user namespace of the specified
-// PID.
-func JoinNS(pid uint, preserveFDs int) (bool, int, error) {
- if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
- return false, -1, nil
+func readUserNs(path string) (string, error) {
+ b := make([]byte, 256)
+ _, err := syscall.Readlink(path, b)
+ if err != nil {
+ return "", err
}
+ return string(b), nil
+}
- userNS, err := getUserNSForPid(pid)
+func readUserNsFd(fd uintptr) (string, error) {
+ return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd))
+}
+
+func getParentUserNs(fd uintptr) (uintptr, error) {
+ const nsGetParent = 0xb702
+ ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0)
+ if errno != 0 {
+ return 0, errno
+ }
+ return (uintptr)(unsafe.Pointer(ret)), nil
+}
+
+// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process
+// Each container creates a new user namespace where the runtime runs. The current process in the container
+// might have created new user namespaces that are child of the initial namespace we created.
+// This function finds the initial namespace created for the container that is a child of the current namespace.
+//
+// current ns
+// / \
+// TARGET -> a [other containers]
+// /
+// b
+// /
+// NS READ USING THE PID -> c
+func getUserNSFirstChild(fd uintptr) (*os.File, error) {
+ currentNS, err := readUserNs("/proc/self/ns/user")
if err != nil {
- return false, -1, err
+ return nil, err
}
- defer userNS.Close()
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1)
- if int(pidC) < 0 {
- return false, -1, errors.Errorf("cannot re-exec process")
+ ns, err := readUserNsFd(fd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot read user namespace")
}
- if preserveFDs > 0 {
- for fd := 3; fd < 3+preserveFDs; fd++ {
- // These fds were passed down to the runtime. Close them
- // and not interfere
- os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close()
- }
+ if ns == currentNS {
+ return nil, errors.New("process running in the same user namespace")
}
- ret := C.reexec_in_user_namespace_wait(pidC)
- if ret < 0 {
- return false, -1, errors.New("error waiting for the re-exec process")
- }
+ for {
+ nextFd, err := getParentUserNs(fd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot get parent user namespace")
+ }
- return true, int(ret), nil
-}
+ ns, err = readUserNsFd(nextFd)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot read user namespace")
+ }
-// JoinDirectUserAndMountNS re-exec podman in a new userNS and join the user and mount
-// namespace of the specified PID without looking up its parent. Useful to join directly
-// the conmon process. It is a convenience function for JoinDirectUserAndMountNSWithOpts
-// with a default configuration.
-func JoinDirectUserAndMountNS(pid uint) (bool, int, error) {
- return JoinDirectUserAndMountNSWithOpts(pid, nil)
+ if ns == currentNS {
+ syscall.Close(int(nextFd))
+
+ // Drop O_CLOEXEC for the fd.
+ _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0)
+ if errno != 0 {
+ syscall.Close(int(fd))
+ return nil, errno
+ }
+
+ return os.NewFile(fd, "userns child"), nil
+ }
+ syscall.Close(int(fd))
+ fd = nextFd
+ }
}
-// JoinDirectUserAndMountNSWithOpts re-exec podman in a new userNS and join the user and
-// mount namespace of the specified PID without looking up its parent. Useful to join
-// directly the conmon process.
-func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) {
+// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
+// namespace of the specified PID without looking up its parent. Useful to join directly
+// the conmon process.
+func JoinUserAndMountNS(pid uint) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
return false, -1, nil
}
@@ -165,39 +181,11 @@ func JoinDirectUserAndMountNSWithOpts(pid uint, opts *Opts) (bool, int, error) {
}
defer userNS.Close()
- if opts != nil && opts.Argument != "" {
- if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil {
- return false, -1, err
- }
- }
-
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), C.int(mountNS.Fd()))
- if int(pidC) < 0 {
- return false, -1, errors.Errorf("cannot re-exec process")
- }
-
- ret := C.reexec_in_user_namespace_wait(pidC)
- if ret < 0 {
- return false, -1, errors.New("error waiting for the re-exec process")
- }
-
- return true, int(ret), nil
-}
-
-// JoinNSPath re-exec podman in a new userNS and join the owner user namespace of the
-// specified path.
-func JoinNSPath(path string) (bool, int, error) {
- if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
- return false, -1, nil
- }
-
- userNS, err := getUserNSForPath(path)
+ fd, err := getUserNSFirstChild(userNS.Fd())
if err != nil {
return false, -1, err
}
- defer userNS.Close()
-
- pidC := C.reexec_userns_join(C.int(userNS.Fd()), -1)
+ pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()))
if int(pidC) < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
}
@@ -213,16 +201,8 @@ func JoinNSPath(path string) (bool, int, error) {
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
-// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration.
-func BecomeRootInUserNS() (bool, int, error) {
- return BecomeRootInUserNSWithOpts(nil)
-}
-
-// BecomeRootInUserNSWithOpts re-exec podman in a new userNS. It returns whether podman was
-// re-execute into a new user namespace and the return code from the re-executed podman process.
-// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
-func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
+func BecomeRootInUserNS() (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
@@ -241,12 +221,6 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
defer w.Close()
defer w.Write([]byte("0"))
- if opts != nil && opts.Argument != "" {
- if err := os.Setenv("_CONTAINERS_ROOTLESS_ARG", opts.Argument); err != nil {
- return false, -1, err
- }
- }
-
pidC := C.reexec_in_user_namespace(C.int(r.Fd()))
pid := int(pidC)
if pid < 0 {
@@ -328,112 +302,3 @@ func BecomeRootInUserNSWithOpts(opts *Opts) (bool, int, error) {
return true, int(ret), nil
}
-
-func readUserNs(path string) (string, error) {
- b := make([]byte, 256)
- _, err := syscall.Readlink(path, b)
- if err != nil {
- return "", err
- }
- return string(b), nil
-}
-
-func readUserNsFd(fd uintptr) (string, error) {
- return readUserNs(fmt.Sprintf("/proc/self/fd/%d", fd))
-}
-
-func getOwner(fd uintptr) (uintptr, error) {
- const nsGetUserns = 0xb701
- ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetUserns), 0)
- if errno != 0 {
- return 0, errno
- }
- return (uintptr)(unsafe.Pointer(ret)), nil
-}
-
-func getParentUserNs(fd uintptr) (uintptr, error) {
- const nsGetParent = 0xb702
- ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(nsGetParent), 0)
- if errno != 0 {
- return 0, errno
- }
- return (uintptr)(unsafe.Pointer(ret)), nil
-}
-
-func getUserNSForPath(path string) (*os.File, error) {
- u, err := os.Open(path)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot open %s", path)
- }
- defer u.Close()
- fd, err := getOwner(u.Fd())
- if err != nil {
- return nil, err
- }
-
- return getUserNSFirstChild(fd)
-}
-
-func getUserNSForPid(pid uint) (*os.File, error) {
- path := fmt.Sprintf("/proc/%d/ns/user", pid)
- u, err := os.Open(path)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot open %s", path)
- }
-
- return getUserNSFirstChild(u.Fd())
-}
-
-// getUserNSFirstChild returns an open FD for the first direct child user namespace that created the process
-// Each container creates a new user namespace where the runtime runs. The current process in the container
-// might have created new user namespaces that are child of the initial namespace we created.
-// This function finds the initial namespace created for the container that is a child of the current namespace.
-//
-// current ns
-// / \
-// TARGET -> a [other containers]
-// /
-// b
-// /
-// NS READ USING THE PID -> c
-func getUserNSFirstChild(fd uintptr) (*os.File, error) {
- currentNS, err := readUserNs("/proc/self/ns/user")
- if err != nil {
- return nil, err
- }
-
- ns, err := readUserNsFd(fd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot read user namespace")
- }
- if ns == currentNS {
- return nil, errors.New("process running in the same user namespace")
- }
-
- for {
- nextFd, err := getParentUserNs(fd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot get parent user namespace")
- }
-
- ns, err = readUserNsFd(nextFd)
- if err != nil {
- return nil, errors.Wrapf(err, "cannot read user namespace")
- }
-
- if ns == currentNS {
- syscall.Close(int(nextFd))
-
- // Drop O_CLOEXEC for the fd.
- _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, fd, syscall.F_SETFD, 0)
- if errno != 0 {
- syscall.Close(int(fd))
- return nil, errno
- }
-
- return os.NewFile(fd, "userns child"), nil
- }
- syscall.Close(int(fd))
- fd = nextFd
- }
-}