diff options
Diffstat (limited to 'pkg/rootless')
-rw-r--r-- | pkg/rootless/rootless.go | 145 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.c | 128 |
2 files changed, 273 insertions, 0 deletions
diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go new file mode 100644 index 000000000..737fc91c7 --- /dev/null +++ b/pkg/rootless/rootless.go @@ -0,0 +1,145 @@ +package rootless + +import ( + "fmt" + "io/ioutil" + "os" + "os/exec" + gosignal "os/signal" + "runtime" + "syscall" + + "github.com/containers/storage/pkg/idtools" + "github.com/docker/docker/pkg/signal" + "github.com/pkg/errors" +) + +/* +extern int reexec_in_user_namespace(int ready); +extern int reexec_in_user_namespace_wait(int pid); +*/ +import "C" + +func runInUser() error { + os.Setenv("_LIBPOD_USERNS_CONFIGURED", "done") + return nil +} + +// IsRootless tells us if we are running in rootless mode +func IsRootless() bool { + return os.Getuid() != 0 || os.Getenv("_LIBPOD_USERNS_CONFIGURED") != "" +} + +func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) error { + path, err := exec.LookPath(tool) + if err != nil { + return err + } + + appendTriplet := func(l []string, a, b, c int) []string { + return append(l, fmt.Sprintf("%d", a), fmt.Sprintf("%d", b), fmt.Sprintf("%d", c)) + } + + args := []string{path, fmt.Sprintf("%d", pid)} + args = appendTriplet(args, 0, hostID, 1) + if mappings != nil { + for _, i := range mappings { + args = appendTriplet(args, i.ContainerID+1, i.HostID, i.Size) + } + } + cmd := exec.Cmd{ + Path: path, + Args: args, + } + return cmd.Run() +} + +// BecomeRootInUserNS re-exec podman in a new userNS +func BecomeRootInUserNS() (bool, error) { + + if os.Getuid() == 0 || os.Getenv("_LIBPOD_USERNS_CONFIGURED") != "" { + if os.Getenv("_LIBPOD_USERNS_CONFIGURED") == "init" { + return false, runInUser() + } + return false, nil + } + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + r, w, err := os.Pipe() + if err != nil { + return false, err + } + defer r.Close() + defer w.Close() + + pidC := C.reexec_in_user_namespace(C.int(r.Fd())) + pid := int(pidC) + if pid < 0 { + return false, errors.Errorf("cannot re-exec process") + } + + setgroups := fmt.Sprintf("/proc/%d/setgroups", pid) + err = ioutil.WriteFile(setgroups, []byte("deny\n"), 0666) + if err != nil { + return false, errors.Wrapf(err, "cannot write setgroups file") + } + + var uids, gids []idtools.IDMap + username := os.Getenv("USER") + mappings, err := idtools.NewIDMappings(username, username) + if err == nil { + uids = mappings.UIDs() + gids = mappings.GIDs() + } + + uidsMapped := false + if mappings != nil && uids != nil { + uidsMapped = tryMappingTool("newuidmap", pid, os.Getuid(), uids) == nil + } + if !uidsMapped { + uidMap := fmt.Sprintf("/proc/%d/uid_map", pid) + err = ioutil.WriteFile(uidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getuid())), 0666) + if err != nil { + return false, errors.Wrapf(err, "cannot write uid_map") + } + } + + gidsMapped := false + if mappings != nil && gids != nil { + gidsMapped = tryMappingTool("newgidmap", pid, os.Getgid(), gids) == nil + } + if !gidsMapped { + gidMap := fmt.Sprintf("/proc/%d/gid_map", pid) + err = ioutil.WriteFile(gidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getgid())), 0666) + if err != nil { + return false, errors.Wrapf(err, "cannot write gid_map") + } + } + + _, err = w.Write([]byte("1")) + if err != nil { + return false, errors.Wrapf(err, "write to sync pipe") + } + + c := make(chan os.Signal, 1) + + gosignal.Notify(c) + defer gosignal.Reset() + go func() { + for s := range c { + if s == signal.SIGCHLD || s == signal.SIGPIPE { + continue + } + + syscall.Kill(int(pidC), s.(syscall.Signal)) + } + }() + + if C.reexec_in_user_namespace_wait(pidC) < 0 { + return false, errors.Wrapf(err, "error waiting for the re-exec process") + } + + return true, nil +} diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c new file mode 100644 index 000000000..f107af7c7 --- /dev/null +++ b/pkg/rootless/rootless_linux.c @@ -0,0 +1,128 @@ +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/stat.h> +#include <limits.h> +#include <sys/types.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/wait.h> + +static int +syscall_clone (unsigned long flags, void *child_stack) +{ + return (int) syscall (__NR_clone, flags, child_stack); +} + +static char ** +get_cmd_line_args (pid_t pid) +{ + int fd; + char path[PATH_MAX]; + char *buffer; + size_t allocated; + size_t used = 0; + int ret; + int i, argc = 0; + char **argv; + + sprintf (path, "/proc/%d/cmdline", pid); + fd = open (path, O_RDONLY); + if (fd < 0) + return NULL; + + allocated = 512; + buffer = malloc (allocated); + if (buffer == NULL) + return NULL; + for (;;) + { + do + ret = read (fd, buffer + used, allocated - used); + while (ret < 0 && errno == EINTR); + if (ret < 0) + return NULL; + + if (ret == 0) + break; + + used += ret; + if (allocated == used) + { + allocated += 512; + buffer = realloc (buffer, allocated); + if (buffer == NULL) + return NULL; + } + } + close (fd); + + for (i = 0; i < used; i++) + if (buffer[i] == '\0') + argc++; + + argv = malloc (sizeof (char *) * (argc + 1)); + argc = 0; + + argv[argc++] = buffer; + for (i = 0; i < used - 1; i++) + if (buffer[i] == '\0') + argv[argc++] = buffer + i + 1; + + argv[argc] = NULL; + + return argv; +} + +int +reexec_in_user_namespace(int ready) +{ + int ret; + pid_t pid; + char b; + pid_t ppid = getpid (); + char **argv; + + pid = syscall_clone (CLONE_NEWUSER|SIGCHLD, NULL); + if (pid) + return pid; + + argv = get_cmd_line_args (ppid); + + setenv ("_LIBPOD_USERNS_CONFIGURED", "init", 1); + + do + ret = read (ready, &b, 1) < 0; + while (ret < 0 && errno == EINTR); + if (ret < 0) + _exit (1); + close (ready); + + execv (argv[0], argv); + + _exit (1); +} + +int +reexec_in_user_namespace_wait (int pid) +{ + pid_t p; + int status; + + do + p = waitpid (pid, &status, 0); + while (p < 0 && errno == EINTR); + + if (p < 0) + return -1; + + if (WIFEXITED (status)) + return WEXITSTATUS (status); + if (WIFSIGNALED (status)) + return 128 + WTERMSIG (status); + return -1; +} |