diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/adapter/containers_remote.go | 2 | ||||
-rw-r--r-- | pkg/adapter/runtime_remote.go | 41 | ||||
-rw-r--r-- | pkg/namespaces/namespaces.go | 7 | ||||
-rw-r--r-- | pkg/registries/registries.go | 6 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.c | 428 | ||||
-rw-r--r-- | pkg/rootless/rootless_linux.go | 77 | ||||
-rw-r--r-- | pkg/rootless/rootless_unsupported.go | 9 | ||||
-rw-r--r-- | pkg/spec/storage.go | 36 | ||||
-rw-r--r-- | pkg/util/utils.go | 53 | ||||
-rw-r--r-- | pkg/util/utils_supported.go | 10 | ||||
-rw-r--r-- | pkg/util/utils_windows.go | 6 | ||||
-rw-r--r-- | pkg/varlinkapi/images.go | 70 |
12 files changed, 579 insertions, 166 deletions
diff --git a/pkg/adapter/containers_remote.go b/pkg/adapter/containers_remote.go index 63b0f9d2f..c34495b3d 100644 --- a/pkg/adapter/containers_remote.go +++ b/pkg/adapter/containers_remote.go @@ -561,7 +561,7 @@ func (r *LocalRuntime) attach(ctx context.Context, stdin, stdout *os.File, cid s if err != nil { return nil, err } - resize := make(chan remotecommand.TerminalSize) + resize := make(chan remotecommand.TerminalSize, 5) haveTerminal := terminal.IsTerminal(int(os.Stdin.Fd())) // Check if we are attached to a terminal. If we are, generate resize diff --git a/pkg/adapter/runtime_remote.go b/pkg/adapter/runtime_remote.go index e86287462..e0c0898bd 100644 --- a/pkg/adapter/runtime_remote.go +++ b/pkg/adapter/runtime_remote.go @@ -182,10 +182,7 @@ func (r *LocalRuntime) NewImageFromLocal(name string) (*ContainerImage, error) { // LoadFromArchiveReference creates an image from a local archive func (r *LocalRuntime) LoadFromArchiveReference(ctx context.Context, srcRef types.ImageReference, signaturePolicyPath string, writer io.Writer) ([]*ContainerImage, error) { var iid string - // TODO We need to find a way to leak certDir, creds, and the tlsverify into this function, normally this would - // come from cli options but we don't want want those in here either. - tlsverify := true - reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, srcRef.DockerReference().String(), "", "", signaturePolicyPath, &tlsverify) + reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, srcRef.DockerReference().String()) if err != nil { return nil, err } @@ -217,21 +214,7 @@ func (r *LocalRuntime) New(ctx context.Context, name, signaturePolicyPath, authf if label != nil { return nil, errors.New("the remote client function does not support checking a remote image for a label") } - var ( - tlsVerify bool - tlsVerifyPtr *bool - ) - if dockeroptions.DockerInsecureSkipTLSVerify == types.OptionalBoolFalse { - tlsVerify = true - tlsVerifyPtr = &tlsVerify - - } - if dockeroptions.DockerInsecureSkipTLSVerify == types.OptionalBoolTrue { - tlsVerify = false - tlsVerifyPtr = &tlsVerify - } - - reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, name, dockeroptions.DockerCertPath, "", signaturePolicyPath, tlsVerifyPtr) + reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, name) if err != nil { return nil, err } @@ -429,9 +412,8 @@ func (r *LocalRuntime) Build(ctx context.Context, c *cliconfig.BuildValues, opti Quiet: options.Quiet, RemoteIntermediateCtrs: options.RemoveIntermediateCtrs, // ReportWriter: - RuntimeArgs: options.RuntimeArgs, - SignaturePolicyPath: options.SignaturePolicyPath, - Squash: options.Squash, + RuntimeArgs: options.RuntimeArgs, + Squash: options.Squash, } // tar the file outputFile, err := ioutil.TempFile("", "varlink_tar_send") @@ -570,20 +552,7 @@ func (r *LocalRuntime) RemoveVolumes(ctx context.Context, c *cliconfig.VolumeRmV func (r *LocalRuntime) Push(ctx context.Context, srcName, destination, manifestMIMEType, authfile, signaturePolicyPath string, writer io.Writer, forceCompress bool, signingOptions image.SigningOptions, dockerRegistryOptions *image.DockerRegistryOptions, additionalDockerArchiveTags []reference.NamedTagged) error { - var ( - tls *bool - tlsVerify bool - ) - if dockerRegistryOptions.DockerInsecureSkipTLSVerify == types.OptionalBoolTrue { - tlsVerify = false - tls = &tlsVerify - } - if dockerRegistryOptions.DockerInsecureSkipTLSVerify == types.OptionalBoolFalse { - tlsVerify = true - tls = &tlsVerify - } - - reply, err := iopodman.PushImage().Send(r.Conn, varlink.More, srcName, destination, tls, signaturePolicyPath, "", dockerRegistryOptions.DockerCertPath, forceCompress, manifestMIMEType, signingOptions.RemoveSignatures, signingOptions.SignBy) + reply, err := iopodman.PushImage().Send(r.Conn, varlink.More, srcName, destination, forceCompress, manifestMIMEType, signingOptions.RemoveSignatures, signingOptions.SignBy) if err != nil { return err } diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go index fde6118af..ec9276344 100644 --- a/pkg/namespaces/namespaces.go +++ b/pkg/namespaces/namespaces.go @@ -12,6 +12,11 @@ func (n UsernsMode) IsHost() bool { return n == "host" } +// IsKeepID indicates whether container uses a mapping where the (uid, gid) on the host is lept inside of the namespace. +func (n UsernsMode) IsKeepID() bool { + return n == "keep-id" +} + // IsPrivate indicates whether the container uses the a private userns. func (n UsernsMode) IsPrivate() bool { return !(n.IsHost()) @@ -21,7 +26,7 @@ func (n UsernsMode) IsPrivate() bool { func (n UsernsMode) Valid() bool { parts := strings.Split(string(n), ":") switch mode := parts[0]; mode { - case "", "host": + case "", "host", "keep-id": default: return false } diff --git a/pkg/registries/registries.go b/pkg/registries/registries.go index fdb7f33c5..5c4ecd020 100644 --- a/pkg/registries/registries.go +++ b/pkg/registries/registries.go @@ -51,7 +51,7 @@ func GetRegistries() ([]string, error) { } for _, reg := range registries { if reg.Search { - searchRegistries = append(searchRegistries, reg.URL) + searchRegistries = append(searchRegistries, reg.Location) } } return searchRegistries, nil @@ -66,7 +66,7 @@ func GetBlockedRegistries() ([]string, error) { } for _, reg := range registries { if reg.Blocked { - blockedRegistries = append(blockedRegistries, reg.URL) + blockedRegistries = append(blockedRegistries, reg.Location) } } return blockedRegistries, nil @@ -81,7 +81,7 @@ func GetInsecureRegistries() ([]string, error) { } for _, reg := range registries { if reg.Insecure { - insecureRegistries = append(insecureRegistries, reg.URL) + insecureRegistries = append(insecureRegistries, reg.Location) } } return insecureRegistries, nil diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 1d32b1adb..098ca7830 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -17,38 +17,30 @@ #include <sys/prctl.h> #include <dirent.h> #include <sys/select.h> +#include <stdio.h> + +#ifndef RENAME_NOREPLACE +# define RENAME_NOREPLACE (1 << 0) + +int renameat2 (int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags) +{ +# ifdef __NR_renameat2 + return (int) syscall (__NR_renameat2, olddirfd, oldpath, newdirfd, newpath, flags); +# else + /* no way to implement it atomically. */ + errno = ENOSYS; + return -1; +# endif +} +#endif static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces"; static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone"; static int open_files_max_fd; fd_set open_files_set; - -static void __attribute__((constructor)) init() -{ - DIR *d; - - /* Store how many FDs were open before the Go runtime kicked in. */ - d = opendir ("/proc/self/fd"); - if (d) - { - struct dirent *ent; - - FD_ZERO (&open_files_set); - for (ent = readdir (d); ent; ent = readdir (d)) - { - int fd = atoi (ent->d_name); - if (fd != dirfd (d)) - { - if (fd > open_files_max_fd) - open_files_max_fd = fd; - FD_SET (fd, &open_files_set); - } - } - closedir (d); - } -} - +static uid_t rootless_uid_init; +static gid_t rootless_gid_init; static int syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid) @@ -62,14 +54,24 @@ syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid) return (int) syscall (__NR_setresgid, rgid, egid, sgid); } -static int -syscall_clone (unsigned long flags, void *child_stack) +uid_t +rootless_uid () { -#if defined(__s390__) || defined(__CRIS__) - return (int) syscall (__NR_clone, child_stack, flags); -#else - return (int) syscall (__NR_clone, flags, child_stack); -#endif + return rootless_uid_init; +} + +uid_t +rootless_gid () +{ + return rootless_gid_init; +} + +static void +do_pause () +{ + prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL); + while (1) + pause (); } static char ** @@ -84,7 +86,10 @@ get_cmd_line_args (pid_t pid) int i, argc = 0; char **argv; - sprintf (path, "/proc/%d/cmdline", pid); + if (pid) + sprintf (path, "/proc/%d/cmdline", pid); + else + strcpy (path, "/proc/self/cmdline"); fd = open (path, O_RDONLY); if (fd < 0) return NULL; @@ -99,7 +104,10 @@ get_cmd_line_args (pid_t pid) ret = read (fd, buffer + used, allocated - used); while (ret < 0 && errno == EINTR); if (ret < 0) - return NULL; + { + free (buffer); + return NULL; + } if (ret == 0) break; @@ -109,11 +117,12 @@ get_cmd_line_args (pid_t pid) { allocated += 512; char *tmp = realloc (buffer, allocated); - if (buffer == NULL) { - free(buffer); - return NULL; - } - buffer=tmp; + if (buffer == NULL) + { + free (buffer); + return NULL; + } + buffer = tmp; } } close (fd); @@ -122,11 +131,17 @@ get_cmd_line_args (pid_t pid) if (buffer[i] == '\0') argc++; if (argc == 0) - return NULL; + { + free (buffer); + return NULL; + } argv = malloc (sizeof (char *) * (argc + 1)); if (argv == NULL) - return NULL; + { + free (buffer); + return NULL; + } argc = 0; argv[argc++] = buffer; @@ -139,11 +154,303 @@ get_cmd_line_args (pid_t pid) return argv; } +static bool +can_use_shortcut () +{ + int argc; + char **argv; + bool ret = true; + +#ifdef DISABLE_JOIN_SHORTCUT + return false; +#endif + + argv = get_cmd_line_args (0); + if (argv == NULL) + return NULL; + + for (argc = 0; argv[argc]; argc++) + { + if (argc == 0 || argv[argc][0] == '-') + continue; + + if (strcmp (argv[argc], "mount") == 0 + || strcmp (argv[argc], "search") == 0 + || strcmp (argv[argc], "system") == 0) + { + ret = false; + break; + } + } + + free (argv[0]); + free (argv); + return ret; +} + +static void __attribute__((constructor)) init() +{ + const char *xdg_runtime_dir; + const char *pause; + DIR *d; + + pause = getenv ("_PODMAN_PAUSE"); + if (pause && pause[0]) + { + do_pause (); + _exit (EXIT_FAILURE); + } + + /* Store how many FDs were open before the Go runtime kicked in. */ + d = opendir ("/proc/self/fd"); + if (d) + { + struct dirent *ent; + + FD_ZERO (&open_files_set); + for (ent = readdir (d); ent; ent = readdir (d)) + { + int fd = atoi (ent->d_name); + if (fd != dirfd (d)) + { + if (fd > open_files_max_fd) + open_files_max_fd = fd; + FD_SET (fd, &open_files_set); + } + } + closedir (d); + } + + /* Shortcut. If we are able to join the pause pid file, do it now so we don't + need to re-exec. */ + xdg_runtime_dir = getenv ("XDG_RUNTIME_DIR"); + if (xdg_runtime_dir && xdg_runtime_dir[0] && can_use_shortcut ()) + { + int r; + int fd; + long pid; + char buf[12]; + uid_t uid; + gid_t gid; + char path[PATH_MAX]; + const char *const suffix = "/libpod/pause.pid"; + char *cwd = getcwd (NULL, 0); + + if (cwd == NULL) + { + fprintf (stderr, "error getting current working directory: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (strlen (xdg_runtime_dir) >= PATH_MAX - strlen (suffix)) + { + fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %s", strerror (ENAMETOOLONG)); + exit (EXIT_FAILURE); + } + + sprintf (path, "%s%s", xdg_runtime_dir, suffix); + fd = open (path, O_RDONLY); + if (fd < 0) + { + free (cwd); + return; + } + + r = read (fd, buf, sizeof (buf)); + close (fd); + if (r < 0) + { + free (cwd); + return; + } + pid = strtol (buf, NULL, 10); + if (pid == LONG_MAX) + { + free (cwd); + return; + } + + uid = geteuid (); + gid = getegid (); + + sprintf (path, "/proc/%d/ns/user", pid); + fd = open (path, O_RDONLY); + if (fd < 0 || setns (fd, 0) < 0) + { + free (cwd); + return; + } + close (fd); + + /* Errors here cannot be ignored as we already joined a ns. */ + sprintf (path, "/proc/%d/ns/mnt", pid); + fd = open (path, O_RDONLY); + if (fd < 0) + { + fprintf (stderr, "cannot open %s: %s", path, strerror (errno)); + exit (EXIT_FAILURE); + } + + r = setns (fd, 0); + if (r < 0) + { + fprintf (stderr, "cannot join mount namespace for %d: %s", pid, strerror (errno)); + exit (EXIT_FAILURE); + } + close (fd); + + if (syscall_setresgid (0, 0, 0) < 0) + { + fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (syscall_setresuid (0, 0, 0) < 0) + { + fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + if (chdir (cwd) < 0) + { + fprintf (stderr, "cannot chdir: %s\n", strerror (errno)); + _exit (EXIT_FAILURE); + } + + free (cwd); + rootless_uid_init = uid; + rootless_gid_init = gid; + } +} + +static int +syscall_clone (unsigned long flags, void *child_stack) +{ +#if defined(__s390__) || defined(__CRIS__) + return (int) syscall (__NR_clone, child_stack, flags); +#else + return (int) syscall (__NR_clone, flags, child_stack); +#endif +} + +static int +create_pause_process (const char *pause_pid_file_path, char **argv) +{ + int r, p[2]; + + if (pipe (p) < 0) + _exit (EXIT_FAILURE); + + r = fork (); + if (r < 0) + _exit (EXIT_FAILURE); + + if (r) + { + char b; + + close (p[1]); + /* Block until we write the pid file. */ + do + r = read (p[0], &b, 1); + while (r < 0 && errno == EINTR); + close (p[0]); + + return r == 1 && b == '0' ? 0 : -1; + } + else + { + int fd; + pid_t pid; + + close (p[0]); + + setsid (); + pid = fork (); + if (r < 0) + _exit (EXIT_FAILURE); + + if (pid) + { + char pid_str[12]; + char *tmp_file_path = NULL; + + sprintf (pid_str, "%d", pid); + + asprintf (&tmp_file_path, "%s.XXXXXX", pause_pid_file_path); + if (tmp_file_path == NULL) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + fd = mkstemp (tmp_file_path); + if (fd < 0) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + do + r = write (fd, pid_str, strlen (pid_str)); + while (r < 0 && errno == EINTR); + if (r < 0) + { + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + close (fd); + + /* There can be another process at this point trying to configure the user namespace and the pause + process, do not override the pid file if it already exists. */ + if (renameat2 (AT_FDCWD, tmp_file_path, AT_FDCWD, pause_pid_file_path, RENAME_NOREPLACE) < 0) + { + unlink (tmp_file_path); + kill (pid, SIGKILL); + _exit (EXIT_FAILURE); + } + + do + r = write (p[1], "0", 1); + while (r < 0 && errno == EINTR); + close (p[1]); + + _exit (EXIT_SUCCESS); + } + else + { + int null; + + close (p[1]); + + null = open ("/dev/null", O_RDWR); + if (null >= 0) + { + dup2 (null, 0); + dup2 (null, 1); + dup2 (null, 2); + close (null); + } + + for (fd = 3; fd < open_files_max_fd + 16; fd++) + close (fd); + + setenv ("_PODMAN_PAUSE", "1", 1); + execlp (argv[0], NULL); + + /* If the execve fails, then do the pause here. */ + do_pause (); + _exit (EXIT_FAILURE); + } + } +} + int -reexec_userns_join (int userns, int mountns) +reexec_userns_join (int userns, int mountns, char *pause_pid_file_path) { pid_t ppid = getpid (); char uid[16]; + char gid[16]; char **argv; int pid; char *cwd = getcwd (NULL, 0); @@ -155,6 +462,7 @@ reexec_userns_join (int userns, int mountns) } sprintf (uid, "%d", geteuid ()); + sprintf (gid, "%d", getegid ()); argv = get_cmd_line_args (ppid); if (argv == NULL) @@ -181,6 +489,7 @@ reexec_userns_join (int userns, int mountns) setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); + setenv ("_CONTAINERS_ROOTLESS_GID", gid, 1); if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0) { @@ -200,7 +509,7 @@ reexec_userns_join (int userns, int mountns) fprintf (stderr, "cannot setns: %s\n", strerror (errno)); _exit (EXIT_FAILURE); } - close (userns); + close (mountns); if (syscall_setresgid (0, 0, 0) < 0) { @@ -221,6 +530,12 @@ reexec_userns_join (int userns, int mountns) } free (cwd); + if (pause_pid_file_path && pause_pid_file_path[0] != '\0') + { + /* We ignore errors here as we didn't create the namespace anyway. */ + create_pause_process (pause_pid_file_path, argv); + } + execvp (argv[0], argv); _exit (EXIT_FAILURE); @@ -246,7 +561,7 @@ check_proc_sys_userns_file (const char *path) } int -reexec_in_user_namespace (int ready) +reexec_in_user_namespace (int ready, char *pause_pid_file_path) { int ret; pid_t pid; @@ -254,6 +569,7 @@ reexec_in_user_namespace (int ready) pid_t ppid = getpid (); char **argv; char uid[16]; + char gid[16]; char *listen_fds = NULL; char *listen_pid = NULL; bool do_socket_activation = false; @@ -275,6 +591,7 @@ reexec_in_user_namespace (int ready) } sprintf (uid, "%d", geteuid ()); + sprintf (gid, "%d", getegid ()); pid = syscall_clone (CLONE_NEWUSER|CLONE_NEWNS|SIGCHLD, NULL); if (pid < 0) @@ -319,6 +636,7 @@ reexec_in_user_namespace (int ready) setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); + setenv ("_CONTAINERS_ROOTLESS_GID", gid, 1); do ret = read (ready, &b, 1) < 0; @@ -328,29 +646,45 @@ reexec_in_user_namespace (int ready) fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno)); _exit (EXIT_FAILURE); } - close (ready); - if (b != '1') + if (b != '0') _exit (EXIT_FAILURE); if (syscall_setresgid (0, 0, 0) < 0) { fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } if (syscall_setresuid (0, 0, 0) < 0) { fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } if (chdir (cwd) < 0) { fprintf (stderr, "cannot chdir: %s\n", strerror (errno)); + write (ready, "1", 1); _exit (EXIT_FAILURE); } free (cwd); + if (pause_pid_file_path && pause_pid_file_path[0] != '\0') + { + if (create_pause_process (pause_pid_file_path, argv) < 0) + { + write (ready, "2", 1); + _exit (EXIT_FAILURE); + } + } + + do + ret = write (ready, "0", 1) < 0; + while (ret < 0 && errno == EINTR); + close (ready); + execvp (argv[0], argv); _exit (EXIT_FAILURE); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 2c99f41a4..9132c0fe5 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -22,9 +22,13 @@ import ( ) /* -extern int reexec_in_user_namespace(int ready); +#cgo remoteclient CFLAGS: -DDISABLE_JOIN_SHORTCUT +#include <stdlib.h> +extern uid_t rootless_uid(); +extern uid_t rootless_gid(); +extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); extern int reexec_in_user_namespace_wait(int pid); -extern int reexec_userns_join(int userns, int mountns); +extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); */ import "C" @@ -45,6 +49,14 @@ var ( // IsRootless tells us if we are running in rootless mode func IsRootless() bool { isRootlessOnce.Do(func() { + rootlessUIDInit := int(C.rootless_uid()) + rootlessGIDInit := int(C.rootless_gid()) + if rootlessUIDInit != 0 { + // This happens if we joined the user+mount namespace as part of + os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done") + os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit)) + os.Setenv("_CONTAINERS_ROOTLESS_GID", fmt.Sprintf("%d", rootlessGIDInit)) + } isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" }) return isRootless @@ -60,6 +72,23 @@ func GetRootlessUID() int { return os.Geteuid() } +// GetRootlessGID returns the GID of the user in the parent userNS +func GetRootlessGID() int { + gidEnv := os.Getenv("_CONTAINERS_ROOTLESS_GID") + if gidEnv != "" { + u, _ := strconv.Atoi(gidEnv) + return u + } + + /* If the _CONTAINERS_ROOTLESS_UID is set, assume the gid==uid. */ + uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") + if uidEnv != "" { + u, _ := strconv.Atoi(uidEnv) + return u + } + return os.Getegid() +} + func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) error { path, err := exec.LookPath(tool) if err != nil { @@ -168,11 +197,14 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) { // JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. -func JoinUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { return false, -1, nil } + cPausePid := C.CString(pausePid) + defer C.free(unsafe.Pointer(cPausePid)) + userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid)) if err != nil { return false, -1, err @@ -189,7 +221,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) { if err != nil { return false, -1, err } - pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd())) + pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid) if int(pidC) < 0 { return false, -1, errors.Errorf("cannot re-exec process") } @@ -206,7 +238,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) { // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child // process. -func BecomeRootInUserNS() (bool, int, error) { +func BecomeRootInUserNS(pausePid string) (bool, int, error) { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { return false, 0, runInUser() @@ -214,18 +246,23 @@ func BecomeRootInUserNS() (bool, int, error) { return false, 0, nil } + cPausePid := C.CString(pausePid) + defer C.free(unsafe.Pointer(cPausePid)) + runtime.LockOSThread() defer runtime.UnlockOSThread() - r, w, err := os.Pipe() + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) if err != nil { return false, -1, err } + r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child") + defer r.Close() defer w.Close() defer w.Write([]byte("0")) - pidC := C.reexec_in_user_namespace(C.int(r.Fd())) + pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) pid := int(pidC) if pid < 0 { return false, -1, errors.Errorf("cannot re-exec process") @@ -280,11 +317,35 @@ func BecomeRootInUserNS() (bool, int, error) { } } - _, err = w.Write([]byte("1")) + _, err = w.Write([]byte("0")) if err != nil { return false, -1, errors.Wrapf(err, "write to sync pipe") } + b := make([]byte, 1, 1) + _, err = w.Read(b) + if err != nil { + return false, -1, errors.Wrapf(err, "read from sync pipe") + } + + if b[0] == '2' { + // We have lost the race for writing the PID file, as probably another + // process created a namespace and wrote the PID. + // Try to join it. + data, err := ioutil.ReadFile(pausePid) + if err == nil { + pid, err := strconv.ParseUint(string(data), 10, 0) + if err == nil { + return JoinUserAndMountNS(uint(pid), "") + } + } + return false, -1, errors.Wrapf(err, "error setting up the process") + } + + if b[0] != '0' { + return false, -1, errors.Wrapf(err, "error setting up the process") + } + c := make(chan os.Signal, 1) signals := []os.Signal{} diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 47b5dd7cc..221baff97 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -15,7 +15,7 @@ func IsRootless() bool { // into a new user namespace and the return code from the re-executed podman process. // If podman was re-executed the caller needs to propagate the error code returned by the child // process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration. -func BecomeRootInUserNS() (bool, int, error) { +func BecomeRootInUserNS(pausePid string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } @@ -24,10 +24,15 @@ func GetRootlessUID() int { return -1 } +// GetRootlessGID returns the GID of the user in the parent userNS +func GetRootlessGID() int { + return -1 +} + // JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount // namespace of the specified PID without looking up its parent. Useful to join directly // the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts // with a default configuration. -func JoinUserAndMountNS(pid uint) (bool, int, error) { +func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } diff --git a/pkg/spec/storage.go b/pkg/spec/storage.go index 55148b606..dcc149b55 100644 --- a/pkg/spec/storage.go +++ b/pkg/spec/storage.go @@ -135,6 +135,29 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount, unifiedMounts[initMount.Destination] = initMount } + // Before superceding, we need to find volume mounts which conflict with + // named volumes, and vice versa. + // We'll delete the conflicts here as we supercede. + for dest := range unifiedMounts { + if _, ok := baseVolumes[dest]; ok { + delete(baseVolumes, dest) + } + } + for dest := range unifiedVolumes { + if _, ok := baseMounts[dest]; ok { + delete(baseMounts, dest) + } + } + + // Supercede volumes-from/image volumes with unified volumes from above. + // This is an unconditional replacement. + for dest, mount := range unifiedMounts { + baseMounts[dest] = mount + } + for dest, volume := range unifiedVolumes { + baseVolumes[dest] = volume + } + // If requested, add tmpfs filesystems for read-only containers. // Need to keep track of which we created, so we don't modify options // for them later... @@ -146,14 +169,14 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount, if config.ReadOnlyRootfs && config.ReadOnlyTmpfs { options := []string{"rw", "rprivate", "nosuid", "nodev", "tmpcopyup", "size=65536k"} for dest := range readonlyTmpfs { - if _, ok := unifiedMounts[dest]; ok { + if _, ok := baseMounts[dest]; ok { continue } localOpts := options if dest == "/run" { localOpts = append(localOpts, "noexec") } - unifiedMounts[dest] = spec.Mount{ + baseMounts[dest] = spec.Mount{ Destination: dest, Type: "tmpfs", Source: "tmpfs", @@ -163,15 +186,6 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount, } } - // Supercede volumes-from/image volumes with unified volumes from above. - // This is an unconditional replacement. - for dest, mount := range unifiedMounts { - baseMounts[dest] = mount - } - for dest, volume := range unifiedVolumes { - baseVolumes[dest] = volume - } - // Check for conflicts between named volumes and mounts for dest := range baseMounts { if _, ok := baseVolumes[dest]; ok { diff --git a/pkg/util/utils.go b/pkg/util/utils.go index 2a52e5129..a074f276c 100644 --- a/pkg/util/utils.go +++ b/pkg/util/utils.go @@ -3,6 +3,7 @@ package util import ( "fmt" "os" + ouser "os/user" "path/filepath" "strings" "sync" @@ -11,6 +12,8 @@ import ( "github.com/BurntSushi/toml" "github.com/containers/image/types" "github.com/containers/libpod/cmd/podman/cliconfig" + "github.com/containers/libpod/pkg/namespaces" + "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage" "github.com/containers/storage/pkg/idtools" "github.com/opencontainers/image-spec/specs-go/v1" @@ -131,11 +134,59 @@ func GetImageConfig(changes []string) (v1.ImageConfig, error) { } // ParseIDMapping takes idmappings and subuid and subgid maps and returns a storage mapping -func ParseIDMapping(UIDMapSlice, GIDMapSlice []string, subUIDMap, subGIDMap string) (*storage.IDMappingOptions, error) { +func ParseIDMapping(mode namespaces.UsernsMode, UIDMapSlice, GIDMapSlice []string, subUIDMap, subGIDMap string) (*storage.IDMappingOptions, error) { options := storage.IDMappingOptions{ HostUIDMapping: true, HostGIDMapping: true, } + + if mode.IsKeepID() { + if len(UIDMapSlice) > 0 || len(GIDMapSlice) > 0 { + return nil, errors.New("cannot specify custom mappings with --userns=keep-id") + } + if len(subUIDMap) > 0 || len(subGIDMap) > 0 { + return nil, errors.New("cannot specify subuidmap or subgidmap with --userns=keep-id") + } + if rootless.IsRootless() { + uid := rootless.GetRootlessUID() + gid := rootless.GetRootlessGID() + + username := os.Getenv("USER") + if username == "" { + user, err := ouser.LookupId(fmt.Sprintf("%d", uid)) + if err == nil { + username = user.Username + } + } + mappings, err := idtools.NewIDMappings(username, username) + if err != nil { + return nil, errors.Wrapf(err, "cannot find mappings for user %s", username) + } + maxUID, maxGID := 0, 0 + for _, u := range mappings.UIDs() { + maxUID += u.Size + } + for _, g := range mappings.GIDs() { + maxGID += g.Size + } + + options.UIDMap, options.GIDMap = nil, nil + + options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: 0, HostID: 1, Size: uid}) + options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: uid, HostID: 0, Size: 1}) + options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: uid + 1, HostID: uid + 1, Size: maxUID - uid}) + + options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: 0, HostID: 1, Size: gid}) + options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: gid, HostID: 0, Size: 1}) + options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: gid + 1, HostID: gid + 1, Size: maxGID - gid}) + + options.HostUIDMapping = false + options.HostGIDMapping = false + } + // Simply ignore the setting and do not setup an inner namespace for root as it is a no-op + return &options, nil + } + if subGIDMap == "" && subUIDMap != "" { subGIDMap = subUIDMap } diff --git a/pkg/util/utils_supported.go b/pkg/util/utils_supported.go index 8b98658c2..3d9140a23 100644 --- a/pkg/util/utils_supported.go +++ b/pkg/util/utils_supported.go @@ -82,3 +82,13 @@ func GetRootlessRuntimeDir() (string, error) { } return rootlessRuntimeDir, nil } + +// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for +// the pause process +func GetRootlessPauseProcessPidPath() (string, error) { + runtimeDir, err := GetRootlessRuntimeDir() + if err != nil { + return "", err + } + return filepath.Join(runtimeDir, "libpod", "pause.pid"), nil +} diff --git a/pkg/util/utils_windows.go b/pkg/util/utils_windows.go index b33733da9..3faa6f10c 100644 --- a/pkg/util/utils_windows.go +++ b/pkg/util/utils_windows.go @@ -15,3 +15,9 @@ func GetRootlessRuntimeDir() (string, error) { func IsCgroup2UnifiedMode() (bool, error) { return false, errors.New("this function is not implemented for windows") } + +// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for +// the pause process +func GetRootlessPauseProcessPidPath() (string, error) { + return "", errors.New("this function is not implemented for windows") +} diff --git a/pkg/varlinkapi/images.go b/pkg/varlinkapi/images.go index 20f82a1c6..fa1a0a109 100644 --- a/pkg/varlinkapi/images.go +++ b/pkg/varlinkapi/images.go @@ -188,7 +188,6 @@ func (i *LibpodAPI) BuildImage(call iopodman.VarlinkCall, config iopodman.BuildI RemoveIntermediateCtrs: config.RemoteIntermediateCtrs, ReportWriter: &output, RuntimeArgs: config.RuntimeArgs, - SignaturePolicyPath: config.SignaturePolicyPath, Squash: config.Squash, SystemContext: &systemContext, } @@ -311,10 +310,9 @@ func (i *LibpodAPI) HistoryImage(call iopodman.VarlinkCall, name string) error { } // PushImage pushes an local image to registry -func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVerify *bool, signaturePolicy, creds, certDir string, compress bool, format string, removeSignatures bool, signBy string) error { +func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, compress bool, format string, removeSignatures bool, signBy string) error { var ( - registryCreds *types.DockerAuthConfig - manifestType string + manifestType string ) newImage, err := i.Runtime.ImageRuntime().NewFromLocal(name) if err != nil { @@ -324,20 +322,7 @@ func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVe if tag != "" { destname = tag } - if creds != "" { - creds, err := util.ParseRegistryCreds(creds) - if err != nil { - return err - } - registryCreds = creds - } - dockerRegistryOptions := image.DockerRegistryOptions{ - DockerRegistryCreds: registryCreds, - DockerCertPath: certDir, - } - if tlsVerify != nil { - dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*tlsVerify) - } + dockerRegistryOptions := image.DockerRegistryOptions{} if format != "" { switch format { case "oci": //nolint @@ -362,7 +347,7 @@ func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVe output := bytes.NewBuffer([]byte{}) c := make(chan error) go func() { - err := newImage.PushImageToHeuristicDestination(getContext(), destname, manifestType, "", signaturePolicy, output, compress, so, &dockerRegistryOptions, nil) + err := newImage.PushImageToHeuristicDestination(getContext(), destname, manifestType, "", "", output, compress, so, &dockerRegistryOptions, nil) c <- err close(c) }() @@ -439,18 +424,14 @@ func (i *LibpodAPI) RemoveImage(call iopodman.VarlinkCall, name string, force bo // SearchImages searches all registries configured in /etc/containers/registries.conf for an image // Requires an image name and a search limit as int -func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit *int64, tlsVerify *bool, filter iopodman.ImageSearchFilter) error { +func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit *int64, filter iopodman.ImageSearchFilter) error { // Transform all arguments to proper types first argLimit := 0 - argTLSVerify := types.OptionalBoolUndefined argIsOfficial := types.OptionalBoolUndefined argIsAutomated := types.OptionalBoolUndefined if limit != nil { argLimit = int(*limit) } - if tlsVerify != nil { - argTLSVerify = types.NewOptionalBool(!*tlsVerify) - } if filter.Is_official != nil { argIsOfficial = types.NewOptionalBool(*filter.Is_official) } @@ -466,9 +447,8 @@ func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit } searchOptions := image.SearchOptions{ - Limit: argLimit, - Filter: sFilter, - InsecureSkipTLSVerify: argTLSVerify, + Limit: argLimit, + Filter: sFilter, } results, err := image.SearchImages(query, searchOptions) if err != nil { @@ -600,27 +580,11 @@ func (i *LibpodAPI) ExportImage(call iopodman.VarlinkCall, name, destination str } // PullImage pulls an image from a registry to the image store. -func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string, certDir, creds, signaturePolicy string, tlsVerify *bool) error { +func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string) error { var ( - registryCreds *types.DockerAuthConfig - imageID string + imageID string ) - if creds != "" { - creds, err := util.ParseRegistryCreds(creds) - if err != nil { - return err - } - registryCreds = creds - } - - dockerRegistryOptions := image.DockerRegistryOptions{ - DockerRegistryCreds: registryCreds, - DockerCertPath: certDir, - } - if tlsVerify != nil { - dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*tlsVerify) - } - + dockerRegistryOptions := image.DockerRegistryOptions{} so := image.SigningOptions{} if call.WantsMore() { @@ -634,14 +598,14 @@ func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string, certDir, c if err != nil { c <- errors.Wrapf(err, "error parsing %q", name) } - newImage, err := i.Runtime.ImageRuntime().LoadFromArchiveReference(getContext(), srcRef, signaturePolicy, output) + newImage, err := i.Runtime.ImageRuntime().LoadFromArchiveReference(getContext(), srcRef, "", output) if err != nil { c <- errors.Wrapf(err, "error pulling image from %q", name) } else { imageID = newImage[0].ID() } } else { - newImage, err := i.Runtime.ImageRuntime().New(getContext(), name, signaturePolicy, "", output, &dockerRegistryOptions, so, false, nil) + newImage, err := i.Runtime.ImageRuntime().New(getContext(), name, "", "", output, &dockerRegistryOptions, so, false, nil) if err != nil { c <- errors.Wrapf(err, "unable to pull %s", name) } else { @@ -709,18 +673,12 @@ func (i *LibpodAPI) ImageExists(call iopodman.VarlinkCall, name string) error { // ContainerRunlabel ... func (i *LibpodAPI) ContainerRunlabel(call iopodman.VarlinkCall, input iopodman.Runlabel) error { ctx := getContext() - dockerRegistryOptions := image.DockerRegistryOptions{ - DockerCertPath: input.CertDir, - } - if input.TlsVerify != nil { - dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*input.TlsVerify) - } - + dockerRegistryOptions := image.DockerRegistryOptions{} stdErr := os.Stderr stdOut := os.Stdout stdIn := os.Stdin - runLabel, imageName, err := shared.GetRunlabel(input.Label, input.Image, ctx, i.Runtime, input.Pull, input.Creds, dockerRegistryOptions, input.Authfile, input.SignaturePolicyPath, nil) + runLabel, imageName, err := shared.GetRunlabel(input.Label, input.Image, ctx, i.Runtime, input.Pull, "", dockerRegistryOptions, input.Authfile, "", nil) if err != nil { return call.ReplyErrorOccurred(err.Error()) } |