aboutsummaryrefslogtreecommitdiff
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/adapter/containers_remote.go2
-rw-r--r--pkg/adapter/runtime_remote.go41
-rw-r--r--pkg/namespaces/namespaces.go7
-rw-r--r--pkg/registries/registries.go6
-rw-r--r--pkg/rootless/rootless_linux.c428
-rw-r--r--pkg/rootless/rootless_linux.go77
-rw-r--r--pkg/rootless/rootless_unsupported.go9
-rw-r--r--pkg/spec/storage.go36
-rw-r--r--pkg/util/utils.go53
-rw-r--r--pkg/util/utils_supported.go10
-rw-r--r--pkg/util/utils_windows.go6
-rw-r--r--pkg/varlinkapi/images.go70
12 files changed, 579 insertions, 166 deletions
diff --git a/pkg/adapter/containers_remote.go b/pkg/adapter/containers_remote.go
index 63b0f9d2f..c34495b3d 100644
--- a/pkg/adapter/containers_remote.go
+++ b/pkg/adapter/containers_remote.go
@@ -561,7 +561,7 @@ func (r *LocalRuntime) attach(ctx context.Context, stdin, stdout *os.File, cid s
if err != nil {
return nil, err
}
- resize := make(chan remotecommand.TerminalSize)
+ resize := make(chan remotecommand.TerminalSize, 5)
haveTerminal := terminal.IsTerminal(int(os.Stdin.Fd()))
// Check if we are attached to a terminal. If we are, generate resize
diff --git a/pkg/adapter/runtime_remote.go b/pkg/adapter/runtime_remote.go
index e86287462..e0c0898bd 100644
--- a/pkg/adapter/runtime_remote.go
+++ b/pkg/adapter/runtime_remote.go
@@ -182,10 +182,7 @@ func (r *LocalRuntime) NewImageFromLocal(name string) (*ContainerImage, error) {
// LoadFromArchiveReference creates an image from a local archive
func (r *LocalRuntime) LoadFromArchiveReference(ctx context.Context, srcRef types.ImageReference, signaturePolicyPath string, writer io.Writer) ([]*ContainerImage, error) {
var iid string
- // TODO We need to find a way to leak certDir, creds, and the tlsverify into this function, normally this would
- // come from cli options but we don't want want those in here either.
- tlsverify := true
- reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, srcRef.DockerReference().String(), "", "", signaturePolicyPath, &tlsverify)
+ reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, srcRef.DockerReference().String())
if err != nil {
return nil, err
}
@@ -217,21 +214,7 @@ func (r *LocalRuntime) New(ctx context.Context, name, signaturePolicyPath, authf
if label != nil {
return nil, errors.New("the remote client function does not support checking a remote image for a label")
}
- var (
- tlsVerify bool
- tlsVerifyPtr *bool
- )
- if dockeroptions.DockerInsecureSkipTLSVerify == types.OptionalBoolFalse {
- tlsVerify = true
- tlsVerifyPtr = &tlsVerify
-
- }
- if dockeroptions.DockerInsecureSkipTLSVerify == types.OptionalBoolTrue {
- tlsVerify = false
- tlsVerifyPtr = &tlsVerify
- }
-
- reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, name, dockeroptions.DockerCertPath, "", signaturePolicyPath, tlsVerifyPtr)
+ reply, err := iopodman.PullImage().Send(r.Conn, varlink.More, name)
if err != nil {
return nil, err
}
@@ -429,9 +412,8 @@ func (r *LocalRuntime) Build(ctx context.Context, c *cliconfig.BuildValues, opti
Quiet: options.Quiet,
RemoteIntermediateCtrs: options.RemoveIntermediateCtrs,
// ReportWriter:
- RuntimeArgs: options.RuntimeArgs,
- SignaturePolicyPath: options.SignaturePolicyPath,
- Squash: options.Squash,
+ RuntimeArgs: options.RuntimeArgs,
+ Squash: options.Squash,
}
// tar the file
outputFile, err := ioutil.TempFile("", "varlink_tar_send")
@@ -570,20 +552,7 @@ func (r *LocalRuntime) RemoveVolumes(ctx context.Context, c *cliconfig.VolumeRmV
func (r *LocalRuntime) Push(ctx context.Context, srcName, destination, manifestMIMEType, authfile, signaturePolicyPath string, writer io.Writer, forceCompress bool, signingOptions image.SigningOptions, dockerRegistryOptions *image.DockerRegistryOptions, additionalDockerArchiveTags []reference.NamedTagged) error {
- var (
- tls *bool
- tlsVerify bool
- )
- if dockerRegistryOptions.DockerInsecureSkipTLSVerify == types.OptionalBoolTrue {
- tlsVerify = false
- tls = &tlsVerify
- }
- if dockerRegistryOptions.DockerInsecureSkipTLSVerify == types.OptionalBoolFalse {
- tlsVerify = true
- tls = &tlsVerify
- }
-
- reply, err := iopodman.PushImage().Send(r.Conn, varlink.More, srcName, destination, tls, signaturePolicyPath, "", dockerRegistryOptions.DockerCertPath, forceCompress, manifestMIMEType, signingOptions.RemoveSignatures, signingOptions.SignBy)
+ reply, err := iopodman.PushImage().Send(r.Conn, varlink.More, srcName, destination, forceCompress, manifestMIMEType, signingOptions.RemoveSignatures, signingOptions.SignBy)
if err != nil {
return err
}
diff --git a/pkg/namespaces/namespaces.go b/pkg/namespaces/namespaces.go
index fde6118af..ec9276344 100644
--- a/pkg/namespaces/namespaces.go
+++ b/pkg/namespaces/namespaces.go
@@ -12,6 +12,11 @@ func (n UsernsMode) IsHost() bool {
return n == "host"
}
+// IsKeepID indicates whether container uses a mapping where the (uid, gid) on the host is lept inside of the namespace.
+func (n UsernsMode) IsKeepID() bool {
+ return n == "keep-id"
+}
+
// IsPrivate indicates whether the container uses the a private userns.
func (n UsernsMode) IsPrivate() bool {
return !(n.IsHost())
@@ -21,7 +26,7 @@ func (n UsernsMode) IsPrivate() bool {
func (n UsernsMode) Valid() bool {
parts := strings.Split(string(n), ":")
switch mode := parts[0]; mode {
- case "", "host":
+ case "", "host", "keep-id":
default:
return false
}
diff --git a/pkg/registries/registries.go b/pkg/registries/registries.go
index fdb7f33c5..5c4ecd020 100644
--- a/pkg/registries/registries.go
+++ b/pkg/registries/registries.go
@@ -51,7 +51,7 @@ func GetRegistries() ([]string, error) {
}
for _, reg := range registries {
if reg.Search {
- searchRegistries = append(searchRegistries, reg.URL)
+ searchRegistries = append(searchRegistries, reg.Location)
}
}
return searchRegistries, nil
@@ -66,7 +66,7 @@ func GetBlockedRegistries() ([]string, error) {
}
for _, reg := range registries {
if reg.Blocked {
- blockedRegistries = append(blockedRegistries, reg.URL)
+ blockedRegistries = append(blockedRegistries, reg.Location)
}
}
return blockedRegistries, nil
@@ -81,7 +81,7 @@ func GetInsecureRegistries() ([]string, error) {
}
for _, reg := range registries {
if reg.Insecure {
- insecureRegistries = append(insecureRegistries, reg.URL)
+ insecureRegistries = append(insecureRegistries, reg.Location)
}
}
return insecureRegistries, nil
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
index 1d32b1adb..098ca7830 100644
--- a/pkg/rootless/rootless_linux.c
+++ b/pkg/rootless/rootless_linux.c
@@ -17,38 +17,30 @@
#include <sys/prctl.h>
#include <dirent.h>
#include <sys/select.h>
+#include <stdio.h>
+
+#ifndef RENAME_NOREPLACE
+# define RENAME_NOREPLACE (1 << 0)
+
+int renameat2 (int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags)
+{
+# ifdef __NR_renameat2
+ return (int) syscall (__NR_renameat2, olddirfd, oldpath, newdirfd, newpath, flags);
+# else
+ /* no way to implement it atomically. */
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+#endif
static const char *_max_user_namespaces = "/proc/sys/user/max_user_namespaces";
static const char *_unprivileged_user_namespaces = "/proc/sys/kernel/unprivileged_userns_clone";
static int open_files_max_fd;
fd_set open_files_set;
-
-static void __attribute__((constructor)) init()
-{
- DIR *d;
-
- /* Store how many FDs were open before the Go runtime kicked in. */
- d = opendir ("/proc/self/fd");
- if (d)
- {
- struct dirent *ent;
-
- FD_ZERO (&open_files_set);
- for (ent = readdir (d); ent; ent = readdir (d))
- {
- int fd = atoi (ent->d_name);
- if (fd != dirfd (d))
- {
- if (fd > open_files_max_fd)
- open_files_max_fd = fd;
- FD_SET (fd, &open_files_set);
- }
- }
- closedir (d);
- }
-}
-
+static uid_t rootless_uid_init;
+static gid_t rootless_gid_init;
static int
syscall_setresuid (uid_t ruid, uid_t euid, uid_t suid)
@@ -62,14 +54,24 @@ syscall_setresgid (gid_t rgid, gid_t egid, gid_t sgid)
return (int) syscall (__NR_setresgid, rgid, egid, sgid);
}
-static int
-syscall_clone (unsigned long flags, void *child_stack)
+uid_t
+rootless_uid ()
{
-#if defined(__s390__) || defined(__CRIS__)
- return (int) syscall (__NR_clone, child_stack, flags);
-#else
- return (int) syscall (__NR_clone, flags, child_stack);
-#endif
+ return rootless_uid_init;
+}
+
+uid_t
+rootless_gid ()
+{
+ return rootless_gid_init;
+}
+
+static void
+do_pause ()
+{
+ prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL);
+ while (1)
+ pause ();
}
static char **
@@ -84,7 +86,10 @@ get_cmd_line_args (pid_t pid)
int i, argc = 0;
char **argv;
- sprintf (path, "/proc/%d/cmdline", pid);
+ if (pid)
+ sprintf (path, "/proc/%d/cmdline", pid);
+ else
+ strcpy (path, "/proc/self/cmdline");
fd = open (path, O_RDONLY);
if (fd < 0)
return NULL;
@@ -99,7 +104,10 @@ get_cmd_line_args (pid_t pid)
ret = read (fd, buffer + used, allocated - used);
while (ret < 0 && errno == EINTR);
if (ret < 0)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
if (ret == 0)
break;
@@ -109,11 +117,12 @@ get_cmd_line_args (pid_t pid)
{
allocated += 512;
char *tmp = realloc (buffer, allocated);
- if (buffer == NULL) {
- free(buffer);
- return NULL;
- }
- buffer=tmp;
+ if (buffer == NULL)
+ {
+ free (buffer);
+ return NULL;
+ }
+ buffer = tmp;
}
}
close (fd);
@@ -122,11 +131,17 @@ get_cmd_line_args (pid_t pid)
if (buffer[i] == '\0')
argc++;
if (argc == 0)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
argv = malloc (sizeof (char *) * (argc + 1));
if (argv == NULL)
- return NULL;
+ {
+ free (buffer);
+ return NULL;
+ }
argc = 0;
argv[argc++] = buffer;
@@ -139,11 +154,303 @@ get_cmd_line_args (pid_t pid)
return argv;
}
+static bool
+can_use_shortcut ()
+{
+ int argc;
+ char **argv;
+ bool ret = true;
+
+#ifdef DISABLE_JOIN_SHORTCUT
+ return false;
+#endif
+
+ argv = get_cmd_line_args (0);
+ if (argv == NULL)
+ return NULL;
+
+ for (argc = 0; argv[argc]; argc++)
+ {
+ if (argc == 0 || argv[argc][0] == '-')
+ continue;
+
+ if (strcmp (argv[argc], "mount") == 0
+ || strcmp (argv[argc], "search") == 0
+ || strcmp (argv[argc], "system") == 0)
+ {
+ ret = false;
+ break;
+ }
+ }
+
+ free (argv[0]);
+ free (argv);
+ return ret;
+}
+
+static void __attribute__((constructor)) init()
+{
+ const char *xdg_runtime_dir;
+ const char *pause;
+ DIR *d;
+
+ pause = getenv ("_PODMAN_PAUSE");
+ if (pause && pause[0])
+ {
+ do_pause ();
+ _exit (EXIT_FAILURE);
+ }
+
+ /* Store how many FDs were open before the Go runtime kicked in. */
+ d = opendir ("/proc/self/fd");
+ if (d)
+ {
+ struct dirent *ent;
+
+ FD_ZERO (&open_files_set);
+ for (ent = readdir (d); ent; ent = readdir (d))
+ {
+ int fd = atoi (ent->d_name);
+ if (fd != dirfd (d))
+ {
+ if (fd > open_files_max_fd)
+ open_files_max_fd = fd;
+ FD_SET (fd, &open_files_set);
+ }
+ }
+ closedir (d);
+ }
+
+ /* Shortcut. If we are able to join the pause pid file, do it now so we don't
+ need to re-exec. */
+ xdg_runtime_dir = getenv ("XDG_RUNTIME_DIR");
+ if (xdg_runtime_dir && xdg_runtime_dir[0] && can_use_shortcut ())
+ {
+ int r;
+ int fd;
+ long pid;
+ char buf[12];
+ uid_t uid;
+ gid_t gid;
+ char path[PATH_MAX];
+ const char *const suffix = "/libpod/pause.pid";
+ char *cwd = getcwd (NULL, 0);
+
+ if (cwd == NULL)
+ {
+ fprintf (stderr, "error getting current working directory: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (strlen (xdg_runtime_dir) >= PATH_MAX - strlen (suffix))
+ {
+ fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %s", strerror (ENAMETOOLONG));
+ exit (EXIT_FAILURE);
+ }
+
+ sprintf (path, "%s%s", xdg_runtime_dir, suffix);
+ fd = open (path, O_RDONLY);
+ if (fd < 0)
+ {
+ free (cwd);
+ return;
+ }
+
+ r = read (fd, buf, sizeof (buf));
+ close (fd);
+ if (r < 0)
+ {
+ free (cwd);
+ return;
+ }
+ pid = strtol (buf, NULL, 10);
+ if (pid == LONG_MAX)
+ {
+ free (cwd);
+ return;
+ }
+
+ uid = geteuid ();
+ gid = getegid ();
+
+ sprintf (path, "/proc/%d/ns/user", pid);
+ fd = open (path, O_RDONLY);
+ if (fd < 0 || setns (fd, 0) < 0)
+ {
+ free (cwd);
+ return;
+ }
+ close (fd);
+
+ /* Errors here cannot be ignored as we already joined a ns. */
+ sprintf (path, "/proc/%d/ns/mnt", pid);
+ fd = open (path, O_RDONLY);
+ if (fd < 0)
+ {
+ fprintf (stderr, "cannot open %s: %s", path, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+
+ r = setns (fd, 0);
+ if (r < 0)
+ {
+ fprintf (stderr, "cannot join mount namespace for %d: %s", pid, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ close (fd);
+
+ if (syscall_setresgid (0, 0, 0) < 0)
+ {
+ fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (syscall_setresuid (0, 0, 0) < 0)
+ {
+ fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ if (chdir (cwd) < 0)
+ {
+ fprintf (stderr, "cannot chdir: %s\n", strerror (errno));
+ _exit (EXIT_FAILURE);
+ }
+
+ free (cwd);
+ rootless_uid_init = uid;
+ rootless_gid_init = gid;
+ }
+}
+
+static int
+syscall_clone (unsigned long flags, void *child_stack)
+{
+#if defined(__s390__) || defined(__CRIS__)
+ return (int) syscall (__NR_clone, child_stack, flags);
+#else
+ return (int) syscall (__NR_clone, flags, child_stack);
+#endif
+}
+
+static int
+create_pause_process (const char *pause_pid_file_path, char **argv)
+{
+ int r, p[2];
+
+ if (pipe (p) < 0)
+ _exit (EXIT_FAILURE);
+
+ r = fork ();
+ if (r < 0)
+ _exit (EXIT_FAILURE);
+
+ if (r)
+ {
+ char b;
+
+ close (p[1]);
+ /* Block until we write the pid file. */
+ do
+ r = read (p[0], &b, 1);
+ while (r < 0 && errno == EINTR);
+ close (p[0]);
+
+ return r == 1 && b == '0' ? 0 : -1;
+ }
+ else
+ {
+ int fd;
+ pid_t pid;
+
+ close (p[0]);
+
+ setsid ();
+ pid = fork ();
+ if (r < 0)
+ _exit (EXIT_FAILURE);
+
+ if (pid)
+ {
+ char pid_str[12];
+ char *tmp_file_path = NULL;
+
+ sprintf (pid_str, "%d", pid);
+
+ asprintf (&tmp_file_path, "%s.XXXXXX", pause_pid_file_path);
+ if (tmp_file_path == NULL)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ fd = mkstemp (tmp_file_path);
+ if (fd < 0)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ do
+ r = write (fd, pid_str, strlen (pid_str));
+ while (r < 0 && errno == EINTR);
+ if (r < 0)
+ {
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+ close (fd);
+
+ /* There can be another process at this point trying to configure the user namespace and the pause
+ process, do not override the pid file if it already exists. */
+ if (renameat2 (AT_FDCWD, tmp_file_path, AT_FDCWD, pause_pid_file_path, RENAME_NOREPLACE) < 0)
+ {
+ unlink (tmp_file_path);
+ kill (pid, SIGKILL);
+ _exit (EXIT_FAILURE);
+ }
+
+ do
+ r = write (p[1], "0", 1);
+ while (r < 0 && errno == EINTR);
+ close (p[1]);
+
+ _exit (EXIT_SUCCESS);
+ }
+ else
+ {
+ int null;
+
+ close (p[1]);
+
+ null = open ("/dev/null", O_RDWR);
+ if (null >= 0)
+ {
+ dup2 (null, 0);
+ dup2 (null, 1);
+ dup2 (null, 2);
+ close (null);
+ }
+
+ for (fd = 3; fd < open_files_max_fd + 16; fd++)
+ close (fd);
+
+ setenv ("_PODMAN_PAUSE", "1", 1);
+ execlp (argv[0], NULL);
+
+ /* If the execve fails, then do the pause here. */
+ do_pause ();
+ _exit (EXIT_FAILURE);
+ }
+ }
+}
+
int
-reexec_userns_join (int userns, int mountns)
+reexec_userns_join (int userns, int mountns, char *pause_pid_file_path)
{
pid_t ppid = getpid ();
char uid[16];
+ char gid[16];
char **argv;
int pid;
char *cwd = getcwd (NULL, 0);
@@ -155,6 +462,7 @@ reexec_userns_join (int userns, int mountns)
}
sprintf (uid, "%d", geteuid ());
+ sprintf (gid, "%d", getegid ());
argv = get_cmd_line_args (ppid);
if (argv == NULL)
@@ -181,6 +489,7 @@ reexec_userns_join (int userns, int mountns)
setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
+ setenv ("_CONTAINERS_ROOTLESS_GID", gid, 1);
if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0)
{
@@ -200,7 +509,7 @@ reexec_userns_join (int userns, int mountns)
fprintf (stderr, "cannot setns: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
- close (userns);
+ close (mountns);
if (syscall_setresgid (0, 0, 0) < 0)
{
@@ -221,6 +530,12 @@ reexec_userns_join (int userns, int mountns)
}
free (cwd);
+ if (pause_pid_file_path && pause_pid_file_path[0] != '\0')
+ {
+ /* We ignore errors here as we didn't create the namespace anyway. */
+ create_pause_process (pause_pid_file_path, argv);
+ }
+
execvp (argv[0], argv);
_exit (EXIT_FAILURE);
@@ -246,7 +561,7 @@ check_proc_sys_userns_file (const char *path)
}
int
-reexec_in_user_namespace (int ready)
+reexec_in_user_namespace (int ready, char *pause_pid_file_path)
{
int ret;
pid_t pid;
@@ -254,6 +569,7 @@ reexec_in_user_namespace (int ready)
pid_t ppid = getpid ();
char **argv;
char uid[16];
+ char gid[16];
char *listen_fds = NULL;
char *listen_pid = NULL;
bool do_socket_activation = false;
@@ -275,6 +591,7 @@ reexec_in_user_namespace (int ready)
}
sprintf (uid, "%d", geteuid ());
+ sprintf (gid, "%d", getegid ());
pid = syscall_clone (CLONE_NEWUSER|CLONE_NEWNS|SIGCHLD, NULL);
if (pid < 0)
@@ -319,6 +636,7 @@ reexec_in_user_namespace (int ready)
setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
+ setenv ("_CONTAINERS_ROOTLESS_GID", gid, 1);
do
ret = read (ready, &b, 1) < 0;
@@ -328,29 +646,45 @@ reexec_in_user_namespace (int ready)
fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno));
_exit (EXIT_FAILURE);
}
- close (ready);
- if (b != '1')
+ if (b != '0')
_exit (EXIT_FAILURE);
if (syscall_setresgid (0, 0, 0) < 0)
{
fprintf (stderr, "cannot setresgid: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
if (syscall_setresuid (0, 0, 0) < 0)
{
fprintf (stderr, "cannot setresuid: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
if (chdir (cwd) < 0)
{
fprintf (stderr, "cannot chdir: %s\n", strerror (errno));
+ write (ready, "1", 1);
_exit (EXIT_FAILURE);
}
free (cwd);
+ if (pause_pid_file_path && pause_pid_file_path[0] != '\0')
+ {
+ if (create_pause_process (pause_pid_file_path, argv) < 0)
+ {
+ write (ready, "2", 1);
+ _exit (EXIT_FAILURE);
+ }
+ }
+
+ do
+ ret = write (ready, "0", 1) < 0;
+ while (ret < 0 && errno == EINTR);
+ close (ready);
+
execvp (argv[0], argv);
_exit (EXIT_FAILURE);
diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go
index 2c99f41a4..9132c0fe5 100644
--- a/pkg/rootless/rootless_linux.go
+++ b/pkg/rootless/rootless_linux.go
@@ -22,9 +22,13 @@ import (
)
/*
-extern int reexec_in_user_namespace(int ready);
+#cgo remoteclient CFLAGS: -DDISABLE_JOIN_SHORTCUT
+#include <stdlib.h>
+extern uid_t rootless_uid();
+extern uid_t rootless_gid();
+extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid);
-extern int reexec_userns_join(int userns, int mountns);
+extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
*/
import "C"
@@ -45,6 +49,14 @@ var (
// IsRootless tells us if we are running in rootless mode
func IsRootless() bool {
isRootlessOnce.Do(func() {
+ rootlessUIDInit := int(C.rootless_uid())
+ rootlessGIDInit := int(C.rootless_gid())
+ if rootlessUIDInit != 0 {
+ // This happens if we joined the user+mount namespace as part of
+ os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done")
+ os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit))
+ os.Setenv("_CONTAINERS_ROOTLESS_GID", fmt.Sprintf("%d", rootlessGIDInit))
+ }
isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != ""
})
return isRootless
@@ -60,6 +72,23 @@ func GetRootlessUID() int {
return os.Geteuid()
}
+// GetRootlessGID returns the GID of the user in the parent userNS
+func GetRootlessGID() int {
+ gidEnv := os.Getenv("_CONTAINERS_ROOTLESS_GID")
+ if gidEnv != "" {
+ u, _ := strconv.Atoi(gidEnv)
+ return u
+ }
+
+ /* If the _CONTAINERS_ROOTLESS_UID is set, assume the gid==uid. */
+ uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
+ if uidEnv != "" {
+ u, _ := strconv.Atoi(uidEnv)
+ return u
+ }
+ return os.Getegid()
+}
+
func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) error {
path, err := exec.LookPath(tool)
if err != nil {
@@ -168,11 +197,14 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) {
// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process.
-func JoinUserAndMountNS(pid uint) (bool, int, error) {
+func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
return false, -1, nil
}
+ cPausePid := C.CString(pausePid)
+ defer C.free(unsafe.Pointer(cPausePid))
+
userNS, err := os.Open(fmt.Sprintf("/proc/%d/ns/user", pid))
if err != nil {
return false, -1, err
@@ -189,7 +221,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) {
if err != nil {
return false, -1, err
}
- pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()))
+ pidC := C.reexec_userns_join(C.int(fd.Fd()), C.int(mountNS.Fd()), cPausePid)
if int(pidC) < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
}
@@ -206,7 +238,7 @@ func JoinUserAndMountNS(pid uint) (bool, int, error) {
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
-func BecomeRootInUserNS() (bool, int, error) {
+func BecomeRootInUserNS(pausePid string) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser()
@@ -214,18 +246,23 @@ func BecomeRootInUserNS() (bool, int, error) {
return false, 0, nil
}
+ cPausePid := C.CString(pausePid)
+ defer C.free(unsafe.Pointer(cPausePid))
+
runtime.LockOSThread()
defer runtime.UnlockOSThread()
- r, w, err := os.Pipe()
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
if err != nil {
return false, -1, err
}
+ r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child")
+
defer r.Close()
defer w.Close()
defer w.Write([]byte("0"))
- pidC := C.reexec_in_user_namespace(C.int(r.Fd()))
+ pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pid := int(pidC)
if pid < 0 {
return false, -1, errors.Errorf("cannot re-exec process")
@@ -280,11 +317,35 @@ func BecomeRootInUserNS() (bool, int, error) {
}
}
- _, err = w.Write([]byte("1"))
+ _, err = w.Write([]byte("0"))
if err != nil {
return false, -1, errors.Wrapf(err, "write to sync pipe")
}
+ b := make([]byte, 1, 1)
+ _, err = w.Read(b)
+ if err != nil {
+ return false, -1, errors.Wrapf(err, "read from sync pipe")
+ }
+
+ if b[0] == '2' {
+ // We have lost the race for writing the PID file, as probably another
+ // process created a namespace and wrote the PID.
+ // Try to join it.
+ data, err := ioutil.ReadFile(pausePid)
+ if err == nil {
+ pid, err := strconv.ParseUint(string(data), 10, 0)
+ if err == nil {
+ return JoinUserAndMountNS(uint(pid), "")
+ }
+ }
+ return false, -1, errors.Wrapf(err, "error setting up the process")
+ }
+
+ if b[0] != '0' {
+ return false, -1, errors.Wrapf(err, "error setting up the process")
+ }
+
c := make(chan os.Signal, 1)
signals := []os.Signal{}
diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go
index 47b5dd7cc..221baff97 100644
--- a/pkg/rootless/rootless_unsupported.go
+++ b/pkg/rootless/rootless_unsupported.go
@@ -15,7 +15,7 @@ func IsRootless() bool {
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process. It is a convenience function for BecomeRootInUserNSWithOpts with a default configuration.
-func BecomeRootInUserNS() (bool, int, error) {
+func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
@@ -24,10 +24,15 @@ func GetRootlessUID() int {
return -1
}
+// GetRootlessGID returns the GID of the user in the parent userNS
+func GetRootlessGID() int {
+ return -1
+}
+
// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
// namespace of the specified PID without looking up its parent. Useful to join directly
// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts
// with a default configuration.
-func JoinUserAndMountNS(pid uint) (bool, int, error) {
+func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}
diff --git a/pkg/spec/storage.go b/pkg/spec/storage.go
index 55148b606..dcc149b55 100644
--- a/pkg/spec/storage.go
+++ b/pkg/spec/storage.go
@@ -135,6 +135,29 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount,
unifiedMounts[initMount.Destination] = initMount
}
+ // Before superceding, we need to find volume mounts which conflict with
+ // named volumes, and vice versa.
+ // We'll delete the conflicts here as we supercede.
+ for dest := range unifiedMounts {
+ if _, ok := baseVolumes[dest]; ok {
+ delete(baseVolumes, dest)
+ }
+ }
+ for dest := range unifiedVolumes {
+ if _, ok := baseMounts[dest]; ok {
+ delete(baseMounts, dest)
+ }
+ }
+
+ // Supercede volumes-from/image volumes with unified volumes from above.
+ // This is an unconditional replacement.
+ for dest, mount := range unifiedMounts {
+ baseMounts[dest] = mount
+ }
+ for dest, volume := range unifiedVolumes {
+ baseVolumes[dest] = volume
+ }
+
// If requested, add tmpfs filesystems for read-only containers.
// Need to keep track of which we created, so we don't modify options
// for them later...
@@ -146,14 +169,14 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount,
if config.ReadOnlyRootfs && config.ReadOnlyTmpfs {
options := []string{"rw", "rprivate", "nosuid", "nodev", "tmpcopyup", "size=65536k"}
for dest := range readonlyTmpfs {
- if _, ok := unifiedMounts[dest]; ok {
+ if _, ok := baseMounts[dest]; ok {
continue
}
localOpts := options
if dest == "/run" {
localOpts = append(localOpts, "noexec")
}
- unifiedMounts[dest] = spec.Mount{
+ baseMounts[dest] = spec.Mount{
Destination: dest,
Type: "tmpfs",
Source: "tmpfs",
@@ -163,15 +186,6 @@ func (config *CreateConfig) parseVolumes(runtime *libpod.Runtime) ([]spec.Mount,
}
}
- // Supercede volumes-from/image volumes with unified volumes from above.
- // This is an unconditional replacement.
- for dest, mount := range unifiedMounts {
- baseMounts[dest] = mount
- }
- for dest, volume := range unifiedVolumes {
- baseVolumes[dest] = volume
- }
-
// Check for conflicts between named volumes and mounts
for dest := range baseMounts {
if _, ok := baseVolumes[dest]; ok {
diff --git a/pkg/util/utils.go b/pkg/util/utils.go
index 2a52e5129..a074f276c 100644
--- a/pkg/util/utils.go
+++ b/pkg/util/utils.go
@@ -3,6 +3,7 @@ package util
import (
"fmt"
"os"
+ ouser "os/user"
"path/filepath"
"strings"
"sync"
@@ -11,6 +12,8 @@ import (
"github.com/BurntSushi/toml"
"github.com/containers/image/types"
"github.com/containers/libpod/cmd/podman/cliconfig"
+ "github.com/containers/libpod/pkg/namespaces"
+ "github.com/containers/libpod/pkg/rootless"
"github.com/containers/storage"
"github.com/containers/storage/pkg/idtools"
"github.com/opencontainers/image-spec/specs-go/v1"
@@ -131,11 +134,59 @@ func GetImageConfig(changes []string) (v1.ImageConfig, error) {
}
// ParseIDMapping takes idmappings and subuid and subgid maps and returns a storage mapping
-func ParseIDMapping(UIDMapSlice, GIDMapSlice []string, subUIDMap, subGIDMap string) (*storage.IDMappingOptions, error) {
+func ParseIDMapping(mode namespaces.UsernsMode, UIDMapSlice, GIDMapSlice []string, subUIDMap, subGIDMap string) (*storage.IDMappingOptions, error) {
options := storage.IDMappingOptions{
HostUIDMapping: true,
HostGIDMapping: true,
}
+
+ if mode.IsKeepID() {
+ if len(UIDMapSlice) > 0 || len(GIDMapSlice) > 0 {
+ return nil, errors.New("cannot specify custom mappings with --userns=keep-id")
+ }
+ if len(subUIDMap) > 0 || len(subGIDMap) > 0 {
+ return nil, errors.New("cannot specify subuidmap or subgidmap with --userns=keep-id")
+ }
+ if rootless.IsRootless() {
+ uid := rootless.GetRootlessUID()
+ gid := rootless.GetRootlessGID()
+
+ username := os.Getenv("USER")
+ if username == "" {
+ user, err := ouser.LookupId(fmt.Sprintf("%d", uid))
+ if err == nil {
+ username = user.Username
+ }
+ }
+ mappings, err := idtools.NewIDMappings(username, username)
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot find mappings for user %s", username)
+ }
+ maxUID, maxGID := 0, 0
+ for _, u := range mappings.UIDs() {
+ maxUID += u.Size
+ }
+ for _, g := range mappings.GIDs() {
+ maxGID += g.Size
+ }
+
+ options.UIDMap, options.GIDMap = nil, nil
+
+ options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: 0, HostID: 1, Size: uid})
+ options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: uid, HostID: 0, Size: 1})
+ options.UIDMap = append(options.UIDMap, idtools.IDMap{ContainerID: uid + 1, HostID: uid + 1, Size: maxUID - uid})
+
+ options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: 0, HostID: 1, Size: gid})
+ options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: gid, HostID: 0, Size: 1})
+ options.GIDMap = append(options.GIDMap, idtools.IDMap{ContainerID: gid + 1, HostID: gid + 1, Size: maxGID - gid})
+
+ options.HostUIDMapping = false
+ options.HostGIDMapping = false
+ }
+ // Simply ignore the setting and do not setup an inner namespace for root as it is a no-op
+ return &options, nil
+ }
+
if subGIDMap == "" && subUIDMap != "" {
subGIDMap = subUIDMap
}
diff --git a/pkg/util/utils_supported.go b/pkg/util/utils_supported.go
index 8b98658c2..3d9140a23 100644
--- a/pkg/util/utils_supported.go
+++ b/pkg/util/utils_supported.go
@@ -82,3 +82,13 @@ func GetRootlessRuntimeDir() (string, error) {
}
return rootlessRuntimeDir, nil
}
+
+// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for
+// the pause process
+func GetRootlessPauseProcessPidPath() (string, error) {
+ runtimeDir, err := GetRootlessRuntimeDir()
+ if err != nil {
+ return "", err
+ }
+ return filepath.Join(runtimeDir, "libpod", "pause.pid"), nil
+}
diff --git a/pkg/util/utils_windows.go b/pkg/util/utils_windows.go
index b33733da9..3faa6f10c 100644
--- a/pkg/util/utils_windows.go
+++ b/pkg/util/utils_windows.go
@@ -15,3 +15,9 @@ func GetRootlessRuntimeDir() (string, error) {
func IsCgroup2UnifiedMode() (bool, error) {
return false, errors.New("this function is not implemented for windows")
}
+
+// GetRootlessPauseProcessPidPath returns the path to the file that holds the pid for
+// the pause process
+func GetRootlessPauseProcessPidPath() (string, error) {
+ return "", errors.New("this function is not implemented for windows")
+}
diff --git a/pkg/varlinkapi/images.go b/pkg/varlinkapi/images.go
index 20f82a1c6..fa1a0a109 100644
--- a/pkg/varlinkapi/images.go
+++ b/pkg/varlinkapi/images.go
@@ -188,7 +188,6 @@ func (i *LibpodAPI) BuildImage(call iopodman.VarlinkCall, config iopodman.BuildI
RemoveIntermediateCtrs: config.RemoteIntermediateCtrs,
ReportWriter: &output,
RuntimeArgs: config.RuntimeArgs,
- SignaturePolicyPath: config.SignaturePolicyPath,
Squash: config.Squash,
SystemContext: &systemContext,
}
@@ -311,10 +310,9 @@ func (i *LibpodAPI) HistoryImage(call iopodman.VarlinkCall, name string) error {
}
// PushImage pushes an local image to registry
-func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVerify *bool, signaturePolicy, creds, certDir string, compress bool, format string, removeSignatures bool, signBy string) error {
+func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, compress bool, format string, removeSignatures bool, signBy string) error {
var (
- registryCreds *types.DockerAuthConfig
- manifestType string
+ manifestType string
)
newImage, err := i.Runtime.ImageRuntime().NewFromLocal(name)
if err != nil {
@@ -324,20 +322,7 @@ func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVe
if tag != "" {
destname = tag
}
- if creds != "" {
- creds, err := util.ParseRegistryCreds(creds)
- if err != nil {
- return err
- }
- registryCreds = creds
- }
- dockerRegistryOptions := image.DockerRegistryOptions{
- DockerRegistryCreds: registryCreds,
- DockerCertPath: certDir,
- }
- if tlsVerify != nil {
- dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*tlsVerify)
- }
+ dockerRegistryOptions := image.DockerRegistryOptions{}
if format != "" {
switch format {
case "oci": //nolint
@@ -362,7 +347,7 @@ func (i *LibpodAPI) PushImage(call iopodman.VarlinkCall, name, tag string, tlsVe
output := bytes.NewBuffer([]byte{})
c := make(chan error)
go func() {
- err := newImage.PushImageToHeuristicDestination(getContext(), destname, manifestType, "", signaturePolicy, output, compress, so, &dockerRegistryOptions, nil)
+ err := newImage.PushImageToHeuristicDestination(getContext(), destname, manifestType, "", "", output, compress, so, &dockerRegistryOptions, nil)
c <- err
close(c)
}()
@@ -439,18 +424,14 @@ func (i *LibpodAPI) RemoveImage(call iopodman.VarlinkCall, name string, force bo
// SearchImages searches all registries configured in /etc/containers/registries.conf for an image
// Requires an image name and a search limit as int
-func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit *int64, tlsVerify *bool, filter iopodman.ImageSearchFilter) error {
+func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit *int64, filter iopodman.ImageSearchFilter) error {
// Transform all arguments to proper types first
argLimit := 0
- argTLSVerify := types.OptionalBoolUndefined
argIsOfficial := types.OptionalBoolUndefined
argIsAutomated := types.OptionalBoolUndefined
if limit != nil {
argLimit = int(*limit)
}
- if tlsVerify != nil {
- argTLSVerify = types.NewOptionalBool(!*tlsVerify)
- }
if filter.Is_official != nil {
argIsOfficial = types.NewOptionalBool(*filter.Is_official)
}
@@ -466,9 +447,8 @@ func (i *LibpodAPI) SearchImages(call iopodman.VarlinkCall, query string, limit
}
searchOptions := image.SearchOptions{
- Limit: argLimit,
- Filter: sFilter,
- InsecureSkipTLSVerify: argTLSVerify,
+ Limit: argLimit,
+ Filter: sFilter,
}
results, err := image.SearchImages(query, searchOptions)
if err != nil {
@@ -600,27 +580,11 @@ func (i *LibpodAPI) ExportImage(call iopodman.VarlinkCall, name, destination str
}
// PullImage pulls an image from a registry to the image store.
-func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string, certDir, creds, signaturePolicy string, tlsVerify *bool) error {
+func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string) error {
var (
- registryCreds *types.DockerAuthConfig
- imageID string
+ imageID string
)
- if creds != "" {
- creds, err := util.ParseRegistryCreds(creds)
- if err != nil {
- return err
- }
- registryCreds = creds
- }
-
- dockerRegistryOptions := image.DockerRegistryOptions{
- DockerRegistryCreds: registryCreds,
- DockerCertPath: certDir,
- }
- if tlsVerify != nil {
- dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*tlsVerify)
- }
-
+ dockerRegistryOptions := image.DockerRegistryOptions{}
so := image.SigningOptions{}
if call.WantsMore() {
@@ -634,14 +598,14 @@ func (i *LibpodAPI) PullImage(call iopodman.VarlinkCall, name string, certDir, c
if err != nil {
c <- errors.Wrapf(err, "error parsing %q", name)
}
- newImage, err := i.Runtime.ImageRuntime().LoadFromArchiveReference(getContext(), srcRef, signaturePolicy, output)
+ newImage, err := i.Runtime.ImageRuntime().LoadFromArchiveReference(getContext(), srcRef, "", output)
if err != nil {
c <- errors.Wrapf(err, "error pulling image from %q", name)
} else {
imageID = newImage[0].ID()
}
} else {
- newImage, err := i.Runtime.ImageRuntime().New(getContext(), name, signaturePolicy, "", output, &dockerRegistryOptions, so, false, nil)
+ newImage, err := i.Runtime.ImageRuntime().New(getContext(), name, "", "", output, &dockerRegistryOptions, so, false, nil)
if err != nil {
c <- errors.Wrapf(err, "unable to pull %s", name)
} else {
@@ -709,18 +673,12 @@ func (i *LibpodAPI) ImageExists(call iopodman.VarlinkCall, name string) error {
// ContainerRunlabel ...
func (i *LibpodAPI) ContainerRunlabel(call iopodman.VarlinkCall, input iopodman.Runlabel) error {
ctx := getContext()
- dockerRegistryOptions := image.DockerRegistryOptions{
- DockerCertPath: input.CertDir,
- }
- if input.TlsVerify != nil {
- dockerRegistryOptions.DockerInsecureSkipTLSVerify = types.NewOptionalBool(!*input.TlsVerify)
- }
-
+ dockerRegistryOptions := image.DockerRegistryOptions{}
stdErr := os.Stderr
stdOut := os.Stdout
stdIn := os.Stdin
- runLabel, imageName, err := shared.GetRunlabel(input.Label, input.Image, ctx, i.Runtime, input.Pull, input.Creds, dockerRegistryOptions, input.Authfile, input.SignaturePolicyPath, nil)
+ runLabel, imageName, err := shared.GetRunlabel(input.Label, input.Image, ctx, i.Runtime, input.Pull, "", dockerRegistryOptions, input.Authfile, "", nil)
if err != nil {
return call.ReplyErrorOccurred(err.Error())
}