diff options
29 files changed, 367 insertions, 181 deletions
diff --git a/cmd/podman/containers/checkpoint.go b/cmd/podman/containers/checkpoint.go index d92bc3e5e..e8dd25978 100644 --- a/cmd/podman/containers/checkpoint.go +++ b/cmd/podman/containers/checkpoint.go @@ -55,6 +55,7 @@ func init() { flags.BoolVarP(&checkpointOptions.Keep, "keep", "k", false, "Keep all temporary checkpoint files") flags.BoolVarP(&checkpointOptions.LeaveRunning, "leave-running", "R", false, "Leave the container running after writing checkpoint to disk") flags.BoolVar(&checkpointOptions.TCPEstablished, "tcp-established", false, "Checkpoint a container with established TCP connections") + flags.BoolVar(&checkpointOptions.FileLocks, "file-locks", false, "Checkpoint a container with file locks") flags.BoolVarP(&checkpointOptions.All, "all", "a", false, "Checkpoint all running containers") exportFlagName := "export" diff --git a/cmd/podman/containers/restore.go b/cmd/podman/containers/restore.go index 4ac14001a..cf0ad5f80 100644 --- a/cmd/podman/containers/restore.go +++ b/cmd/podman/containers/restore.go @@ -53,6 +53,7 @@ func init() { flags.BoolVarP(&restoreOptions.All, "all", "a", false, "Restore all checkpointed containers") flags.BoolVarP(&restoreOptions.Keep, "keep", "k", false, "Keep all temporary checkpoint files") flags.BoolVar(&restoreOptions.TCPEstablished, "tcp-established", false, "Restore a container with established TCP connections") + flags.BoolVar(&restoreOptions.FileLocks, "file-locks", false, "Restore a container with file locks") importFlagName := "import" flags.StringVarP(&restoreOptions.Import, importFlagName, "i", "", "Restore from exported checkpoint archive (tar.gz)") diff --git a/contrib/fedora-minimal/Dockerfile b/contrib/fedora-minimal/Dockerfile deleted file mode 100644 index a051b3204..000000000 --- a/contrib/fedora-minimal/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM registry.fedoraproject.org/fedora-minimal:latest diff --git a/contrib/fedora-minimal/README.md b/contrib/fedora-minimal/README.md deleted file mode 100644 index 52bf94b53..000000000 --- a/contrib/fedora-minimal/README.md +++ /dev/null @@ -1,4 +0,0 @@ -This dockerfile exists so that the container image can be "mirrored" -onto quay.io automatically, so automated testing can be more resilient. - -https://quay.io/repository/libpod/fedora-minimal?tab=builds diff --git a/docs/source/markdown/podman-container-checkpoint.1.md b/docs/source/markdown/podman-container-checkpoint.1.md index 1faa40a94..200920ca9 100644 --- a/docs/source/markdown/podman-container-checkpoint.1.md +++ b/docs/source/markdown/podman-container-checkpoint.1.md @@ -110,6 +110,14 @@ restore. Defaults to not checkpointing *containers* with established TCP connections.\ The default is **false**. +#### **--file-locks** + +Checkpoint a *container* with file locks. If an application running in the container +is using file locks, this OPTION is required during checkpoint and restore. Otherwise +checkpointing *containers* with file locks is expected to fail. If file locks are not +used, this option is ignored.\ +The default is **false**. + #### **--with-previous** Check out the *container* with previous criu image files in pre-dump. It only works on `runc 1.0-rc3` or `higher`.\ diff --git a/docs/source/markdown/podman-container-restore.1.md b/docs/source/markdown/podman-container-restore.1.md index 962770ea0..10477fc77 100644 --- a/docs/source/markdown/podman-container-restore.1.md +++ b/docs/source/markdown/podman-container-restore.1.md @@ -81,6 +81,7 @@ to import a checkpointed *container* from another host.\ Import a pre-checkpoint tar.gz file which was exported by Podman. This option must be used with **-i** or **--import**. It only works on `runc 1.0-rc3` or `higher`. +*IMPORTANT: This OPTION is not supported on the remote client.* #### **--name**, **-n**=*name* @@ -142,6 +143,14 @@ option is ignored. Defaults to not restoring *containers* with established TCP connections.\ The default is **false**. +#### **--file-locks** + +Restore a *container* with file locks. This option is required to +restore file locks from a checkpoint image. If the checkpoint image +does not contain file locks, this option is ignored. Defaults to not +restoring file locks.\ +The default is **false**. + ## EXAMPLE Restores the container "mywebserver". ``` diff --git a/docs/source/markdown/podman-create.1.md b/docs/source/markdown/podman-create.1.md index 811d16880..b58fd1e18 100644 --- a/docs/source/markdown/podman-create.1.md +++ b/docs/source/markdown/podman-create.1.md @@ -606,7 +606,9 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and · ro, readonly: true or false (default). - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + + · idmap: true or false (default). If specified, create an idmapped mount to the target user namespace in the container. Options specific to image: @@ -622,7 +624,9 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and . relabel: shared, private. - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + · idmap: true or false (default). If specified, create an idmapped mount to the target user namespace in the container. + + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. Options specific to tmpfs: @@ -636,7 +640,7 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and · notmpcopyup: Disable copying files from the image to the tmpfs. - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. Options specific to devpts: diff --git a/docs/source/markdown/podman-run.1.md b/docs/source/markdown/podman-run.1.md index 3d908444b..90c456544 100644 --- a/docs/source/markdown/podman-run.1.md +++ b/docs/source/markdown/podman-run.1.md @@ -633,7 +633,9 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and · ro, readonly: true or false (default). - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + + · idmap: true or false (default). If specified, create an idmapped mount to the target user namespace in the container. Options specific to image: @@ -649,7 +651,9 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and . relabel: shared, private. - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + · idmap: true or false (default). If specified, create an idmapped mount to the target user namespace in the container. + + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. Options specific to tmpfs: @@ -663,7 +667,7 @@ Current supported mount TYPEs are **bind**, **volume**, **image**, **tmpfs** and · notmpcopyup: Disable copying files from the image to the tmpfs. - . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. + . U, chown: true or false (default). Change recursively the owner and group of the source volume based on the UID and GID of the container. Options specific to devpts: diff --git a/libpod/container_api.go b/libpod/container_api.go index a41bb03df..7ae9f497c 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -798,6 +798,9 @@ type ContainerCheckpointOptions struct { // how much time each component in the stack requires to // checkpoint a container. PrintStats bool + // FileLocks tells the API to checkpoint/restore a container + // with file-locks + FileLocks bool } // Checkpoint checkpoints a container diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index 9be600bb4..314a74427 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -4,6 +4,7 @@ package libpod import ( "crypto/rand" + "crypto/sha1" "fmt" "io/ioutil" "net" @@ -400,10 +401,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { return nil, nil } var rootlessNetNS *RootlessNetNS - runDir, err := util.GetRuntimeDir() - if err != nil { - return nil, err - } + runDir := r.config.Engine.TmpDir lfile := filepath.Join(runDir, "rootless-netns.lock") lock, err := lockfile.GetLockfile(lfile) @@ -429,7 +427,15 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { if err != nil { return nil, err } - path := filepath.Join(nsDir, rootlessNetNsName) + + // create a hash from the static dir + // the cleanup will check if there are running containers + // if you run a several libpod instances with different root/runroot directories this check will fail + // we want one netns for each libpod static dir so we use the hash to prevent name collisions + hash := sha1.Sum([]byte(r.config.Engine.StaticDir)) + netnsName := fmt.Sprintf("%s-%x", rootlessNetNsName, hash[:10]) + + path := filepath.Join(nsDir, netnsName) ns, err := ns.GetNS(path) if err != nil { if !new { @@ -437,8 +443,8 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) { return nil, errors.Wrap(err, "error getting rootless network namespace") } // create a new namespace - logrus.Debug("creating rootless network namespace") - ns, err = netns.NewNSWithName(rootlessNetNsName) + logrus.Debugf("creating rootless network namespace with name %q", netnsName) + ns, err = netns.NewNSWithName(netnsName) if err != nil { return nil, errors.Wrap(err, "error creating rootless network namespace") } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 533a0d78b..e007d0b92 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -794,6 +794,9 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container if options.TCPEstablished { args = append(args, "--tcp-established") } + if options.FileLocks { + args = append(args, "--file-locks") + } if !options.PreCheckPoint && options.KeepRunning { args = append(args, "--leave-running") } @@ -1101,6 +1104,9 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if restoreOptions.TCPEstablished { args = append(args, "--runtime-opt", "--tcp-established") } + if restoreOptions.FileLocks { + args = append(args, "--runtime-opt", "--file-locks") + } if restoreOptions.Pod != "" { mountLabel := ctr.config.MountLabel processLabel := ctr.config.ProcessLabel diff --git a/pkg/api/handlers/libpod/containers.go b/pkg/api/handlers/libpod/containers.go index b0cec2b1f..d5da22a91 100644 --- a/pkg/api/handlers/libpod/containers.go +++ b/pkg/api/handlers/libpod/containers.go @@ -219,6 +219,8 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { Export bool `schema:"export"` IgnoreRootFS bool `schema:"ignoreRootFS"` PrintStats bool `schema:"printStats"` + PreCheckpoint bool `schema:"preCheckpoint"` + WithPrevious bool `schema:"withPrevious"` }{ // override any golang type defaults } @@ -242,6 +244,8 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { TCPEstablished: query.TCPEstablished, IgnoreRootFS: query.IgnoreRootFS, PrintStats: query.PrintStats, + PreCheckPoint: query.PreCheckpoint, + WithPrevious: query.WithPrevious, } if query.Export { diff --git a/pkg/bindings/containers/types.go b/pkg/bindings/containers/types.go index 4bbb4a62b..81a53a549 100644 --- a/pkg/bindings/containers/types.go +++ b/pkg/bindings/containers/types.go @@ -51,6 +51,9 @@ type CheckpointOptions struct { LeaveRunning *bool TCPEstablished *bool PrintStats *bool + PreCheckpoint *bool + WithPrevious *bool + FileLocks *bool } //go:generate go run ../generator/generator.go RestoreOptions @@ -67,6 +70,7 @@ type RestoreOptions struct { Pod *string PrintStats *bool PublishPorts []string + FileLocks *bool } //go:generate go run ../generator/generator.go CreateOptions diff --git a/pkg/bindings/containers/types_checkpoint_options.go b/pkg/bindings/containers/types_checkpoint_options.go index b606922e0..391748d76 100644 --- a/pkg/bindings/containers/types_checkpoint_options.go +++ b/pkg/bindings/containers/types_checkpoint_options.go @@ -106,3 +106,48 @@ func (o *CheckpointOptions) GetPrintStats() bool { } return *o.PrintStats } + +// WithPreCheckpoint set field PreCheckpoint to given value +func (o *CheckpointOptions) WithPreCheckpoint(value bool) *CheckpointOptions { + o.PreCheckpoint = &value + return o +} + +// GetPreCheckpoint returns value of field PreCheckpoint +func (o *CheckpointOptions) GetPreCheckpoint() bool { + if o.PreCheckpoint == nil { + var z bool + return z + } + return *o.PreCheckpoint +} + +// WithWithPrevious set field WithPrevious to given value +func (o *CheckpointOptions) WithWithPrevious(value bool) *CheckpointOptions { + o.WithPrevious = &value + return o +} + +// GetWithPrevious returns value of field WithPrevious +func (o *CheckpointOptions) GetWithPrevious() bool { + if o.WithPrevious == nil { + var z bool + return z + } + return *o.WithPrevious +} + +// WithFileLocks set field FileLocks to given value +func (o *CheckpointOptions) WithFileLocks(value bool) *CheckpointOptions { + o.FileLocks = &value + return o +} + +// GetFileLocks returns value of field FileLocks +func (o *CheckpointOptions) GetFileLocks() bool { + if o.FileLocks == nil { + var z bool + return z + } + return *o.FileLocks +} diff --git a/pkg/bindings/containers/types_restore_options.go b/pkg/bindings/containers/types_restore_options.go index 8817b834b..7af2bba32 100644 --- a/pkg/bindings/containers/types_restore_options.go +++ b/pkg/bindings/containers/types_restore_options.go @@ -181,3 +181,18 @@ func (o *RestoreOptions) GetPublishPorts() []string { } return o.PublishPorts } + +// WithFileLocks set field FileLocks to given value +func (o *RestoreOptions) WithFileLocks(value bool) *RestoreOptions { + o.FileLocks = &value + return o +} + +// GetFileLocks returns value of field FileLocks +func (o *RestoreOptions) GetFileLocks() bool { + if o.FileLocks == nil { + var z bool + return z + } + return *o.FileLocks +} diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 1a4019bb1..1677c067f 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -191,6 +191,7 @@ type CheckpointOptions struct { WithPrevious bool Compression archive.Compression PrintStats bool + FileLocks bool } type CheckpointReport struct { @@ -215,6 +216,7 @@ type RestoreOptions struct { PublishPorts []string Pod string PrintStats bool + FileLocks bool } type RestoreReport struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 69c628669..e04c7a38a 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -516,6 +516,7 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ WithPrevious: options.WithPrevious, Compression: options.Compression, PrintStats: options.PrintStats, + FileLocks: options.FileLocks, } if options.All { diff --git a/pkg/domain/infra/tunnel/containers.go b/pkg/domain/infra/tunnel/containers.go index 152e3c302..2127f8749 100644 --- a/pkg/domain/infra/tunnel/containers.go +++ b/pkg/domain/infra/tunnel/containers.go @@ -303,12 +303,15 @@ func (ic *ContainerEngine) ContainerExport(ctx context.Context, nameOrID string, func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds []string, opts entities.CheckpointOptions) ([]*entities.CheckpointReport, error) { options := new(containers.CheckpointOptions) + options.WithFileLocks(opts.FileLocks) options.WithIgnoreRootfs(opts.IgnoreRootFS) options.WithKeep(opts.Keep) options.WithExport(opts.Export) options.WithTCPEstablished(opts.TCPEstablished) options.WithPrintStats(opts.PrintStats) + options.WithPreCheckpoint(opts.PreCheckPoint) options.WithLeaveRunning(opts.LeaveRunning) + options.WithWithPrevious(opts.WithPrevious) var ( err error @@ -345,7 +348,12 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ } func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []string, opts entities.RestoreOptions) ([]*entities.RestoreReport, error) { + if opts.ImportPrevious != "" { + return nil, fmt.Errorf("--import-previous is not supported on the remote client") + } + options := new(containers.RestoreOptions) + options.WithFileLocks(opts.FileLocks) options.WithIgnoreRootfs(opts.IgnoreRootFS) options.WithIgnoreVolumes(opts.IgnoreVolumes) options.WithIgnoreStaticIP(opts.IgnoreStaticIP) diff --git a/pkg/machine/ignition.go b/pkg/machine/ignition.go index 42d729458..e19940b22 100644 --- a/pkg/machine/ignition.go +++ b/pkg/machine/ignition.go @@ -81,7 +81,7 @@ func NewIgnitionFile(ign DynamicIgnition) error { // so a listening host knows it can being interacting with it ready := `[Unit] Requires=dev-virtio\\x2dports-%s.device -After=remove-moby.service +After=remove-moby.service sshd.socket sshd.service OnFailure=emergency.target OnFailureJobMode=isolate [Service] diff --git a/pkg/machine/qemu/options_darwin_arm64.go b/pkg/machine/qemu/options_darwin_arm64.go index 43cd3d69d..727a275d2 100644 --- a/pkg/machine/qemu/options_darwin_arm64.go +++ b/pkg/machine/qemu/options_darwin_arm64.go @@ -24,7 +24,7 @@ func (v *MachineVM) addArchOptions() []string { func (v *MachineVM) prepare() error { ovmfDir := getOvmfDir(v.ImagePath, v.Name) - cmd := []string{"dd", "if=/dev/zero", "conv=sync", "bs=1m", "count=64", "of=" + ovmfDir} + cmd := []string{"/bin/dd", "if=/dev/zero", "conv=sync", "bs=1m", "count=64", "of=" + ovmfDir} return exec.Command(cmd[0], cmd[1:]...).Run() } diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 6ce4b1e29..e71d5d999 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -19,6 +19,33 @@ #include <sys/select.h> #include <stdio.h> +#define cleanup_free __attribute__ ((cleanup (cleanup_freep))) +#define cleanup_close __attribute__ ((cleanup (cleanup_closep))) +#define cleanup_dir __attribute__ ((cleanup (cleanup_dirp))) + +static inline void +cleanup_freep (void *p) +{ + void **pp = (void **) p; + free (*pp); +} + +static inline void +cleanup_closep (void *p) +{ + int *pp = p; + if (*pp >= 0) + TEMP_FAILURE_RETRY (close (*pp)); +} + +static inline void +cleanup_dirp (DIR **p) +{ + DIR *dir = *p; + if (dir) + closedir (dir); +} + int rename_noreplace (int olddirfd, const char *oldpath, int newdirfd, const char *newpath) { int ret; @@ -106,6 +133,11 @@ do_pause () for (i = 0; sig[i]; i++) sigaction (sig[i], &act, NULL); + /* Attempt to execv catatonit to keep the pause process alive. */ + execl ("/usr/libexec/podman/catatonit", "catatonit", "-P", NULL); + execl ("/usr/bin/catatonit", "catatonit", "-P", NULL); + /* and if the catatonit executable could not be found, fallback here... */ + prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL); while (1) pause (); @@ -114,8 +146,8 @@ do_pause () static char ** get_cmd_line_args () { - int fd; - char *buffer; + cleanup_free char *buffer = NULL; + cleanup_close int fd = -1; size_t allocated; size_t used = 0; int ret; @@ -134,10 +166,7 @@ get_cmd_line_args () { ret = TEMP_FAILURE_RETRY (read (fd, buffer + used, allocated - used)); if (ret < 0) - { - free (buffer); - return NULL; - } + return NULL; if (ret == 0) break; @@ -148,30 +177,21 @@ get_cmd_line_args () allocated += 512; char *tmp = realloc (buffer, allocated); if (tmp == NULL) - { - free (buffer); - return NULL; - } + return NULL; buffer = tmp; } } - close (fd); for (i = 0; i < used; i++) if (buffer[i] == '\0') argc++; if (argc == 0) - { - free (buffer); - return NULL; - } + return NULL; argv = malloc (sizeof (char *) * (argc + 1)); if (argv == NULL) - { - free (buffer); - return NULL; - } + return NULL; + argc = 0; argv[argc++] = buffer; @@ -181,15 +201,19 @@ get_cmd_line_args () argv[argc] = NULL; + /* Move ownership. */ + buffer = NULL; + return argv; } static bool can_use_shortcut () { - int argc; - char **argv; + cleanup_free char **argv = NULL; + cleanup_free char *argv0 = NULL; bool ret = true; + int argc; #ifdef DISABLE_JOIN_SHORTCUT return false; @@ -199,12 +223,10 @@ can_use_shortcut () if (argv == NULL) return false; + argv0 = argv[0]; + if (strstr (argv[0], "podman") == NULL) - { - free (argv[0]); - free (argv); - return false; - } + return false; for (argc = 0; argv[argc]; argc++) { @@ -229,11 +251,25 @@ can_use_shortcut () } } - free (argv[0]); - free (argv); return ret; } +static int +open_namespace (int pid_to_join, const char *ns_file) +{ + char ns_path[PATH_MAX]; + int ret; + + ret = snprintf (ns_path, PATH_MAX, "/proc/%d/ns/%s", pid_to_join, ns_file); + if (ret == PATH_MAX) + { + fprintf (stderr, "internal error: namespace path too long\n"); + return -1; + } + + return open (ns_path, O_CLOEXEC | O_RDONLY); +} + int is_fd_inherited(int fd) { @@ -250,8 +286,7 @@ static void __attribute__((constructor)) init() const char *listen_pid; const char *listen_fds; const char *listen_fdnames; - - DIR *d; + cleanup_dir DIR *d = NULL; pause = getenv ("_PODMAN_PAUSE"); if (pause && pause[0]) @@ -299,7 +334,6 @@ static void __attribute__((constructor)) init() FD_SET (fd % FD_SETSIZE, &(open_files_set[fd / FD_SETSIZE])); } - closedir (d); } listen_pid = getenv("LISTEN_PID"); @@ -317,7 +351,7 @@ static void __attribute__((constructor)) init() if (saved_systemd_listen_pid == NULL || saved_systemd_listen_fds == NULL) { - fprintf (stderr, "save socket listen environments error: %s\n", strerror (errno)); + fprintf (stderr, "save socket listen environments error: %m\n"); _exit (EXIT_FAILURE); } } @@ -327,73 +361,70 @@ static void __attribute__((constructor)) init() xdg_runtime_dir = getenv ("XDG_RUNTIME_DIR"); if (geteuid () != 0 && xdg_runtime_dir && xdg_runtime_dir[0] && can_use_shortcut ()) { - int r; - int fd; + cleanup_free char *cwd = NULL; + cleanup_close int userns_fd = -1; + cleanup_close int mntns_fd = -1; + cleanup_close int fd = -1; long pid; char buf[12]; uid_t uid; gid_t gid; char path[PATH_MAX]; const char *const suffix = "/libpod/tmp/pause.pid"; - char *cwd = getcwd (NULL, 0); char uid_fmt[16]; char gid_fmt[16]; size_t len; + int r; + cwd = getcwd (NULL, 0); if (cwd == NULL) { - fprintf (stderr, "error getting current working directory: %s\n", strerror (errno)); + fprintf (stderr, "error getting current working directory: %m\n"); _exit (EXIT_FAILURE); } len = snprintf (path, PATH_MAX, "%s%s", xdg_runtime_dir, suffix); if (len >= PATH_MAX) { - fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %s", strerror (ENAMETOOLONG)); + errno = ENAMETOOLONG; + fprintf (stderr, "invalid value for XDG_RUNTIME_DIR: %m"); exit (EXIT_FAILURE); } fd = open (path, O_RDONLY); if (fd < 0) - { - free (cwd); - return; - } + return; r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof (buf) - 1)); - close (fd); + if (r < 0) - { - free (cwd); - return; - } + return; buf[r] = '\0'; pid = strtol (buf, NULL, 10); if (pid == LONG_MAX) - { - free (cwd); - return; - } + return; uid = geteuid (); gid = getegid (); - sprintf (path, "/proc/%ld/ns/user", pid); - fd = open (path, O_RDONLY); - if (fd < 0 || setns (fd, 0) < 0) - { - free (cwd); - return; - } - close (fd); + userns_fd = open_namespace (pid, "user"); + if (userns_fd < 0) + return; - /* Errors here cannot be ignored as we already joined a ns. */ - sprintf (path, "/proc/%ld/ns/mnt", pid); - fd = open (path, O_RDONLY); - if (fd < 0) + mntns_fd = open_namespace (pid, "mnt"); + if (mntns_fd < 0) + return; + + if (setns (userns_fd, 0) < 0) + return; + + /* The user namespace was joined, after this point errors are + not recoverable anymore. */ + + if (setns (mntns_fd, 0) < 0) { - fprintf (stderr, "cannot open %s: %s", path, strerror (errno)); + fprintf (stderr, "cannot join mount namespace for %ld: %m", pid); exit (EXIT_FAILURE); } @@ -404,33 +435,24 @@ static void __attribute__((constructor)) init() setenv ("_CONTAINERS_ROOTLESS_UID", uid_fmt, 1); setenv ("_CONTAINERS_ROOTLESS_GID", gid_fmt, 1); - r = setns (fd, 0); - if (r < 0) - { - fprintf (stderr, "cannot join mount namespace for %ld: %s", pid, strerror (errno)); - exit (EXIT_FAILURE); - } - close (fd); - if (syscall_setresgid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresgid: %m\n"); _exit (EXIT_FAILURE); } if (syscall_setresuid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresuid: %m\n"); _exit (EXIT_FAILURE); } if (chdir (cwd) < 0) { - fprintf (stderr, "cannot chdir to %s: %s\n", cwd, strerror (errno)); + fprintf (stderr, "cannot chdir to %s: %m\n", cwd); _exit (EXIT_FAILURE); } - free (cwd); rootless_uid_init = uid; rootless_gid_init = gid; } @@ -529,7 +551,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv) fd = mkstemp (tmp_file_path); if (fd < 0) { - fprintf (stderr, "error creating temporary file: %s\n", strerror (errno)); + fprintf (stderr, "error creating temporary file: %m\n"); kill (pid, SIGKILL); _exit (EXIT_FAILURE); } @@ -537,7 +559,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv) r = TEMP_FAILURE_RETRY (write (fd, pid_str, strlen (pid_str))); if (r < 0) { - fprintf (stderr, "cannot write to file descriptor: %s\n", strerror (errno)); + fprintf (stderr, "cannot write to file descriptor: %m\n"); kill (pid, SIGKILL); _exit (EXIT_FAILURE); } @@ -555,7 +577,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv) r = TEMP_FAILURE_RETRY (write (p[1], "0", 1)); if (r < 0) { - fprintf (stderr, "cannot write to pipe: %s\n", strerror (errno)); + fprintf (stderr, "cannot write to pipe: %m\n"); _exit (EXIT_FAILURE); } close (p[1]); @@ -590,22 +612,6 @@ create_pause_process (const char *pause_pid_file_path, char **argv) } } -static int -open_namespace (int pid_to_join, const char *ns_file) -{ - char ns_path[PATH_MAX]; - int ret; - - ret = snprintf (ns_path, PATH_MAX, "/proc/%d/ns/%s", pid_to_join, ns_file); - if (ret == PATH_MAX) - { - fprintf (stderr, "internal error: namespace path too long\n"); - return -1; - } - - return open (ns_path, O_CLOEXEC | O_RDONLY); -} - static void join_namespace_or_die (const char *name, int ns_fd) { @@ -619,18 +625,20 @@ join_namespace_or_die (const char *name, int ns_fd) int reexec_userns_join (int pid_to_join, char *pause_pid_file_path) { + cleanup_close int userns_fd = -1; + cleanup_close int mntns_fd = -1; + cleanup_free char *cwd = NULL; char uid[16]; char gid[16]; - char **argv; + cleanup_free char *argv0 = NULL; + cleanup_free char **argv = NULL; int pid; - int mnt_ns = -1; - int user_ns = -1; - char *cwd = getcwd (NULL, 0); sigset_t sigset, oldsigset; + cwd = getcwd (NULL, 0); if (cwd == NULL) { - fprintf (stderr, "error getting current working directory: %s\n", strerror (errno)); + fprintf (stderr, "error getting current working directory: %m\n"); _exit (EXIT_FAILURE); } @@ -640,32 +648,27 @@ reexec_userns_join (int pid_to_join, char *pause_pid_file_path) argv = get_cmd_line_args (); if (argv == NULL) { - fprintf (stderr, "cannot read argv: %s\n", strerror (errno)); + fprintf (stderr, "cannot read argv: %m\n"); _exit (EXIT_FAILURE); } - user_ns = open_namespace (pid_to_join, "user"); - if (user_ns < 0) - return user_ns; - mnt_ns = open_namespace (pid_to_join, "mnt"); - if (mnt_ns < 0) - { - close (user_ns); - return mnt_ns; - } + argv0 = argv[0]; + + userns_fd = open_namespace (pid_to_join, "user"); + if (userns_fd < 0) + return userns_fd; + mntns_fd = open_namespace (pid_to_join, "mnt"); + if (mntns_fd < 0) + return mntns_fd; pid = fork (); if (pid < 0) - fprintf (stderr, "cannot fork: %s\n", strerror (errno)); + fprintf (stderr, "cannot fork: %m\n"); if (pid) { int f; - /* We passed down these fds, close them. */ - close (user_ns); - close (mnt_ns); - for (f = 3; f <= open_files_max_fd; f++) if (is_fd_inherited (f)) close (f); @@ -681,22 +684,22 @@ reexec_userns_join (int pid_to_join, char *pause_pid_file_path) if (sigfillset (&sigset) < 0) { - fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno)); + fprintf (stderr, "cannot fill sigset: %m\n"); _exit (EXIT_FAILURE); } if (sigdelset (&sigset, SIGCHLD) < 0) { - fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno)); + fprintf (stderr, "cannot sigdelset(SIGCHLD): %m\n"); _exit (EXIT_FAILURE); } if (sigdelset (&sigset, SIGTERM) < 0) { - fprintf (stderr, "cannot sigdelset(SIGTERM): %s\n", strerror (errno)); + fprintf (stderr, "cannot sigdelset(SIGTERM): %m\n"); _exit (EXIT_FAILURE); } if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0) { - fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + fprintf (stderr, "cannot block signals: %m\n"); _exit (EXIT_FAILURE); } @@ -717,33 +720,30 @@ reexec_userns_join (int pid_to_join, char *pause_pid_file_path) if (prctl (PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0) < 0) { - fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %s\n", strerror (errno)); + fprintf (stderr, "cannot prctl(PR_SET_PDEATHSIG): %m\n"); _exit (EXIT_FAILURE); } - join_namespace_or_die ("user", user_ns); - join_namespace_or_die ("mnt", mnt_ns); - close (user_ns); - close (mnt_ns); + join_namespace_or_die ("user", userns_fd); + join_namespace_or_die ("mnt", mntns_fd); if (syscall_setresgid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresgid: %m\n"); _exit (EXIT_FAILURE); } if (syscall_setresuid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresuid: %m\n"); _exit (EXIT_FAILURE); } if (chdir (cwd) < 0) { - fprintf (stderr, "cannot chdir to %s: %s\n", cwd, strerror (errno)); + fprintf (stderr, "cannot chdir to %s: %m\n", cwd); _exit (EXIT_FAILURE); } - free (cwd); if (pause_pid_file_path && pause_pid_file_path[0] != '\0') { @@ -752,7 +752,7 @@ reexec_userns_join (int pid_to_join, char *pause_pid_file_path) } if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0) { - fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + fprintf (stderr, "cannot block signals: %m\n"); _exit (EXIT_FAILURE); } @@ -784,7 +784,7 @@ static int copy_file_to_fd (const char *file_to_read, int outfd) { char buf[512]; - int fd; + cleanup_close int fd = -1; fd = open (file_to_read, O_RDONLY); if (fd < 0) @@ -796,10 +796,7 @@ copy_file_to_fd (const char *file_to_read, int outfd) r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf)); if (r < 0) - { - close (fd); - return r; - } + return r; if (r == 0) break; @@ -808,43 +805,40 @@ copy_file_to_fd (const char *file_to_read, int outfd) { w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t)); if (w < 0) - { - close (fd); - return w; - } + return w; t += w; } } - close (fd); return 0; } int reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd) { + cleanup_free char **argv = NULL; + cleanup_free char *argv0 = NULL; + cleanup_free char *cwd = NULL; + sigset_t sigset, oldsigset; int ret; pid_t pid; char b; - char **argv; char uid[16]; char gid[16]; - char *cwd = getcwd (NULL, 0); - sigset_t sigset, oldsigset; + cwd = getcwd (NULL, 0); if (cwd == NULL) { - fprintf (stderr, "error getting current working directory: %s\n", strerror (errno)); + fprintf (stderr, "error getting current working directory: %m\n"); _exit (EXIT_FAILURE); } - sprintf (uid, "%d", geteuid ()); sprintf (gid, "%d", getegid ()); pid = syscall_clone (CLONE_NEWUSER|CLONE_NEWNS|SIGCHLD, NULL); if (pid < 0) { - fprintf (stderr, "cannot clone: %s\n", strerror (errno)); + fprintf (stderr, "cannot clone: %m\n"); check_proc_sys_userns_file (_max_user_namespaces); check_proc_sys_userns_file (_unprivileged_user_namespaces); } @@ -872,32 +866,34 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re if (sigfillset (&sigset) < 0) { - fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno)); + fprintf (stderr, "cannot fill sigset: %m\n"); _exit (EXIT_FAILURE); } if (sigdelset (&sigset, SIGCHLD) < 0) { - fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno)); + fprintf (stderr, "cannot sigdelset(SIGCHLD): %m\n"); _exit (EXIT_FAILURE); } if (sigdelset (&sigset, SIGTERM) < 0) { - fprintf (stderr, "cannot sigdelset(SIGTERM): %s\n", strerror (errno)); + fprintf (stderr, "cannot sigdelset(SIGTERM): %m\n"); _exit (EXIT_FAILURE); } if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0) { - fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + fprintf (stderr, "cannot block signals: %m\n"); _exit (EXIT_FAILURE); } argv = get_cmd_line_args (); if (argv == NULL) { - fprintf (stderr, "cannot read argv: %s\n", strerror (errno)); + fprintf (stderr, "cannot read argv: %m\n"); _exit (EXIT_FAILURE); } + argv0 = argv[0]; + if (do_socket_activation) { char s[32]; @@ -916,7 +912,7 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re ret = TEMP_FAILURE_RETRY (read (ready, &b, 1)); if (ret < 0) { - fprintf (stderr, "cannot read from sync pipe: %s\n", strerror (errno)); + fprintf (stderr, "cannot read from sync pipe: %m\n"); _exit (EXIT_FAILURE); } if (ret != 1 || b != '0') @@ -924,25 +920,24 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re if (syscall_setresgid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresgid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresgid: %m\n"); TEMP_FAILURE_RETRY (write (ready, "1", 1)); _exit (EXIT_FAILURE); } if (syscall_setresuid (0, 0, 0) < 0) { - fprintf (stderr, "cannot setresuid: %s\n", strerror (errno)); + fprintf (stderr, "cannot setresuid: %m\n"); TEMP_FAILURE_RETRY (write (ready, "1", 1)); _exit (EXIT_FAILURE); } if (chdir (cwd) < 0) { - fprintf (stderr, "cannot chdir to %s: %s\n", cwd, strerror (errno)); + fprintf (stderr, "cannot chdir to %s: %m\n", cwd); TEMP_FAILURE_RETRY (write (ready, "1", 1)); _exit (EXIT_FAILURE); } - free (cwd); if (pause_pid_file_path && pause_pid_file_path[0] != '\0') { @@ -956,14 +951,14 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re ret = TEMP_FAILURE_RETRY (write (ready, "0", 1)); if (ret < 0) { - fprintf (stderr, "cannot write to ready pipe: %s\n", strerror (errno)); - _exit (EXIT_FAILURE); + fprintf (stderr, "cannot write to ready pipe: %m\n"); + _exit (EXIT_FAILURE); } close (ready); if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0) { - fprintf (stderr, "cannot block signals: %s\n", strerror (errno)); + fprintf (stderr, "cannot block signals: %m\n"); _exit (EXIT_FAILURE); } diff --git a/pkg/specgenutil/volumes.go b/pkg/specgenutil/volumes.go index 184bfadf8..8ff770f9c 100644 --- a/pkg/specgenutil/volumes.go +++ b/pkg/specgenutil/volumes.go @@ -355,6 +355,8 @@ func getBindMount(args []string) (spec.Mount, error) { newMount.Options = append(newMount.Options, "U") } setOwnership = true + case "idmap": + newMount.Options = append(newMount.Options, "idmap") case "consistency": // Often used on MACs and mistakenly on Linux platforms. // Since Docker ignores this option so shall we. diff --git a/pkg/util/filters.go b/pkg/util/filters.go index e252c1ddf..5af868873 100644 --- a/pkg/util/filters.go +++ b/pkg/util/filters.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "net/http" + "regexp" "strings" "time" @@ -94,6 +95,28 @@ func PrepareFilters(r *http.Request) (*map[string][]string, error) { return &filterMap, nil } +func wildCardToRegexp(pattern string) string { + var result strings.Builder + for i, literal := range strings.Split(pattern, "*") { + // Replace * with .* + if i > 0 { + result.WriteString(".*") + } + // Quote any regular expression meta characters in the + // literal text. + result.WriteString(regexp.QuoteMeta(literal)) + } + return result.String() +} + +func matchPattern(pattern string, value string) bool { + if strings.Contains(pattern, "*") { + result, _ := regexp.MatchString(wildCardToRegexp(pattern), value) + return result + } + return false +} + // MatchLabelFilters matches labels and returns true if they are valid func MatchLabelFilters(filterValues []string, labels map[string]string) bool { outer: @@ -106,7 +129,7 @@ outer: filterValue = "" } for labelKey, labelValue := range labels { - if labelKey == filterKey && (filterValue == "" || labelValue == filterValue) { + if ((labelKey == filterKey) || matchPattern(filterKey, labelKey)) && (filterValue == "" || labelValue == filterValue) { continue outer } } diff --git a/pkg/util/mountOpts.go b/pkg/util/mountOpts.go index f13dc94ec..959763dba 100644 --- a/pkg/util/mountOpts.go +++ b/pkg/util/mountOpts.go @@ -33,6 +33,7 @@ func ProcessOptions(options []string, isTmpfs bool, sourcePath string) ([]string // Some options have parameters - size, mode splitOpt := strings.SplitN(opt, "=", 2) switch splitOpt[0] { + case "idmap": case "O": if len(options) > 1 { return nil, errors.Wrapf(ErrDupeMntOption, "'O' option can not be used with other options") diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go index 37935fd38..4963b04fc 100644 --- a/test/e2e/checkpoint_test.go +++ b/test/e2e/checkpoint_test.go @@ -899,6 +899,7 @@ var _ = Describe("Podman checkpoint", func() { }) It("podman checkpoint container with --pre-checkpoint and export (migration)", func() { + SkipIfRemote("--import-previous is not yet supported on the remote client") if !strings.Contains(podmanTest.OCIRuntime, "runc") { Skip("Test only works on runc 1.0-rc3 or higher.") } @@ -1339,4 +1340,41 @@ var _ = Describe("Podman checkpoint", func() { Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) }) + It("podman checkpoint and restore container with --file-locks", func() { + if !strings.Contains(podmanTest.OCIRuntime, "runc") { + // TODO: Enable test for crun when this feature has been released + // https://github.com/containers/crun/pull/783 + Skip("FIXME: requires crun >= 1.4") + } + localRunString := getRunString([]string{"--name", "test_name", ALPINE, "flock", "test.lock", "sleep", "100"}) + session := podmanTest.Podman(localRunString) + session.WaitWithDefaultTimeout() + Expect(session).Should(Exit(0)) + + // Checkpoint is expected to fail without --file-locks + result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"}) + result.WaitWithDefaultTimeout() + Expect(result).Should(Exit(125)) + Expect(result.ErrorToString()).To(ContainSubstring("criu failed")) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + + // Checkpoint is expected to succeed with --file-locks + result = podmanTest.Podman([]string{"container", "checkpoint", "--file-locks", "test_name"}) + result.WaitWithDefaultTimeout() + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) + + result = podmanTest.Podman([]string{"container", "restore", "--file-locks", "test_name"}) + result.WaitWithDefaultTimeout() + + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + + result = podmanTest.Podman([]string{"rm", "-t", "0", "-f", "test_name"}) + result.WaitWithDefaultTimeout() + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + }) }) diff --git a/test/e2e/config.go b/test/e2e/config.go index 2552595ad..9c810575b 100644 --- a/test/e2e/config.go +++ b/test/e2e/config.go @@ -2,7 +2,7 @@ package integration var ( redis = "quay.io/libpod/redis:alpine" - fedoraMinimal = "quay.io/libpod/fedora-minimal:latest" + fedoraMinimal = "registry.fedoraproject.org/fedora-minimal:34" ALPINE = "quay.io/libpod/alpine:latest" ALPINELISTTAG = "quay.io/libpod/alpine:3.10.2" ALPINELISTDIGEST = "quay.io/libpod/alpine@sha256:fa93b01658e3a5a1686dc3ae55f170d8de487006fb53a28efcd12ab0710a2e5f" diff --git a/test/e2e/images_test.go b/test/e2e/images_test.go index 56af64f04..b07e287ac 100644 --- a/test/e2e/images_test.go +++ b/test/e2e/images_test.go @@ -144,7 +144,7 @@ var _ = Describe("Podman images", func() { result := podmanTest.Podman([]string{"images", "-q", "-f", "reference=quay.io*"}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(0)) - Expect(len(result.OutputToStringArray())).To(Equal(8)) + Expect(len(result.OutputToStringArray())).To(Equal(7)) retalpine := podmanTest.Podman([]string{"images", "-f", "reference=a*pine"}) retalpine.WaitWithDefaultTimeout() diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go index d6d729d3a..05cb986c6 100644 --- a/test/e2e/run_test.go +++ b/test/e2e/run_test.go @@ -1516,7 +1516,7 @@ USER mail`, BB) }) It("podman run --privileged and --group-add", func() { - groupName := "kvm" + groupName := "mail" session := podmanTest.Podman([]string{"run", "-t", "-i", "--group-add", groupName, "--privileged", fedoraMinimal, "groups"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) diff --git a/test/e2e/volume_ls_test.go b/test/e2e/volume_ls_test.go index 0dd1a2b7c..6c4b22fa5 100644 --- a/test/e2e/volume_ls_test.go +++ b/test/e2e/volume_ls_test.go @@ -45,6 +45,17 @@ var _ = Describe("Podman volume ls", func() { Expect(len(session.OutputToStringArray())).To(Equal(2)) }) + It("podman ls volume filter with a key pattern", func() { + session := podmanTest.Podman([]string{"volume", "create", "--label", "helloworld=world", "myvol2"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(Exit(0)) + + session = podmanTest.Podman([]string{"volume", "ls", "--filter", "label=hello*"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(Exit(0)) + Expect(len(session.OutputToStringArray())).To(Equal(2)) + }) + It("podman ls volume with JSON format", func() { session := podmanTest.Podman([]string{"volume", "create", "myvol"}) session.WaitWithDefaultTimeout() |