From 92dce3e2febc752434c08574cbb394545c7fef47 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 8 Jul 2021 15:24:31 +0000 Subject: Prepare CRIU version check to work with multiple versions The upcoming commit to support checkpointing out of Pods requires CRIU 3.16. This changes the CRIU version check to support checking for different versions. Signed-off-by: Adrian Reber --- libpod/container_internal_linux.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'libpod') diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index b69ad4105..e002f9a22 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1021,9 +1021,9 @@ func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { return nil } -func (c *Container) checkpointRestoreSupported() error { - if !criu.CheckForCriu() { - return errors.Errorf("checkpoint/restore requires at least CRIU %d", criu.MinCriuVersion) +func (c *Container) checkpointRestoreSupported(version int) error { + if !criu.CheckForCriu(version) { + return errors.Errorf("checkpoint/restore requires at least CRIU %d", version) } if !c.ociRuntime.SupportsCheckpoint() { return errors.Errorf("configured runtime does not support checkpoint/restore") @@ -1032,7 +1032,7 @@ func (c *Container) checkpointRestoreSupported() error { } func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { - if err := c.checkpointRestoreSupported(); err != nil { + if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { return err } @@ -1136,7 +1136,7 @@ func (c *Container) importPreCheckpoint(input string) error { } func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) { - if err := c.checkpointRestoreSupported(); err != nil { + if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { return err } -- cgit v1.2.3-54-g00ecf From eb94467780eab06a452586c9751fc4f571d9e089 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 12 Jul 2021 11:43:45 +0000 Subject: Support checkpoint/restore with pods This adds support to checkpoint containers out of pods and restore container into pods. It is only possible to restore a container into a pod if it has been checkpointed out of pod. It is also not possible to restore a non pod container into a pod. The main reason this does not work is the PID namespace. If a non pod container is being restored in a pod with a shared PID namespace, at least one process in the restored container uses PID 1 which is already in use by the infrastructure container. If someone tries to restore container from a pod with a shared PID namespace without a shared PID namespace it will also fail because the resulting PID namespace will not have a PID 1. Signed-off-by: Adrian Reber --- cmd/podman/containers/restore.go | 6 ++ docs/source/markdown/podman-container-restore.1.md | 11 +- libpod/container_api.go | 16 ++- libpod/container_internal_linux.go | 112 ++++++++++++++++++++- libpod/oci_conmon_linux.go | 24 +++++ pkg/bindings/containers/types.go | 1 + pkg/bindings/containers/types_restore_options.go | 16 +++ pkg/checkpoint/checkpoint_restore.go | 96 ++++++++++++++++++ pkg/checkpoint/crutils/checkpoint_restore_utils.go | 11 ++ pkg/criu/criu.go | 4 + pkg/domain/entities/containers.go | 1 + pkg/domain/infra/abi/containers.go | 1 + 12 files changed, 294 insertions(+), 5 deletions(-) (limited to 'libpod') diff --git a/cmd/podman/containers/restore.go b/cmd/podman/containers/restore.go index b908ea493..3b6f74efa 100644 --- a/cmd/podman/containers/restore.go +++ b/cmd/podman/containers/restore.go @@ -71,6 +71,9 @@ func init() { ) _ = restoreCommand.RegisterFlagCompletionFunc("publish", completion.AutocompleteNone) + flags.StringVar(&restoreOptions.Pod, "pod", "", "Restore container into existing Pod (only works with --import)") + _ = restoreCommand.RegisterFlagCompletionFunc("pod", common.AutocompletePodsRunning) + validate.AddLatestFlag(restoreCommand, &restoreOptions.Latest) } @@ -91,6 +94,9 @@ func restore(cmd *cobra.Command, args []string) error { if restoreOptions.Import == "" && restoreOptions.Name != "" { return errors.Errorf("--name can only be used with --import") } + if restoreOptions.Import == "" && restoreOptions.Pod != "" { + return errors.Errorf("--pod can only be used with --import") + } if restoreOptions.Name != "" && restoreOptions.TCPEstablished { return errors.Errorf("--tcp-established cannot be used with --name") } diff --git a/docs/source/markdown/podman-container-restore.1.md b/docs/source/markdown/podman-container-restore.1.md index 36eb650e5..856008cc0 100644 --- a/docs/source/markdown/podman-container-restore.1.md +++ b/docs/source/markdown/podman-container-restore.1.md @@ -93,6 +93,15 @@ be used once and the restored *container* will have another IP address. This als that **--name, -n** cannot be used in combination with **--tcp-established**.\ *IMPORTANT: This OPTION is only available in combination with **--import, -i**.* +#### **--pod**=*name* + +Restore a container into the pod *name*. The destination pod for this restore +has to have the same namespaces shared as the pod this container was checkpointed +from (see **[podman pod create --share](podman-pod-create.1.md#--share)**). +*IMPORTANT: This OPTION is only available in combination with **--import, -i**.* + +This option requires at least CRIU 3.16. + #### **--publish**, **-p**=*port* Replaces the ports that the *container* publishes, as configured during the @@ -128,7 +137,7 @@ $ podman run --rm -p 2345:80 -d webserver ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)** +**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)** ## HISTORY September 2018, Originally compiled by Adrian Reber diff --git a/libpod/container_api.go b/libpod/container_api.go index 390bba7bb..ddc5aa684 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -780,6 +780,16 @@ type ContainerCheckpointOptions struct { // Compression tells the API which compression to use for // the exported checkpoint archive. Compression archive.Compression + // If Pod is set the container should be restored into the + // given Pod. If Pod is empty it is a restore without a Pod. + // Restoring a non Pod container into a Pod or a Pod container + // without a Pod is theoretically possible, but will + // probably not work if a PID namespace is shared. + // A shared PID namespace means that a Pod container has PID 1 + // in the infrastructure container, but without the infrastructure + // container no PID 1 will be in the namespace and that is not + // possible. + Pod string } // Checkpoint checkpoints a container @@ -811,7 +821,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO // Restore restores a container func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error { - logrus.Debugf("Trying to restore container %s", c.ID()) + if options.Pod == "" { + logrus.Debugf("Trying to restore container %s", c.ID()) + } else { + logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod) + } if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index e002f9a22..bff64aa95 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -901,8 +901,27 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr } func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { - if len(c.Dependencies()) > 0 { - return errors.Errorf("Cannot export checkpoints of containers with dependencies") + if len(c.Dependencies()) == 1 { + // Check if the dependency is an infra container. If it is we can checkpoint + // the container out of the Pod. + if c.config.Pod == "" { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), c.config.Pod) + } + infraID, err := pod.InfraContainerID() + if err != nil { + return errors.Wrapf(err, "cannot retrieve infra container ID for pod %s", c.config.Pod) + } + if c.Dependencies()[0] != infraID { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + } + if len(c.Dependencies()) > 1 { + return errors.Errorf("cannot export checkpoints of containers with dependencies") } logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile) @@ -1136,10 +1155,20 @@ func (c *Container) importPreCheckpoint(input string) error { } func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) { - if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { + minCriuVersion := func() int { + if options.Pod == "" { + return criu.MinCriuVersion + } + return criu.PodCriuVersion + }() + if err := c.checkpointRestoreSupported(minCriuVersion); err != nil { return err } + if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) { + return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path()) + } + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) } @@ -1247,6 +1276,83 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } } + if options.Pod != "" { + // Running in a Pod means that we have to change all namespace settings to + // the ones from the infrastructure container. + pod, err := c.runtime.LookupPod(options.Pod) + if err != nil { + return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod) + } + + infraContainer.lock.Lock() + if err := infraContainer.syncContainer(); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID()) + } + if infraContainer.state.State != define.ContainerStateRunning { + if err := infraContainer.initAndStart(ctx); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID()) + } + } + infraContainer.lock.Unlock() + + if c.config.IPCNsCtr != "" { + nsPath, err := infraContainer.namespacePath(IPCNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil { + return err + } + } + + if c.config.NetNsCtr != "" { + nsPath, err := infraContainer.namespacePath(NetNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil { + return err + } + } + + if c.config.PIDNsCtr != "" { + nsPath, err := infraContainer.namespacePath(PIDNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil { + return err + } + } + + if c.config.UTSNsCtr != "" { + nsPath, err := infraContainer.namespacePath(UTSNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil { + return err + } + } + + if c.config.CgroupNsCtr != "" { + nsPath, err := infraContainer.namespacePath(CgroupNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil { + return err + } + } + } + if err := c.makeBindMounts(); err != nil { return err } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 2914bd1a1..846d3815a 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -1064,6 +1064,30 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if restoreOptions.TCPEstablished { args = append(args, "--runtime-opt", "--tcp-established") } + if restoreOptions.Pod != "" { + mountLabel := ctr.config.MountLabel + processLabel := ctr.config.ProcessLabel + if mountLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-mount-context=%s", + mountLabel, + ), + ) + } + if processLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-profile=selinux:%s", + processLabel, + ), + ) + } + } } logrus.WithFields(logrus.Fields{ diff --git a/pkg/bindings/containers/types.go b/pkg/bindings/containers/types.go index 1058c7a48..b74a938d1 100644 --- a/pkg/bindings/containers/types.go +++ b/pkg/bindings/containers/types.go @@ -62,6 +62,7 @@ type RestoreOptions struct { Keep *bool Name *string TCPEstablished *bool + Pod *string } //go:generate go run ../generator/generator.go CreateOptions diff --git a/pkg/bindings/containers/types_restore_options.go b/pkg/bindings/containers/types_restore_options.go index ea6c810a2..820a7696f 100644 --- a/pkg/bindings/containers/types_restore_options.go +++ b/pkg/bindings/containers/types_restore_options.go @@ -131,3 +131,19 @@ func (o *RestoreOptions) GetTCPEstablished() bool { } return *o.TCPEstablished } + +// WithPod +func (o *RestoreOptions) WithPod(value string) *RestoreOptions { + v := &value + o.Pod = v + return o +} + +// GetPod +func (o *RestoreOptions) GetPod() string { + var pod string + if o.Pod == nil { + return pod + } + return *o.Pod +} diff --git a/pkg/checkpoint/checkpoint_restore.go b/pkg/checkpoint/checkpoint_restore.go index 0d45cab5f..9fdf04933 100644 --- a/pkg/checkpoint/checkpoint_restore.go +++ b/pkg/checkpoint/checkpoint_restore.go @@ -9,6 +9,9 @@ import ( "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/libpod" + ann "github.com/containers/podman/v3/pkg/annotations" + "github.com/containers/podman/v3/pkg/checkpoint/crutils" + "github.com/containers/podman/v3/pkg/criu" "github.com/containers/podman/v3/pkg/domain/entities" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/specgen/generate" @@ -68,6 +71,14 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt return nil, err } + if ctrConfig.Pod != "" && restoreOptions.Pod == "" { + return nil, errors.New("cannot restore pod container without --pod") + } + + if ctrConfig.Pod == "" && restoreOptions.Pod != "" { + return nil, errors.New("cannot restore non pod container into pod") + } + // This should not happen as checkpoints with these options are not exported. if len(ctrConfig.Dependencies) > 0 { return nil, errors.Errorf("Cannot import checkpoints of containers with dependencies") @@ -96,6 +107,91 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt newName = true } + if restoreOptions.Pod != "" { + // Restoring into a Pod requires much newer versions of CRIU + if !criu.CheckForCriu(criu.PodCriuVersion) { + return nil, errors.Errorf("restoring containers into pods requires at least CRIU %d", criu.PodCriuVersion) + } + // The runtime also has to support it + if !crutils.CRRuntimeSupportsPodCheckpointRestore(runtime.GetOCIRuntimePath()) { + return nil, errors.Errorf("runtime %s does not support pod restore", runtime.GetOCIRuntimePath()) + } + // Restoring into an existing Pod + ctrConfig.Pod = restoreOptions.Pod + + // According to podman pod create a pod can share the following namespaces: + // cgroup, ipc, net, pid, uts + // Let's make sure we a restoring into a pod with the same shared namespaces. + pod, err := runtime.LookupPod(ctrConfig.Pod) + if err != nil { + return nil, errors.Wrapf(err, "pod %q cannot be retrieved", ctrConfig.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve infra container from pod %q", ctrConfig.Pod) + } + + // If a namespaces was shared (!= "") it needs to be set to the new infrastructure container + // If the infrastructure container does not share the same namespaces as the to be restored + // container we abort. + if ctrConfig.IPCNsCtr != "" { + if !pod.SharesIPC() { + return nil, errors.Errorf("pod %s does not share the IPC namespace", ctrConfig.Pod) + } + ctrConfig.IPCNsCtr = infraContainer.ID() + } + + if ctrConfig.NetNsCtr != "" { + if !pod.SharesNet() { + return nil, errors.Errorf("pod %s does not share the network namespace", ctrConfig.Pod) + } + ctrConfig.NetNsCtr = infraContainer.ID() + } + + if ctrConfig.PIDNsCtr != "" { + if !pod.SharesPID() { + return nil, errors.Errorf("pod %s does not share the PID namespace", ctrConfig.Pod) + } + ctrConfig.PIDNsCtr = infraContainer.ID() + } + + if ctrConfig.UTSNsCtr != "" { + if !pod.SharesUTS() { + return nil, errors.Errorf("pod %s does not share the UTS namespace", ctrConfig.Pod) + } + ctrConfig.UTSNsCtr = infraContainer.ID() + } + + if ctrConfig.CgroupNsCtr != "" { + if !pod.SharesCgroup() { + return nil, errors.Errorf("pod %s does not share the cgroup namespace", ctrConfig.Pod) + } + ctrConfig.CgroupNsCtr = infraContainer.ID() + } + + // Change SELinux labels to infrastructure container labels + ctrConfig.MountLabel = infraContainer.MountLabel() + ctrConfig.ProcessLabel = infraContainer.ProcessLabel() + + // Fix parent cgroup + cgroupPath, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve cgroup path from pod %q", ctrConfig.Pod) + } + ctrConfig.CgroupParent = cgroupPath + + oldPodID := dumpSpec.Annotations[ann.SandboxID] + // Fix up SandboxID in the annotations + dumpSpec.Annotations[ann.SandboxID] = ctrConfig.Pod + // Fix up CreateCommand + for i, c := range ctrConfig.CreateCommand { + if c == oldPodID { + ctrConfig.CreateCommand[i] = ctrConfig.Pod + } + } + } + if len(restoreOptions.PublishPorts) > 0 { ports, _, _, err := generate.ParsePortMapping(restoreOptions.PublishPorts) if err != nil { diff --git a/pkg/checkpoint/crutils/checkpoint_restore_utils.go b/pkg/checkpoint/crutils/checkpoint_restore_utils.go index 53ff55865..3b77368bb 100644 --- a/pkg/checkpoint/crutils/checkpoint_restore_utils.go +++ b/pkg/checkpoint/crutils/checkpoint_restore_utils.go @@ -1,6 +1,7 @@ package crutils import ( + "bytes" "io" "os" "os/exec" @@ -189,3 +190,13 @@ func CRRuntimeSupportsCheckpointRestore(runtimePath string) bool { } return false } + +// CRRuntimeSupportsCheckpointRestore tests if the runtime at 'runtimePath' +// supports restoring into existing Pods. The runtime needs to support +// the CRIU option --lsm-mount-context and the existence of this is checked +// by this function. In addition it is necessary to at least have CRIU 3.16. +func CRRuntimeSupportsPodCheckpointRestore(runtimePath string) bool { + cmd := exec.Command(runtimePath, "restore", "--lsm-mount-context") + out, _ := cmd.CombinedOutput() + return bytes.Contains(out, []byte("flag needs an argument")) +} diff --git a/pkg/criu/criu.go b/pkg/criu/criu.go index 0b5d2c45e..2a6805979 100644 --- a/pkg/criu/criu.go +++ b/pkg/criu/criu.go @@ -7,6 +7,10 @@ import ( // MinCriuVersion for Podman at least CRIU 3.11 is required const MinCriuVersion = 31100 +// PodCriuVersion is the version of CRIU needed for +// checkpointing and restoring containers out of and into Pods. +const PodCriuVersion = 31600 + // CheckForCriu uses CRIU's go bindings to check if the CRIU // binary exists and if it at least the version Podman needs. func CheckForCriu(version int) bool { diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 4d85941cd..6d84a4ecb 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -207,6 +207,7 @@ type RestoreOptions struct { TCPEstablished bool ImportPrevious string PublishPorts []specgen.PortMapping + Pod string } type RestoreReport struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 2c89fc66b..2003879b8 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -529,6 +529,7 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st IgnoreStaticIP: options.IgnoreStaticIP, IgnoreStaticMAC: options.IgnoreStaticMAC, ImportPrevious: options.ImportPrevious, + Pod: options.Pod, } filterFuncs := []libpod.ContainerFilter{ -- cgit v1.2.3-54-g00ecf