diff options
author | Adrian Reber <areber@redhat.com> | 2021-07-12 11:43:45 +0000 |
---|---|---|
committer | Adrian Reber <adrian@lisas.de> | 2021-07-27 16:10:44 +0200 |
commit | eb94467780eab06a452586c9751fc4f571d9e089 (patch) | |
tree | 7fd60bf5369ae2da6c4661ba2c20e80267abf0c5 | |
parent | 3375cbb198c74e895624eada148edff514b64d35 (diff) | |
download | podman-eb94467780eab06a452586c9751fc4f571d9e089.tar.gz podman-eb94467780eab06a452586c9751fc4f571d9e089.tar.bz2 podman-eb94467780eab06a452586c9751fc4f571d9e089.zip |
Support checkpoint/restore with pods
This adds support to checkpoint containers out of pods and restore
container into pods.
It is only possible to restore a container into a pod if it has been
checkpointed out of pod. It is also not possible to restore a non pod
container into a pod.
The main reason this does not work is the PID namespace. If a non pod
container is being restored in a pod with a shared PID namespace, at
least one process in the restored container uses PID 1 which is already
in use by the infrastructure container. If someone tries to restore
container from a pod with a shared PID namespace without a shared PID
namespace it will also fail because the resulting PID namespace will not
have a PID 1.
Signed-off-by: Adrian Reber <areber@redhat.com>
-rw-r--r-- | cmd/podman/containers/restore.go | 6 | ||||
-rw-r--r-- | docs/source/markdown/podman-container-restore.1.md | 11 | ||||
-rw-r--r-- | libpod/container_api.go | 16 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 112 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 24 | ||||
-rw-r--r-- | pkg/bindings/containers/types.go | 1 | ||||
-rw-r--r-- | pkg/bindings/containers/types_restore_options.go | 16 | ||||
-rw-r--r-- | pkg/checkpoint/checkpoint_restore.go | 96 | ||||
-rw-r--r-- | pkg/checkpoint/crutils/checkpoint_restore_utils.go | 11 | ||||
-rw-r--r-- | pkg/criu/criu.go | 4 | ||||
-rw-r--r-- | pkg/domain/entities/containers.go | 1 | ||||
-rw-r--r-- | pkg/domain/infra/abi/containers.go | 1 |
12 files changed, 294 insertions, 5 deletions
diff --git a/cmd/podman/containers/restore.go b/cmd/podman/containers/restore.go index b908ea493..3b6f74efa 100644 --- a/cmd/podman/containers/restore.go +++ b/cmd/podman/containers/restore.go @@ -71,6 +71,9 @@ func init() { ) _ = restoreCommand.RegisterFlagCompletionFunc("publish", completion.AutocompleteNone) + flags.StringVar(&restoreOptions.Pod, "pod", "", "Restore container into existing Pod (only works with --import)") + _ = restoreCommand.RegisterFlagCompletionFunc("pod", common.AutocompletePodsRunning) + validate.AddLatestFlag(restoreCommand, &restoreOptions.Latest) } @@ -91,6 +94,9 @@ func restore(cmd *cobra.Command, args []string) error { if restoreOptions.Import == "" && restoreOptions.Name != "" { return errors.Errorf("--name can only be used with --import") } + if restoreOptions.Import == "" && restoreOptions.Pod != "" { + return errors.Errorf("--pod can only be used with --import") + } if restoreOptions.Name != "" && restoreOptions.TCPEstablished { return errors.Errorf("--tcp-established cannot be used with --name") } diff --git a/docs/source/markdown/podman-container-restore.1.md b/docs/source/markdown/podman-container-restore.1.md index 36eb650e5..856008cc0 100644 --- a/docs/source/markdown/podman-container-restore.1.md +++ b/docs/source/markdown/podman-container-restore.1.md @@ -93,6 +93,15 @@ be used once and the restored *container* will have another IP address. This als that **--name, -n** cannot be used in combination with **--tcp-established**.\ *IMPORTANT: This OPTION is only available in combination with **--import, -i**.* +#### **--pod**=*name* + +Restore a container into the pod *name*. The destination pod for this restore +has to have the same namespaces shared as the pod this container was checkpointed +from (see **[podman pod create --share](podman-pod-create.1.md#--share)**). +*IMPORTANT: This OPTION is only available in combination with **--import, -i**.* + +This option requires at least CRIU 3.16. + #### **--publish**, **-p**=*port* Replaces the ports that the *container* publishes, as configured during the @@ -128,7 +137,7 @@ $ podman run --rm -p 2345:80 -d webserver ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)** +**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)** ## HISTORY September 2018, Originally compiled by Adrian Reber <areber@redhat.com> diff --git a/libpod/container_api.go b/libpod/container_api.go index 390bba7bb..ddc5aa684 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -780,6 +780,16 @@ type ContainerCheckpointOptions struct { // Compression tells the API which compression to use for // the exported checkpoint archive. Compression archive.Compression + // If Pod is set the container should be restored into the + // given Pod. If Pod is empty it is a restore without a Pod. + // Restoring a non Pod container into a Pod or a Pod container + // without a Pod is theoretically possible, but will + // probably not work if a PID namespace is shared. + // A shared PID namespace means that a Pod container has PID 1 + // in the infrastructure container, but without the infrastructure + // container no PID 1 will be in the namespace and that is not + // possible. + Pod string } // Checkpoint checkpoints a container @@ -811,7 +821,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO // Restore restores a container func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error { - logrus.Debugf("Trying to restore container %s", c.ID()) + if options.Pod == "" { + logrus.Debugf("Trying to restore container %s", c.ID()) + } else { + logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod) + } if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index e002f9a22..bff64aa95 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -901,8 +901,27 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr } func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error { - if len(c.Dependencies()) > 0 { - return errors.Errorf("Cannot export checkpoints of containers with dependencies") + if len(c.Dependencies()) == 1 { + // Check if the dependency is an infra container. If it is we can checkpoint + // the container out of the Pod. + if c.config.Pod == "" { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), c.config.Pod) + } + infraID, err := pod.InfraContainerID() + if err != nil { + return errors.Wrapf(err, "cannot retrieve infra container ID for pod %s", c.config.Pod) + } + if c.Dependencies()[0] != infraID { + return errors.Errorf("cannot export checkpoints of containers with dependencies") + } + } + if len(c.Dependencies()) > 1 { + return errors.Errorf("cannot export checkpoints of containers with dependencies") } logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile) @@ -1136,10 +1155,20 @@ func (c *Container) importPreCheckpoint(input string) error { } func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) { - if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { + minCriuVersion := func() int { + if options.Pod == "" { + return criu.MinCriuVersion + } + return criu.PodCriuVersion + }() + if err := c.checkpointRestoreSupported(minCriuVersion); err != nil { return err } + if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) { + return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path()) + } + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) } @@ -1247,6 +1276,83 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } } + if options.Pod != "" { + // Running in a Pod means that we have to change all namespace settings to + // the ones from the infrastructure container. + pod, err := c.runtime.LookupPod(options.Pod) + if err != nil { + return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod) + } + + infraContainer.lock.Lock() + if err := infraContainer.syncContainer(); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID()) + } + if infraContainer.state.State != define.ContainerStateRunning { + if err := infraContainer.initAndStart(ctx); err != nil { + infraContainer.lock.Unlock() + return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID()) + } + } + infraContainer.lock.Unlock() + + if c.config.IPCNsCtr != "" { + nsPath, err := infraContainer.namespacePath(IPCNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil { + return err + } + } + + if c.config.NetNsCtr != "" { + nsPath, err := infraContainer.namespacePath(NetNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil { + return err + } + } + + if c.config.PIDNsCtr != "" { + nsPath, err := infraContainer.namespacePath(PIDNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil { + return err + } + } + + if c.config.UTSNsCtr != "" { + nsPath, err := infraContainer.namespacePath(UTSNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil { + return err + } + } + + if c.config.CgroupNsCtr != "" { + nsPath, err := infraContainer.namespacePath(CgroupNS) + if err != nil { + return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil { + return err + } + } + } + if err := c.makeBindMounts(); err != nil { return err } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 2914bd1a1..846d3815a 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -1064,6 +1064,30 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if restoreOptions.TCPEstablished { args = append(args, "--runtime-opt", "--tcp-established") } + if restoreOptions.Pod != "" { + mountLabel := ctr.config.MountLabel + processLabel := ctr.config.ProcessLabel + if mountLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-mount-context=%s", + mountLabel, + ), + ) + } + if processLabel != "" { + args = append( + args, + "--runtime-opt", + fmt.Sprintf( + "--lsm-profile=selinux:%s", + processLabel, + ), + ) + } + } } logrus.WithFields(logrus.Fields{ diff --git a/pkg/bindings/containers/types.go b/pkg/bindings/containers/types.go index 1058c7a48..b74a938d1 100644 --- a/pkg/bindings/containers/types.go +++ b/pkg/bindings/containers/types.go @@ -62,6 +62,7 @@ type RestoreOptions struct { Keep *bool Name *string TCPEstablished *bool + Pod *string } //go:generate go run ../generator/generator.go CreateOptions diff --git a/pkg/bindings/containers/types_restore_options.go b/pkg/bindings/containers/types_restore_options.go index ea6c810a2..820a7696f 100644 --- a/pkg/bindings/containers/types_restore_options.go +++ b/pkg/bindings/containers/types_restore_options.go @@ -131,3 +131,19 @@ func (o *RestoreOptions) GetTCPEstablished() bool { } return *o.TCPEstablished } + +// WithPod +func (o *RestoreOptions) WithPod(value string) *RestoreOptions { + v := &value + o.Pod = v + return o +} + +// GetPod +func (o *RestoreOptions) GetPod() string { + var pod string + if o.Pod == nil { + return pod + } + return *o.Pod +} diff --git a/pkg/checkpoint/checkpoint_restore.go b/pkg/checkpoint/checkpoint_restore.go index 0d45cab5f..9fdf04933 100644 --- a/pkg/checkpoint/checkpoint_restore.go +++ b/pkg/checkpoint/checkpoint_restore.go @@ -9,6 +9,9 @@ import ( "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/libpod" + ann "github.com/containers/podman/v3/pkg/annotations" + "github.com/containers/podman/v3/pkg/checkpoint/crutils" + "github.com/containers/podman/v3/pkg/criu" "github.com/containers/podman/v3/pkg/domain/entities" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/specgen/generate" @@ -68,6 +71,14 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt return nil, err } + if ctrConfig.Pod != "" && restoreOptions.Pod == "" { + return nil, errors.New("cannot restore pod container without --pod") + } + + if ctrConfig.Pod == "" && restoreOptions.Pod != "" { + return nil, errors.New("cannot restore non pod container into pod") + } + // This should not happen as checkpoints with these options are not exported. if len(ctrConfig.Dependencies) > 0 { return nil, errors.Errorf("Cannot import checkpoints of containers with dependencies") @@ -96,6 +107,91 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt newName = true } + if restoreOptions.Pod != "" { + // Restoring into a Pod requires much newer versions of CRIU + if !criu.CheckForCriu(criu.PodCriuVersion) { + return nil, errors.Errorf("restoring containers into pods requires at least CRIU %d", criu.PodCriuVersion) + } + // The runtime also has to support it + if !crutils.CRRuntimeSupportsPodCheckpointRestore(runtime.GetOCIRuntimePath()) { + return nil, errors.Errorf("runtime %s does not support pod restore", runtime.GetOCIRuntimePath()) + } + // Restoring into an existing Pod + ctrConfig.Pod = restoreOptions.Pod + + // According to podman pod create a pod can share the following namespaces: + // cgroup, ipc, net, pid, uts + // Let's make sure we a restoring into a pod with the same shared namespaces. + pod, err := runtime.LookupPod(ctrConfig.Pod) + if err != nil { + return nil, errors.Wrapf(err, "pod %q cannot be retrieved", ctrConfig.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve infra container from pod %q", ctrConfig.Pod) + } + + // If a namespaces was shared (!= "") it needs to be set to the new infrastructure container + // If the infrastructure container does not share the same namespaces as the to be restored + // container we abort. + if ctrConfig.IPCNsCtr != "" { + if !pod.SharesIPC() { + return nil, errors.Errorf("pod %s does not share the IPC namespace", ctrConfig.Pod) + } + ctrConfig.IPCNsCtr = infraContainer.ID() + } + + if ctrConfig.NetNsCtr != "" { + if !pod.SharesNet() { + return nil, errors.Errorf("pod %s does not share the network namespace", ctrConfig.Pod) + } + ctrConfig.NetNsCtr = infraContainer.ID() + } + + if ctrConfig.PIDNsCtr != "" { + if !pod.SharesPID() { + return nil, errors.Errorf("pod %s does not share the PID namespace", ctrConfig.Pod) + } + ctrConfig.PIDNsCtr = infraContainer.ID() + } + + if ctrConfig.UTSNsCtr != "" { + if !pod.SharesUTS() { + return nil, errors.Errorf("pod %s does not share the UTS namespace", ctrConfig.Pod) + } + ctrConfig.UTSNsCtr = infraContainer.ID() + } + + if ctrConfig.CgroupNsCtr != "" { + if !pod.SharesCgroup() { + return nil, errors.Errorf("pod %s does not share the cgroup namespace", ctrConfig.Pod) + } + ctrConfig.CgroupNsCtr = infraContainer.ID() + } + + // Change SELinux labels to infrastructure container labels + ctrConfig.MountLabel = infraContainer.MountLabel() + ctrConfig.ProcessLabel = infraContainer.ProcessLabel() + + // Fix parent cgroup + cgroupPath, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve cgroup path from pod %q", ctrConfig.Pod) + } + ctrConfig.CgroupParent = cgroupPath + + oldPodID := dumpSpec.Annotations[ann.SandboxID] + // Fix up SandboxID in the annotations + dumpSpec.Annotations[ann.SandboxID] = ctrConfig.Pod + // Fix up CreateCommand + for i, c := range ctrConfig.CreateCommand { + if c == oldPodID { + ctrConfig.CreateCommand[i] = ctrConfig.Pod + } + } + } + if len(restoreOptions.PublishPorts) > 0 { ports, _, _, err := generate.ParsePortMapping(restoreOptions.PublishPorts) if err != nil { diff --git a/pkg/checkpoint/crutils/checkpoint_restore_utils.go b/pkg/checkpoint/crutils/checkpoint_restore_utils.go index 53ff55865..3b77368bb 100644 --- a/pkg/checkpoint/crutils/checkpoint_restore_utils.go +++ b/pkg/checkpoint/crutils/checkpoint_restore_utils.go @@ -1,6 +1,7 @@ package crutils import ( + "bytes" "io" "os" "os/exec" @@ -189,3 +190,13 @@ func CRRuntimeSupportsCheckpointRestore(runtimePath string) bool { } return false } + +// CRRuntimeSupportsCheckpointRestore tests if the runtime at 'runtimePath' +// supports restoring into existing Pods. The runtime needs to support +// the CRIU option --lsm-mount-context and the existence of this is checked +// by this function. In addition it is necessary to at least have CRIU 3.16. +func CRRuntimeSupportsPodCheckpointRestore(runtimePath string) bool { + cmd := exec.Command(runtimePath, "restore", "--lsm-mount-context") + out, _ := cmd.CombinedOutput() + return bytes.Contains(out, []byte("flag needs an argument")) +} diff --git a/pkg/criu/criu.go b/pkg/criu/criu.go index 0b5d2c45e..2a6805979 100644 --- a/pkg/criu/criu.go +++ b/pkg/criu/criu.go @@ -7,6 +7,10 @@ import ( // MinCriuVersion for Podman at least CRIU 3.11 is required const MinCriuVersion = 31100 +// PodCriuVersion is the version of CRIU needed for +// checkpointing and restoring containers out of and into Pods. +const PodCriuVersion = 31600 + // CheckForCriu uses CRIU's go bindings to check if the CRIU // binary exists and if it at least the version Podman needs. func CheckForCriu(version int) bool { diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 4d85941cd..6d84a4ecb 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -207,6 +207,7 @@ type RestoreOptions struct { TCPEstablished bool ImportPrevious string PublishPorts []specgen.PortMapping + Pod string } type RestoreReport struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 2c89fc66b..2003879b8 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -529,6 +529,7 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st IgnoreStaticIP: options.IgnoreStaticIP, IgnoreStaticMAC: options.IgnoreStaticMAC, ImportPrevious: options.ImportPrevious, + Pod: options.Pod, } filterFuncs := []libpod.ContainerFilter{ |