aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Reber <areber@redhat.com>2021-07-12 11:43:45 +0000
committerAdrian Reber <adrian@lisas.de>2021-07-27 16:10:44 +0200
commiteb94467780eab06a452586c9751fc4f571d9e089 (patch)
tree7fd60bf5369ae2da6c4661ba2c20e80267abf0c5
parent3375cbb198c74e895624eada148edff514b64d35 (diff)
downloadpodman-eb94467780eab06a452586c9751fc4f571d9e089.tar.gz
podman-eb94467780eab06a452586c9751fc4f571d9e089.tar.bz2
podman-eb94467780eab06a452586c9751fc4f571d9e089.zip
Support checkpoint/restore with pods
This adds support to checkpoint containers out of pods and restore container into pods. It is only possible to restore a container into a pod if it has been checkpointed out of pod. It is also not possible to restore a non pod container into a pod. The main reason this does not work is the PID namespace. If a non pod container is being restored in a pod with a shared PID namespace, at least one process in the restored container uses PID 1 which is already in use by the infrastructure container. If someone tries to restore container from a pod with a shared PID namespace without a shared PID namespace it will also fail because the resulting PID namespace will not have a PID 1. Signed-off-by: Adrian Reber <areber@redhat.com>
-rw-r--r--cmd/podman/containers/restore.go6
-rw-r--r--docs/source/markdown/podman-container-restore.1.md11
-rw-r--r--libpod/container_api.go16
-rw-r--r--libpod/container_internal_linux.go112
-rw-r--r--libpod/oci_conmon_linux.go24
-rw-r--r--pkg/bindings/containers/types.go1
-rw-r--r--pkg/bindings/containers/types_restore_options.go16
-rw-r--r--pkg/checkpoint/checkpoint_restore.go96
-rw-r--r--pkg/checkpoint/crutils/checkpoint_restore_utils.go11
-rw-r--r--pkg/criu/criu.go4
-rw-r--r--pkg/domain/entities/containers.go1
-rw-r--r--pkg/domain/infra/abi/containers.go1
12 files changed, 294 insertions, 5 deletions
diff --git a/cmd/podman/containers/restore.go b/cmd/podman/containers/restore.go
index b908ea493..3b6f74efa 100644
--- a/cmd/podman/containers/restore.go
+++ b/cmd/podman/containers/restore.go
@@ -71,6 +71,9 @@ func init() {
)
_ = restoreCommand.RegisterFlagCompletionFunc("publish", completion.AutocompleteNone)
+ flags.StringVar(&restoreOptions.Pod, "pod", "", "Restore container into existing Pod (only works with --import)")
+ _ = restoreCommand.RegisterFlagCompletionFunc("pod", common.AutocompletePodsRunning)
+
validate.AddLatestFlag(restoreCommand, &restoreOptions.Latest)
}
@@ -91,6 +94,9 @@ func restore(cmd *cobra.Command, args []string) error {
if restoreOptions.Import == "" && restoreOptions.Name != "" {
return errors.Errorf("--name can only be used with --import")
}
+ if restoreOptions.Import == "" && restoreOptions.Pod != "" {
+ return errors.Errorf("--pod can only be used with --import")
+ }
if restoreOptions.Name != "" && restoreOptions.TCPEstablished {
return errors.Errorf("--tcp-established cannot be used with --name")
}
diff --git a/docs/source/markdown/podman-container-restore.1.md b/docs/source/markdown/podman-container-restore.1.md
index 36eb650e5..856008cc0 100644
--- a/docs/source/markdown/podman-container-restore.1.md
+++ b/docs/source/markdown/podman-container-restore.1.md
@@ -93,6 +93,15 @@ be used once and the restored *container* will have another IP address. This als
that **--name, -n** cannot be used in combination with **--tcp-established**.\
*IMPORTANT: This OPTION is only available in combination with **--import, -i**.*
+#### **--pod**=*name*
+
+Restore a container into the pod *name*. The destination pod for this restore
+has to have the same namespaces shared as the pod this container was checkpointed
+from (see **[podman pod create --share](podman-pod-create.1.md#--share)**).
+*IMPORTANT: This OPTION is only available in combination with **--import, -i**.*
+
+This option requires at least CRIU 3.16.
+
#### **--publish**, **-p**=*port*
Replaces the ports that the *container* publishes, as configured during the
@@ -128,7 +137,7 @@ $ podman run --rm -p 2345:80 -d webserver
```
## SEE ALSO
-**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**
+**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)**
## HISTORY
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 390bba7bb..ddc5aa684 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -780,6 +780,16 @@ type ContainerCheckpointOptions struct {
// Compression tells the API which compression to use for
// the exported checkpoint archive.
Compression archive.Compression
+ // If Pod is set the container should be restored into the
+ // given Pod. If Pod is empty it is a restore without a Pod.
+ // Restoring a non Pod container into a Pod or a Pod container
+ // without a Pod is theoretically possible, but will
+ // probably not work if a PID namespace is shared.
+ // A shared PID namespace means that a Pod container has PID 1
+ // in the infrastructure container, but without the infrastructure
+ // container no PID 1 will be in the namespace and that is not
+ // possible.
+ Pod string
}
// Checkpoint checkpoints a container
@@ -811,7 +821,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO
// Restore restores a container
func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error {
- logrus.Debugf("Trying to restore container %s", c.ID())
+ if options.Pod == "" {
+ logrus.Debugf("Trying to restore container %s", c.ID())
+ } else {
+ logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod)
+ }
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index e002f9a22..bff64aa95 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -901,8 +901,27 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
}
func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
- if len(c.Dependencies()) > 0 {
- return errors.Errorf("Cannot export checkpoints of containers with dependencies")
+ if len(c.Dependencies()) == 1 {
+ // Check if the dependency is an infra container. If it is we can checkpoint
+ // the container out of the Pod.
+ if c.config.Pod == "" {
+ return errors.Errorf("cannot export checkpoints of containers with dependencies")
+ }
+
+ pod, err := c.runtime.state.Pod(c.config.Pod)
+ if err != nil {
+ return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), c.config.Pod)
+ }
+ infraID, err := pod.InfraContainerID()
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve infra container ID for pod %s", c.config.Pod)
+ }
+ if c.Dependencies()[0] != infraID {
+ return errors.Errorf("cannot export checkpoints of containers with dependencies")
+ }
+ }
+ if len(c.Dependencies()) > 1 {
+ return errors.Errorf("cannot export checkpoints of containers with dependencies")
}
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
@@ -1136,10 +1155,20 @@ func (c *Container) importPreCheckpoint(input string) error {
}
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) {
- if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
+ minCriuVersion := func() int {
+ if options.Pod == "" {
+ return criu.MinCriuVersion
+ }
+ return criu.PodCriuVersion
+ }()
+ if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
return err
}
+ if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
+ return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
+ }
+
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
}
@@ -1247,6 +1276,83 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}
}
+ if options.Pod != "" {
+ // Running in a Pod means that we have to change all namespace settings to
+ // the ones from the infrastructure container.
+ pod, err := c.runtime.LookupPod(options.Pod)
+ if err != nil {
+ return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod)
+ }
+
+ infraContainer, err := pod.InfraContainer()
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod)
+ }
+
+ infraContainer.lock.Lock()
+ if err := infraContainer.syncContainer(); err != nil {
+ infraContainer.lock.Unlock()
+ return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID())
+ }
+ if infraContainer.state.State != define.ContainerStateRunning {
+ if err := infraContainer.initAndStart(ctx); err != nil {
+ infraContainer.lock.Unlock()
+ return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID())
+ }
+ }
+ infraContainer.lock.Unlock()
+
+ if c.config.IPCNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(IPCNS)
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
+ return err
+ }
+ }
+
+ if c.config.NetNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(NetNS)
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
+ return err
+ }
+ }
+
+ if c.config.PIDNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(PIDNS)
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
+ return err
+ }
+ }
+
+ if c.config.UTSNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(UTSNS)
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
+ return err
+ }
+ }
+
+ if c.config.CgroupNsCtr != "" {
+ nsPath, err := infraContainer.namespacePath(CgroupNS)
+ if err != nil {
+ return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod)
+ }
+ if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
+ return err
+ }
+ }
+ }
+
if err := c.makeBindMounts(); err != nil {
return err
}
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 2914bd1a1..846d3815a 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1064,6 +1064,30 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
if restoreOptions.TCPEstablished {
args = append(args, "--runtime-opt", "--tcp-established")
}
+ if restoreOptions.Pod != "" {
+ mountLabel := ctr.config.MountLabel
+ processLabel := ctr.config.ProcessLabel
+ if mountLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-mount-context=%s",
+ mountLabel,
+ ),
+ )
+ }
+ if processLabel != "" {
+ args = append(
+ args,
+ "--runtime-opt",
+ fmt.Sprintf(
+ "--lsm-profile=selinux:%s",
+ processLabel,
+ ),
+ )
+ }
+ }
}
logrus.WithFields(logrus.Fields{
diff --git a/pkg/bindings/containers/types.go b/pkg/bindings/containers/types.go
index 1058c7a48..b74a938d1 100644
--- a/pkg/bindings/containers/types.go
+++ b/pkg/bindings/containers/types.go
@@ -62,6 +62,7 @@ type RestoreOptions struct {
Keep *bool
Name *string
TCPEstablished *bool
+ Pod *string
}
//go:generate go run ../generator/generator.go CreateOptions
diff --git a/pkg/bindings/containers/types_restore_options.go b/pkg/bindings/containers/types_restore_options.go
index ea6c810a2..820a7696f 100644
--- a/pkg/bindings/containers/types_restore_options.go
+++ b/pkg/bindings/containers/types_restore_options.go
@@ -131,3 +131,19 @@ func (o *RestoreOptions) GetTCPEstablished() bool {
}
return *o.TCPEstablished
}
+
+// WithPod
+func (o *RestoreOptions) WithPod(value string) *RestoreOptions {
+ v := &value
+ o.Pod = v
+ return o
+}
+
+// GetPod
+func (o *RestoreOptions) GetPod() string {
+ var pod string
+ if o.Pod == nil {
+ return pod
+ }
+ return *o.Pod
+}
diff --git a/pkg/checkpoint/checkpoint_restore.go b/pkg/checkpoint/checkpoint_restore.go
index 0d45cab5f..9fdf04933 100644
--- a/pkg/checkpoint/checkpoint_restore.go
+++ b/pkg/checkpoint/checkpoint_restore.go
@@ -9,6 +9,9 @@ import (
"github.com/containers/common/libimage"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v3/libpod"
+ ann "github.com/containers/podman/v3/pkg/annotations"
+ "github.com/containers/podman/v3/pkg/checkpoint/crutils"
+ "github.com/containers/podman/v3/pkg/criu"
"github.com/containers/podman/v3/pkg/domain/entities"
"github.com/containers/podman/v3/pkg/errorhandling"
"github.com/containers/podman/v3/pkg/specgen/generate"
@@ -68,6 +71,14 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt
return nil, err
}
+ if ctrConfig.Pod != "" && restoreOptions.Pod == "" {
+ return nil, errors.New("cannot restore pod container without --pod")
+ }
+
+ if ctrConfig.Pod == "" && restoreOptions.Pod != "" {
+ return nil, errors.New("cannot restore non pod container into pod")
+ }
+
// This should not happen as checkpoints with these options are not exported.
if len(ctrConfig.Dependencies) > 0 {
return nil, errors.Errorf("Cannot import checkpoints of containers with dependencies")
@@ -96,6 +107,91 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt
newName = true
}
+ if restoreOptions.Pod != "" {
+ // Restoring into a Pod requires much newer versions of CRIU
+ if !criu.CheckForCriu(criu.PodCriuVersion) {
+ return nil, errors.Errorf("restoring containers into pods requires at least CRIU %d", criu.PodCriuVersion)
+ }
+ // The runtime also has to support it
+ if !crutils.CRRuntimeSupportsPodCheckpointRestore(runtime.GetOCIRuntimePath()) {
+ return nil, errors.Errorf("runtime %s does not support pod restore", runtime.GetOCIRuntimePath())
+ }
+ // Restoring into an existing Pod
+ ctrConfig.Pod = restoreOptions.Pod
+
+ // According to podman pod create a pod can share the following namespaces:
+ // cgroup, ipc, net, pid, uts
+ // Let's make sure we a restoring into a pod with the same shared namespaces.
+ pod, err := runtime.LookupPod(ctrConfig.Pod)
+ if err != nil {
+ return nil, errors.Wrapf(err, "pod %q cannot be retrieved", ctrConfig.Pod)
+ }
+
+ infraContainer, err := pod.InfraContainer()
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot retrieve infra container from pod %q", ctrConfig.Pod)
+ }
+
+ // If a namespaces was shared (!= "") it needs to be set to the new infrastructure container
+ // If the infrastructure container does not share the same namespaces as the to be restored
+ // container we abort.
+ if ctrConfig.IPCNsCtr != "" {
+ if !pod.SharesIPC() {
+ return nil, errors.Errorf("pod %s does not share the IPC namespace", ctrConfig.Pod)
+ }
+ ctrConfig.IPCNsCtr = infraContainer.ID()
+ }
+
+ if ctrConfig.NetNsCtr != "" {
+ if !pod.SharesNet() {
+ return nil, errors.Errorf("pod %s does not share the network namespace", ctrConfig.Pod)
+ }
+ ctrConfig.NetNsCtr = infraContainer.ID()
+ }
+
+ if ctrConfig.PIDNsCtr != "" {
+ if !pod.SharesPID() {
+ return nil, errors.Errorf("pod %s does not share the PID namespace", ctrConfig.Pod)
+ }
+ ctrConfig.PIDNsCtr = infraContainer.ID()
+ }
+
+ if ctrConfig.UTSNsCtr != "" {
+ if !pod.SharesUTS() {
+ return nil, errors.Errorf("pod %s does not share the UTS namespace", ctrConfig.Pod)
+ }
+ ctrConfig.UTSNsCtr = infraContainer.ID()
+ }
+
+ if ctrConfig.CgroupNsCtr != "" {
+ if !pod.SharesCgroup() {
+ return nil, errors.Errorf("pod %s does not share the cgroup namespace", ctrConfig.Pod)
+ }
+ ctrConfig.CgroupNsCtr = infraContainer.ID()
+ }
+
+ // Change SELinux labels to infrastructure container labels
+ ctrConfig.MountLabel = infraContainer.MountLabel()
+ ctrConfig.ProcessLabel = infraContainer.ProcessLabel()
+
+ // Fix parent cgroup
+ cgroupPath, err := pod.CgroupPath()
+ if err != nil {
+ return nil, errors.Wrapf(err, "cannot retrieve cgroup path from pod %q", ctrConfig.Pod)
+ }
+ ctrConfig.CgroupParent = cgroupPath
+
+ oldPodID := dumpSpec.Annotations[ann.SandboxID]
+ // Fix up SandboxID in the annotations
+ dumpSpec.Annotations[ann.SandboxID] = ctrConfig.Pod
+ // Fix up CreateCommand
+ for i, c := range ctrConfig.CreateCommand {
+ if c == oldPodID {
+ ctrConfig.CreateCommand[i] = ctrConfig.Pod
+ }
+ }
+ }
+
if len(restoreOptions.PublishPorts) > 0 {
ports, _, _, err := generate.ParsePortMapping(restoreOptions.PublishPorts)
if err != nil {
diff --git a/pkg/checkpoint/crutils/checkpoint_restore_utils.go b/pkg/checkpoint/crutils/checkpoint_restore_utils.go
index 53ff55865..3b77368bb 100644
--- a/pkg/checkpoint/crutils/checkpoint_restore_utils.go
+++ b/pkg/checkpoint/crutils/checkpoint_restore_utils.go
@@ -1,6 +1,7 @@
package crutils
import (
+ "bytes"
"io"
"os"
"os/exec"
@@ -189,3 +190,13 @@ func CRRuntimeSupportsCheckpointRestore(runtimePath string) bool {
}
return false
}
+
+// CRRuntimeSupportsCheckpointRestore tests if the runtime at 'runtimePath'
+// supports restoring into existing Pods. The runtime needs to support
+// the CRIU option --lsm-mount-context and the existence of this is checked
+// by this function. In addition it is necessary to at least have CRIU 3.16.
+func CRRuntimeSupportsPodCheckpointRestore(runtimePath string) bool {
+ cmd := exec.Command(runtimePath, "restore", "--lsm-mount-context")
+ out, _ := cmd.CombinedOutput()
+ return bytes.Contains(out, []byte("flag needs an argument"))
+}
diff --git a/pkg/criu/criu.go b/pkg/criu/criu.go
index 0b5d2c45e..2a6805979 100644
--- a/pkg/criu/criu.go
+++ b/pkg/criu/criu.go
@@ -7,6 +7,10 @@ import (
// MinCriuVersion for Podman at least CRIU 3.11 is required
const MinCriuVersion = 31100
+// PodCriuVersion is the version of CRIU needed for
+// checkpointing and restoring containers out of and into Pods.
+const PodCriuVersion = 31600
+
// CheckForCriu uses CRIU's go bindings to check if the CRIU
// binary exists and if it at least the version Podman needs.
func CheckForCriu(version int) bool {
diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go
index 4d85941cd..6d84a4ecb 100644
--- a/pkg/domain/entities/containers.go
+++ b/pkg/domain/entities/containers.go
@@ -207,6 +207,7 @@ type RestoreOptions struct {
TCPEstablished bool
ImportPrevious string
PublishPorts []specgen.PortMapping
+ Pod string
}
type RestoreReport struct {
diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go
index 2c89fc66b..2003879b8 100644
--- a/pkg/domain/infra/abi/containers.go
+++ b/pkg/domain/infra/abi/containers.go
@@ -529,6 +529,7 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st
IgnoreStaticIP: options.IgnoreStaticIP,
IgnoreStaticMAC: options.IgnoreStaticMAC,
ImportPrevious: options.ImportPrevious,
+ Pod: options.Pod,
}
filterFuncs := []libpod.ContainerFilter{