From eb94467780eab06a452586c9751fc4f571d9e089 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 12 Jul 2021 11:43:45 +0000 Subject: Support checkpoint/restore with pods This adds support to checkpoint containers out of pods and restore container into pods. It is only possible to restore a container into a pod if it has been checkpointed out of pod. It is also not possible to restore a non pod container into a pod. The main reason this does not work is the PID namespace. If a non pod container is being restored in a pod with a shared PID namespace, at least one process in the restored container uses PID 1 which is already in use by the infrastructure container. If someone tries to restore container from a pod with a shared PID namespace without a shared PID namespace it will also fail because the resulting PID namespace will not have a PID 1. Signed-off-by: Adrian Reber --- pkg/checkpoint/checkpoint_restore.go | 96 ++++++++++++++++++++++ pkg/checkpoint/crutils/checkpoint_restore_utils.go | 11 +++ 2 files changed, 107 insertions(+) (limited to 'pkg/checkpoint') diff --git a/pkg/checkpoint/checkpoint_restore.go b/pkg/checkpoint/checkpoint_restore.go index 0d45cab5f..9fdf04933 100644 --- a/pkg/checkpoint/checkpoint_restore.go +++ b/pkg/checkpoint/checkpoint_restore.go @@ -9,6 +9,9 @@ import ( "github.com/containers/common/libimage" "github.com/containers/common/pkg/config" "github.com/containers/podman/v3/libpod" + ann "github.com/containers/podman/v3/pkg/annotations" + "github.com/containers/podman/v3/pkg/checkpoint/crutils" + "github.com/containers/podman/v3/pkg/criu" "github.com/containers/podman/v3/pkg/domain/entities" "github.com/containers/podman/v3/pkg/errorhandling" "github.com/containers/podman/v3/pkg/specgen/generate" @@ -68,6 +71,14 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt return nil, err } + if ctrConfig.Pod != "" && restoreOptions.Pod == "" { + return nil, errors.New("cannot restore pod container without --pod") + } + + if ctrConfig.Pod == "" && restoreOptions.Pod != "" { + return nil, errors.New("cannot restore non pod container into pod") + } + // This should not happen as checkpoints with these options are not exported. if len(ctrConfig.Dependencies) > 0 { return nil, errors.Errorf("Cannot import checkpoints of containers with dependencies") @@ -96,6 +107,91 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt newName = true } + if restoreOptions.Pod != "" { + // Restoring into a Pod requires much newer versions of CRIU + if !criu.CheckForCriu(criu.PodCriuVersion) { + return nil, errors.Errorf("restoring containers into pods requires at least CRIU %d", criu.PodCriuVersion) + } + // The runtime also has to support it + if !crutils.CRRuntimeSupportsPodCheckpointRestore(runtime.GetOCIRuntimePath()) { + return nil, errors.Errorf("runtime %s does not support pod restore", runtime.GetOCIRuntimePath()) + } + // Restoring into an existing Pod + ctrConfig.Pod = restoreOptions.Pod + + // According to podman pod create a pod can share the following namespaces: + // cgroup, ipc, net, pid, uts + // Let's make sure we a restoring into a pod with the same shared namespaces. + pod, err := runtime.LookupPod(ctrConfig.Pod) + if err != nil { + return nil, errors.Wrapf(err, "pod %q cannot be retrieved", ctrConfig.Pod) + } + + infraContainer, err := pod.InfraContainer() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve infra container from pod %q", ctrConfig.Pod) + } + + // If a namespaces was shared (!= "") it needs to be set to the new infrastructure container + // If the infrastructure container does not share the same namespaces as the to be restored + // container we abort. + if ctrConfig.IPCNsCtr != "" { + if !pod.SharesIPC() { + return nil, errors.Errorf("pod %s does not share the IPC namespace", ctrConfig.Pod) + } + ctrConfig.IPCNsCtr = infraContainer.ID() + } + + if ctrConfig.NetNsCtr != "" { + if !pod.SharesNet() { + return nil, errors.Errorf("pod %s does not share the network namespace", ctrConfig.Pod) + } + ctrConfig.NetNsCtr = infraContainer.ID() + } + + if ctrConfig.PIDNsCtr != "" { + if !pod.SharesPID() { + return nil, errors.Errorf("pod %s does not share the PID namespace", ctrConfig.Pod) + } + ctrConfig.PIDNsCtr = infraContainer.ID() + } + + if ctrConfig.UTSNsCtr != "" { + if !pod.SharesUTS() { + return nil, errors.Errorf("pod %s does not share the UTS namespace", ctrConfig.Pod) + } + ctrConfig.UTSNsCtr = infraContainer.ID() + } + + if ctrConfig.CgroupNsCtr != "" { + if !pod.SharesCgroup() { + return nil, errors.Errorf("pod %s does not share the cgroup namespace", ctrConfig.Pod) + } + ctrConfig.CgroupNsCtr = infraContainer.ID() + } + + // Change SELinux labels to infrastructure container labels + ctrConfig.MountLabel = infraContainer.MountLabel() + ctrConfig.ProcessLabel = infraContainer.ProcessLabel() + + // Fix parent cgroup + cgroupPath, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "cannot retrieve cgroup path from pod %q", ctrConfig.Pod) + } + ctrConfig.CgroupParent = cgroupPath + + oldPodID := dumpSpec.Annotations[ann.SandboxID] + // Fix up SandboxID in the annotations + dumpSpec.Annotations[ann.SandboxID] = ctrConfig.Pod + // Fix up CreateCommand + for i, c := range ctrConfig.CreateCommand { + if c == oldPodID { + ctrConfig.CreateCommand[i] = ctrConfig.Pod + } + } + } + if len(restoreOptions.PublishPorts) > 0 { ports, _, _, err := generate.ParsePortMapping(restoreOptions.PublishPorts) if err != nil { diff --git a/pkg/checkpoint/crutils/checkpoint_restore_utils.go b/pkg/checkpoint/crutils/checkpoint_restore_utils.go index 53ff55865..3b77368bb 100644 --- a/pkg/checkpoint/crutils/checkpoint_restore_utils.go +++ b/pkg/checkpoint/crutils/checkpoint_restore_utils.go @@ -1,6 +1,7 @@ package crutils import ( + "bytes" "io" "os" "os/exec" @@ -189,3 +190,13 @@ func CRRuntimeSupportsCheckpointRestore(runtimePath string) bool { } return false } + +// CRRuntimeSupportsCheckpointRestore tests if the runtime at 'runtimePath' +// supports restoring into existing Pods. The runtime needs to support +// the CRIU option --lsm-mount-context and the existence of this is checked +// by this function. In addition it is necessary to at least have CRIU 3.16. +func CRRuntimeSupportsPodCheckpointRestore(runtimePath string) bool { + cmd := exec.Command(runtimePath, "restore", "--lsm-mount-context") + out, _ := cmd.CombinedOutput() + return bytes.Contains(out, []byte("flag needs an argument")) +} -- cgit v1.2.3-54-g00ecf