From 0028578b432d207e1e5b313c76e587eae275bdac Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 6 Feb 2019 19:22:46 +0000 Subject: Added support to migrate containers This commit adds an option to the checkpoint command to export a checkpoint into a tar.gz file as well as importing a checkpoint tar.gz file during restore. With all checkpoint artifacts in one file it is possible to easily transfer a checkpoint and thus enabling container migration in Podman. With the following steps it is possible to migrate a running container from one system (source) to another (destination). Source system: * podman container checkpoint -l -e /tmp/checkpoint.tar.gz * scp /tmp/checkpoint.tar.gz destination:/tmp Destination system: * podman pull 'container-image-as-on-source-system' * podman container restore -i /tmp/checkpoint.tar.gz The exported tar.gz file contains the checkpoint image as created by CRIU and a few additional JSON files describing the state of the checkpointed container. Now the container is running on the destination system with the same state just as during checkpointing. If the container is kept running on the source system with the checkpoint flag '-R', the result will be that the same container is running on two different hosts. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 2 + cmd/podman/cliconfig/config.go | 2 + cmd/podman/restore.go | 14 ++++-- libpod/container_api.go | 7 +++ libpod/container_internal.go | 2 +- libpod/container_internal_linux.go | 44 +++++++++++++++-- libpod/runtime_ctr.go | 60 +++++++++++++++++++---- pkg/adapter/checkpoint_restore.go | 99 ++++++++++++++++++++++++++++++++++++++ pkg/adapter/containers.go | 6 ++- pkg/adapter/containers_remote.go | 10 +++- 10 files changed, 225 insertions(+), 21 deletions(-) create mode 100644 pkg/adapter/checkpoint_restore.go diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index 234d683bb..86bc8b973 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -46,6 +46,7 @@ func init() { flags.BoolVar(&checkpointCommand.TcpEstablished, "tcp-established", false, "Checkpoint a container with established TCP connections") flags.BoolVarP(&checkpointCommand.All, "all", "a", false, "Checkpoint all running containers") flags.BoolVarP(&checkpointCommand.Latest, "latest", "l", false, "Act on the latest container podman is aware of") + flags.StringVarP(&checkpointCommand.Export, "export", "e", "", "Export the checkpoint image to a tar.gz") markFlagHiddenForRemoteClient("latest", flags) } @@ -64,6 +65,7 @@ func checkpointCmd(c *cliconfig.CheckpointValues) error { Keep: c.Keep, KeepRunning: c.LeaveRunning, TCPEstablished: c.TcpEstablished, + TargetFile: c.Export, } return runtime.Checkpoint(c, options) } diff --git a/cmd/podman/cliconfig/config.go b/cmd/podman/cliconfig/config.go index aaa4513d8..d742830ee 100644 --- a/cmd/podman/cliconfig/config.go +++ b/cmd/podman/cliconfig/config.go @@ -89,6 +89,7 @@ type CheckpointValues struct { TcpEstablished bool All bool Latest bool + Export string } type CommitValues struct { @@ -426,6 +427,7 @@ type RestoreValues struct { Keep bool Latest bool TcpEstablished bool + Import string } type RmValues struct { diff --git a/cmd/podman/restore.go b/cmd/podman/restore.go index 36ae16183..828ae682f 100644 --- a/cmd/podman/restore.go +++ b/cmd/podman/restore.go @@ -24,10 +24,10 @@ var ( restoreCommand.InputArgs = args restoreCommand.GlobalFlags = MainGlobalOpts restoreCommand.Remote = remoteclient - return restoreCmd(&restoreCommand) + return restoreCmd(&restoreCommand, cmd) }, Args: func(cmd *cobra.Command, args []string) error { - return checkAllAndLatest(cmd, args, false) + return checkAllAndLatest(cmd, args, true) }, Example: `podman container restore ctrID podman container restore --latest @@ -44,11 +44,12 @@ func init() { flags.BoolVarP(&restoreCommand.Keep, "keep", "k", false, "Keep all temporary checkpoint files") flags.BoolVarP(&restoreCommand.Latest, "latest", "l", false, "Act on the latest container podman is aware of") flags.BoolVar(&restoreCommand.TcpEstablished, "tcp-established", false, "Restore a container with established TCP connections") + flags.StringVarP(&restoreCommand.Import, "import", "i", "", "Restore from exported checkpoint archive (tar.gz)") markFlagHiddenForRemoteClient("latest", flags) } -func restoreCmd(c *cliconfig.RestoreValues) error { +func restoreCmd(c *cliconfig.RestoreValues, cmd *cobra.Command) error { if rootless.IsRootless() { return errors.New("restoring a container requires root") } @@ -62,6 +63,11 @@ func restoreCmd(c *cliconfig.RestoreValues) error { options := libpod.ContainerCheckpointOptions{ Keep: c.Keep, TCPEstablished: c.TcpEstablished, + TargetFile: c.Import, } - return runtime.Restore(c, options) + + if (c.Import != "") && (c.All || c.Latest) { + return errors.Errorf("Cannot use --import and --all or --latest at the same time") + } + return runtime.Restore(getContext(), c, options) } diff --git a/libpod/container_api.go b/libpod/container_api.go index b8b60a6e2..72c4d7f37 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -825,6 +825,13 @@ type ContainerCheckpointOptions struct { // Checkpoint checkpoints a container func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { logrus.Debugf("Trying to checkpoint container %s", c.ID()) + + if options.TargetFile != "" { + if err := c.prepareCheckpointExport(); err != nil { + return err + } + } + if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 42f56e82f..c0b5e4302 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1346,7 +1346,7 @@ func (c *Container) appendStringToRundir(destFile, output string) (string, error return filepath.Join(c.state.RunDir, destFile), nil } -// Save OCI spec to disk, replacing any existing specs for the container +// saveSpec saves the OCI spec to disk, replacing any existing specs for the container func (c *Container) saveSpec(spec *spec.Spec) error { // If the OCI spec already exists, we need to replace it // Cannot guarantee some things, e.g. network namespaces, have the same diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 3c02e3ae0..d471d4191 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -499,6 +499,9 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr } func (c *Container) exportCheckpoint(dest string) (err error) { + if (len(c.config.NamedVolumes) > 0) || (len(c.Dependencies()) > 0) { + return errors.Errorf("Cannot export checkpoints of containers with named volumes or dependencies") + } logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), dest) input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{ Compression: archive.Gzip, @@ -587,6 +590,12 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO return err } + if options.TargetFile != "" { + if err = c.exportCheckpoint(options.TargetFile); err != nil { + return err + } + } + logrus.Debugf("Checkpointed container %s", c.ID()) if !options.KeepRunning { @@ -653,6 +662,12 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) } + if options.TargetFile != "" { + if err = c.importCheckpoint(options.TargetFile); err != nil { + return err + } + } + // Let's try to stat() CRIU's inventory file. If it does not exist, it makes // no sense to try a restore. This is a minimal check if a checkpoint exist. if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) { @@ -710,23 +725,44 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti return err } + // Restoring from an import means that we are doing migration + if options.TargetFile != "" { + g.SetRootPath(c.state.Mountpoint) + } + // We want to have the same network namespace as before. if c.config.CreateNetNS { g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path()) } - // Save the OCI spec to disk - if err := c.saveSpec(g.Spec()); err != nil { + if err := c.makeBindMounts(); err != nil { return err } - if err := c.makeBindMounts(); err != nil { - return err + if options.TargetFile != "" { + for dstPath, srcPath := range c.state.BindMounts { + newMount := spec.Mount{ + Type: "bind", + Source: srcPath, + Destination: dstPath, + Options: []string{"bind", "private"}, + } + if c.IsReadOnly() && dstPath != "/dev/shm" { + newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") + } + if !MountExists(g.Mounts(), dstPath) { + g.AddMount(newMount) + } + } } // Cleanup for a working restore. c.removeConmonFiles() + // Save the OCI spec to disk + if err := c.saveSpec(g.Spec()); err != nil { + return err + } if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil { return err } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 0c8d3edab..9d0bbf7e8 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -14,6 +14,7 @@ import ( "github.com/containers/storage" "github.com/containers/storage/pkg/stringid" spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" opentracing "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -34,7 +35,7 @@ type CtrCreateOption func(*Container) error // A true return will include the container, a false return will exclude it. type ContainerFilter func(*Container) bool -// NewContainer creates a new container from a given OCI config +// NewContainer creates a new container from a given OCI config. func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ...CtrCreateOption) (c *Container, err error) { r.lock.Lock() defer r.lock.Unlock() @@ -44,20 +45,38 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. return r.newContainer(ctx, rSpec, options...) } -func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ...CtrCreateOption) (c *Container, err error) { - span, _ := opentracing.StartSpanFromContext(ctx, "newContainer") - span.SetTag("type", "runtime") - defer span.Finish() +// RestoreContainer re-creates a container from an imported checkpoint +func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config *ContainerConfig) (c *Container, err error) { + r.lock.Lock() + defer r.lock.Unlock() + if !r.valid { + return nil, ErrRuntimeStopped + } + ctr, err := r.initContainerVariables(rSpec, config) + if err != nil { + return nil, errors.Wrapf(err, "error initializing container variables") + } + return r.setupContainer(ctx, ctr, true) +} + +func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConfig) (c *Container, err error) { if rSpec == nil { return nil, errors.Wrapf(ErrInvalidArg, "must provide a valid runtime spec to create container") } - ctr := new(Container) ctr.config = new(ContainerConfig) ctr.state = new(ContainerState) - ctr.config.ID = stringid.GenerateNonCryptoID() + if config == nil { + ctr.config.ID = stringid.GenerateNonCryptoID() + ctr.config.ShmSize = DefaultShmSize + } else { + // This is a restore from an imported checkpoint + if err := JSONDeepCopy(config, ctr.config); err != nil { + return nil, errors.Wrapf(err, "error copying container config for restore") + } + } ctr.config.Spec = new(spec.Spec) if err := JSONDeepCopy(rSpec, ctr.config.Spec); err != nil { @@ -65,8 +84,6 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. } ctr.config.CreatedTime = time.Now() - ctr.config.ShmSize = DefaultShmSize - ctr.state.BindMounts = make(map[string]string) ctr.config.StopTimeout = CtrRemoveTimeout @@ -80,12 +97,29 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. } ctr.runtime = r + + return ctr, nil +} + +func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ...CtrCreateOption) (c *Container, err error) { + span, _ := opentracing.StartSpanFromContext(ctx, "newContainer") + span.SetTag("type", "runtime") + defer span.Finish() + + ctr, err := r.initContainerVariables(rSpec, nil) + if err != nil { + return nil, errors.Wrapf(err, "error initializing container variables") + } + for _, option := range options { if err := option(ctr); err != nil { return nil, errors.Wrapf(err, "error running container create option") } } + return r.setupContainer(ctx, ctr, false) +} +func (r *Runtime) setupContainer(ctx context.Context, ctr *Container, restore bool) (c *Container, err error) { // Allocate a lock for the container lock, err := r.lockManager.AllocateLock() if err != nil { @@ -154,6 +188,14 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } + if restore { + // Remove information about /dev/shm mount + // for new container from imported checkpoint + g := generate.Generator{Config: ctr.config.Spec} + g.RemoveMount("/dev/shm") + ctr.config.ShmDir = "" + } + // Set up storage for the container if err := ctr.setupStorage(ctx); err != nil { return nil, err diff --git a/pkg/adapter/checkpoint_restore.go b/pkg/adapter/checkpoint_restore.go new file mode 100644 index 000000000..9df1704ea --- /dev/null +++ b/pkg/adapter/checkpoint_restore.go @@ -0,0 +1,99 @@ +// +build !remoteclient + +package adapter + +import ( + "context" + "github.com/containers/libpod/libpod" + "github.com/containers/storage/pkg/archive" + jsoniter "github.com/json-iterator/go" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "io/ioutil" + "os" + "path/filepath" +) + +// Prefixing the checkpoint/restore related functions with 'cr' + +// crImportFromJSON imports the JSON files stored in the exported +// checkpoint tarball +func crImportFromJSON(filePath string, v interface{}) error { + jsonFile, err := os.Open(filePath) + if err != nil { + return errors.Wrapf(err, "Failed to open container definition %s for restore", filePath) + } + defer jsonFile.Close() + + content, err := ioutil.ReadAll(jsonFile) + if err != nil { + return errors.Wrapf(err, "Failed to read container definition %s for restore", filePath) + } + json := jsoniter.ConfigCompatibleWithStandardLibrary + if err = json.Unmarshal([]byte(content), v); err != nil { + return errors.Wrapf(err, "Failed to unmarshal container definition %s for restore", filePath) + } + + return nil +} + +// crImportCheckpoint it the function which imports the information +// from checkpoint tarball and re-creates the container from that information +func crImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, input string) ([]*libpod.Container, error) { + // First get the container definition from the + // tarball to a temporary directory + archiveFile, err := os.Open(input) + if err != nil { + return nil, errors.Wrapf(err, "Failed to open checkpoint archive %s for import", input) + } + defer archiveFile.Close() + options := &archive.TarOptions{ + // Here we only need the files config.dump and spec.dump + ExcludePatterns: []string{ + "checkpoint", + "artifacts", + "ctr.log", + "network.status", + }, + } + dir, err := ioutil.TempDir("", "checkpoint") + if err != nil { + return nil, err + } + defer os.RemoveAll(dir) + err = archive.Untar(archiveFile, dir, options) + if err != nil { + return nil, errors.Wrapf(err, "Unpacking of checkpoint archive %s failed", input) + } + + // Load spec.dump from temporary directory + spec := new(spec.Spec) + if err := crImportFromJSON(filepath.Join(dir, "spec.dump"), spec); err != nil { + return nil, err + } + + // Load config.dump from temporary directory + config := new(libpod.ContainerConfig) + if err = crImportFromJSON(filepath.Join(dir, "config.dump"), config); err != nil { + return nil, err + } + + // This should not happen as checkpoints with these options are not exported. + if (len(config.Dependencies) > 0) || (len(config.NamedVolumes) > 0) { + return nil, errors.Errorf("Cannot import checkpoints of containers with named volumes or dependencies") + } + + // Now create a new container from the just loaded information + container, err := runtime.RestoreContainer(ctx, spec, config) + if err != nil { + return nil, err + } + + var containers []*libpod.Container + if container == nil { + return nil, nil + } + + containers = append(containers, container) + return containers, nil +} diff --git a/pkg/adapter/containers.go b/pkg/adapter/containers.go index 34ee70d3d..b7f4c272b 100644 --- a/pkg/adapter/containers.go +++ b/pkg/adapter/containers.go @@ -526,7 +526,7 @@ func (r *LocalRuntime) Checkpoint(c *cliconfig.CheckpointValues, options libpod. } // Restore one or more containers -func (r *LocalRuntime) Restore(c *cliconfig.RestoreValues, options libpod.ContainerCheckpointOptions) error { +func (r *LocalRuntime) Restore(ctx context.Context, c *cliconfig.RestoreValues, options libpod.ContainerCheckpointOptions) error { var ( containers []*libpod.Container err, lastError error @@ -538,7 +538,9 @@ func (r *LocalRuntime) Restore(c *cliconfig.RestoreValues, options libpod.Contai return state == libpod.ContainerStateExited }) - if c.All { + if c.Import != "" { + containers, err = crImportCheckpoint(ctx, r.Runtime, c.Import) + } else if c.All { containers, err = r.GetContainers(filterFuncs...) } else { containers, err = shortcuts.GetContainersByContext(false, c.Latest, c.InputArgs, r.Runtime) diff --git a/pkg/adapter/containers_remote.go b/pkg/adapter/containers_remote.go index bc6a9cfcd..776fcbb70 100644 --- a/pkg/adapter/containers_remote.go +++ b/pkg/adapter/containers_remote.go @@ -664,6 +664,10 @@ func (r *LocalRuntime) Attach(ctx context.Context, c *cliconfig.AttachValues) er // Checkpoint one or more containers func (r *LocalRuntime) Checkpoint(c *cliconfig.CheckpointValues, options libpod.ContainerCheckpointOptions) error { + if c.Export != "" { + return errors.New("the remote client does not support exporting checkpoints") + } + var lastError error ids, err := iopodman.GetContainersByContext().Call(r.Conn, c.All, c.Latest, c.InputArgs) if err != nil { @@ -699,7 +703,11 @@ func (r *LocalRuntime) Checkpoint(c *cliconfig.CheckpointValues, options libpod. } // Restore one or more containers -func (r *LocalRuntime) Restore(c *cliconfig.RestoreValues, options libpod.ContainerCheckpointOptions) error { +func (r *LocalRuntime) Restore(ctx context.Context, c *cliconfig.RestoreValues, options libpod.ContainerCheckpointOptions) error { + if c.Import != "" { + return errors.New("the remote client does not support importing checkpoints") + } + var lastError error ids, err := iopodman.GetContainersByContext().Call(r.Conn, c.All, c.Latest, c.InputArgs) if err != nil { -- cgit v1.2.3-54-g00ecf