diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2018-10-04 12:34:49 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-04 12:34:49 -0700 |
commit | 06a959f74ab4f23d5a789d03de4b2b73a3d53dc6 (patch) | |
tree | 42e0437cd91aae4b53cd769401d7becd2309feb6 | |
parent | 3c31e176c7dfce3c86a45ff4750f740a5f8f9321 (diff) | |
parent | dc987af0b0146ec5fd2026ca8db403806c3425df (diff) | |
download | podman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.tar.gz podman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.tar.bz2 podman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.zip |
Merge pull request #469 from adrianreber/master
Add support to checkpoint/restore containers
-rw-r--r-- | Dockerfile | 12 | ||||
-rw-r--r-- | cmd/podman/checkpoint.go | 73 | ||||
-rw-r--r-- | cmd/podman/container.go | 2 | ||||
-rw-r--r-- | cmd/podman/restore.go | 73 | ||||
-rw-r--r-- | completions/bash/podman | 42 | ||||
-rw-r--r-- | docs/podman-container-checkpoint.1.md | 30 | ||||
-rw-r--r-- | docs/podman-container-restore.1.md | 37 | ||||
-rw-r--r-- | docs/podman-container.1.md | 2 | ||||
-rw-r--r-- | docs/tutorials/podman_tutorial.md | 22 | ||||
-rw-r--r-- | libpod/container_api.go | 30 | ||||
-rw-r--r-- | libpod/container_internal.go | 7 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 158 | ||||
-rw-r--r-- | libpod/container_internal_unsupported.go | 8 | ||||
-rw-r--r-- | libpod/oci.go | 18 | ||||
-rw-r--r-- | libpod/oci_linux.go | 6 | ||||
-rw-r--r-- | libpod/oci_unsupported.go | 2 | ||||
-rw-r--r-- | test/e2e/checkpoint_test.go | 129 | ||||
-rw-r--r-- | test/e2e/libpod_suite_test.go | 37 |
18 files changed, 682 insertions, 6 deletions
diff --git a/Dockerfile b/Dockerfile index 749c5edb9..2c43cb046 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,8 @@ RUN apt-get update && apt-get install -y \ libaio-dev \ libcap-dev \ libfuse-dev \ + libnet-dev \ + libnl-3-dev \ libostree-dev \ libprotobuf-dev \ libprotobuf-c0-dev \ @@ -110,6 +112,16 @@ RUN set -x \ && go get -u github.com/mailru/easyjson/... \ && install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/ +# Install criu +ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9 +RUN set -x \ + && cd /tmp \ + && git clone https://github.com/checkpoint-restore/criu.git \ + && cd criu \ + && make \ + && install -D -m 755 criu/criu /usr/sbin/ \ + && rm -rf /tmp/criu + # Install cni config #RUN make install.cni RUN mkdir -p /etc/cni/net.d/ diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go new file mode 100644 index 000000000..cbbbcd740 --- /dev/null +++ b/cmd/podman/checkpoint.go @@ -0,0 +1,73 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/containers/libpod/cmd/podman/libpodruntime" + "github.com/containers/libpod/pkg/rootless" + "github.com/pkg/errors" + "github.com/urfave/cli" +) + +var ( + checkpointDescription = ` + podman container checkpoint + + Checkpoints one or more running containers. The container name or ID can be used. +` + checkpointFlags = []cli.Flag{ + cli.BoolFlag{ + Name: "keep, k", + Usage: "keep all temporary checkpoint files", + }, + } + checkpointCommand = cli.Command{ + Name: "checkpoint", + Usage: "Checkpoints one or more containers", + Description: checkpointDescription, + Flags: checkpointFlags, + Action: checkpointCmd, + ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]", + } +) + +func checkpointCmd(c *cli.Context) error { + if rootless.IsRootless() { + return errors.New("checkpointing a container requires root") + } + + runtime, err := libpodruntime.GetRuntime(c) + if err != nil { + return errors.Wrapf(err, "could not get runtime") + } + defer runtime.Shutdown(false) + + keep := c.Bool("keep") + args := c.Args() + if len(args) < 1 { + return errors.Errorf("you must provide at least one container name or id") + } + + var lastError error + for _, arg := range args { + ctr, err := runtime.LookupContainer(arg) + if err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "error looking up container %q", arg) + continue + } + if err = ctr.Checkpoint(context.TODO(), keep); err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "failed to checkpoint container %v", ctr.ID()) + } else { + fmt.Println(ctr.ID()) + } + } + return lastError +} diff --git a/cmd/podman/container.go b/cmd/podman/container.go index 82c1c824d..ff634278f 100644 --- a/cmd/podman/container.go +++ b/cmd/podman/container.go @@ -7,6 +7,7 @@ import ( var ( subCommands = []cli.Command{ attachCommand, + checkpointCommand, cleanupCommand, commitCommand, createCommand, @@ -23,6 +24,7 @@ var ( // pruneCommand, refreshCommand, restartCommand, + restoreCommand, rmCommand, runCommand, runlabelCommand, diff --git a/cmd/podman/restore.go b/cmd/podman/restore.go new file mode 100644 index 000000000..43ef87ca2 --- /dev/null +++ b/cmd/podman/restore.go @@ -0,0 +1,73 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/containers/libpod/cmd/podman/libpodruntime" + "github.com/containers/libpod/pkg/rootless" + "github.com/pkg/errors" + "github.com/urfave/cli" +) + +var ( + restoreDescription = ` + podman container restore + + Restores a container from a checkpoint. The container name or ID can be used. +` + restoreFlags = []cli.Flag{ + cli.BoolFlag{ + Name: "keep, k", + Usage: "keep all temporary checkpoint files", + }, + } + restoreCommand = cli.Command{ + Name: "restore", + Usage: "Restores one or more containers from a checkpoint", + Description: restoreDescription, + Flags: restoreFlags, + Action: restoreCmd, + ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]", + } +) + +func restoreCmd(c *cli.Context) error { + if rootless.IsRootless() { + return errors.New("restoring a container requires root") + } + + runtime, err := libpodruntime.GetRuntime(c) + if err != nil { + return errors.Wrapf(err, "could not get runtime") + } + defer runtime.Shutdown(false) + + keep := c.Bool("keep") + args := c.Args() + if len(args) < 1 { + return errors.Errorf("you must provide at least one container name or id") + } + + var lastError error + for _, arg := range args { + ctr, err := runtime.LookupContainer(arg) + if err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "error looking up container %q", arg) + continue + } + if err = ctr.Restore(context.TODO(), keep); err != nil { + if lastError != nil { + fmt.Fprintln(os.Stderr, lastError) + } + lastError = errors.Wrapf(err, "failed to restore container %v", ctr.ID()) + } else { + fmt.Println(ctr.ID()) + } + } + return lastError +} diff --git a/completions/bash/podman b/completions/bash/podman index f63bf4469..604a25f5d 100644 --- a/completions/bash/podman +++ b/completions/bash/podman @@ -87,6 +87,10 @@ __podman_complete_containers_all() { __podman_complete_containers "$@" --all } +__podman_complete_containers_created() { + __podman_complete_containers "$@" --all --filter status=created +} + __podman_complete_containers_running() { __podman_complete_containers "$@" --filter status=running } @@ -710,6 +714,24 @@ _podman_container_attach() { _podman_attach } +_podman_container_checkpoint() { + local options_with_args=" + --help -h + " + local boolean_options=" + --keep + -k + " + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __podman_complete_containers_running + ;; + esac +} + _podman_container_commit() { _podman_commit } @@ -770,6 +792,24 @@ _podman_container_restart() { _podman_restart } +_podman_container_restore() { + local options_with_args=" + --help -h + " + local boolean_options=" + --keep + -k + " + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __podman_complete_containers_created + ;; + esac +} + _podman_container_rm() { _podman_rm } @@ -817,6 +857,7 @@ _podman_container() { " subcommands=" attach + checkpoint commit create diff @@ -831,6 +872,7 @@ _podman_container() { port refresh restart + restore rm run start diff --git a/docs/podman-container-checkpoint.1.md b/docs/podman-container-checkpoint.1.md new file mode 100644 index 000000000..4906e0e12 --- /dev/null +++ b/docs/podman-container-checkpoint.1.md @@ -0,0 +1,30 @@ +% podman-container-checkpoint(1) + +## NAME +podman\-container\-checkpoint - Checkpoints one or more running containers + +## SYNOPSIS +**podman container checkpoint** [*options*] *container* ... + +## DESCRIPTION +Checkpoints all the processes in one or more containers. You may use container IDs or names as input. + +## OPTIONS +**-k**, **--keep** + +Keep all temporary log and statistics files created by CRIU during checkpointing. These files +are not deleted if checkpointing fails for further debugging. If checkpointing succeeds these +files are theoretically not needed, but if these files are needed Podman can keep the files +for further analysis. + +## EXAMPLE + +podman container checkpoint mywebserver + +podman container checkpoint 860a4b23 + +## SEE ALSO +podman(1), podman-container-restore(1) + +## HISTORY +September 2018, Originally compiled by Adrian Reber <areber@redhat.com> diff --git a/docs/podman-container-restore.1.md b/docs/podman-container-restore.1.md new file mode 100644 index 000000000..6360bccb0 --- /dev/null +++ b/docs/podman-container-restore.1.md @@ -0,0 +1,37 @@ +% podman-container-restore(1) + +## NAME +podman\-container\-restore - Restores one or more running containers + +## SYNOPSIS +**podman container restore** [*options*] *container* ... + +## DESCRIPTION +Restores a container from a checkpoint. You may use container IDs or names as input. + +## OPTIONS +**-k**, **--keep** + +Keep all temporary log and statistics files created by CRIU during +checkpointing as well as restoring. These files are not deleted if restoring +fails for further debugging. If restoring succeeds these files are +theoretically not needed, but if these files are needed Podman can keep the +files for further analysis. This includes the checkpoint directory with all +files created during checkpointing. The size required by the checkpoint +directory is roughly the same as the amount of memory required by the +processes in the checkpointed container. + +Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used +again. + +## EXAMPLE + +podman container restore mywebserver + +podman container restore 860a4b23 + +## SEE ALSO +podman(1), podman-container-checkpoint(1) + +## HISTORY +September 2018, Originally compiled by Adrian Reber <areber@redhat.com> diff --git a/docs/podman-container.1.md b/docs/podman-container.1.md index bbc325823..eac3343d5 100644 --- a/docs/podman-container.1.md +++ b/docs/podman-container.1.md @@ -14,6 +14,7 @@ The container command allows you to manage containers | Command | Man Page | Description | | ------- | --------------------------------------------------- | ---------------------------------------------------------------------------- | | attach | [podman-attach(1)](podman-attach.1.md) | Attach to a running container. | +| checkpoint | [podman-container-checkpoint(1)](podman-container-checkpoint.1.md) | Checkpoints one or more containers. | | cleanup | [podman-container-cleanup(1)](podman-container-cleanup.1.md) | Cleanup containers network and mountpoints. | | commit | [podman-commit(1)](podman-commit.1.md) | Create new image based on the changed container. | | create | [podman-create(1)](podman-create.1.md) | Create a new container. | @@ -29,6 +30,7 @@ The container command allows you to manage containers | port | [podman-port(1)](podman-port.1.md) | List port mappings for the container. | | refresh | [podman-refresh(1)](podman-container-refresh.1.md) | Refresh the state of all containers | | restart | [podman-restart(1)](podman-restart.1.md) | Restart one or more containers. | +| restore | [podman-container-restore(1)](podman-container-restore.1.md) | Restores one or more containers from a checkpoint. | | rm | [podman-rm(1)](podman-rm.1.md) | Remove one or more containers. | | run | [podman-run(1)](podman-run.1.md) | Run a command in a container. | | start | [podman-start(1)](podman-start.1.md) | Starts one or more containers. | diff --git a/docs/tutorials/podman_tutorial.md b/docs/tutorials/podman_tutorial.md index a866b8eed..152d65a59 100644 --- a/docs/tutorials/podman_tutorial.md +++ b/docs/tutorials/podman_tutorial.md @@ -157,6 +157,28 @@ $ sudo podman top <container_id> 101 31889 31873 0 09:21 ? 00:00:00 nginx: worker process ``` +### Checkpointing the container +Checkpointing a container stops the container while writing the state of all processes in the container to disk. +With this a container can later be restored and continue running at exactly the same point in time as the +checkpoint. This capability requires CRIU 3.11 or later installed on the system. +To checkpoint the container use: +```console +$ sudo podman container checkpoint <container_id> +``` + +### Restoring the container +Restoring a container is only possible for a previously checkpointed container. The restored container will +continue to run at exactly the same point in time it was checkpointed. +To restore the container use: +```console +$ sudo podman container restore <container_id> +``` + +After being restored, the container will answer requests again as it did before checkpointing. +```console +# curl http://<IP_address>:8080 +``` + ### Stopping the container To stop the httpd container: ```console diff --git a/libpod/container_api.go b/libpod/container_api.go index 192ccd347..93becb80d 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -832,3 +832,33 @@ func (c *Container) Refresh(ctx context.Context) error { return nil } + +// Checkpoint checkpoints a container +func (c *Container) Checkpoint(ctx context.Context, keep bool) error { + logrus.Debugf("Trying to checkpoint container %s", c) + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return err + } + } + + return c.checkpoint(ctx, keep) +} + +// Restore restores a container +func (c *Container) Restore(ctx context.Context, keep bool) (err error) { + logrus.Debugf("Trying to restore container %s", c) + if !c.batched { + c.lock.Lock() + defer c.lock.Unlock() + + if err := c.syncContainer(); err != nil { + return err + } + } + + return c.restore(ctx, keep) +} diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 033426817..c925f070b 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -129,6 +129,11 @@ func (c *Container) ControlSocketPath() string { return filepath.Join(c.bundlePath(), "ctl") } +// CheckpointPath returns the path to the directory containing the checkpoint +func (c *Container) CheckpointPath() string { + return filepath.Join(c.bundlePath(), "checkpoint") +} + // AttachSocketPath retrieves the path of the container's attach socket func (c *Container) AttachSocketPath() string { return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach") @@ -523,7 +528,7 @@ func (c *Container) init(ctx context.Context) error { } // With the spec complete, do an OCI create - if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil { + if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil { return err } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index b77beaf64..0353124dd 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -4,12 +4,18 @@ package libpod import ( "context" + "encoding/json" "fmt" + "io/ioutil" + "net" + "os" "path" + "path/filepath" "strings" "syscall" "time" + cnitypes "github.com/containernetworking/cni/pkg/types/current" crioAnnotations "github.com/containers/libpod/pkg/annotations" "github.com/containers/libpod/pkg/chrootuser" "github.com/containers/libpod/pkg/rootless" @@ -307,3 +313,155 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr return nil } + +func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { + + if c.state.State != ContainerStateRunning { + return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) + } + if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { + return err + } + + // Save network.status. This is needed to restore the container with + // the same IP. Currently limited to one IP address in a container + // with one interface. + formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ") + if err != nil { + return err + } + if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil { + return err + } + + logrus.Debugf("Checkpointed container %s", c.ID()) + + c.state.State = ContainerStateStopped + + // Cleanup Storage and Network + if err := c.cleanup(ctx); err != nil { + return err + } + + if !keep { + // Remove log file + os.Remove(filepath.Join(c.bundlePath(), "dump.log")) + // Remove statistic file + os.Remove(filepath.Join(c.bundlePath(), "stats-dump")) + } + + return c.save() +} + +func (c *Container) restore(ctx context.Context, keep bool) (err error) { + + if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) { + return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) + } + + // Let's try to stat() CRIU's inventory file. If it does not exist, it makes + // no sense to try a restore. This is a minimal check if a checkpoint exist. + if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) { + return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore") + } + + // Read network configuration from checkpoint + // Currently only one interface with one IP is supported. + networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status")) + if err == nil { + // The file with the network.status does exist. Let's restore the + // container with the same IP address as during checkpointing. + defer networkStatusFile.Close() + var networkStatus []*cnitypes.Result + networkJSON, err := ioutil.ReadAll(networkStatusFile) + if err != nil { + return err + } + json.Unmarshal(networkJSON, &networkStatus) + // Take the first IP address + var IP net.IP + if len(networkStatus) > 0 { + if len(networkStatus[0].IPs) > 0 { + IP = networkStatus[0].IPs[0].Address.IP + } + } + if IP != nil { + env := fmt.Sprintf("IP=%s", IP) + // Tell CNI which IP address we want. + os.Setenv("CNI_ARGS", env) + logrus.Debugf("Restoring container with %s", env) + } + } + + if err := c.prepare(); err != nil { + return err + } + defer func() { + if err != nil { + if err2 := c.cleanup(ctx); err2 != nil { + logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) + } + } + }() + + // TODO: use existing way to request static IPs, once it is merged in ocicni + // https://github.com/cri-o/ocicni/pull/23/ + + // CNI_ARGS was used to request a certain IP address. Unconditionally remove it. + os.Unsetenv("CNI_ARGS") + + // Read config + jsonPath := filepath.Join(c.bundlePath(), "config.json") + logrus.Debugf("generate.NewFromFile at %v", jsonPath) + g, err := generate.NewFromFile(jsonPath) + if err != nil { + logrus.Debugf("generate.NewFromFile failed with %v", err) + return err + } + + // We want to have the same network namespace as before. + if c.config.CreateNetNS { + g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path()) + } + + // Save the OCI spec to disk + if err := c.saveSpec(g.Spec()); err != nil { + return err + } + + if err := c.makeBindMounts(); err != nil { + return err + } + + // Cleanup for a working restore. + c.removeConmonFiles() + + if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil { + return err + } + + logrus.Debugf("Restored container %s", c.ID()) + + c.state.State = ContainerStateRunning + + if !keep { + // Delete all checkpoint related files. At this point, in theory, all files + // should exist. Still ignoring errors for now as the container should be + // restored and running. Not erroring out just because some cleanup operation + // failed. Starting with the checkpoint directory + err = os.RemoveAll(c.CheckpointPath()) + if err != nil { + logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err) + } + cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"} + for _, delete := range cleanup { + file := filepath.Join(c.bundlePath(), delete) + err = os.Remove(file) + if err != nil { + logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) + } + } + } + + return c.save() +} diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go index 45b54efab..eed0449a9 100644 --- a/libpod/container_internal_unsupported.go +++ b/libpod/container_internal_unsupported.go @@ -27,3 +27,11 @@ func (c *Container) cleanupNetwork() error { func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { return nil, ErrNotImplemented } + +func (c *Container) checkpoint(ctx context.Context, keep bool) error { + return ErrNotImplemented +} + +func (c *Container) restore(ctx context.Context, keep bool) error { + return ErrNotImplemented +} diff --git a/libpod/oci.go b/libpod/oci.go index e5db06540..cf2b76ab0 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) { return files, nil } -func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) { +func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { var stderrBuf bytes.Buffer runtimeDir, err := GetRootlessRuntimeDir() @@ -289,6 +289,10 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (er args = append(args, "--syslog") } + if restoreContainer { + args = append(args, "--restore", ctr.CheckpointPath()) + } + logrus.WithFields(logrus.Fields{ "args": args, }).Debugf("running conmon: %s", r.conmonPath) @@ -766,3 +770,15 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { return nil } + +// checkpointContainer checkpoints the given container +func (r *OCIRuntime) checkpointContainer(ctr *Container) error { + // imagePath is used by CRIU to store the actual checkpoint files + imagePath := ctr.CheckpointPath() + // workPath will be used to store dump.log and stats-dump + workPath := ctr.bundlePath() + logrus.Debugf("Writing checkpoint to %s", imagePath) + logrus.Debugf("Writing checkpoint logs to %s", workPath) + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint", + "--image-path", imagePath, "--work-path", workPath, ctr.ID()) +} diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 210ba57d1..0447670b3 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -63,10 +63,10 @@ func newPipe() (parent *os.File, child *os.File, err error) { // CreateContainer creates a container in the OCI runtime // TODO terminal support for container // Presently just ignoring conmon opts related to it -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { if ctr.state.UserNSRoot == "" { // no need of an intermediate mount ns - return r.createOCIContainer(ctr, cgroupParent) + return r.createOCIContainer(ctr, cgroupParent, restoreContainer) } var wg sync.WaitGroup wg.Add(1) @@ -103,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err e if err != nil { return } - err = r.createOCIContainer(ctr, cgroupParent) + err = r.createOCIContainer(ctr, cgroupParent, restoreContainer) }() wg.Wait() diff --git a/libpod/oci_unsupported.go b/libpod/oci_unsupported.go index 8cb4994d3..b133eb402 100644 --- a/libpod/oci_unsupported.go +++ b/libpod/oci_unsupported.go @@ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) { return nil, nil, ErrNotImplemented } -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { return ErrNotImplemented } diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go new file mode 100644 index 000000000..6c5d891a0 --- /dev/null +++ b/test/e2e/checkpoint_test.go @@ -0,0 +1,129 @@ +package integration + +import ( + "fmt" + "os" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = Describe("Podman checkpoint", func() { + var ( + tempdir string + err error + podmanTest PodmanTest + ) + + BeforeEach(func() { + tempdir, err = CreateTempDirInTempDir() + if err != nil { + os.Exit(1) + } + podmanTest = PodmanCreate(tempdir) + podmanTest.RestoreAllArtifacts() + // At least CRIU 3.11 is needed + skip, err := podmanTest.isCriuAtLeast(31100) + if err != nil || skip { + Skip("CRIU missing or too old.") + } + }) + + AfterEach(func() { + podmanTest.Cleanup() + f := CurrentGinkgoTestDescription() + timedResult := fmt.Sprintf("Test: %s completed in %f seconds", f.TestText, f.Duration.Seconds()) + GinkgoWriter.Write([]byte(timedResult)) + }) + + It("podman checkpoint bogus container", func() { + session := podmanTest.Podman([]string{"container", "checkpoint", "foobar"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + }) + + It("podman restore bogus container", func() { + session := podmanTest.Podman([]string{"container", "restore", "foobar"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + }) + + It("podman checkpoint a running container by id", func() { + // CRIU does not work with seccomp correctly on RHEL7 + session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + cid := session.OutputToString() + + result := podmanTest.Podman([]string{"container", "checkpoint", cid}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) + + result = podmanTest.Podman([]string{"container", "restore", cid}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + }) + + It("podman checkpoint a running container by name", func() { + session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "test_name", "-d", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + + result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) + + result = podmanTest.Podman([]string{"container", "restore", "test_name"}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + }) + + It("podman pause a checkpointed container by id", func() { + session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Equal(0)) + cid := session.OutputToString() + + result := podmanTest.Podman([]string{"container", "checkpoint", cid}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) + + result = podmanTest.Podman([]string{"pause", cid}) + result.WaitWithDefaultTimeout() + + Expect(result.ExitCode()).To(Equal(125)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) + + result = podmanTest.Podman([]string{"container", "restore", cid}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + + result = podmanTest.Podman([]string{"rm", cid}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(125)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) + + result = podmanTest.Podman([]string{"rm", "-f", cid}) + result.WaitWithDefaultTimeout() + Expect(result.ExitCode()).To(Equal(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + + }) +}) diff --git a/test/e2e/libpod_suite_test.go b/test/e2e/libpod_suite_test.go index d521632d7..a032b0e88 100644 --- a/test/e2e/libpod_suite_test.go +++ b/test/e2e/libpod_suite_test.go @@ -2,6 +2,7 @@ package integration import ( "bufio" + "bytes" "context" "encoding/json" "fmt" @@ -64,6 +65,7 @@ type PodmanTest struct { TempDir string CgroupManager string Host HostOS + CriuBinary string } // HostOS is a simple struct for the test os @@ -164,6 +166,7 @@ func PodmanCreate(tempDir string) PodmanTest { runCBinary = "/usr/bin/runc" } + criuBinary := "/usr/sbin/criu" CNIConfigDir := "/etc/cni/net.d" p := PodmanTest{ @@ -179,6 +182,7 @@ func PodmanCreate(tempDir string) PodmanTest { TempDir: tempDir, CgroupManager: cgroupManager, Host: host, + CriuBinary: criuBinary, } // Setup registries.conf ENV variable @@ -678,6 +682,39 @@ func (p *PodmanTest) setRegistriesConfigEnv(b []byte) { ioutil.WriteFile(outfile, b, 0644) } +func (p *PodmanTest) isCriuAtLeast(version int) (bool, error) { + cmd := exec.Command(p.CriuBinary, "-V") + var out bytes.Buffer + cmd.Stdout = &out + err := cmd.Run() + if err != nil { + return false, err + } + + var x int + var y int + var z int + + fmt.Sscanf(out.String(), "Version: %d.%d.%d", &x, &y, &z) + + if strings.Contains(out.String(), "GitID") { + // If CRIU is built from git it contains a git ID. + // If that is the case, increase minor by one as this + // could mean we are running a development version. + y = y + 1 + } + + parsed_version := x*10000 + y*100 + z + + fmt.Println(parsed_version) + + if parsed_version >= version { + return false, nil + } else { + return true, nil + } +} + func resetRegistriesConfigEnv() { os.Setenv("REGISTRIES_CONFIG_PATH", "") } |