summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2018-10-04 12:34:49 -0700
committerGitHub <noreply@github.com>2018-10-04 12:34:49 -0700
commit06a959f74ab4f23d5a789d03de4b2b73a3d53dc6 (patch)
tree42e0437cd91aae4b53cd769401d7becd2309feb6
parent3c31e176c7dfce3c86a45ff4750f740a5f8f9321 (diff)
parentdc987af0b0146ec5fd2026ca8db403806c3425df (diff)
downloadpodman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.tar.gz
podman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.tar.bz2
podman-06a959f74ab4f23d5a789d03de4b2b73a3d53dc6.zip
Merge pull request #469 from adrianreber/master
Add support to checkpoint/restore containers
-rw-r--r--Dockerfile12
-rw-r--r--cmd/podman/checkpoint.go73
-rw-r--r--cmd/podman/container.go2
-rw-r--r--cmd/podman/restore.go73
-rw-r--r--completions/bash/podman42
-rw-r--r--docs/podman-container-checkpoint.1.md30
-rw-r--r--docs/podman-container-restore.1.md37
-rw-r--r--docs/podman-container.1.md2
-rw-r--r--docs/tutorials/podman_tutorial.md22
-rw-r--r--libpod/container_api.go30
-rw-r--r--libpod/container_internal.go7
-rw-r--r--libpod/container_internal_linux.go158
-rw-r--r--libpod/container_internal_unsupported.go8
-rw-r--r--libpod/oci.go18
-rw-r--r--libpod/oci_linux.go6
-rw-r--r--libpod/oci_unsupported.go2
-rw-r--r--test/e2e/checkpoint_test.go129
-rw-r--r--test/e2e/libpod_suite_test.go37
18 files changed, 682 insertions, 6 deletions
diff --git a/Dockerfile b/Dockerfile
index 749c5edb9..2c43cb046 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,6 +18,8 @@ RUN apt-get update && apt-get install -y \
libaio-dev \
libcap-dev \
libfuse-dev \
+ libnet-dev \
+ libnl-3-dev \
libostree-dev \
libprotobuf-dev \
libprotobuf-c0-dev \
@@ -110,6 +112,16 @@ RUN set -x \
&& go get -u github.com/mailru/easyjson/... \
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
+# Install criu
+ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9
+RUN set -x \
+ && cd /tmp \
+ && git clone https://github.com/checkpoint-restore/criu.git \
+ && cd criu \
+ && make \
+ && install -D -m 755 criu/criu /usr/sbin/ \
+ && rm -rf /tmp/criu
+
# Install cni config
#RUN make install.cni
RUN mkdir -p /etc/cni/net.d/
diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go
new file mode 100644
index 000000000..cbbbcd740
--- /dev/null
+++ b/cmd/podman/checkpoint.go
@@ -0,0 +1,73 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "os"
+
+ "github.com/containers/libpod/cmd/podman/libpodruntime"
+ "github.com/containers/libpod/pkg/rootless"
+ "github.com/pkg/errors"
+ "github.com/urfave/cli"
+)
+
+var (
+ checkpointDescription = `
+ podman container checkpoint
+
+ Checkpoints one or more running containers. The container name or ID can be used.
+`
+ checkpointFlags = []cli.Flag{
+ cli.BoolFlag{
+ Name: "keep, k",
+ Usage: "keep all temporary checkpoint files",
+ },
+ }
+ checkpointCommand = cli.Command{
+ Name: "checkpoint",
+ Usage: "Checkpoints one or more containers",
+ Description: checkpointDescription,
+ Flags: checkpointFlags,
+ Action: checkpointCmd,
+ ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
+ }
+)
+
+func checkpointCmd(c *cli.Context) error {
+ if rootless.IsRootless() {
+ return errors.New("checkpointing a container requires root")
+ }
+
+ runtime, err := libpodruntime.GetRuntime(c)
+ if err != nil {
+ return errors.Wrapf(err, "could not get runtime")
+ }
+ defer runtime.Shutdown(false)
+
+ keep := c.Bool("keep")
+ args := c.Args()
+ if len(args) < 1 {
+ return errors.Errorf("you must provide at least one container name or id")
+ }
+
+ var lastError error
+ for _, arg := range args {
+ ctr, err := runtime.LookupContainer(arg)
+ if err != nil {
+ if lastError != nil {
+ fmt.Fprintln(os.Stderr, lastError)
+ }
+ lastError = errors.Wrapf(err, "error looking up container %q", arg)
+ continue
+ }
+ if err = ctr.Checkpoint(context.TODO(), keep); err != nil {
+ if lastError != nil {
+ fmt.Fprintln(os.Stderr, lastError)
+ }
+ lastError = errors.Wrapf(err, "failed to checkpoint container %v", ctr.ID())
+ } else {
+ fmt.Println(ctr.ID())
+ }
+ }
+ return lastError
+}
diff --git a/cmd/podman/container.go b/cmd/podman/container.go
index 82c1c824d..ff634278f 100644
--- a/cmd/podman/container.go
+++ b/cmd/podman/container.go
@@ -7,6 +7,7 @@ import (
var (
subCommands = []cli.Command{
attachCommand,
+ checkpointCommand,
cleanupCommand,
commitCommand,
createCommand,
@@ -23,6 +24,7 @@ var (
// pruneCommand,
refreshCommand,
restartCommand,
+ restoreCommand,
rmCommand,
runCommand,
runlabelCommand,
diff --git a/cmd/podman/restore.go b/cmd/podman/restore.go
new file mode 100644
index 000000000..43ef87ca2
--- /dev/null
+++ b/cmd/podman/restore.go
@@ -0,0 +1,73 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "os"
+
+ "github.com/containers/libpod/cmd/podman/libpodruntime"
+ "github.com/containers/libpod/pkg/rootless"
+ "github.com/pkg/errors"
+ "github.com/urfave/cli"
+)
+
+var (
+ restoreDescription = `
+ podman container restore
+
+ Restores a container from a checkpoint. The container name or ID can be used.
+`
+ restoreFlags = []cli.Flag{
+ cli.BoolFlag{
+ Name: "keep, k",
+ Usage: "keep all temporary checkpoint files",
+ },
+ }
+ restoreCommand = cli.Command{
+ Name: "restore",
+ Usage: "Restores one or more containers from a checkpoint",
+ Description: restoreDescription,
+ Flags: restoreFlags,
+ Action: restoreCmd,
+ ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
+ }
+)
+
+func restoreCmd(c *cli.Context) error {
+ if rootless.IsRootless() {
+ return errors.New("restoring a container requires root")
+ }
+
+ runtime, err := libpodruntime.GetRuntime(c)
+ if err != nil {
+ return errors.Wrapf(err, "could not get runtime")
+ }
+ defer runtime.Shutdown(false)
+
+ keep := c.Bool("keep")
+ args := c.Args()
+ if len(args) < 1 {
+ return errors.Errorf("you must provide at least one container name or id")
+ }
+
+ var lastError error
+ for _, arg := range args {
+ ctr, err := runtime.LookupContainer(arg)
+ if err != nil {
+ if lastError != nil {
+ fmt.Fprintln(os.Stderr, lastError)
+ }
+ lastError = errors.Wrapf(err, "error looking up container %q", arg)
+ continue
+ }
+ if err = ctr.Restore(context.TODO(), keep); err != nil {
+ if lastError != nil {
+ fmt.Fprintln(os.Stderr, lastError)
+ }
+ lastError = errors.Wrapf(err, "failed to restore container %v", ctr.ID())
+ } else {
+ fmt.Println(ctr.ID())
+ }
+ }
+ return lastError
+}
diff --git a/completions/bash/podman b/completions/bash/podman
index f63bf4469..604a25f5d 100644
--- a/completions/bash/podman
+++ b/completions/bash/podman
@@ -87,6 +87,10 @@ __podman_complete_containers_all() {
__podman_complete_containers "$@" --all
}
+__podman_complete_containers_created() {
+ __podman_complete_containers "$@" --all --filter status=created
+}
+
__podman_complete_containers_running() {
__podman_complete_containers "$@" --filter status=running
}
@@ -710,6 +714,24 @@ _podman_container_attach() {
_podman_attach
}
+_podman_container_checkpoint() {
+ local options_with_args="
+ --help -h
+ "
+ local boolean_options="
+ --keep
+ -k
+ "
+ case "$cur" in
+ -*)
+ COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
+ ;;
+ *)
+ __podman_complete_containers_running
+ ;;
+ esac
+}
+
_podman_container_commit() {
_podman_commit
}
@@ -770,6 +792,24 @@ _podman_container_restart() {
_podman_restart
}
+_podman_container_restore() {
+ local options_with_args="
+ --help -h
+ "
+ local boolean_options="
+ --keep
+ -k
+ "
+ case "$cur" in
+ -*)
+ COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
+ ;;
+ *)
+ __podman_complete_containers_created
+ ;;
+ esac
+}
+
_podman_container_rm() {
_podman_rm
}
@@ -817,6 +857,7 @@ _podman_container() {
"
subcommands="
attach
+ checkpoint
commit
create
diff
@@ -831,6 +872,7 @@ _podman_container() {
port
refresh
restart
+ restore
rm
run
start
diff --git a/docs/podman-container-checkpoint.1.md b/docs/podman-container-checkpoint.1.md
new file mode 100644
index 000000000..4906e0e12
--- /dev/null
+++ b/docs/podman-container-checkpoint.1.md
@@ -0,0 +1,30 @@
+% podman-container-checkpoint(1)
+
+## NAME
+podman\-container\-checkpoint - Checkpoints one or more running containers
+
+## SYNOPSIS
+**podman container checkpoint** [*options*] *container* ...
+
+## DESCRIPTION
+Checkpoints all the processes in one or more containers. You may use container IDs or names as input.
+
+## OPTIONS
+**-k**, **--keep**
+
+Keep all temporary log and statistics files created by CRIU during checkpointing. These files
+are not deleted if checkpointing fails for further debugging. If checkpointing succeeds these
+files are theoretically not needed, but if these files are needed Podman can keep the files
+for further analysis.
+
+## EXAMPLE
+
+podman container checkpoint mywebserver
+
+podman container checkpoint 860a4b23
+
+## SEE ALSO
+podman(1), podman-container-restore(1)
+
+## HISTORY
+September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
diff --git a/docs/podman-container-restore.1.md b/docs/podman-container-restore.1.md
new file mode 100644
index 000000000..6360bccb0
--- /dev/null
+++ b/docs/podman-container-restore.1.md
@@ -0,0 +1,37 @@
+% podman-container-restore(1)
+
+## NAME
+podman\-container\-restore - Restores one or more running containers
+
+## SYNOPSIS
+**podman container restore** [*options*] *container* ...
+
+## DESCRIPTION
+Restores a container from a checkpoint. You may use container IDs or names as input.
+
+## OPTIONS
+**-k**, **--keep**
+
+Keep all temporary log and statistics files created by CRIU during
+checkpointing as well as restoring. These files are not deleted if restoring
+fails for further debugging. If restoring succeeds these files are
+theoretically not needed, but if these files are needed Podman can keep the
+files for further analysis. This includes the checkpoint directory with all
+files created during checkpointing. The size required by the checkpoint
+directory is roughly the same as the amount of memory required by the
+processes in the checkpointed container.
+
+Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used
+again.
+
+## EXAMPLE
+
+podman container restore mywebserver
+
+podman container restore 860a4b23
+
+## SEE ALSO
+podman(1), podman-container-checkpoint(1)
+
+## HISTORY
+September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
diff --git a/docs/podman-container.1.md b/docs/podman-container.1.md
index bbc325823..eac3343d5 100644
--- a/docs/podman-container.1.md
+++ b/docs/podman-container.1.md
@@ -14,6 +14,7 @@ The container command allows you to manage containers
| Command | Man Page | Description |
| ------- | --------------------------------------------------- | ---------------------------------------------------------------------------- |
| attach | [podman-attach(1)](podman-attach.1.md) | Attach to a running container. |
+| checkpoint | [podman-container-checkpoint(1)](podman-container-checkpoint.1.md) | Checkpoints one or more containers. |
| cleanup | [podman-container-cleanup(1)](podman-container-cleanup.1.md) | Cleanup containers network and mountpoints. |
| commit | [podman-commit(1)](podman-commit.1.md) | Create new image based on the changed container. |
| create | [podman-create(1)](podman-create.1.md) | Create a new container. |
@@ -29,6 +30,7 @@ The container command allows you to manage containers
| port | [podman-port(1)](podman-port.1.md) | List port mappings for the container. |
| refresh | [podman-refresh(1)](podman-container-refresh.1.md) | Refresh the state of all containers |
| restart | [podman-restart(1)](podman-restart.1.md) | Restart one or more containers. |
+| restore | [podman-container-restore(1)](podman-container-restore.1.md) | Restores one or more containers from a checkpoint. |
| rm | [podman-rm(1)](podman-rm.1.md) | Remove one or more containers. |
| run | [podman-run(1)](podman-run.1.md) | Run a command in a container. |
| start | [podman-start(1)](podman-start.1.md) | Starts one or more containers. |
diff --git a/docs/tutorials/podman_tutorial.md b/docs/tutorials/podman_tutorial.md
index a866b8eed..152d65a59 100644
--- a/docs/tutorials/podman_tutorial.md
+++ b/docs/tutorials/podman_tutorial.md
@@ -157,6 +157,28 @@ $ sudo podman top <container_id>
101 31889 31873 0 09:21 ? 00:00:00 nginx: worker process
```
+### Checkpointing the container
+Checkpointing a container stops the container while writing the state of all processes in the container to disk.
+With this a container can later be restored and continue running at exactly the same point in time as the
+checkpoint. This capability requires CRIU 3.11 or later installed on the system.
+To checkpoint the container use:
+```console
+$ sudo podman container checkpoint <container_id>
+```
+
+### Restoring the container
+Restoring a container is only possible for a previously checkpointed container. The restored container will
+continue to run at exactly the same point in time it was checkpointed.
+To restore the container use:
+```console
+$ sudo podman container restore <container_id>
+```
+
+After being restored, the container will answer requests again as it did before checkpointing.
+```console
+# curl http://<IP_address>:8080
+```
+
### Stopping the container
To stop the httpd container:
```console
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 192ccd347..93becb80d 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -832,3 +832,33 @@ func (c *Container) Refresh(ctx context.Context) error {
return nil
}
+
+// Checkpoint checkpoints a container
+func (c *Container) Checkpoint(ctx context.Context, keep bool) error {
+ logrus.Debugf("Trying to checkpoint container %s", c)
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ }
+
+ return c.checkpoint(ctx, keep)
+}
+
+// Restore restores a container
+func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
+ logrus.Debugf("Trying to restore container %s", c)
+ if !c.batched {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ if err := c.syncContainer(); err != nil {
+ return err
+ }
+ }
+
+ return c.restore(ctx, keep)
+}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 033426817..c925f070b 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -129,6 +129,11 @@ func (c *Container) ControlSocketPath() string {
return filepath.Join(c.bundlePath(), "ctl")
}
+// CheckpointPath returns the path to the directory containing the checkpoint
+func (c *Container) CheckpointPath() string {
+ return filepath.Join(c.bundlePath(), "checkpoint")
+}
+
// AttachSocketPath retrieves the path of the container's attach socket
func (c *Container) AttachSocketPath() string {
return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach")
@@ -523,7 +528,7 @@ func (c *Container) init(ctx context.Context) error {
}
// With the spec complete, do an OCI create
- if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil {
+ if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil {
return err
}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index b77beaf64..0353124dd 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -4,12 +4,18 @@ package libpod
import (
"context"
+ "encoding/json"
"fmt"
+ "io/ioutil"
+ "net"
+ "os"
"path"
+ "path/filepath"
"strings"
"syscall"
"time"
+ cnitypes "github.com/containernetworking/cni/pkg/types/current"
crioAnnotations "github.com/containers/libpod/pkg/annotations"
"github.com/containers/libpod/pkg/chrootuser"
"github.com/containers/libpod/pkg/rootless"
@@ -307,3 +313,155 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
return nil
}
+
+func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
+
+ if c.state.State != ContainerStateRunning {
+ return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
+ }
+ if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil {
+ return err
+ }
+
+ // Save network.status. This is needed to restore the container with
+ // the same IP. Currently limited to one IP address in a container
+ // with one interface.
+ formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ")
+ if err != nil {
+ return err
+ }
+ if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
+ return err
+ }
+
+ logrus.Debugf("Checkpointed container %s", c.ID())
+
+ c.state.State = ContainerStateStopped
+
+ // Cleanup Storage and Network
+ if err := c.cleanup(ctx); err != nil {
+ return err
+ }
+
+ if !keep {
+ // Remove log file
+ os.Remove(filepath.Join(c.bundlePath(), "dump.log"))
+ // Remove statistic file
+ os.Remove(filepath.Join(c.bundlePath(), "stats-dump"))
+ }
+
+ return c.save()
+}
+
+func (c *Container) restore(ctx context.Context, keep bool) (err error) {
+
+ if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) {
+ return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
+ }
+
+ // Let's try to stat() CRIU's inventory file. If it does not exist, it makes
+ // no sense to try a restore. This is a minimal check if a checkpoint exist.
+ if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
+ return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
+ }
+
+ // Read network configuration from checkpoint
+ // Currently only one interface with one IP is supported.
+ networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
+ if err == nil {
+ // The file with the network.status does exist. Let's restore the
+ // container with the same IP address as during checkpointing.
+ defer networkStatusFile.Close()
+ var networkStatus []*cnitypes.Result
+ networkJSON, err := ioutil.ReadAll(networkStatusFile)
+ if err != nil {
+ return err
+ }
+ json.Unmarshal(networkJSON, &networkStatus)
+ // Take the first IP address
+ var IP net.IP
+ if len(networkStatus) > 0 {
+ if len(networkStatus[0].IPs) > 0 {
+ IP = networkStatus[0].IPs[0].Address.IP
+ }
+ }
+ if IP != nil {
+ env := fmt.Sprintf("IP=%s", IP)
+ // Tell CNI which IP address we want.
+ os.Setenv("CNI_ARGS", env)
+ logrus.Debugf("Restoring container with %s", env)
+ }
+ }
+
+ if err := c.prepare(); err != nil {
+ return err
+ }
+ defer func() {
+ if err != nil {
+ if err2 := c.cleanup(ctx); err2 != nil {
+ logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
+ }
+ }
+ }()
+
+ // TODO: use existing way to request static IPs, once it is merged in ocicni
+ // https://github.com/cri-o/ocicni/pull/23/
+
+ // CNI_ARGS was used to request a certain IP address. Unconditionally remove it.
+ os.Unsetenv("CNI_ARGS")
+
+ // Read config
+ jsonPath := filepath.Join(c.bundlePath(), "config.json")
+ logrus.Debugf("generate.NewFromFile at %v", jsonPath)
+ g, err := generate.NewFromFile(jsonPath)
+ if err != nil {
+ logrus.Debugf("generate.NewFromFile failed with %v", err)
+ return err
+ }
+
+ // We want to have the same network namespace as before.
+ if c.config.CreateNetNS {
+ g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
+ }
+
+ // Save the OCI spec to disk
+ if err := c.saveSpec(g.Spec()); err != nil {
+ return err
+ }
+
+ if err := c.makeBindMounts(); err != nil {
+ return err
+ }
+
+ // Cleanup for a working restore.
+ c.removeConmonFiles()
+
+ if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil {
+ return err
+ }
+
+ logrus.Debugf("Restored container %s", c.ID())
+
+ c.state.State = ContainerStateRunning
+
+ if !keep {
+ // Delete all checkpoint related files. At this point, in theory, all files
+ // should exist. Still ignoring errors for now as the container should be
+ // restored and running. Not erroring out just because some cleanup operation
+ // failed. Starting with the checkpoint directory
+ err = os.RemoveAll(c.CheckpointPath())
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
+ }
+ cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"}
+ for _, delete := range cleanup {
+ file := filepath.Join(c.bundlePath(), delete)
+ err = os.Remove(file)
+ if err != nil {
+ logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
+ }
+ }
+ }
+
+ return c.save()
+}
diff --git a/libpod/container_internal_unsupported.go b/libpod/container_internal_unsupported.go
index 45b54efab..eed0449a9 100644
--- a/libpod/container_internal_unsupported.go
+++ b/libpod/container_internal_unsupported.go
@@ -27,3 +27,11 @@ func (c *Container) cleanupNetwork() error {
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
return nil, ErrNotImplemented
}
+
+func (c *Container) checkpoint(ctx context.Context, keep bool) error {
+ return ErrNotImplemented
+}
+
+func (c *Container) restore(ctx context.Context, keep bool) error {
+ return ErrNotImplemented
+}
diff --git a/libpod/oci.go b/libpod/oci.go
index e5db06540..cf2b76ab0 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
return files, nil
}
-func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) {
+func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
var stderrBuf bytes.Buffer
runtimeDir, err := GetRootlessRuntimeDir()
@@ -289,6 +289,10 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (er
args = append(args, "--syslog")
}
+ if restoreContainer {
+ args = append(args, "--restore", ctr.CheckpointPath())
+ }
+
logrus.WithFields(logrus.Fields{
"args": args,
}).Debugf("running conmon: %s", r.conmonPath)
@@ -766,3 +770,15 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
return nil
}
+
+// checkpointContainer checkpoints the given container
+func (r *OCIRuntime) checkpointContainer(ctr *Container) error {
+ // imagePath is used by CRIU to store the actual checkpoint files
+ imagePath := ctr.CheckpointPath()
+ // workPath will be used to store dump.log and stats-dump
+ workPath := ctr.bundlePath()
+ logrus.Debugf("Writing checkpoint to %s", imagePath)
+ logrus.Debugf("Writing checkpoint logs to %s", workPath)
+ return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint",
+ "--image-path", imagePath, "--work-path", workPath, ctr.ID())
+}
diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go
index 210ba57d1..0447670b3 100644
--- a/libpod/oci_linux.go
+++ b/libpod/oci_linux.go
@@ -63,10 +63,10 @@ func newPipe() (parent *os.File, child *os.File, err error) {
// CreateContainer creates a container in the OCI runtime
// TODO terminal support for container
// Presently just ignoring conmon opts related to it
-func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
+func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
if ctr.state.UserNSRoot == "" {
// no need of an intermediate mount ns
- return r.createOCIContainer(ctr, cgroupParent)
+ return r.createOCIContainer(ctr, cgroupParent, restoreContainer)
}
var wg sync.WaitGroup
wg.Add(1)
@@ -103,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err e
if err != nil {
return
}
- err = r.createOCIContainer(ctr, cgroupParent)
+ err = r.createOCIContainer(ctr, cgroupParent, restoreContainer)
}()
wg.Wait()
diff --git a/libpod/oci_unsupported.go b/libpod/oci_unsupported.go
index 8cb4994d3..b133eb402 100644
--- a/libpod/oci_unsupported.go
+++ b/libpod/oci_unsupported.go
@@ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) {
return nil, nil, ErrNotImplemented
}
-func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
+func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
return ErrNotImplemented
}
diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go
new file mode 100644
index 000000000..6c5d891a0
--- /dev/null
+++ b/test/e2e/checkpoint_test.go
@@ -0,0 +1,129 @@
+package integration
+
+import (
+ "fmt"
+ "os"
+
+ . "github.com/onsi/ginkgo"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("Podman checkpoint", func() {
+ var (
+ tempdir string
+ err error
+ podmanTest PodmanTest
+ )
+
+ BeforeEach(func() {
+ tempdir, err = CreateTempDirInTempDir()
+ if err != nil {
+ os.Exit(1)
+ }
+ podmanTest = PodmanCreate(tempdir)
+ podmanTest.RestoreAllArtifacts()
+ // At least CRIU 3.11 is needed
+ skip, err := podmanTest.isCriuAtLeast(31100)
+ if err != nil || skip {
+ Skip("CRIU missing or too old.")
+ }
+ })
+
+ AfterEach(func() {
+ podmanTest.Cleanup()
+ f := CurrentGinkgoTestDescription()
+ timedResult := fmt.Sprintf("Test: %s completed in %f seconds", f.TestText, f.Duration.Seconds())
+ GinkgoWriter.Write([]byte(timedResult))
+ })
+
+ It("podman checkpoint bogus container", func() {
+ session := podmanTest.Podman([]string{"container", "checkpoint", "foobar"})
+ session.WaitWithDefaultTimeout()
+ Expect(session.ExitCode()).To(Not(Equal(0)))
+ })
+
+ It("podman restore bogus container", func() {
+ session := podmanTest.Podman([]string{"container", "restore", "foobar"})
+ session.WaitWithDefaultTimeout()
+ Expect(session.ExitCode()).To(Not(Equal(0)))
+ })
+
+ It("podman checkpoint a running container by id", func() {
+ // CRIU does not work with seccomp correctly on RHEL7
+ session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
+ session.WaitWithDefaultTimeout()
+ Expect(session.ExitCode()).To(Equal(0))
+ cid := session.OutputToString()
+
+ result := podmanTest.Podman([]string{"container", "checkpoint", cid})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
+
+ result = podmanTest.Podman([]string{"container", "restore", cid})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
+ })
+
+ It("podman checkpoint a running container by name", func() {
+ session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "test_name", "-d", ALPINE, "top"})
+ session.WaitWithDefaultTimeout()
+ Expect(session.ExitCode()).To(Equal(0))
+
+ result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
+
+ result = podmanTest.Podman([]string{"container", "restore", "test_name"})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
+ })
+
+ It("podman pause a checkpointed container by id", func() {
+ session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
+ session.WaitWithDefaultTimeout()
+ Expect(session.ExitCode()).To(Equal(0))
+ cid := session.OutputToString()
+
+ result := podmanTest.Podman([]string{"container", "checkpoint", cid})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
+
+ result = podmanTest.Podman([]string{"pause", cid})
+ result.WaitWithDefaultTimeout()
+
+ Expect(result.ExitCode()).To(Equal(125))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
+ Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
+
+ result = podmanTest.Podman([]string{"container", "restore", cid})
+ result.WaitWithDefaultTimeout()
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
+
+ result = podmanTest.Podman([]string{"rm", cid})
+ result.WaitWithDefaultTimeout()
+ Expect(result.ExitCode()).To(Equal(125))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
+
+ result = podmanTest.Podman([]string{"rm", "-f", cid})
+ result.WaitWithDefaultTimeout()
+ Expect(result.ExitCode()).To(Equal(0))
+ Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
+
+ })
+})
diff --git a/test/e2e/libpod_suite_test.go b/test/e2e/libpod_suite_test.go
index d521632d7..a032b0e88 100644
--- a/test/e2e/libpod_suite_test.go
+++ b/test/e2e/libpod_suite_test.go
@@ -2,6 +2,7 @@ package integration
import (
"bufio"
+ "bytes"
"context"
"encoding/json"
"fmt"
@@ -64,6 +65,7 @@ type PodmanTest struct {
TempDir string
CgroupManager string
Host HostOS
+ CriuBinary string
}
// HostOS is a simple struct for the test os
@@ -164,6 +166,7 @@ func PodmanCreate(tempDir string) PodmanTest {
runCBinary = "/usr/bin/runc"
}
+ criuBinary := "/usr/sbin/criu"
CNIConfigDir := "/etc/cni/net.d"
p := PodmanTest{
@@ -179,6 +182,7 @@ func PodmanCreate(tempDir string) PodmanTest {
TempDir: tempDir,
CgroupManager: cgroupManager,
Host: host,
+ CriuBinary: criuBinary,
}
// Setup registries.conf ENV variable
@@ -678,6 +682,39 @@ func (p *PodmanTest) setRegistriesConfigEnv(b []byte) {
ioutil.WriteFile(outfile, b, 0644)
}
+func (p *PodmanTest) isCriuAtLeast(version int) (bool, error) {
+ cmd := exec.Command(p.CriuBinary, "-V")
+ var out bytes.Buffer
+ cmd.Stdout = &out
+ err := cmd.Run()
+ if err != nil {
+ return false, err
+ }
+
+ var x int
+ var y int
+ var z int
+
+ fmt.Sscanf(out.String(), "Version: %d.%d.%d", &x, &y, &z)
+
+ if strings.Contains(out.String(), "GitID") {
+ // If CRIU is built from git it contains a git ID.
+ // If that is the case, increase minor by one as this
+ // could mean we are running a development version.
+ y = y + 1
+ }
+
+ parsed_version := x*10000 + y*100 + z
+
+ fmt.Println(parsed_version)
+
+ if parsed_version >= version {
+ return false, nil
+ } else {
+ return true, nil
+ }
+}
+
func resetRegistriesConfigEnv() {
os.Setenv("REGISTRIES_CONFIG_PATH", "")
}