From ff47a4c2d5485fc49f937f3ce0c4e2fd6bdb1956 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 20 Nov 2018 14:08:08 +0000 Subject: Use a struct to pass options to Checkpoint() For upcoming changes to the Checkpoint() functions this commit switches checkpoint options from a boolean to a struct, so that additional options can be passed easily to Checkpoint() without changing the function parameters all the time. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'cmd/podman/checkpoint.go') diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index bf280920d..49e2aec63 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -50,7 +50,9 @@ func checkpointCmd(c *cli.Context) error { } defer runtime.Shutdown(false) - keep := c.Bool("keep") + options := libpod.ContainerCheckpointOptions{ + Keep: c.Bool("keep"), + } if err := checkAllAndLatest(c); err != nil { return err @@ -59,7 +61,7 @@ func checkpointCmd(c *cli.Context) error { containers, lastError := getAllOrLatestContainers(c, runtime, libpod.ContainerStateRunning, "running") for _, ctr := range containers { - if err = ctr.Checkpoint(context.TODO(), keep); err != nil { + if err = ctr.Checkpoint(context.TODO(), options); err != nil { if lastError != nil { fmt.Fprintln(os.Stderr, lastError) } -- cgit v1.2.3-54-g00ecf From b0572d622974837c2221ed1f01a2ab982f078370 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 20 Nov 2018 15:34:15 +0000 Subject: Added option to keep containers running after checkpointing CRIU supports to leave processes running after checkpointing: -R|--leave-running leave tasks in running state after checkpoint runc also support to leave containers running after checkpointing: --leave-running leave the process running after checkpointing With this commit the support to leave a container running after checkpointing is brought to Podman: --leave-running, -R leave the container running after writing checkpoint to disk Now it is possible to checkpoint a container at some point in time without stopping the container. This can be used to rollback the container to an early state: $ podman run --tmpfs /tmp --name podman-criu-test -d docker://docker.io/yovfiatbeb/podman-criu-test $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 3 $ podman container checkpoint -R -l $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 4 $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 5 $ podman stop -l $ podman container restore -l $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 4 So after checkpointing the container kept running and was stopped after some time. Restoring this container will restore the state right at the checkpoint. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 7 ++++++- libpod/container_api.go | 3 ++- libpod/container_internal_linux.go | 12 +++++++----- libpod/oci.go | 15 ++++++++++++--- 4 files changed, 27 insertions(+), 10 deletions(-) (limited to 'cmd/podman/checkpoint.go') diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index 49e2aec63..ddfd12bc3 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -23,6 +23,10 @@ var ( Name: "keep, k", Usage: "keep all temporary checkpoint files", }, + cli.BoolFlag{ + Name: "leave-running, R", + Usage: "leave the container running after writing checkpoint to disk", + }, cli.BoolFlag{ Name: "all, a", Usage: "checkpoint all running containers", @@ -51,7 +55,8 @@ func checkpointCmd(c *cli.Context) error { defer runtime.Shutdown(false) options := libpod.ContainerCheckpointOptions{ - Keep: c.Bool("keep"), + Keep: c.Bool("keep"), + KeepRunning: c.Bool("leave-running"), } if err := checkAllAndLatest(c); err != nil { diff --git a/libpod/container_api.go b/libpod/container_api.go index 9f5436b14..df6b6e962 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -833,7 +833,8 @@ func (c *Container) Refresh(ctx context.Context) error { // ContainerCheckpointOptions is a struct used to pass the parameters // for checkpointing to corresponding functions type ContainerCheckpointOptions struct { - Keep bool + Keep bool + KeepRunning bool } // Checkpoint checkpoints a container diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 003e8284a..e6071945d 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -440,7 +440,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } - if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { + if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil { return err } @@ -457,11 +457,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO logrus.Debugf("Checkpointed container %s", c.ID()) - c.state.State = ContainerStateStopped + if !options.KeepRunning { + c.state.State = ContainerStateStopped - // Cleanup Storage and Network - if err := c.cleanup(ctx); err != nil { - return err + // Cleanup Storage and Network + if err := c.cleanup(ctx); err != nil { + return err + } } if !options.Keep { diff --git a/libpod/oci.go b/libpod/oci.go index 71da830b5..8ee2c948f 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -844,13 +844,22 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { } // checkpointContainer checkpoints the given container -func (r *OCIRuntime) checkpointContainer(ctr *Container) error { +func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckpointOptions) error { // imagePath is used by CRIU to store the actual checkpoint files imagePath := ctr.CheckpointPath() // workPath will be used to store dump.log and stats-dump workPath := ctr.bundlePath() logrus.Debugf("Writing checkpoint to %s", imagePath) logrus.Debugf("Writing checkpoint logs to %s", workPath) - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint", - "--image-path", imagePath, "--work-path", workPath, ctr.ID()) + args := []string{} + args = append(args, "checkpoint") + args = append(args, "--image-path") + args = append(args, imagePath) + args = append(args, "--work-path") + args = append(args, workPath) + if options.KeepRunning { + args = append(args, "--leave-running") + } + args = append(args, ctr.ID()) + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) } -- cgit v1.2.3-54-g00ecf From 03c88a3debf77780bdad2382d4c01ccedc6d27a5 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 21 Nov 2018 14:40:43 +0000 Subject: Added tcp-established to checkpoint/restore CRIU can checkpoint and restore processes/containers with established TCP connections if the correct option is specified. To implement checkpoint and restore with support for established TCP connections with Podman this commit adds the necessary options to runc during checkpoint and also tells conmon during restore to use 'runc restore' with '--tcp-established'. For this Podman feature to work a corresponding conmon change is required. Example: $ podman run --tmpfs /tmp --name podman-criu-test -d docker://docker.io/yovfiatbeb/podman-criu-test $ nc `podman inspect -l | jq -r '.[0].NetworkSettings.IPAddress'` 8080 GET /examples/servlets/servlet/HelloWorldExample Connection: keep-alive 1 GET /examples/servlets/servlet/HelloWorldExample Connection: keep-alive 2 $ # Using HTTP keep-alive multiple requests are send to the server in the container $ # Different terminal: $ podman container checkpoint -l criu failed: type NOTIFY errno 0 $ # Looking at the log file would show errors because of established TCP connections $ podman container checkpoint -l --tcp-established $ # This works now and after the restore the same connection as above can be used for requests $ podman container restore -l --tcp-established The restore would fail without '--tcp-established' as the checkpoint image contains established TCP connections. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 9 +++++++-- cmd/podman/restore.go | 7 ++++++- libpod/container_api.go | 8 +++++++- libpod/oci.go | 6 ++++++ 4 files changed, 26 insertions(+), 4 deletions(-) (limited to 'cmd/podman/checkpoint.go') diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index ddfd12bc3..824c97662 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -27,6 +27,10 @@ var ( Name: "leave-running, R", Usage: "leave the container running after writing checkpoint to disk", }, + cli.BoolFlag{ + Name: "tcp-established", + Usage: "checkpoint a container with established TCP connections", + }, cli.BoolFlag{ Name: "all, a", Usage: "checkpoint all running containers", @@ -55,8 +59,9 @@ func checkpointCmd(c *cli.Context) error { defer runtime.Shutdown(false) options := libpod.ContainerCheckpointOptions{ - Keep: c.Bool("keep"), - KeepRunning: c.Bool("leave-running"), + Keep: c.Bool("keep"), + KeepRunning: c.Bool("leave-running"), + TCPEstablished: c.Bool("tcp-established"), } if err := checkAllAndLatest(c); err != nil { diff --git a/cmd/podman/restore.go b/cmd/podman/restore.go index 6383ebf0b..afdbc36e0 100644 --- a/cmd/podman/restore.go +++ b/cmd/podman/restore.go @@ -26,6 +26,10 @@ var ( // restore --all would make more sense if there would be // dedicated state for container which are checkpointed. // TODO: add ContainerStateCheckpointed + cli.BoolFlag{ + Name: "tcp-established", + Usage: "checkpoint a container with established TCP connections", + }, cli.BoolFlag{ Name: "all, a", Usage: "restore all checkpointed containers", @@ -54,7 +58,8 @@ func restoreCmd(c *cli.Context) error { defer runtime.Shutdown(false) options := libpod.ContainerCheckpointOptions{ - Keep: c.Bool("keep"), + Keep: c.Bool("keep"), + TCPEstablished: c.Bool("tcp-established"), } if err := checkAllAndLatest(c); err != nil { diff --git a/libpod/container_api.go b/libpod/container_api.go index 396f06c20..aee87cc04 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -833,8 +833,14 @@ func (c *Container) Refresh(ctx context.Context) error { // ContainerCheckpointOptions is a struct used to pass the parameters // for checkpointing (and restoring) to the corresponding functions type ContainerCheckpointOptions struct { - Keep bool + // Keep tells the API to not delete checkpoint artifacts + Keep bool + // KeepRunning tells the API to keep the container running + // after writing the checkpoint to disk KeepRunning bool + // TCPEstablished tells the API to checkpoint a container + // even if it contains established TCP connections + TCPEstablished bool } // Checkpoint checkpoints a container diff --git a/libpod/oci.go b/libpod/oci.go index 6aedc5662..5a041b7d6 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -291,6 +291,9 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res if restoreOptions != nil { args = append(args, "--restore", ctr.CheckpointPath()) + if restoreOptions.TCPEstablished { + args = append(args, "--restore-arg", "--tcp-established") + } } logrus.WithFields(logrus.Fields{ @@ -862,6 +865,9 @@ func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckp if options.KeepRunning { args = append(args, "--leave-running") } + if options.TCPEstablished { + args = append(args, "--tcp-established") + } args = append(args, ctr.ID()) return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) } -- cgit v1.2.3-54-g00ecf