From 6202e8102b9728f89c2ab7bb504306a2bd007878 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 9 Nov 2021 09:15:50 +0000 Subject: Added optional container checkpointing statistics This adds the parameter '--print-stats' to 'podman container checkpoint'. With '--print-stats' Podman will measure how long Podman itself, the OCI runtime and CRIU requires to create a checkpoint and print out these information. CRIU already creates checkpointing statistics which are just read in addition to the added measurements. In contrast to just printing out the ID of the checkpointed container, Podman will now print out JSON: # podman container checkpoint --latest --print-stats { "podman_checkpoint_duration": 360749, "container_statistics": [ { "Id": "25244244bf2efbef30fb6857ddea8cb2e5489f07eb6659e20dda117f0c466808", "runtime_checkpoint_duration": 177222, "criu_statistics": { "freezing_time": 100657, "frozen_time": 60700, "memdump_time": 8162, "memwrite_time": 4224, "pages_scanned": 20561, "pages_written": 2129 } } ] } The output contains 'podman_checkpoint_duration' which contains the number of microseconds Podman required to create the checkpoint. The output also includes 'runtime_checkpoint_duration' which is the time the runtime needed to checkpoint that specific container. Each container also includes 'criu_statistics' which displays the timing information collected by CRIU. Signed-off-by: Adrian Reber --- cmd/podman/containers/checkpoint.go | 34 ++++++++++++++++++++++- libpod/container_api.go | 16 ++++++++--- libpod/container_internal_linux.go | 52 +++++++++++++++++++++++++++-------- libpod/define/checkpoint_restore.go | 32 +++++++++++++++++++++ libpod/oci.go | 6 ++-- libpod/oci_conmon_linux.go | 21 ++++++++++---- libpod/oci_missing.go | 4 +-- pkg/api/handlers/libpod/containers.go | 14 ++++++++-- pkg/api/server/register_containers.go | 4 +++ pkg/domain/entities/containers.go | 7 +++-- pkg/domain/infra/abi/containers.go | 9 ++++-- 11 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 libpod/define/checkpoint_restore.go diff --git a/cmd/podman/containers/checkpoint.go b/cmd/podman/containers/checkpoint.go index 4fa72d520..d92bc3e5e 100644 --- a/cmd/podman/containers/checkpoint.go +++ b/cmd/podman/containers/checkpoint.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/containers/common/pkg/completion" "github.com/containers/podman/v3/cmd/podman/common" @@ -40,6 +41,11 @@ var ( var checkpointOptions entities.CheckpointOptions +type checkpointStatistics struct { + PodmanDuration int64 `json:"podman_checkpoint_duration"` + ContainerStatistics []*entities.CheckpointReport `json:"container_statistics"` +} + func init() { registry.Commands = append(registry.Commands, registry.CliCommand{ Command: checkpointCommand, @@ -63,11 +69,19 @@ func init() { flags.StringP("compress", "c", "zstd", "Select compression algorithm (gzip, none, zstd) for checkpoint archive.") _ = checkpointCommand.RegisterFlagCompletionFunc("compress", common.AutocompleteCheckpointCompressType) + flags.BoolVar( + &checkpointOptions.PrintStats, + "print-stats", + false, + "Display checkpoint statistics", + ) + validate.AddLatestFlag(checkpointCommand, &checkpointOptions.Latest) } func checkpoint(cmd *cobra.Command, args []string) error { var errs utils.OutputErrors + podmanStart := time.Now() if cmd.Flags().Changed("compress") { if checkpointOptions.Export == "" { return errors.Errorf("--compress can only be used with --export") @@ -102,12 +116,30 @@ func checkpoint(cmd *cobra.Command, args []string) error { if err != nil { return err } + podmanFinished := time.Now() + + var statistics checkpointStatistics + for _, r := range responses { if r.Err == nil { - fmt.Println(r.Id) + if checkpointOptions.PrintStats { + statistics.ContainerStatistics = append(statistics.ContainerStatistics, r) + } else { + fmt.Println(r.Id) + } } else { errs = append(errs, r.Err) } } + + if checkpointOptions.PrintStats { + statistics.PodmanDuration = podmanFinished.Sub(podmanStart).Microseconds() + j, err := json.MarshalIndent(statistics, "", " ") + if err != nil { + return err + } + fmt.Println(string(j)) + } + return errs.PrintErrors() } diff --git a/libpod/container_api.go b/libpod/container_api.go index 38223316e..b0ea7c0de 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -794,21 +794,29 @@ type ContainerCheckpointOptions struct { // container no PID 1 will be in the namespace and that is not // possible. Pod string + // PrintStats tells the API to fill out the statistics about + // how much time each component in the stack requires to + // checkpoint a container. + PrintStats bool } // Checkpoint checkpoints a container -func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time +// the runtime needs to checkpoint the container) are only set if +// options.PrintStats is set to true. Not setting options.PrintStats to true +// will return nil and 0. +func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { logrus.Debugf("Trying to checkpoint container %s", c.ID()) if options.TargetFile != "" { if err := c.prepareCheckpointExport(); err != nil { - return err + return nil, 0, err } } if options.WithPrevious { if err := c.canWithPrevious(); err != nil { - return err + return nil, 0, err } } @@ -817,7 +825,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return err + return nil, 0, err } } return c.checkpoint(ctx, options) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 91453574e..2778d2cde 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1129,25 +1129,26 @@ func (c *Container) checkpointRestoreSupported(version int) error { return nil } -func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { - return err + return nil, 0, err } if c.state.State != define.ContainerStateRunning { - return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) + return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } if c.AutoRemove() && options.TargetFile == "" { - return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") + return nil, 0, errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") } if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil { - return err + return nil, 0, err } - if err := c.ociRuntime.CheckpointContainer(c, options); err != nil { - return err + runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options) + if err != nil { + return nil, 0, err } // Save network.status. This is needed to restore the container with @@ -1155,7 +1156,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // with one interface. // FIXME: will this break something? if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil { - return err + return nil, 0, err } defer c.newContainerEvent(events.Checkpoint) @@ -1165,13 +1166,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if options.WithPrevious { os.Remove(path.Join(c.CheckpointPath(), "parent")) if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil { - return err + return nil, 0, err } } if options.TargetFile != "" { if err := c.exportCheckpoint(options); err != nil { - return err + return nil, 0, err } } @@ -1183,8 +1184,35 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // Cleanup Storage and Network if err := c.cleanup(ctx); err != nil { - return err + return nil, 0, err + } + } + + criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) { + if !options.PrintStats { + return nil, nil + } + statsDirectory, err := os.Open(c.bundlePath()) + if err != nil { + return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath()) + } + + dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory) + if err != nil { + return nil, errors.Wrap(err, "Displaying checkpointing statistics not possible") } + + return &define.CRIUCheckpointRestoreStatistics{ + FreezingTime: dumpStatistics.GetFreezingTime(), + FrozenTime: dumpStatistics.GetFrozenTime(), + MemdumpTime: dumpStatistics.GetMemdumpTime(), + MemwriteTime: dumpStatistics.GetMemwriteTime(), + PagesScanned: dumpStatistics.GetPagesScanned(), + PagesWritten: dumpStatistics.GetPagesWritten(), + }, nil + }() + if err != nil { + return nil, 0, err } if !options.Keep && !options.PreCheckPoint { @@ -1203,7 +1231,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO } c.state.FinishedTime = time.Now() - return c.save() + return criuStatistics, runtimeCheckpointDuration, c.save() } func (c *Container) importCheckpoint(input string) error { diff --git a/libpod/define/checkpoint_restore.go b/libpod/define/checkpoint_restore.go new file mode 100644 index 000000000..536bdde9a --- /dev/null +++ b/libpod/define/checkpoint_restore.go @@ -0,0 +1,32 @@ +package define + +// This contains values reported by CRIU during +// checkpointing or restoring. +// All names are the same as reported by CRIU. +type CRIUCheckpointRestoreStatistics struct { + // Checkpoint values + // Time required to freeze/pause/quiesce the processes + FreezingTime uint32 `json:"freezing_time,omitempty"` + // Time the processes are actually not running during checkpointing + FrozenTime uint32 `json:"frozen_time,omitempty"` + // Time required to extract memory pages from the processes + MemdumpTime uint32 `json:"memdump_time,omitempty"` + // Time required to write memory pages to disk + MemwriteTime uint32 `json:"memwrite_time,omitempty"` + // Number of memory pages CRIU analyzed + PagesScanned uint64 `json:"pages_scanned,omitempty"` + // Number of memory pages written + PagesWritten uint64 `json:"pages_written,omitempty"` + + // Restore values + // Number of pages compared during restore + PagesCompared uint64 `json:"pages_compared,omitempty"` + // Number of COW pages skipped during restore + PagesSkippedCow uint64 `json:"pages_skipped_cow,omitempty"` + // Time required to fork processes + ForkingTime uint32 `json:"forking_time,omitempty"` + // Time required to restore + RestoreTime uint32 `json:"restore_time,omitempty"` + // Number of memory pages restored + PagesRestored uint64 `json:"pages_restored,omitempty"` +} diff --git a/libpod/oci.go b/libpod/oci.go index c92d9a077..f78600210 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -101,8 +101,10 @@ type OCIRuntime interface { // CheckpointContainer checkpoints the given container. // Some OCI runtimes may not support this - if SupportsCheckpoint() // returns false, this is not implemented, and will always return an - // error. - CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error + // error. If CheckpointOptions.PrintStats is true the first return parameter + // contains the number of microseconds the runtime needed to checkpoint + // the given container. + CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) // CheckConmonRunning verifies that the given container's Conmon // instance is still running. Runtimes without Conmon, or systems where diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index db906fabb..305d73d75 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -760,9 +760,9 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize define.TerminalS } // CheckpointContainer checkpoints the given container. -func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { +func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { - return err + return 0, err } // imagePath is used by CRIU to store the actual checkpoint files imagePath := ctr.CheckpointPath() @@ -802,14 +802,25 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container } runtimeDir, err := util.GetRuntimeDir() if err != nil { - return err + return 0, err } if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil { - return errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") + return 0, errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") } args = append(args, ctr.ID()) logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointStarted := time.Now() + err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointDuration := func() int64 { + if options.PrintStats { + return time.Since(runtimeCheckpointStarted).Microseconds() + } + return 0 + }() + + return runtimeCheckpointDuration, err } func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index fcf2ffca8..c91146b9c 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -153,8 +153,8 @@ func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (boo } // CheckpointContainer is not available as the runtime is missing -func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { - return r.printError() +func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { + return 0, r.printError() } // CheckConmonRunning is not available as the runtime is missing diff --git a/pkg/api/handlers/libpod/containers.go b/pkg/api/handlers/libpod/containers.go index 343c0d0b3..d8312e9be 100644 --- a/pkg/api/handlers/libpod/containers.go +++ b/pkg/api/handlers/libpod/containers.go @@ -214,6 +214,7 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { TCPEstablished bool `schema:"tcpEstablished"` Export bool `schema:"export"` IgnoreRootFS bool `schema:"ignoreRootFS"` + PrintStats bool `schema:"printStats"` }{ // override any golang type defaults } @@ -248,11 +249,12 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { KeepRunning: query.LeaveRunning, TCPEstablished: query.TCPEstablished, IgnoreRootfs: query.IgnoreRootFS, + PrintStats: query.PrintStats, } if query.Export { options.TargetFile = targetFile } - err = ctr.Checkpoint(r.Context(), options) + criuStatistics, runtimeCheckpointDuration, err := ctr.Checkpoint(r.Context(), options) if err != nil { utils.InternalServerError(w, err) return @@ -267,7 +269,15 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { utils.WriteResponse(w, http.StatusOK, f) return } - utils.WriteResponse(w, http.StatusOK, entities.CheckpointReport{Id: ctr.ID()}) + utils.WriteResponse( + w, + http.StatusOK, + entities.CheckpointReport{ + Id: ctr.ID(), + RuntimeDuration: runtimeCheckpointDuration, + CRIUStatistics: criuStatistics, + }, + ) } func Restore(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/api/server/register_containers.go b/pkg/api/server/register_containers.go index c4919182b..e98098f97 100644 --- a/pkg/api/server/register_containers.go +++ b/pkg/api/server/register_containers.go @@ -1441,6 +1441,10 @@ func (s *APIServer) registerContainersHandlers(r *mux.Router) error { // name: ignoreRootFS // type: boolean // description: do not include root file-system changes when exporting + // - in: query + // name: printStats + // type: boolean + // description: add checkpoint statistics to the returned CheckpointReport // produces: // - application/json // responses: diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 869c616ea..bab663ef7 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -190,11 +190,14 @@ type CheckpointOptions struct { PreCheckPoint bool WithPrevious bool Compression archive.Compression + PrintStats bool } type CheckpointReport struct { - Err error - Id string //nolint + Err error `json:"-"` + Id string `json:"Id` //nolint + RuntimeDuration int64 `json:"runtime_checkpoint_duration"` + CRIUStatistics *define.CRIUCheckpointRestoreStatistics `json:"criu_statistics"` } type RestoreOptions struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index c30129001..c36bc5ccd 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -515,6 +515,7 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ PreCheckPoint: options.PreCheckPoint, WithPrevious: options.WithPrevious, Compression: options.Compression, + PrintStats: options.PrintStats, } if options.All { @@ -531,10 +532,12 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ } reports := make([]*entities.CheckpointReport, 0, len(cons)) for _, con := range cons { - err = con.Checkpoint(ctx, checkOpts) + criuStatistics, runtimeCheckpointDuration, err := con.Checkpoint(ctx, checkOpts) reports = append(reports, &entities.CheckpointReport{ - Err: err, - Id: con.ID(), + Err: err, + Id: con.ID(), + RuntimeDuration: runtimeCheckpointDuration, + CRIUStatistics: criuStatistics, }) } return reports, nil -- cgit v1.2.3-54-g00ecf From 80e56fa12b7bf7e19a768ce9a3beeeeb53f9b33d Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 9 Nov 2021 21:21:07 +0000 Subject: Added optional container restore statistics This adds the parameter '--print-stats' to 'podman container restore'. With '--print-stats' Podman will measure how long Podman itself, the OCI runtime and CRIU requires to restore a checkpoint and print out these information. CRIU already creates process restore statistics which are just read in addition to the added measurements. In contrast to just printing out the ID of the restored container, Podman will now print out JSON: # podman container restore --latest --print-stats { "podman_restore_duration": 305871, "container_statistics": [ { "Id": "47b02e1d474b5d5fe917825e91ac653efa757c91e5a81a368d771a78f6b5ed20", "runtime_restore_duration": 140614, "criu_statistics": { "forking_time": 5, "restore_time": 67672, "pages_restored": 14 } } ] } The output contains 'podman_restore_duration' which contains the number of microseconds Podman required to restore the checkpoint. The output also includes 'runtime_restore_duration' which is the time the runtime needed to restore that specific container. Each container also includes 'criu_statistics' which displays the timing information collected by CRIU. Signed-off-by: Adrian Reber --- cmd/podman/containers/restore.go | 34 ++++++++++- libpod/container_api.go | 8 ++- libpod/container_internal.go | 2 +- libpod/container_internal_linux.go | 107 +++++++++++++++++++++------------- libpod/oci.go | 5 +- libpod/oci_conmon_linux.go | 75 +++++++++++++++--------- libpod/oci_missing.go | 4 +- pkg/api/handlers/libpod/containers.go | 14 ++++- pkg/api/server/register_containers.go | 4 ++ pkg/domain/entities/containers.go | 7 ++- pkg/domain/infra/abi/containers.go | 9 ++- 11 files changed, 186 insertions(+), 83 deletions(-) diff --git a/cmd/podman/containers/restore.go b/cmd/podman/containers/restore.go index 05214f32c..217adc887 100644 --- a/cmd/podman/containers/restore.go +++ b/cmd/podman/containers/restore.go @@ -3,6 +3,7 @@ package containers import ( "context" "fmt" + "time" "github.com/containers/common/pkg/completion" "github.com/containers/podman/v3/cmd/podman/common" @@ -39,6 +40,11 @@ var ( var restoreOptions entities.RestoreOptions +type restoreStatistics struct { + PodmanDuration int64 `json:"podman_restore_duration"` + ContainerStatistics []*entities.RestoreReport `json:"container_statistics"` +} + func init() { registry.Commands = append(registry.Commands, registry.CliCommand{ Command: restoreCommand, @@ -75,11 +81,19 @@ func init() { flags.StringVar(&restoreOptions.Pod, "pod", "", "Restore container into existing Pod (only works with --import)") _ = restoreCommand.RegisterFlagCompletionFunc("pod", common.AutocompletePodsRunning) + flags.BoolVar( + &restoreOptions.PrintStats, + "print-stats", + false, + "Display restore statistics", + ) + validate.AddLatestFlag(restoreCommand, &restoreOptions.Latest) } func restore(cmd *cobra.Command, args []string) error { var errs utils.OutputErrors + podmanStart := time.Now() if rootless.IsRootless() { return errors.New("restoring a container requires root") } @@ -132,12 +146,30 @@ func restore(cmd *cobra.Command, args []string) error { if err != nil { return err } + podmanFinished := time.Now() + + var statistics restoreStatistics + for _, r := range responses { if r.Err == nil { - fmt.Println(r.Id) + if restoreOptions.PrintStats { + statistics.ContainerStatistics = append(statistics.ContainerStatistics, r) + } else { + fmt.Println(r.Id) + } } else { errs = append(errs, r.Err) } } + + if restoreOptions.PrintStats { + statistics.PodmanDuration = podmanFinished.Sub(podmanStart).Microseconds() + j, err := json.MarshalIndent(statistics, "", " ") + if err != nil { + return err + } + fmt.Println(string(j)) + } + return errs.PrintErrors() } diff --git a/libpod/container_api.go b/libpod/container_api.go index b0ea7c0de..a41bb03df 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -832,7 +832,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO } // Restore restores a container -func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error { +// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time +// the runtime needs to restore the container) are only set if +// options.PrintStats is set to true. Not setting options.PrintStats to true +// will return nil and 0. +func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { if options.Pod == "" { logrus.Debugf("Trying to restore container %s", c.ID()) } else { @@ -843,7 +847,7 @@ func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOpti defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return err + return nil, 0, err } } defer c.newContainerEvent(events.Restore) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index de23a4aeb..4bf15be86 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1089,7 +1089,7 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { } // With the spec complete, do an OCI create - if err := c.ociRuntime.CreateContainer(c, nil); err != nil { + if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil { // Fedora 31 is carrying a patch to display improved error // messages to better handle the V2 transition. This is NOT // upstream in any OCI runtime. diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 2778d2cde..4a15d495f 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1264,7 +1264,7 @@ func (c *Container) importPreCheckpoint(input string) error { return nil } -func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) { +func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) { minCriuVersion := func() int { if options.Pod == "" { return criu.MinCriuVersion @@ -1272,37 +1272,37 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti return criu.PodCriuVersion }() if err := c.checkpointRestoreSupported(minCriuVersion); err != nil { - return err + return nil, 0, err } if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) { - return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path()) + return nil, 0, errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path()) } if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) { - return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) + return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) } if options.ImportPrevious != "" { if err := c.importPreCheckpoint(options.ImportPrevious); err != nil { - return err + return nil, 0, err } } if options.TargetFile != "" { if err := c.importCheckpoint(options.TargetFile); err != nil { - return err + return nil, 0, err } } // Let's try to stat() CRIU's inventory file. If it does not exist, it makes // no sense to try a restore. This is a minimal check if a checkpoint exist. if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) { - return errors.Wrapf(err, "a complete checkpoint for this container cannot be found, cannot restore") + return nil, 0, errors.Wrapf(err, "a complete checkpoint for this container cannot be found, cannot restore") } if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil { - return err + return nil, 0, err } // If a container is restored multiple times from an exported checkpoint with @@ -1339,7 +1339,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti // container with the same networks settings as during checkpointing. aliases, err := c.GetAllNetworkAliases() if err != nil { - return err + return nil, 0, err } netOpts := make(map[string]types.PerNetworkOptions, len(netStatus)) for network, status := range netStatus { @@ -1364,7 +1364,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti if perNetOpts.InterfaceName == "" { eth, exists := c.state.NetInterfaceDescriptions.getInterfaceByName(network) if !exists { - return errors.Errorf("no network interface name for container %s on network %s", c.config.ID, network) + return nil, 0, errors.Errorf("no network interface name for container %s on network %s", c.config.ID, network) } perNetOpts.InterfaceName = eth } @@ -1382,7 +1382,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti }() if err := c.prepare(); err != nil { - return err + return nil, 0, err } // Read config @@ -1391,7 +1391,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti g, err := generate.NewFromFile(jsonPath) if err != nil { logrus.Debugf("generate.NewFromFile failed with %v", err) - return err + return nil, 0, err } // Restoring from an import means that we are doing migration @@ -1407,7 +1407,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil { - return err + return nil, 0, err } } @@ -1416,23 +1416,23 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti // the ones from the infrastructure container. pod, err := c.runtime.LookupPod(options.Pod) if err != nil { - return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod) + return nil, 0, errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod) } infraContainer, err := pod.InfraContainer() if err != nil { - return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod) } infraContainer.lock.Lock() if err := infraContainer.syncContainer(); err != nil { infraContainer.lock.Unlock() - return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID()) + return nil, 0, errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID()) } if infraContainer.state.State != define.ContainerStateRunning { if err := infraContainer.initAndStart(ctx); err != nil { infraContainer.lock.Unlock() - return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID()) + return nil, 0, errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID()) } } infraContainer.lock.Unlock() @@ -1440,56 +1440,56 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti if c.config.IPCNsCtr != "" { nsPath, err := infraContainer.namespacePath(IPCNS) if err != nil { - return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod) } if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil { - return err + return nil, 0, err } } if c.config.NetNsCtr != "" { nsPath, err := infraContainer.namespacePath(NetNS) if err != nil { - return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod) } if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil { - return err + return nil, 0, err } } if c.config.PIDNsCtr != "" { nsPath, err := infraContainer.namespacePath(PIDNS) if err != nil { - return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod) } if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil { - return err + return nil, 0, err } } if c.config.UTSNsCtr != "" { nsPath, err := infraContainer.namespacePath(UTSNS) if err != nil { - return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod) } if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil { - return err + return nil, 0, err } } if c.config.CgroupNsCtr != "" { nsPath, err := infraContainer.namespacePath(CgroupNS) if err != nil { - return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod) + return nil, 0, errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod) } if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil { - return err + return nil, 0, err } } } if err := c.makeBindMounts(); err != nil { - return err + return nil, 0, err } if options.TargetFile != "" { @@ -1511,12 +1511,12 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti // Cleanup for a working restore. if err := c.removeConmonFiles(); err != nil { - return err + return nil, 0, err } // Save the OCI spec to disk if err := c.saveSpec(g.Config); err != nil { - return err + return nil, 0, err } // When restoring from an imported archive, allow restoring the content of volumes. @@ -1527,24 +1527,24 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti volumeFile, err := os.Open(volumeFilePath) if err != nil { - return errors.Wrapf(err, "failed to open volume file %s", volumeFilePath) + return nil, 0, errors.Wrapf(err, "failed to open volume file %s", volumeFilePath) } defer volumeFile.Close() volume, err := c.runtime.GetVolume(v.Name) if err != nil { - return errors.Wrapf(err, "failed to retrieve volume %s", v.Name) + return nil, 0, errors.Wrapf(err, "failed to retrieve volume %s", v.Name) } mountPoint, err := volume.MountPoint() if err != nil { - return err + return nil, 0, err } if mountPoint == "" { - return errors.Wrapf(err, "unable to import volume %s as it is not mounted", volume.Name()) + return nil, 0, errors.Wrapf(err, "unable to import volume %s as it is not mounted", volume.Name()) } if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil { - return errors.Wrapf(err, "Failed to extract volume %s to %s", volumeFilePath, mountPoint) + return nil, 0, errors.Wrapf(err, "Failed to extract volume %s to %s", volumeFilePath, mountPoint) } } } @@ -1552,16 +1552,43 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti // Before actually restarting the container, apply the root file-system changes if !options.IgnoreRootfs { if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil { - return err + return nil, 0, err } if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil { - return err + return nil, 0, err } } - if err := c.ociRuntime.CreateContainer(c, &options); err != nil { - return err + runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options) + if err != nil { + return nil, 0, err + } + + criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) { + if !options.PrintStats { + return nil, nil + } + statsDirectory, err := os.Open(c.bundlePath()) + if err != nil { + return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath()) + } + + restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory) + if err != nil { + return nil, errors.Wrap(err, "Displaying restore statistics not possible") + } + + return &define.CRIUCheckpointRestoreStatistics{ + PagesCompared: restoreStatistics.GetPagesCompared(), + PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(), + ForkingTime: restoreStatistics.GetForkingTime(), + RestoreTime: restoreStatistics.GetRestoreTime(), + PagesRestored: restoreStatistics.GetPagesRestored(), + }, nil + }() + if err != nil { + return nil, 0, err } logrus.Debugf("Restored container %s", c.ID()) @@ -1600,7 +1627,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } } - return c.save() + return criuStatistics, runtimeRestoreDuration, c.save() } // Retrieves a container's "root" net namespace container dependency. diff --git a/libpod/oci.go b/libpod/oci.go index f78600210..f45c1a105 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -23,7 +23,10 @@ type OCIRuntime interface { Path() string // CreateContainer creates the container in the OCI runtime. - CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error + // The returned int64 contains the microseconds needed to restore + // the given container if it is a restore and if restoreOptions.PrintStats + // is true. In all other cases the returned int64 is 0. + CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) // UpdateContainerStatus updates the status of the given container. UpdateContainerStatus(ctr *Container) error // StartContainer starts the given container. diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 305d73d75..533a0d78b 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -183,35 +183,39 @@ func hasCurrentUserMapped(ctr *Container) bool { } // CreateContainer creates a container. -func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error { +func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { // always make the run dir accessible to the current user so that the PID files can be read without // being in the rootless user namespace. if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil { - return err + return 0, err } if !hasCurrentUserMapped(ctr) { for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} { if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { - return err + return 0, err } } // if we are running a non privileged container, be sure to umount some kernel paths so they are not // bind mounted inside the container at all. if !ctr.config.Privileged && !rootless.IsRootless() { - ch := make(chan error) + type result struct { + restoreDuration int64 + err error + } + ch := make(chan result) go func() { runtime.LockOSThread() - err := func() error { + restoreDuration, err := func() (int64, error) { fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) if err != nil { - return err + return 0, err } defer errorhandling.CloseQuiet(fd) // create a new mountns on the current thread if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { - return err + return 0, err } defer func() { if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil { @@ -224,12 +228,12 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta // changes are propagated to the host. err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "") if err != nil { - return errors.Wrapf(err, "cannot make /sys slave") + return 0, errors.Wrapf(err, "cannot make /sys slave") } mounts, err := pmount.GetMounts() if err != nil { - return err + return 0, err } for _, m := range mounts { if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") { @@ -237,15 +241,18 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta } err = unix.Unmount(m.Mountpoint, 0) if err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "cannot unmount %s", m.Mountpoint) + return 0, errors.Wrapf(err, "cannot unmount %s", m.Mountpoint) } } return r.createOCIContainer(ctr, restoreOptions) }() - ch <- err + ch <- result{ + restoreDuration: restoreDuration, + err: err, + } }() - err := <-ch - return err + r := <-ch + return r.restoreDuration, r.err } } return r.createOCIContainer(ctr, restoreOptions) @@ -995,23 +1002,23 @@ func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) { } // createOCIContainer generates this container's main conmon instance and prepares it for starting -func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error { +func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { var stderrBuf bytes.Buffer runtimeDir, err := util.GetRuntimeDir() if err != nil { - return err + return 0, err } parentSyncPipe, childSyncPipe, err := newPipe() if err != nil { - return errors.Wrapf(err, "error creating socket pair") + return 0, errors.Wrapf(err, "error creating socket pair") } defer errorhandling.CloseQuiet(parentSyncPipe) childStartPipe, parentStartPipe, err := newPipe() if err != nil { - return errors.Wrapf(err, "error creating socket pair for start pipe") + return 0, errors.Wrapf(err, "error creating socket pair for start pipe") } defer errorhandling.CloseQuiet(parentStartPipe) @@ -1023,12 +1030,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co logTag, err := r.getLogTag(ctr) if err != nil { - return err + return 0, err } if ctr.config.CgroupsMode == cgroupSplit { if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil { - return err + return 0, err } } @@ -1079,7 +1086,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co } else { fds, err := strconv.Atoi(val) if err != nil { - return fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err) + return 0, fmt.Errorf("converting LISTEN_FDS=%s: %w", val, err) } preserveFDs = uint(fds) } @@ -1160,7 +1167,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if r.reservePorts && !rootless.IsRootless() && !ctr.config.NetMode.IsSlirp4netns() { ports, err := bindPorts(ctr.config.PortMappings) if err != nil { - return err + return 0, err } filesToClose = append(filesToClose, ports...) @@ -1176,12 +1183,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if havePortMapping { ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe() if err != nil { - return errors.Wrapf(err, "failed to create rootless port sync pipe") + return 0, errors.Wrapf(err, "failed to create rootless port sync pipe") } } ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() if err != nil { - return errors.Wrapf(err, "failed to create rootless network sync pipe") + return 0, errors.Wrapf(err, "failed to create rootless network sync pipe") } } else { if ctr.rootlessSlirpSyncR != nil { @@ -1200,22 +1207,25 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncW) } } - + var runtimeRestoreStarted time.Time + if restoreOptions != nil { + runtimeRestoreStarted = time.Now() + } err = startCommandGivenSelinux(cmd, ctr) // regardless of whether we errored or not, we no longer need the children pipes childSyncPipe.Close() childStartPipe.Close() if err != nil { - return err + return 0, err } if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil { - return err + return 0, err } /* Wait for initial setup and fork, and reap child */ err = cmd.Wait() if err != nil { - return err + return 0, err } pid, err := readConmonPipeData(parentSyncPipe, ociLog) @@ -1223,7 +1233,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co if err2 := r.DeleteContainer(ctr); err2 != nil { logrus.Errorf("Removing container %s from runtime after creation failed", ctr.ID()) } - return err + return 0, err } ctr.state.PID = pid @@ -1249,13 +1259,20 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co } } + runtimeRestoreDuration := func() int64 { + if restoreOptions != nil && restoreOptions.PrintStats { + return time.Since(runtimeRestoreStarted).Microseconds() + } + return 0 + }() + // These fds were passed down to the runtime. Close them // and not interfere for _, f := range filesToClose { errorhandling.CloseQuiet(f) } - return nil + return runtimeRestoreDuration, nil } // configureConmonEnv gets the environment values to add to conmon's exec struct diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index c91146b9c..65ff818b4 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -66,8 +66,8 @@ func (r *MissingRuntime) Path() string { } // CreateContainer is not available as the runtime is missing -func (r *MissingRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error { - return r.printError() +func (r *MissingRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) { + return 0, r.printError() } // UpdateContainerStatus is not available as the runtime is missing diff --git a/pkg/api/handlers/libpod/containers.go b/pkg/api/handlers/libpod/containers.go index d8312e9be..3aeebc334 100644 --- a/pkg/api/handlers/libpod/containers.go +++ b/pkg/api/handlers/libpod/containers.go @@ -294,6 +294,7 @@ func Restore(w http.ResponseWriter, r *http.Request) { IgnoreVolumes bool `schema:"ignoreVolumes"` IgnoreStaticIP bool `schema:"ignoreStaticIP"` IgnoreStaticMAC bool `schema:"ignoreStaticMAC"` + PrintStats bool `schema:"printStats"` }{ // override any golang type defaults } @@ -329,17 +330,26 @@ func Restore(w http.ResponseWriter, r *http.Request) { IgnoreRootfs: query.IgnoreRootFS, IgnoreStaticIP: query.IgnoreStaticIP, IgnoreStaticMAC: query.IgnoreStaticMAC, + PrintStats: query.PrintStats, } if query.Import { options.TargetFile = targetFile options.Name = query.Name } - err = ctr.Restore(r.Context(), options) + criuStatistics, runtimeRestoreDuration, err := ctr.Restore(r.Context(), options) if err != nil { utils.InternalServerError(w, err) return } - utils.WriteResponse(w, http.StatusOK, entities.RestoreReport{Id: ctr.ID()}) + utils.WriteResponse( + w, + http.StatusOK, + entities.RestoreReport{ + Id: ctr.ID(), + RuntimeDuration: runtimeRestoreDuration, + CRIUStatistics: criuStatistics, + }, + ) } func InitContainer(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/api/server/register_containers.go b/pkg/api/server/register_containers.go index e98098f97..601e1251b 100644 --- a/pkg/api/server/register_containers.go +++ b/pkg/api/server/register_containers.go @@ -1499,6 +1499,10 @@ func (s *APIServer) registerContainersHandlers(r *mux.Router) error { // name: ignoreStaticMAC // type: boolean // description: ignore MAC address if set statically + // - in: query + // name: printStats + // type: boolean + // description: add restore statistics to the returned RestoreReport // produces: // - application/json // responses: diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index bab663ef7..8b7cd62d9 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -214,11 +214,14 @@ type RestoreOptions struct { ImportPrevious string PublishPorts []nettypes.PortMapping Pod string + PrintStats bool } type RestoreReport struct { - Err error - Id string //nolint + Err error `json:"-"` + Id string `json:"Id` //nolint + RuntimeDuration int64 `json:"runtime_restore_duration"` + CRIUStatistics *define.CRIUCheckpointRestoreStatistics `json:"criu_statistics"` } type ContainerCreateReport struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index c36bc5ccd..69c628669 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -560,6 +560,7 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st IgnoreStaticMAC: options.IgnoreStaticMAC, ImportPrevious: options.ImportPrevious, Pod: options.Pod, + PrintStats: options.PrintStats, } filterFuncs := []libpod.ContainerFilter{ @@ -582,10 +583,12 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st } reports := make([]*entities.RestoreReport, 0, len(cons)) for _, con := range cons { - err := con.Restore(ctx, restoreOptions) + criuStatistics, runtimeRestoreDuration, err := con.Restore(ctx, restoreOptions) reports = append(reports, &entities.RestoreReport{ - Err: err, - Id: con.ID(), + Err: err, + Id: con.ID(), + RuntimeDuration: runtimeRestoreDuration, + CRIUStatistics: criuStatistics, }) } return reports, nil -- cgit v1.2.3-54-g00ecf From 914f4c8905d102a5f345b445cc899fd85062093b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 10 Nov 2021 12:43:12 +0000 Subject: Update man pages for checkpoint/restore --print-stats This commit updates the man pages for checkpoint and restore to describe the '--print-stats' parameter. Signed-off-by: Adrian Reber --- .../markdown/podman-container-checkpoint.1.md | 36 +++++++++++++++++++++- docs/source/markdown/podman-container-restore.1.md | 26 +++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/docs/source/markdown/podman-container-checkpoint.1.md b/docs/source/markdown/podman-container-checkpoint.1.md index 56fd848ac..1faa40a94 100644 --- a/docs/source/markdown/podman-container-checkpoint.1.md +++ b/docs/source/markdown/podman-container-checkpoint.1.md @@ -68,6 +68,40 @@ Dump the *container's* memory information only, leaving the *container* running. operations will supersede prior dumps. It only works on `runc 1.0-rc3` or `higher`.\ The default is **false**. +#### **--print-stats** + +Print out statistics about checkpointing the container(s). The output is +rendered in a JSON array and contains information about how much time different +checkpoint operations required. Many of the checkpoint statistics are created +by CRIU and just passed through to Podman. The following information is provided +in the JSON array: + +- **podman_checkpoint_duration**: Overall time (in microseconds) needed to create + all checkpoints. + +- **runtime_checkpoint_duration**: Time (in microseconds) the container runtime + needed to create the checkpoint. + +- **freezing_time**: Time (in microseconds) CRIU needed to pause (freeze) all + processes in the container (measured by CRIU). + +- **frozen_time**: Time (in microseconds) all processes in the container were + paused (measured by CRIU). + +- **memdump_time**: Time (in microseconds) needed to extract all required memory + pages from all container processes (measured by CRIU). + +- **memwrite_time**: Time (in microseconds) needed to write all required memory + pages to the corresponding checkpoint image files (measured by CRIU). + +- **pages_scanned**: Number of memory pages scanned to determine if they need + to be checkpointed (measured by CRIU). + +- **pages_written**: Number of memory pages actually written to the checkpoint + image files (measured by CRIU). + +The default is **false**. + #### **--tcp-established** Checkpoint a *container* with established TCP connections. If the checkpoint @@ -106,7 +140,7 @@ Dump the container's memory information of the latest container into an archive ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-container-restore(1)](podman-container-restore.1.md)** +**[podman(1)](podman.1.md)**, **[podman-container-restore(1)](podman-container-restore.1.md)**, **criu(8)** ## HISTORY September 2018, Originally compiled by Adrian Reber diff --git a/docs/source/markdown/podman-container-restore.1.md b/docs/source/markdown/podman-container-restore.1.md index 856008cc0..962770ea0 100644 --- a/docs/source/markdown/podman-container-restore.1.md +++ b/docs/source/markdown/podman-container-restore.1.md @@ -102,6 +102,30 @@ from (see **[podman pod create --share](podman-pod-create.1.md#--share)**). This option requires at least CRIU 3.16. +#### **--print-stats** + +Print out statistics about restoring the container(s). The output is +rendered in a JSON array and contains information about how much time different +restore operations required. Many of the restore statistics are created +by CRIU and just passed through to Podman. The following information is provided +in the JSON array: + +- **podman_restore_duration**: Overall time (in microseconds) needed to restore + all checkpoints. + +- **runtime_restore_duration**: Time (in microseconds) the container runtime + needed to restore the checkpoint. + +- **forking_time**: Time (in microseconds) CRIU needed to create (fork) all + processes in the restored container (measured by CRIU). + +- **restore_time**: Time (in microseconds) CRIU needed to restore all processes + in the container (measured by CRIU). + +- **pages_restored**: Number of memory pages restored (measured by CRIU). + +The default is **false**. + #### **--publish**, **-p**=*port* Replaces the ports that the *container* publishes, as configured during the @@ -137,7 +161,7 @@ $ podman run --rm -p 2345:80 -d webserver ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)** +**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)**, **criu(8)** ## HISTORY September 2018, Originally compiled by Adrian Reber -- cgit v1.2.3-54-g00ecf From d28b39a90d1b4733ff3144450a280afed8661924 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 10 Nov 2021 15:35:36 +0000 Subject: Added test for checkpoint/restore --print-stats Signed-off-by: Adrian Reber --- test/e2e/checkpoint_test.go | 95 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go index be6b782b5..6b294802d 100644 --- a/test/e2e/checkpoint_test.go +++ b/test/e2e/checkpoint_test.go @@ -1,6 +1,7 @@ package integration import ( + "encoding/json" "fmt" "net" "os" @@ -12,6 +13,7 @@ import ( "github.com/checkpoint-restore/go-criu/v5/stats" "github.com/containers/podman/v3/pkg/checkpoint/crutils" "github.com/containers/podman/v3/pkg/criu" + "github.com/containers/podman/v3/pkg/domain/entities" . "github.com/containers/podman/v3/test/utils" "github.com/containers/podman/v3/utils" . "github.com/onsi/ginkgo" @@ -1244,4 +1246,97 @@ var _ = Describe("Podman checkpoint", func() { // Remove exported checkpoint os.Remove(fileName) }) + + It("podman checkpoint and restore containers with --print-stats", func() { + session1 := podmanTest.Podman(getRunString([]string{redis})) + session1.WaitWithDefaultTimeout() + Expect(session1).Should(Exit(0)) + + session2 := podmanTest.Podman(getRunString([]string{redis, "top"})) + session2.WaitWithDefaultTimeout() + Expect(session2).Should(Exit(0)) + + result := podmanTest.Podman([]string{ + "container", + "checkpoint", + "-a", + "--print-stats", + }) + result.WaitWithDefaultTimeout() + + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + + type checkpointStatistics struct { + PodmanDuration int64 `json:"podman_checkpoint_duration"` + ContainerStatistics []*entities.CheckpointReport `json:"container_statistics"` + } + + cS := new(checkpointStatistics) + err := json.Unmarshal([]byte(result.OutputToString()), cS) + Expect(err).ShouldNot(HaveOccurred()) + + Expect(len(cS.ContainerStatistics)).To(Equal(2)) + Expect(cS.PodmanDuration).To(BeNumerically(">", cS.ContainerStatistics[0].RuntimeDuration)) + Expect(cS.PodmanDuration).To(BeNumerically(">", cS.ContainerStatistics[1].RuntimeDuration)) + Expect(cS.ContainerStatistics[0].RuntimeDuration).To( + BeNumerically(">", cS.ContainerStatistics[0].CRIUStatistics.FrozenTime), + ) + Expect(cS.ContainerStatistics[1].RuntimeDuration).To( + BeNumerically(">", cS.ContainerStatistics[1].CRIUStatistics.FrozenTime), + ) + + ps := podmanTest.Podman([]string{ + "ps", + "-q", + "--no-trunc", + }) + ps.WaitWithDefaultTimeout() + Expect(ps).Should(Exit(0)) + Expect(ps.LineInOutputContains(session1.OutputToString())).To(BeFalse()) + Expect(ps.LineInOutputContains(session2.OutputToString())).To(BeFalse()) + + result = podmanTest.Podman([]string{ + "container", + "restore", + "-a", + "--print-stats", + }) + result.WaitWithDefaultTimeout() + + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(2)) + Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) + Expect(podmanTest.GetContainerStatus()).To(Not(ContainSubstring("Exited"))) + + type restoreStatistics struct { + PodmanDuration int64 `json:"podman_restore_duration"` + ContainerStatistics []*entities.RestoreReport `json:"container_statistics"` + } + + rS := new(restoreStatistics) + err = json.Unmarshal([]byte(result.OutputToString()), rS) + Expect(err).ShouldNot(HaveOccurred()) + + Expect(len(cS.ContainerStatistics)).To(Equal(2)) + Expect(cS.PodmanDuration).To(BeNumerically(">", cS.ContainerStatistics[0].RuntimeDuration)) + Expect(cS.PodmanDuration).To(BeNumerically(">", cS.ContainerStatistics[1].RuntimeDuration)) + Expect(cS.ContainerStatistics[0].RuntimeDuration).To( + BeNumerically(">", cS.ContainerStatistics[0].CRIUStatistics.RestoreTime), + ) + Expect(cS.ContainerStatistics[1].RuntimeDuration).To( + BeNumerically(">", cS.ContainerStatistics[1].CRIUStatistics.RestoreTime), + ) + + result = podmanTest.Podman([]string{ + "rm", + "-t", + "0", + "-fa", + }) + result.WaitWithDefaultTimeout() + Expect(result).Should(Exit(0)) + Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) + }) + }) -- cgit v1.2.3-54-g00ecf