diff options
author | Adrian Reber <areber@redhat.com> | 2021-11-09 09:15:50 +0000 |
---|---|---|
committer | Adrian Reber <areber@redhat.com> | 2021-11-15 11:50:24 +0000 |
commit | 6202e8102b9728f89c2ab7bb504306a2bd007878 (patch) | |
tree | e5233ba6f948d62a59f2338b7f34827f6ff76eb2 /libpod | |
parent | cca6df428cb9ce187ae1341740ac1137c7a67a75 (diff) | |
download | podman-6202e8102b9728f89c2ab7bb504306a2bd007878.tar.gz podman-6202e8102b9728f89c2ab7bb504306a2bd007878.tar.bz2 podman-6202e8102b9728f89c2ab7bb504306a2bd007878.zip |
Added optional container checkpointing statistics
This adds the parameter '--print-stats' to 'podman container checkpoint'.
With '--print-stats' Podman will measure how long Podman itself, the OCI
runtime and CRIU requires to create a checkpoint and print out these
information. CRIU already creates checkpointing statistics which are
just read in addition to the added measurements. In contrast to just
printing out the ID of the checkpointed container, Podman will now print
out JSON:
# podman container checkpoint --latest --print-stats
{
"podman_checkpoint_duration": 360749,
"container_statistics": [
{
"Id": "25244244bf2efbef30fb6857ddea8cb2e5489f07eb6659e20dda117f0c466808",
"runtime_checkpoint_duration": 177222,
"criu_statistics": {
"freezing_time": 100657,
"frozen_time": 60700,
"memdump_time": 8162,
"memwrite_time": 4224,
"pages_scanned": 20561,
"pages_written": 2129
}
}
]
}
The output contains 'podman_checkpoint_duration' which contains the
number of microseconds Podman required to create the checkpoint. The
output also includes 'runtime_checkpoint_duration' which is the time
the runtime needed to checkpoint that specific container. Each container
also includes 'criu_statistics' which displays the timing information
collected by CRIU.
Signed-off-by: Adrian Reber <areber@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_api.go | 16 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 52 | ||||
-rw-r--r-- | libpod/define/checkpoint_restore.go | 32 | ||||
-rw-r--r-- | libpod/oci.go | 6 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 21 | ||||
-rw-r--r-- | libpod/oci_missing.go | 4 |
6 files changed, 106 insertions, 25 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 38223316e..b0ea7c0de 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -794,21 +794,29 @@ type ContainerCheckpointOptions struct { // container no PID 1 will be in the namespace and that is not // possible. Pod string + // PrintStats tells the API to fill out the statistics about + // how much time each component in the stack requires to + // checkpoint a container. + PrintStats bool } // Checkpoint checkpoints a container -func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time +// the runtime needs to checkpoint the container) are only set if +// options.PrintStats is set to true. Not setting options.PrintStats to true +// will return nil and 0. +func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { logrus.Debugf("Trying to checkpoint container %s", c.ID()) if options.TargetFile != "" { if err := c.prepareCheckpointExport(); err != nil { - return err + return nil, 0, err } } if options.WithPrevious { if err := c.canWithPrevious(); err != nil { - return err + return nil, 0, err } } @@ -817,7 +825,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return err + return nil, 0, err } } return c.checkpoint(ctx, options) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 91453574e..2778d2cde 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1129,25 +1129,26 @@ func (c *Container) checkpointRestoreSupported(version int) error { return nil } -func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { - return err + return nil, 0, err } if c.state.State != define.ContainerStateRunning { - return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) + return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } if c.AutoRemove() && options.TargetFile == "" { - return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") + return nil, 0, errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") } if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil { - return err + return nil, 0, err } - if err := c.ociRuntime.CheckpointContainer(c, options); err != nil { - return err + runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options) + if err != nil { + return nil, 0, err } // Save network.status. This is needed to restore the container with @@ -1155,7 +1156,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // with one interface. // FIXME: will this break something? if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil { - return err + return nil, 0, err } defer c.newContainerEvent(events.Checkpoint) @@ -1165,13 +1166,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if options.WithPrevious { os.Remove(path.Join(c.CheckpointPath(), "parent")) if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil { - return err + return nil, 0, err } } if options.TargetFile != "" { if err := c.exportCheckpoint(options); err != nil { - return err + return nil, 0, err } } @@ -1183,8 +1184,35 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // Cleanup Storage and Network if err := c.cleanup(ctx); err != nil { - return err + return nil, 0, err + } + } + + criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) { + if !options.PrintStats { + return nil, nil + } + statsDirectory, err := os.Open(c.bundlePath()) + if err != nil { + return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath()) + } + + dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory) + if err != nil { + return nil, errors.Wrap(err, "Displaying checkpointing statistics not possible") } + + return &define.CRIUCheckpointRestoreStatistics{ + FreezingTime: dumpStatistics.GetFreezingTime(), + FrozenTime: dumpStatistics.GetFrozenTime(), + MemdumpTime: dumpStatistics.GetMemdumpTime(), + MemwriteTime: dumpStatistics.GetMemwriteTime(), + PagesScanned: dumpStatistics.GetPagesScanned(), + PagesWritten: dumpStatistics.GetPagesWritten(), + }, nil + }() + if err != nil { + return nil, 0, err } if !options.Keep && !options.PreCheckPoint { @@ -1203,7 +1231,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO } c.state.FinishedTime = time.Now() - return c.save() + return criuStatistics, runtimeCheckpointDuration, c.save() } func (c *Container) importCheckpoint(input string) error { diff --git a/libpod/define/checkpoint_restore.go b/libpod/define/checkpoint_restore.go new file mode 100644 index 000000000..536bdde9a --- /dev/null +++ b/libpod/define/checkpoint_restore.go @@ -0,0 +1,32 @@ +package define + +// This contains values reported by CRIU during +// checkpointing or restoring. +// All names are the same as reported by CRIU. +type CRIUCheckpointRestoreStatistics struct { + // Checkpoint values + // Time required to freeze/pause/quiesce the processes + FreezingTime uint32 `json:"freezing_time,omitempty"` + // Time the processes are actually not running during checkpointing + FrozenTime uint32 `json:"frozen_time,omitempty"` + // Time required to extract memory pages from the processes + MemdumpTime uint32 `json:"memdump_time,omitempty"` + // Time required to write memory pages to disk + MemwriteTime uint32 `json:"memwrite_time,omitempty"` + // Number of memory pages CRIU analyzed + PagesScanned uint64 `json:"pages_scanned,omitempty"` + // Number of memory pages written + PagesWritten uint64 `json:"pages_written,omitempty"` + + // Restore values + // Number of pages compared during restore + PagesCompared uint64 `json:"pages_compared,omitempty"` + // Number of COW pages skipped during restore + PagesSkippedCow uint64 `json:"pages_skipped_cow,omitempty"` + // Time required to fork processes + ForkingTime uint32 `json:"forking_time,omitempty"` + // Time required to restore + RestoreTime uint32 `json:"restore_time,omitempty"` + // Number of memory pages restored + PagesRestored uint64 `json:"pages_restored,omitempty"` +} diff --git a/libpod/oci.go b/libpod/oci.go index c92d9a077..f78600210 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -101,8 +101,10 @@ type OCIRuntime interface { // CheckpointContainer checkpoints the given container. // Some OCI runtimes may not support this - if SupportsCheckpoint() // returns false, this is not implemented, and will always return an - // error. - CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error + // error. If CheckpointOptions.PrintStats is true the first return parameter + // contains the number of microseconds the runtime needed to checkpoint + // the given container. + CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) // CheckConmonRunning verifies that the given container's Conmon // instance is still running. Runtimes without Conmon, or systems where diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index db906fabb..305d73d75 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -760,9 +760,9 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize define.TerminalS } // CheckpointContainer checkpoints the given container. -func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { +func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { - return err + return 0, err } // imagePath is used by CRIU to store the actual checkpoint files imagePath := ctr.CheckpointPath() @@ -802,14 +802,25 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container } runtimeDir, err := util.GetRuntimeDir() if err != nil { - return err + return 0, err } if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil { - return errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") + return 0, errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") } args = append(args, ctr.ID()) logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointStarted := time.Now() + err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointDuration := func() int64 { + if options.PrintStats { + return time.Since(runtimeCheckpointStarted).Microseconds() + } + return 0 + }() + + return runtimeCheckpointDuration, err } func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index fcf2ffca8..c91146b9c 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -153,8 +153,8 @@ func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (boo } // CheckpointContainer is not available as the runtime is missing -func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { - return r.printError() +func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { + return 0, r.printError() } // CheckConmonRunning is not available as the runtime is missing |