diff options
-rw-r--r-- | cmd/podman/containers/checkpoint.go | 34 | ||||
-rw-r--r-- | libpod/container_api.go | 16 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 52 | ||||
-rw-r--r-- | libpod/define/checkpoint_restore.go | 32 | ||||
-rw-r--r-- | libpod/oci.go | 6 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 21 | ||||
-rw-r--r-- | libpod/oci_missing.go | 4 | ||||
-rw-r--r-- | pkg/api/handlers/libpod/containers.go | 14 | ||||
-rw-r--r-- | pkg/api/server/register_containers.go | 4 | ||||
-rw-r--r-- | pkg/domain/entities/containers.go | 7 | ||||
-rw-r--r-- | pkg/domain/infra/abi/containers.go | 9 |
11 files changed, 166 insertions, 33 deletions
diff --git a/cmd/podman/containers/checkpoint.go b/cmd/podman/containers/checkpoint.go index 4fa72d520..d92bc3e5e 100644 --- a/cmd/podman/containers/checkpoint.go +++ b/cmd/podman/containers/checkpoint.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/containers/common/pkg/completion" "github.com/containers/podman/v3/cmd/podman/common" @@ -40,6 +41,11 @@ var ( var checkpointOptions entities.CheckpointOptions +type checkpointStatistics struct { + PodmanDuration int64 `json:"podman_checkpoint_duration"` + ContainerStatistics []*entities.CheckpointReport `json:"container_statistics"` +} + func init() { registry.Commands = append(registry.Commands, registry.CliCommand{ Command: checkpointCommand, @@ -63,11 +69,19 @@ func init() { flags.StringP("compress", "c", "zstd", "Select compression algorithm (gzip, none, zstd) for checkpoint archive.") _ = checkpointCommand.RegisterFlagCompletionFunc("compress", common.AutocompleteCheckpointCompressType) + flags.BoolVar( + &checkpointOptions.PrintStats, + "print-stats", + false, + "Display checkpoint statistics", + ) + validate.AddLatestFlag(checkpointCommand, &checkpointOptions.Latest) } func checkpoint(cmd *cobra.Command, args []string) error { var errs utils.OutputErrors + podmanStart := time.Now() if cmd.Flags().Changed("compress") { if checkpointOptions.Export == "" { return errors.Errorf("--compress can only be used with --export") @@ -102,12 +116,30 @@ func checkpoint(cmd *cobra.Command, args []string) error { if err != nil { return err } + podmanFinished := time.Now() + + var statistics checkpointStatistics + for _, r := range responses { if r.Err == nil { - fmt.Println(r.Id) + if checkpointOptions.PrintStats { + statistics.ContainerStatistics = append(statistics.ContainerStatistics, r) + } else { + fmt.Println(r.Id) + } } else { errs = append(errs, r.Err) } } + + if checkpointOptions.PrintStats { + statistics.PodmanDuration = podmanFinished.Sub(podmanStart).Microseconds() + j, err := json.MarshalIndent(statistics, "", " ") + if err != nil { + return err + } + fmt.Println(string(j)) + } + return errs.PrintErrors() } diff --git a/libpod/container_api.go b/libpod/container_api.go index 38223316e..b0ea7c0de 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -794,21 +794,29 @@ type ContainerCheckpointOptions struct { // container no PID 1 will be in the namespace and that is not // possible. Pod string + // PrintStats tells the API to fill out the statistics about + // how much time each component in the stack requires to + // checkpoint a container. + PrintStats bool } // Checkpoint checkpoints a container -func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time +// the runtime needs to checkpoint the container) are only set if +// options.PrintStats is set to true. Not setting options.PrintStats to true +// will return nil and 0. +func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { logrus.Debugf("Trying to checkpoint container %s", c.ID()) if options.TargetFile != "" { if err := c.prepareCheckpointExport(); err != nil { - return err + return nil, 0, err } } if options.WithPrevious { if err := c.canWithPrevious(); err != nil { - return err + return nil, 0, err } } @@ -817,7 +825,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return err + return nil, 0, err } } return c.checkpoint(ctx, options) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 91453574e..2778d2cde 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1129,25 +1129,26 @@ func (c *Container) checkpointRestoreSupported(version int) error { return nil } -func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) { if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { - return err + return nil, 0, err } if c.state.State != define.ContainerStateRunning { - return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) + return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } if c.AutoRemove() && options.TargetFile == "" { - return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") + return nil, 0, errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") } if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil { - return err + return nil, 0, err } - if err := c.ociRuntime.CheckpointContainer(c, options); err != nil { - return err + runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options) + if err != nil { + return nil, 0, err } // Save network.status. This is needed to restore the container with @@ -1155,7 +1156,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // with one interface. // FIXME: will this break something? if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil { - return err + return nil, 0, err } defer c.newContainerEvent(events.Checkpoint) @@ -1165,13 +1166,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if options.WithPrevious { os.Remove(path.Join(c.CheckpointPath(), "parent")) if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil { - return err + return nil, 0, err } } if options.TargetFile != "" { if err := c.exportCheckpoint(options); err != nil { - return err + return nil, 0, err } } @@ -1183,8 +1184,35 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO // Cleanup Storage and Network if err := c.cleanup(ctx); err != nil { - return err + return nil, 0, err + } + } + + criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) { + if !options.PrintStats { + return nil, nil + } + statsDirectory, err := os.Open(c.bundlePath()) + if err != nil { + return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath()) + } + + dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory) + if err != nil { + return nil, errors.Wrap(err, "Displaying checkpointing statistics not possible") } + + return &define.CRIUCheckpointRestoreStatistics{ + FreezingTime: dumpStatistics.GetFreezingTime(), + FrozenTime: dumpStatistics.GetFrozenTime(), + MemdumpTime: dumpStatistics.GetMemdumpTime(), + MemwriteTime: dumpStatistics.GetMemwriteTime(), + PagesScanned: dumpStatistics.GetPagesScanned(), + PagesWritten: dumpStatistics.GetPagesWritten(), + }, nil + }() + if err != nil { + return nil, 0, err } if !options.Keep && !options.PreCheckPoint { @@ -1203,7 +1231,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO } c.state.FinishedTime = time.Now() - return c.save() + return criuStatistics, runtimeCheckpointDuration, c.save() } func (c *Container) importCheckpoint(input string) error { diff --git a/libpod/define/checkpoint_restore.go b/libpod/define/checkpoint_restore.go new file mode 100644 index 000000000..536bdde9a --- /dev/null +++ b/libpod/define/checkpoint_restore.go @@ -0,0 +1,32 @@ +package define + +// This contains values reported by CRIU during +// checkpointing or restoring. +// All names are the same as reported by CRIU. +type CRIUCheckpointRestoreStatistics struct { + // Checkpoint values + // Time required to freeze/pause/quiesce the processes + FreezingTime uint32 `json:"freezing_time,omitempty"` + // Time the processes are actually not running during checkpointing + FrozenTime uint32 `json:"frozen_time,omitempty"` + // Time required to extract memory pages from the processes + MemdumpTime uint32 `json:"memdump_time,omitempty"` + // Time required to write memory pages to disk + MemwriteTime uint32 `json:"memwrite_time,omitempty"` + // Number of memory pages CRIU analyzed + PagesScanned uint64 `json:"pages_scanned,omitempty"` + // Number of memory pages written + PagesWritten uint64 `json:"pages_written,omitempty"` + + // Restore values + // Number of pages compared during restore + PagesCompared uint64 `json:"pages_compared,omitempty"` + // Number of COW pages skipped during restore + PagesSkippedCow uint64 `json:"pages_skipped_cow,omitempty"` + // Time required to fork processes + ForkingTime uint32 `json:"forking_time,omitempty"` + // Time required to restore + RestoreTime uint32 `json:"restore_time,omitempty"` + // Number of memory pages restored + PagesRestored uint64 `json:"pages_restored,omitempty"` +} diff --git a/libpod/oci.go b/libpod/oci.go index c92d9a077..f78600210 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -101,8 +101,10 @@ type OCIRuntime interface { // CheckpointContainer checkpoints the given container. // Some OCI runtimes may not support this - if SupportsCheckpoint() // returns false, this is not implemented, and will always return an - // error. - CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error + // error. If CheckpointOptions.PrintStats is true the first return parameter + // contains the number of microseconds the runtime needed to checkpoint + // the given container. + CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) // CheckConmonRunning verifies that the given container's Conmon // instance is still running. Runtimes without Conmon, or systems where diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index db906fabb..305d73d75 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -760,9 +760,9 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize define.TerminalS } // CheckpointContainer checkpoints the given container. -func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { +func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { - return err + return 0, err } // imagePath is used by CRIU to store the actual checkpoint files imagePath := ctr.CheckpointPath() @@ -802,14 +802,25 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container } runtimeDir, err := util.GetRuntimeDir() if err != nil { - return err + return 0, err } if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil { - return errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") + return 0, errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") } args = append(args, ctr.ID()) logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointStarted := time.Now() + err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) + + runtimeCheckpointDuration := func() int64 { + if options.PrintStats { + return time.Since(runtimeCheckpointStarted).Microseconds() + } + return 0 + }() + + return runtimeCheckpointDuration, err } func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index fcf2ffca8..c91146b9c 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -153,8 +153,8 @@ func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (boo } // CheckpointContainer is not available as the runtime is missing -func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { - return r.printError() +func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) { + return 0, r.printError() } // CheckConmonRunning is not available as the runtime is missing diff --git a/pkg/api/handlers/libpod/containers.go b/pkg/api/handlers/libpod/containers.go index 343c0d0b3..d8312e9be 100644 --- a/pkg/api/handlers/libpod/containers.go +++ b/pkg/api/handlers/libpod/containers.go @@ -214,6 +214,7 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { TCPEstablished bool `schema:"tcpEstablished"` Export bool `schema:"export"` IgnoreRootFS bool `schema:"ignoreRootFS"` + PrintStats bool `schema:"printStats"` }{ // override any golang type defaults } @@ -248,11 +249,12 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { KeepRunning: query.LeaveRunning, TCPEstablished: query.TCPEstablished, IgnoreRootfs: query.IgnoreRootFS, + PrintStats: query.PrintStats, } if query.Export { options.TargetFile = targetFile } - err = ctr.Checkpoint(r.Context(), options) + criuStatistics, runtimeCheckpointDuration, err := ctr.Checkpoint(r.Context(), options) if err != nil { utils.InternalServerError(w, err) return @@ -267,7 +269,15 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) { utils.WriteResponse(w, http.StatusOK, f) return } - utils.WriteResponse(w, http.StatusOK, entities.CheckpointReport{Id: ctr.ID()}) + utils.WriteResponse( + w, + http.StatusOK, + entities.CheckpointReport{ + Id: ctr.ID(), + RuntimeDuration: runtimeCheckpointDuration, + CRIUStatistics: criuStatistics, + }, + ) } func Restore(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/api/server/register_containers.go b/pkg/api/server/register_containers.go index c4919182b..e98098f97 100644 --- a/pkg/api/server/register_containers.go +++ b/pkg/api/server/register_containers.go @@ -1441,6 +1441,10 @@ func (s *APIServer) registerContainersHandlers(r *mux.Router) error { // name: ignoreRootFS // type: boolean // description: do not include root file-system changes when exporting + // - in: query + // name: printStats + // type: boolean + // description: add checkpoint statistics to the returned CheckpointReport // produces: // - application/json // responses: diff --git a/pkg/domain/entities/containers.go b/pkg/domain/entities/containers.go index 869c616ea..bab663ef7 100644 --- a/pkg/domain/entities/containers.go +++ b/pkg/domain/entities/containers.go @@ -190,11 +190,14 @@ type CheckpointOptions struct { PreCheckPoint bool WithPrevious bool Compression archive.Compression + PrintStats bool } type CheckpointReport struct { - Err error - Id string //nolint + Err error `json:"-"` + Id string `json:"Id` //nolint + RuntimeDuration int64 `json:"runtime_checkpoint_duration"` + CRIUStatistics *define.CRIUCheckpointRestoreStatistics `json:"criu_statistics"` } type RestoreOptions struct { diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index c30129001..c36bc5ccd 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -515,6 +515,7 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ PreCheckPoint: options.PreCheckPoint, WithPrevious: options.WithPrevious, Compression: options.Compression, + PrintStats: options.PrintStats, } if options.All { @@ -531,10 +532,12 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [ } reports := make([]*entities.CheckpointReport, 0, len(cons)) for _, con := range cons { - err = con.Checkpoint(ctx, checkOpts) + criuStatistics, runtimeCheckpointDuration, err := con.Checkpoint(ctx, checkOpts) reports = append(reports, &entities.CheckpointReport{ - Err: err, - Id: con.ID(), + Err: err, + Id: con.ID(), + RuntimeDuration: runtimeCheckpointDuration, + CRIUStatistics: criuStatistics, }) } return reports, nil |