From bc3389e212dfc2709a1deefc16018e6fb25ed480 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 Dec 2021 16:37:02 +0000 Subject: Add more checkpoint/restore information to 'inspect' This adds the following information to the output of 'podman inspect': * CheckpointedAt - time the container was checkpointed Only set if the container has been checkpointed * RestoredAt - time the container was restored Only set if the container has been restored * CheckpointLog - path to the checkpoint log file (CRIU's dump.log) Only set if the log file exists (--keep) * RestoreLog - path to the restore log file (CRIU's restore.log) Only set if the log file exists (--keep) * CheckpointPath - path to the actual (CRIU) checkpoint files Only set if the checkpoint files exists (--keep) * Restored - set to true if the container has been restored Only set if the container has been restored Signed-off-by: Adrian Reber --- libpod/container.go | 9 +++++++++ libpod/container_inspect.go | 34 ++++++++++++++++++++-------------- libpod/container_internal.go | 12 ++++++++++++ libpod/container_internal_linux.go | 19 +++++++++++++++++++ libpod/define/container_inspect.go | 38 ++++++++++++++++++++++---------------- 5 files changed, 82 insertions(+), 30 deletions(-) diff --git a/libpod/container.go b/libpod/container.go index 1270f2112..c746f97c7 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -213,6 +213,15 @@ type ContainerState struct { // containerPlatformState holds platform-specific container state. containerPlatformState + + // Following checkpoint/restore related information is displayed + // if the container has been checkpointed or restored. + CheckpointedTime time.Time `json:"checkpointedTime,omitempty"` + RestoredTime time.Time `json:"restoredTime,omitempty"` + CheckpointLog string `json:"checkpointLog,omitempty"` + CheckpointPath string `json:"checkpointPath,omitempty"` + RestoreLog string `json:"restoreLog,omitempty"` + Restored bool `json:"restored,omitempty"` } // ContainerNamedVolume is a named volume that will be mounted into the diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 83b643266..439328ea8 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -113,20 +113,26 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver Path: path, Args: args, State: &define.InspectContainerState{ - OciVersion: ctrSpec.Version, - Status: runtimeInfo.State.String(), - Running: runtimeInfo.State == define.ContainerStateRunning, - Paused: runtimeInfo.State == define.ContainerStatePaused, - OOMKilled: runtimeInfo.OOMKilled, - Dead: runtimeInfo.State.String() == "bad state", - Pid: runtimeInfo.PID, - ConmonPid: runtimeInfo.ConmonPID, - ExitCode: runtimeInfo.ExitCode, - Error: "", // can't get yet - StartedAt: runtimeInfo.StartedTime, - FinishedAt: runtimeInfo.FinishedTime, - Checkpointed: runtimeInfo.Checkpointed, - CgroupPath: cgroupPath, + OciVersion: ctrSpec.Version, + Status: runtimeInfo.State.String(), + Running: runtimeInfo.State == define.ContainerStateRunning, + Paused: runtimeInfo.State == define.ContainerStatePaused, + OOMKilled: runtimeInfo.OOMKilled, + Dead: runtimeInfo.State.String() == "bad state", + Pid: runtimeInfo.PID, + ConmonPid: runtimeInfo.ConmonPID, + ExitCode: runtimeInfo.ExitCode, + Error: "", // can't get yet + StartedAt: runtimeInfo.StartedTime, + FinishedAt: runtimeInfo.FinishedTime, + Checkpointed: runtimeInfo.Checkpointed, + CgroupPath: cgroupPath, + RestoredAt: runtimeInfo.RestoredTime, + CheckpointedAt: runtimeInfo.CheckpointedTime, + Restored: runtimeInfo.Restored, + CheckpointPath: runtimeInfo.CheckpointPath, + CheckpointLog: runtimeInfo.CheckpointLog, + RestoreLog: runtimeInfo.RestoreLog, }, Image: config.RootfsImageID, ImageName: config.RootfsImageName, diff --git a/libpod/container_internal.go b/libpod/container_internal.go index a68de3173..b7398523a 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -634,6 +634,12 @@ func resetState(state *ContainerState) { state.RestartPolicyMatch = false state.RestartCount = 0 state.Checkpointed = false + state.Restored = false + state.CheckpointedTime = time.Time{} + state.RestoredTime = time.Time{} + state.CheckpointPath = "" + state.CheckpointLog = "" + state.RestoreLog = "" } // Refresh refreshes the container's state after a restart. @@ -1111,6 +1117,12 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { } c.state.Checkpointed = false + c.state.Restored = false + c.state.CheckpointedTime = time.Time{} + c.state.RestoredTime = time.Time{} + c.state.CheckpointPath = "" + c.state.CheckpointLog = "" + c.state.RestoreLog = "" c.state.ExitCode = 0 c.state.Exited = false c.state.State = define.ContainerStateCreated diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index f4b629a83..9e6ae9f02 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1134,6 +1134,10 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO return nil, 0, err } + // Setting CheckpointLog early in case there is a failure. + c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log") + c.state.CheckpointPath = c.CheckpointPath() + runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options) if err != nil { return nil, 0, err @@ -1169,6 +1173,9 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if !options.KeepRunning && !options.PreCheckPoint { c.state.State = define.ContainerStateStopped c.state.Checkpointed = true + c.state.CheckpointedTime = time.Now() + c.state.Restored = false + c.state.RestoredTime = time.Time{} // Cleanup Storage and Network if err := c.cleanup(ctx); err != nil { @@ -1216,6 +1223,8 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO logrus.Debugf("Unable to remove file %s", file) } } + // The file has been deleted. Do not mention it. + c.state.CheckpointLog = "" } c.state.FinishedTime = time.Now() @@ -1293,6 +1302,10 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti return nil, 0, err } + // Setting RestoreLog early in case there is a failure. + c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log") + c.state.CheckpointPath = c.CheckpointPath() + // Read network configuration from checkpoint var netStatus map[string]types.StatusBlock _, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile) @@ -1559,6 +1572,9 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti c.state.State = define.ContainerStateRunning c.state.Checkpointed = false + c.state.Restored = true + c.state.CheckpointedTime = time.Time{} + c.state.RestoredTime = time.Now() if !options.Keep { // Delete all checkpoint related files. At this point, in theory, all files @@ -1569,6 +1585,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti if err != nil { logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err) } + c.state.CheckpointPath = "" err = os.RemoveAll(c.PreCheckPointPath()) if err != nil { logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err) @@ -1589,6 +1606,8 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) } } + c.state.CheckpointLog = "" + c.state.RestoreLog = "" } return criuStatistics, runtimeRestoreDuration, c.save() diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 677b39218..a4d9bcf4f 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -189,22 +189,28 @@ type InspectMount struct { // Docker, but here we see more fields that are unused (nonsensical in the // context of Libpod). type InspectContainerState struct { - OciVersion string `json:"OciVersion"` - Status string `json:"Status"` - Running bool `json:"Running"` - Paused bool `json:"Paused"` - Restarting bool `json:"Restarting"` // TODO - OOMKilled bool `json:"OOMKilled"` - Dead bool `json:"Dead"` - Pid int `json:"Pid"` - ConmonPid int `json:"ConmonPid,omitempty"` - ExitCode int32 `json:"ExitCode"` - Error string `json:"Error"` // TODO - StartedAt time.Time `json:"StartedAt"` - FinishedAt time.Time `json:"FinishedAt"` - Health HealthCheckResults `json:"Health,omitempty"` - Checkpointed bool `json:"Checkpointed,omitempty"` - CgroupPath string `json:"CgroupPath,omitempty"` + OciVersion string `json:"OciVersion"` + Status string `json:"Status"` + Running bool `json:"Running"` + Paused bool `json:"Paused"` + Restarting bool `json:"Restarting"` // TODO + OOMKilled bool `json:"OOMKilled"` + Dead bool `json:"Dead"` + Pid int `json:"Pid"` + ConmonPid int `json:"ConmonPid,omitempty"` + ExitCode int32 `json:"ExitCode"` + Error string `json:"Error"` // TODO + StartedAt time.Time `json:"StartedAt"` + FinishedAt time.Time `json:"FinishedAt"` + Health HealthCheckResults `json:"Health,omitempty"` + Checkpointed bool `json:"Checkpointed,omitempty"` + CgroupPath string `json:"CgroupPath,omitempty"` + CheckpointedAt time.Time `json:"CheckpointedAt,omitempty"` + RestoredAt time.Time `json:"RestoredAt,omitempty"` + CheckpointLog string `json:"CheckpointLog,omitempty"` + CheckpointPath string `json:"CheckpointPath,omitempty"` + RestoreLog string `json:"RestoreLog,omitempty"` + Restored bool `json:"Restored,omitempty"` } // Healthcheck returns the HealthCheckResults. This is used for old podman compat -- cgit v1.2.3-54-g00ecf