diff options
-rw-r--r-- | libpod/container_api.go | 2 | ||||
-rw-r--r-- | libpod/container_internal.go | 90 | ||||
-rw-r--r-- | libpod/oci.go | 58 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 2 |
4 files changed, 118 insertions, 34 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 30c67eb2a..25c1fb5f9 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -685,7 +685,7 @@ func (c *Container) Sync() error { (c.state.State != ContainerStateConfigured) { oldState := c.state.State // TODO: optionally replace this with a stat for the exit file - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil { return err } // Only save back to DB if state changed diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 867d539be..db1cf6c99 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -13,8 +13,10 @@ import ( "strconv" "strings" "syscall" + "time" "github.com/containers/buildah/imagebuildah" + "github.com/containers/libpod/pkg/ctime" "github.com/containers/libpod/pkg/hooks" "github.com/containers/libpod/pkg/hooks/exec" "github.com/containers/libpod/pkg/lookup" @@ -31,6 +33,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/text/language" + kwait "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -147,6 +150,77 @@ func (c *Container) execPidPath(sessionID string) string { return filepath.Join(c.state.RunDir, "exec_pid_"+sessionID) } +// exitFilePath gets the path to the container's exit file +func (c *Container) exitFilePath() string { + return filepath.Join(c.runtime.ociRuntime.exitsDir, c.ID()) +} + +// Wait for the container's exit file to appear. +// When it does, update our state based on it. +func (c *Container) waitForExitFileAndSync() error { + exitFile := c.exitFilePath() + + err := kwait.ExponentialBackoff( + kwait.Backoff{ + Duration: 500 * time.Millisecond, + Factor: 1.2, + Steps: 6, + }, + func() (bool, error) { + _, err := os.Stat(exitFile) + if err != nil { + // wait longer + return false, nil + } + return true, nil + }) + if err != nil { + // Exit file did not appear + // Reset our state + c.state.ExitCode = -1 + c.state.FinishedTime = time.Now() + c.state.State = ContainerStateStopped + + if err2 := c.save(); err2 != nil { + logrus.Errorf("Error saving container %s state: %v", c.ID(), err2) + } + + return err + } + + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { + return err + } + + return c.save() +} + +// Handle the container exit file. +// The exit file is used to supply container exit time and exit code. +// This assumes the exit file already exists. +func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { + c.state.FinishedTime = ctime.Created(fi) + statusCodeStr, err := ioutil.ReadFile(exitFile) + if err != nil { + return errors.Wrapf(err, "failed to read exit file for container %s", c.ID()) + } + statusCode, err := strconv.Atoi(string(statusCodeStr)) + if err != nil { + return errors.Wrapf(err, "error converting exit status code for container %s to int", + c.ID()) + } + c.state.ExitCode = int32(statusCode) + + oomFilePath := filepath.Join(c.bundlePath(), "oom") + if _, err = os.Stat(oomFilePath); err == nil { + c.state.OOMKilled = true + } + + c.state.Exited = true + + return nil +} + // Sync this container with on-disk state and runtime status // Should only be called with container lock held // This function should suffice to ensure a container's state is accurate and @@ -162,7 +236,7 @@ func (c *Container) syncContainer() error { (c.state.State != ContainerStateExited) { oldState := c.state.State // TODO: optionally replace this with a stat for the exit file - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { return err } // Only save back to DB if state changed @@ -660,7 +734,10 @@ func (c *Container) start() error { } logrus.Debugf("Started container %s", c.ID()) - c.state.State = ContainerStateRunning + // We need to pick up full container state, including PID. + if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil { + return err + } return c.save() } @@ -673,13 +750,8 @@ func (c *Container) stop(timeout uint) error { return err } - // Sync the container's state to pick up return code - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { - return err - } - - // Container should clean itself up - return c.save() + // Wait until we have an exit file, and sync once we do + return c.waitForExitFileAndSync() } // Internal, non-locking function to pause a container diff --git a/libpod/oci.go b/libpod/oci.go index ca8f967c4..b390b1967 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -11,12 +11,10 @@ import ( "os/exec" "path/filepath" "runtime" - "strconv" "strings" "syscall" "time" - "github.com/containers/libpod/pkg/ctime" "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/coreos/go-systemd/activation" @@ -451,17 +449,47 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res // updateContainerStatus retrieves the current status of the container from the // runtime. It updates the container's state but does not save it. -func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { - state := new(spec.State) +// If useRunc is false, we will not directly hit runc to see the container's +// status, but will instead only check for the existance of the conmon exit file +// and update state to stopped if it exists. +func (r *OCIRuntime) updateContainerStatus(ctr *Container, useRunc bool) error { + exitFile := ctr.exitFilePath() runtimeDir, err := util.GetRootlessRuntimeDir() if err != nil { return err } + // If not using runc, we don't need to do most of this. + if !useRunc { + // If the container's not running, nothing to do. + if ctr.state.State != ContainerStateRunning { + return nil + } + + // Check for the exit file conmon makes + info, err := os.Stat(exitFile) + if err != nil { + if os.IsNotExist(err) { + // Container is still running, no error + return nil + } + + return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID()) + } + + // Alright, it exists. Transition to Stopped state. + ctr.state.State = ContainerStateStopped + + // Read the exit file to get our stopped time and exit code. + return ctr.handleExitFile(exitFile, info) + } + // Store old state so we know if we were already stopped oldState := ctr.state.State + state := new(spec.State) + cmd := exec.Command(r.path, "state", ctr.ID()) cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) outPipe, err := cmd.StdoutPipe() @@ -514,7 +542,6 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { // Only grab exit status if we were not already stopped // If we were, it should already be in the database if ctr.state.State == ContainerStateStopped && oldState != ContainerStateStopped { - exitFile := filepath.Join(r.exitsDir, ctr.ID()) var fi os.FileInfo err = kwait.ExponentialBackoff( kwait.Backoff{ @@ -538,24 +565,7 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { return nil } - ctr.state.FinishedTime = ctime.Created(fi) - statusCodeStr, err := ioutil.ReadFile(exitFile) - if err != nil { - return errors.Wrapf(err, "failed to read exit file for container %s", ctr.ID()) - } - statusCode, err := strconv.Atoi(string(statusCodeStr)) - if err != nil { - return errors.Wrapf(err, "error converting exit status code for container %s to int", - ctr.ID()) - } - ctr.state.ExitCode = int32(statusCode) - - oomFilePath := filepath.Join(ctr.bundlePath(), "oom") - if _, err = os.Stat(oomFilePath); err == nil { - ctr.state.OOMKilled = true - } - - ctr.state.Exited = true + return ctr.handleExitFile(exitFile, fi) } return nil @@ -601,6 +611,8 @@ func (r *OCIRuntime) killContainer(ctr *Container, signal uint) error { // Does not set finished time for container, assumes you will run updateStatus // after to pull the exit code func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { + logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID) + // Ping the container to see if it's alive // If it's not, it's already stopped, return err := unix.Kill(ctr.state.PID, 0) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index b63726f29..09dc7c48b 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -256,7 +256,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool) } // Need to update container state to make sure we know it's stopped - if err := c.syncContainer(); err != nil { + if err := c.waitForExitFileAndSync(); err != nil { return err } } else if !(c.state.State == ContainerStateConfigured || |