diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2018-11-07 09:36:03 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-11-07 09:36:03 -0800 |
commit | ae031378619ba78bcd22a3c2ee9b276506c1dbd3 (patch) | |
tree | cd35959d810a754f9c300764561edaa944c84b26 | |
parent | 1e1aaac97d795b5c94f3c714d3d240d48c5efb97 (diff) | |
parent | 536af1f68944c68956e705d589e11cd52875273a (diff) | |
download | podman-ae031378619ba78bcd22a3c2ee9b276506c1dbd3.tar.gz podman-ae031378619ba78bcd22a3c2ee9b276506c1dbd3.tar.bz2 podman-ae031378619ba78bcd22a3c2ee9b276506c1dbd3.zip |
Merge pull request #1689 from mheon/add_runc_timeout
Do not call out to runc for sync
-rw-r--r-- | libpod/container_api.go | 2 | ||||
-rw-r--r-- | libpod/container_internal.go | 85 | ||||
-rw-r--r-- | libpod/oci.go | 61 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 2 |
4 files changed, 117 insertions, 33 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 30c67eb2a..25c1fb5f9 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -685,7 +685,7 @@ func (c *Container) Sync() error { (c.state.State != ContainerStateConfigured) { oldState := c.state.State // TODO: optionally replace this with a stat for the exit file - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil { return err } // Only save back to DB if state changed diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 9bdf4b8fc..b523c89e3 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -13,8 +13,10 @@ import ( "strconv" "strings" "syscall" + "time" "github.com/containers/buildah/imagebuildah" + "github.com/containers/libpod/pkg/ctime" "github.com/containers/libpod/pkg/hooks" "github.com/containers/libpod/pkg/hooks/exec" "github.com/containers/libpod/pkg/lookup" @@ -31,6 +33,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/text/language" + kwait "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -147,6 +150,77 @@ func (c *Container) execPidPath(sessionID string) string { return filepath.Join(c.state.RunDir, "exec_pid_"+sessionID) } +// exitFilePath gets the path to the container's exit file +func (c *Container) exitFilePath() string { + return filepath.Join(c.runtime.ociRuntime.exitsDir, c.ID()) +} + +// Wait for the container's exit file to appear. +// When it does, update our state based on it. +func (c *Container) waitForExitFileAndSync() error { + exitFile := c.exitFilePath() + + err := kwait.ExponentialBackoff( + kwait.Backoff{ + Duration: 500 * time.Millisecond, + Factor: 1.2, + Steps: 6, + }, + func() (bool, error) { + _, err := os.Stat(exitFile) + if err != nil { + // wait longer + return false, nil + } + return true, nil + }) + if err != nil { + // Exit file did not appear + // Reset our state + c.state.ExitCode = -1 + c.state.FinishedTime = time.Now() + c.state.State = ContainerStateStopped + + if err2 := c.save(); err2 != nil { + logrus.Errorf("Error saving container %s state: %v", c.ID(), err2) + } + + return err + } + + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { + return err + } + + return c.save() +} + +// Handle the container exit file. +// The exit file is used to supply container exit time and exit code. +// This assumes the exit file already exists. +func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { + c.state.FinishedTime = ctime.Created(fi) + statusCodeStr, err := ioutil.ReadFile(exitFile) + if err != nil { + return errors.Wrapf(err, "failed to read exit file for container %s", c.ID()) + } + statusCode, err := strconv.Atoi(string(statusCodeStr)) + if err != nil { + return errors.Wrapf(err, "error converting exit status code (%q) for container %s to int", + c.ID(), statusCodeStr) + } + c.state.ExitCode = int32(statusCode) + + oomFilePath := filepath.Join(c.bundlePath(), "oom") + if _, err = os.Stat(oomFilePath); err == nil { + c.state.OOMKilled = true + } + + c.state.Exited = true + + return nil +} + // Sync this container with on-disk state and runtime status // Should only be called with container lock held // This function should suffice to ensure a container's state is accurate and @@ -162,7 +236,7 @@ func (c *Container) syncContainer() error { (c.state.State != ContainerStateExited) { oldState := c.state.State // TODO: optionally replace this with a stat for the exit file - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { return err } // Only save back to DB if state changed @@ -673,13 +747,8 @@ func (c *Container) stop(timeout uint) error { return err } - // Sync the container's state to pick up return code - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { - return err - } - - // Container should clean itself up - return nil + // Wait until we have an exit file, and sync once we do + return c.waitForExitFileAndSync() } // Internal, non-locking function to pause a container diff --git a/libpod/oci.go b/libpod/oci.go index ca8f967c4..233bacfbb 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -11,12 +11,10 @@ import ( "os/exec" "path/filepath" "runtime" - "strconv" "strings" "syscall" "time" - "github.com/containers/libpod/pkg/ctime" "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/coreos/go-systemd/activation" @@ -443,6 +441,7 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res } return errors.Wrapf(ErrInternal, "container create failed") } + ctr.state.PID = ss.si.Pid case <-time.After(ContainerCreateTimeout): return errors.Wrapf(ErrInternal, "container creation timeout") } @@ -451,17 +450,47 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res // updateContainerStatus retrieves the current status of the container from the // runtime. It updates the container's state but does not save it. -func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { - state := new(spec.State) +// If useRunc is false, we will not directly hit runc to see the container's +// status, but will instead only check for the existence of the conmon exit file +// and update state to stopped if it exists. +func (r *OCIRuntime) updateContainerStatus(ctr *Container, useRunc bool) error { + exitFile := ctr.exitFilePath() runtimeDir, err := util.GetRootlessRuntimeDir() if err != nil { return err } + // If not using runc, we don't need to do most of this. + if !useRunc { + // If the container's not running, nothing to do. + if ctr.state.State != ContainerStateRunning { + return nil + } + + // Check for the exit file conmon makes + info, err := os.Stat(exitFile) + if err != nil { + if os.IsNotExist(err) { + // Container is still running, no error + return nil + } + + return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID()) + } + + // Alright, it exists. Transition to Stopped state. + ctr.state.State = ContainerStateStopped + + // Read the exit file to get our stopped time and exit code. + return ctr.handleExitFile(exitFile, info) + } + // Store old state so we know if we were already stopped oldState := ctr.state.State + state := new(spec.State) + cmd := exec.Command(r.path, "state", ctr.ID()) cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) outPipe, err := cmd.StdoutPipe() @@ -480,6 +509,8 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { } if strings.Contains(string(out), "does not exist") { ctr.removeConmonFiles() + ctr.state.ExitCode = -1 + ctr.state.FinishedTime = time.Now() ctr.state.State = ContainerStateExited return nil } @@ -514,7 +545,6 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { // Only grab exit status if we were not already stopped // If we were, it should already be in the database if ctr.state.State == ContainerStateStopped && oldState != ContainerStateStopped { - exitFile := filepath.Join(r.exitsDir, ctr.ID()) var fi os.FileInfo err = kwait.ExponentialBackoff( kwait.Backoff{ @@ -538,24 +568,7 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error { return nil } - ctr.state.FinishedTime = ctime.Created(fi) - statusCodeStr, err := ioutil.ReadFile(exitFile) - if err != nil { - return errors.Wrapf(err, "failed to read exit file for container %s", ctr.ID()) - } - statusCode, err := strconv.Atoi(string(statusCodeStr)) - if err != nil { - return errors.Wrapf(err, "error converting exit status code for container %s to int", - ctr.ID()) - } - ctr.state.ExitCode = int32(statusCode) - - oomFilePath := filepath.Join(ctr.bundlePath(), "oom") - if _, err = os.Stat(oomFilePath); err == nil { - ctr.state.OOMKilled = true - } - - ctr.state.Exited = true + return ctr.handleExitFile(exitFile, fi) } return nil @@ -601,6 +614,8 @@ func (r *OCIRuntime) killContainer(ctr *Container, signal uint) error { // Does not set finished time for container, assumes you will run updateStatus // after to pull the exit code func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { + logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID) + // Ping the container to see if it's alive // If it's not, it's already stopped, return err := unix.Kill(ctr.state.PID, 0) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index b63726f29..09dc7c48b 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -256,7 +256,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool) } // Need to update container state to make sure we know it's stopped - if err := c.syncContainer(); err != nil { + if err := c.waitForExitFileAndSync(); err != nil { return err } } else if !(c.state.State == ContainerStateConfigured || |