diff options
author | Matthew Heon <matthew.heon@gmail.com> | 2018-10-19 16:02:14 -0400 |
---|---|---|
committer | Matthew Heon <matthew.heon@gmail.com> | 2018-11-07 11:36:01 -0500 |
commit | 140f87c474f5a075d5833335593895491b83dab6 (patch) | |
tree | 93fa572f876c58bd2e083a64bb8a8529b11e3ce9 /libpod/container_internal.go | |
parent | f714ee4fb14085c326ee5d56432781a01ffabbb0 (diff) | |
download | podman-140f87c474f5a075d5833335593895491b83dab6.tar.gz podman-140f87c474f5a075d5833335593895491b83dab6.tar.bz2 podman-140f87c474f5a075d5833335593895491b83dab6.zip |
EXPERIMENTAL: Do not call out to runc for sync
When syncing container state, we normally call out to runc to see
the container's status. This does have significant performance
implications, though, and we've seen issues with large amounts of
runc processes being spawned.
This patch attempts to use stat calls on the container exit file
created by Conmon instead to sync state. This massively decreases
the cost of calling updateContainer (it has gone from an
almost-unconditional fork/exec of runc to a single stat call that
can be avoided in most states).
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'libpod/container_internal.go')
-rw-r--r-- | libpod/container_internal.go | 90 |
1 files changed, 81 insertions, 9 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 867d539be..db1cf6c99 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -13,8 +13,10 @@ import ( "strconv" "strings" "syscall" + "time" "github.com/containers/buildah/imagebuildah" + "github.com/containers/libpod/pkg/ctime" "github.com/containers/libpod/pkg/hooks" "github.com/containers/libpod/pkg/hooks/exec" "github.com/containers/libpod/pkg/lookup" @@ -31,6 +33,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/text/language" + kwait "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -147,6 +150,77 @@ func (c *Container) execPidPath(sessionID string) string { return filepath.Join(c.state.RunDir, "exec_pid_"+sessionID) } +// exitFilePath gets the path to the container's exit file +func (c *Container) exitFilePath() string { + return filepath.Join(c.runtime.ociRuntime.exitsDir, c.ID()) +} + +// Wait for the container's exit file to appear. +// When it does, update our state based on it. +func (c *Container) waitForExitFileAndSync() error { + exitFile := c.exitFilePath() + + err := kwait.ExponentialBackoff( + kwait.Backoff{ + Duration: 500 * time.Millisecond, + Factor: 1.2, + Steps: 6, + }, + func() (bool, error) { + _, err := os.Stat(exitFile) + if err != nil { + // wait longer + return false, nil + } + return true, nil + }) + if err != nil { + // Exit file did not appear + // Reset our state + c.state.ExitCode = -1 + c.state.FinishedTime = time.Now() + c.state.State = ContainerStateStopped + + if err2 := c.save(); err2 != nil { + logrus.Errorf("Error saving container %s state: %v", c.ID(), err2) + } + + return err + } + + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { + return err + } + + return c.save() +} + +// Handle the container exit file. +// The exit file is used to supply container exit time and exit code. +// This assumes the exit file already exists. +func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { + c.state.FinishedTime = ctime.Created(fi) + statusCodeStr, err := ioutil.ReadFile(exitFile) + if err != nil { + return errors.Wrapf(err, "failed to read exit file for container %s", c.ID()) + } + statusCode, err := strconv.Atoi(string(statusCodeStr)) + if err != nil { + return errors.Wrapf(err, "error converting exit status code for container %s to int", + c.ID()) + } + c.state.ExitCode = int32(statusCode) + + oomFilePath := filepath.Join(c.bundlePath(), "oom") + if _, err = os.Stat(oomFilePath); err == nil { + c.state.OOMKilled = true + } + + c.state.Exited = true + + return nil +} + // Sync this container with on-disk state and runtime status // Should only be called with container lock held // This function should suffice to ensure a container's state is accurate and @@ -162,7 +236,7 @@ func (c *Container) syncContainer() error { (c.state.State != ContainerStateExited) { oldState := c.state.State // TODO: optionally replace this with a stat for the exit file - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { + if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil { return err } // Only save back to DB if state changed @@ -660,7 +734,10 @@ func (c *Container) start() error { } logrus.Debugf("Started container %s", c.ID()) - c.state.State = ContainerStateRunning + // We need to pick up full container state, including PID. + if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil { + return err + } return c.save() } @@ -673,13 +750,8 @@ func (c *Container) stop(timeout uint) error { return err } - // Sync the container's state to pick up return code - if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil { - return err - } - - // Container should clean itself up - return c.save() + // Wait until we have an exit file, and sync once we do + return c.waitForExitFileAndSync() } // Internal, non-locking function to pause a container |