From bd44fd5c815fc750fd6b60899328564bee74e6e5 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 12 Dec 2019 16:19:36 -0500 Subject: Reap exec sessions on cleanup and removal We currently rely on exec sessions being removed from the state by the Exec() API itself, on detecting the session stopping. This is not a reliable method, though. The Podman frontend for exec could be killed before the session ended, or another Podman process could be holding the lock and prevent update (most notable in `run --rm`, when a container with an active exec session is stopped). To resolve this, add a function to reap active exec sessions from the state, and use it on cleanup (to clear sessions after the container stops) and remove (to do the same when --rm is passed). This is a bit more complicated than it ought to be because Kata and company exist, and we can't guarantee the exec session has a PID on the host, so we have to plumb this through to the OCI runtime. Fixes #4666 Signed-off-by: Matthew Heon --- libpod/container_api.go | 7 ++++++- libpod/container_internal.go | 40 ++++++++++++++++++++++++++++++++++++++++ libpod/oci.go | 6 +++--- libpod/oci_conmon_linux.go | 22 ++++++++++++++++++++++ libpod/oci_missing.go | 5 +++++ 5 files changed, 76 insertions(+), 4 deletions(-) (limited to 'libpod') diff --git a/libpod/container_api.go b/libpod/container_api.go index 153a1d628..5168dbc68 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -594,7 +594,12 @@ func (c *Container) Cleanup(ctx context.Context) error { // If we didn't restart, we perform a normal cleanup - // Check if we have active exec sessions + // Reap exec sessions first. + if err := c.reapExecSessions(); err != nil { + return err + } + + // Check if we have active exec sessions after reaping. if len(c.state.ExecSessions) != 0 { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID()) } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 1e8a8a580..37801162a 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1749,6 +1749,11 @@ func (c *Container) checkReadyForRemoval() error { return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String()) } + // Reap exec sessions + if err := c.reapExecSessions(); err != nil { + return err + } + if len(c.state.ExecSessions) != 0 { return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID()) } @@ -1855,3 +1860,38 @@ func (c *Container) checkExitFile() error { // Read the exit file to get our stopped time and exit code. return c.handleExitFile(exitFile, info) } + +// Reap dead exec sessions +func (c *Container) reapExecSessions() error { + // Instead of saving once per iteration, use a defer to do it once at + // the end. + var lastErr error + needSave := false + for id := range c.state.ExecSessions { + alive, err := c.ociRuntime.ExecUpdateStatus(c, id) + if err != nil { + if lastErr != nil { + logrus.Errorf("Error reaping exec sessions for container %s: %v", c.ID(), lastErr) + } + lastErr = err + continue + } + if !alive { + // Clean up lingering files and remove the exec session + if err := c.ociRuntime.ExecContainerCleanup(c, id); err != nil { + return errors.Wrapf(err, "error cleaning up container %s exec session %s files", c.ID(), id) + } + delete(c.state.ExecSessions, id) + needSave = true + } + } + if needSave { + if err := c.save(); err != nil { + if lastErr != nil { + logrus.Errorf("Error reaping exec sessions for container %s: %v", c.ID(), lastErr) + } + lastErr = err + } + } + return lastErr +} diff --git a/libpod/oci.go b/libpod/oci.go index 9e761788e..05a2f37db 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -23,9 +23,6 @@ type OCIRuntime interface { // CreateContainer creates the container in the OCI runtime. CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error // UpdateContainerStatus updates the status of the given container. - // It includes a switch for whether to perform a hard query of the - // runtime. If unset, the exit file (if supported by the implementation) - // will be used. UpdateContainerStatus(ctr *Container) error // StartContainer starts the given container. StartContainer(ctr *Container) error @@ -59,6 +56,9 @@ type OCIRuntime interface { // If timeout is 0, SIGKILL will be sent immediately, and SIGTERM will // be omitted. ExecStopContainer(ctr *Container, sessionID string, timeout uint) error + // ExecUpdateStatus checks the status of a given exec session. + // Returns true if the session is still running, or false if it exited. + ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) // ExecContainerCleanup cleans up after an exec session exits. // It removes any files left by the exec session that are no longer // needed, including the attach socket. diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 026b13129..37aa71cbb 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -687,6 +687,28 @@ func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, t return nil } +// ExecUpdateStatus checks if the given exec session is still running. +func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) { + session, ok := ctr.state.ExecSessions[sessionID] + if !ok { + // TODO This should probably be a separate error + return false, errors.Wrapf(define.ErrInvalidArg, "no exec session with ID %s found in container %s", sessionID, ctr.ID()) + } + + logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID) + + // Is the session dead? + // Ping the PID with signal 0 to see if it still exists. + if err := unix.Kill(session.PID, 0); err != nil { + if err == unix.ESRCH { + return false, nil + } + return false, errors.Wrapf(err, "error pinging container %s exec session %s PID %d with signal 0", ctr.ID(), sessionID, session.PID) + } + + return true, nil +} + // ExecCleanupContainer cleans up files created when a command is run via // ExecContainer. This includes the attach socket for the exec session. func (r *ConmonOCIRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index d4524cd34..0faa1805b 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -120,6 +120,11 @@ func (r *MissingRuntime) ExecStopContainer(ctr *Container, sessionID string, tim return r.printError() } +// ExecUpdateStatus is not available as the runtime is missing. +func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) { + return false, r.printError() +} + // ExecContainerCleanup is not available as the runtime is missing func (r *MissingRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error { return r.printError() -- cgit v1.2.3-54-g00ecf