summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorMatthew Heon <mheon@redhat.com>2022-03-22 14:01:44 -0400
committerMatthew Heon <mheon@redhat.com>2022-03-23 09:33:40 -0400
commit5b2597d5232627b5560bf628276ec67b35aba4bf (patch)
tree1bda31be4159f08bdb1a83936c07e8534e172130 /libpod
parentc840f64e419813f389d88e16294ce916aaf31957 (diff)
downloadpodman-5b2597d5232627b5560bf628276ec67b35aba4bf.tar.gz
podman-5b2597d5232627b5560bf628276ec67b35aba4bf.tar.bz2
podman-5b2597d5232627b5560bf628276ec67b35aba4bf.zip
Fix a potential race around the exec cleanup process
Every exec session run attached will, on exit, do two things: it will signal the associated `podman exec` that it is finished (to allow Podman to collect the exit code and exit), and spawn a cleanup process to clean up the exec session (in case the `podman exec` process died, we still need to clean up). If an exec session is created that exits almost instantly, but generates a large amount of output (e.g. prints thousands of lines), the cleanup process can potentially execute before `podman exec` has a chance to read the exit code, resulting in errors. Handle this by detecting if the cleanup process has already removed the exec session before handling the error from reading the exec exit code. [NO NEW TESTS NEEDED] I have no idea how to test this in CI. Fixes #13227 Signed-off-by: Matthew Heon <mheon@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container_exec.go56
1 files changed, 47 insertions, 9 deletions
diff --git a/libpod/container_exec.go b/libpod/container_exec.go
index d1c190905..140267f28 100644
--- a/libpod/container_exec.go
+++ b/libpod/container_exec.go
@@ -341,22 +341,60 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS
}
lastErr = tmpErr
- exitCode, err := c.readExecExitCode(session.ID())
- if err != nil {
+ exitCode, exitCodeErr := c.readExecExitCode(session.ID())
+
+ // Lock again.
+ // Important: we must lock and sync *before* the above error is handled.
+ // We need info from the database to handle the error.
+ if !c.batched {
+ c.lock.Lock()
+ }
+ // We can't reuse the old exec session (things may have changed from
+ // other use, the container was unlocked).
+ // So re-sync and get a fresh copy.
+ // If we can't do this, no point in continuing, any attempt to save
+ // would write garbage to the DB.
+ if err := c.syncContainer(); err != nil {
+ if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
+ // We can't save status, but since the container has
+ // been entirely removed, we don't have to; exit cleanly
+ return lastErr
+ }
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}
- lastErr = err
- }
+ return errors.Wrapf(err, "error syncing container %s state to update exec session %s", c.ID(), sessionID)
+ }
+
+ // Now handle the error from readExecExitCode above.
+ if exitCodeErr != nil {
+ newSess, ok := c.state.ExecSessions[sessionID]
+ if !ok {
+ // The exec session was removed entirely, probably by
+ // the cleanup process. When it did so, it should have
+ // written an event with the exit code.
+ // Given that, there's nothing more we can do.
+ logrus.Infof("Container %s exec session %s already removed", c.ID(), session.ID())
+ return lastErr
+ }
- logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)
+ if newSess.State == define.ExecStateStopped {
+ // Exec session already cleaned up.
+ // Exit code should be recorded, so it's OK if we were
+ // not able to read it.
+ logrus.Infof("Container %s exec session %s already cleaned up", c.ID(), session.ID())
+ return lastErr
+ }
- // Lock again
- if !c.batched {
- c.lock.Lock()
+ if lastErr != nil {
+ logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
+ }
+ lastErr = exitCodeErr
}
- if err := writeExecExitCode(c, session.ID(), exitCode); err != nil {
+ logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)
+
+ if err := justWriteExecExitCode(c, session.ID(), exitCode); err != nil {
if lastErr != nil {
logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
}