Merge pull request #13600 from mheon/exec_cleanup_race

Fix a potential race around the exec cleanup process
author: OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> 2022-03-23 19:38:31 +0100
committer: GitHub <noreply@github.com> 2022-03-23 19:38:31 +0100
commit: a1e2897e92ff4b79a57d6ebbecd7c70de23b8977 (patch)
tree: d9987aba75d998966a051cc0535d89bd125f9c81
parent: f049cba47c31d31a4a8ed9a9180f0e847be3411c (diff)
parent: 5b2597d5232627b5560bf628276ec67b35aba4bf (diff)
download: podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.tar.gz
podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.tar.bz2
podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.zip
1 files changed, 47 insertions, 9 deletions
diff --git a/libpod/container_exec.go b/libpod/container_exec.go
index d1c190905..140267f28 100644
--- a/libpod/container_exec.go
+++ b/libpod/container_exec.go
@@ -341,22 +341,60 @@ func (c *Container) ExecStartAndAttach(sessionID string, streams *define.AttachS
 	}
 	lastErr = tmpErr
 
-	exitCode, err := c.readExecExitCode(session.ID())
-	if err != nil {
+	exitCode, exitCodeErr := c.readExecExitCode(session.ID())
+
+	// Lock again.
+	// Important: we must lock and sync *before* the above error is handled.
+	// We need info from the database to handle the error.
+	if !c.batched {
+		c.lock.Lock()
+	}
+	// We can't reuse the old exec session (things may have changed from
+	// other use, the container was unlocked).
+	// So re-sync and get a fresh copy.
+	// If we can't do this, no point in continuing, any attempt to save
+	// would write garbage to the DB.
+	if err := c.syncContainer(); err != nil {
+		if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
+			// We can't save status, but since the container has
+			// been entirely removed, we don't have to; exit cleanly
+			return lastErr
+		}
 		if lastErr != nil {
 			logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
 		}
-		lastErr = err
-	}
+		return errors.Wrapf(err, "error syncing container %s state to update exec session %s", c.ID(), sessionID)
+	}
+
+	// Now handle the error from readExecExitCode above.
+	if exitCodeErr != nil {
+		newSess, ok := c.state.ExecSessions[sessionID]
+		if !ok {
+			// The exec session was removed entirely, probably by
+			// the cleanup process. When it did so, it should have
+			// written an event with the exit code.
+			// Given that, there's nothing more we can do.
+			logrus.Infof("Container %s exec session %s already removed", c.ID(), session.ID())
+			return lastErr
+		}
 
-	logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)
+		if newSess.State == define.ExecStateStopped {
+			// Exec session already cleaned up.
+			// Exit code should be recorded, so it's OK if we were
+			// not able to read it.
+			logrus.Infof("Container %s exec session %s already cleaned up", c.ID(), session.ID())
+			return lastErr
+		}
 
-	// Lock again
-	if !c.batched {
-		c.lock.Lock()
+		if lastErr != nil {
+			logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
+		}
+		lastErr = exitCodeErr
 	}
 
-	if err := writeExecExitCode(c, session.ID(), exitCode); err != nil {
+	logrus.Debugf("Container %s exec session %s completed with exit code %d", c.ID(), session.ID(), exitCode)
+
+	if err := justWriteExecExitCode(c, session.ID(), exitCode); err != nil {
 		if lastErr != nil {
 			logrus.Errorf("Container %s exec session %s error: %v", c.ID(), session.ID(), lastErr)
 		}
author	OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>	2022-03-23 19:38:31 +0100
committer	GitHub <noreply@github.com>	2022-03-23 19:38:31 +0100
commit	a1e2897e92ff4b79a57d6ebbecd7c70de23b8977 (patch)
tree	d9987aba75d998966a051cc0535d89bd125f9c81
parent	f049cba47c31d31a4a8ed9a9180f0e847be3411c (diff)
parent	5b2597d5232627b5560bf628276ec67b35aba4bf (diff)
download	podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.tar.gz podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.tar.bz2 podman-a1e2897e92ff4b79a57d6ebbecd7c70de23b8977.zip