From fad6e1d3ef334076f180ab8a31ae7fa7a309e096 Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Wed, 26 May 2021 15:16:35 -0400
Subject: Ensure that container still exists when removing

After #8906, there is a potential race condition in container
removal of running containers with `--rm`. Running containers
must first be stopped, which was changed to unlock the container
to allow commands like `podman ps` to continue to run while
stopping; however, this also means that the cleanup process can
potentially run before we re-lock, and remove the container from
under us, resulting in error messages from `podman rm`. The end
result is unchanged, the container is still cleanly removed, but
the `podman rm` command will seem to have failed.

Work around this by pinging the database after we stop the
container to make sure it still exists. If it doesn't, our job is
done and we can exit cleanly.

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/runtime_ctr.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 4e4b2a8ab..6c69d1b72 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -581,6 +581,15 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 		if err := c.stop(c.StopTimeout()); err != nil && errors.Cause(err) != define.ErrConmonDead {
 			return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
 		}
+
+		// We unlocked as part of stop() above - there's a chance someone
+		// else got in and removed the container before we reacquired the
+		// lock.
+		// Do a quick ping of the database to check if the container
+		// still exists.
+		if ok, _ := r.state.HasContainer(c.ID()); !ok {
+			return nil
+		}
 	}
 
 	// Remove all active exec sessions
-- 
cgit v1.2.3-54-g00ecf