From e5ac28f3b968661e5c2603880a5c4576d590f3dd Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 23 Feb 2021 13:02:35 +0100 Subject: container removal: handle already removed containers Since commit d54478d8eaec, a container's lock is released before attempting to stop it via the OCI runtime. This opened the window for various kinds of race conditions. One of them led to #9479 where the removal+cleanup sequences of a `run --rm` session overlapped with `rm -af`. Make both execution paths more robust by handling the case of an already removed container. Fixes: #9479 Signed-off-by: Valentin Rothberg --- libpod/container_exec.go | 16 ++++++++++------ pkg/domain/infra/abi/containers.go | 12 +++++++++--- test/system/055-rm.bats | 7 +++++++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 7b1d797bb..8d63ef90f 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -954,18 +954,22 @@ func (c *Container) removeAllExecSessions() error { } // Delete all exec sessions if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil { - if lastErr != nil { - logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + if errors.Cause(err) != define.ErrCtrRemoved { + if lastErr != nil { + logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + } + lastErr = err } - lastErr = err } c.state.ExecSessions = nil c.state.LegacyExecSessions = nil if err := c.save(); err != nil { - if lastErr != nil { - logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + if errors.Cause(err) != define.ErrCtrRemoved { + if lastErr != nil { + logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + } + lastErr = err } - lastErr = err } return lastErr diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 938a5a92b..4790bd58c 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -319,12 +319,18 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string, errMap, err := parallelctr.ContainerOp(ctx, ctrs, func(c *libpod.Container) error { err := ic.Libpod.RemoveContainer(ctx, c, options.Force, options.Volumes) - if err != nil { - if options.Ignore && errors.Cause(err) == define.ErrNoSuchCtr { + if err == nil { + return nil + } + logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error()) + switch errors.Cause(err) { + case define.ErrNoSuchCtr: + if options.Ignore { logrus.Debugf("Ignoring error (--allow-missing): %v", err) return nil } - logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error()) + case define.ErrCtrRemoved: + return nil } return err }) diff --git a/test/system/055-rm.bats b/test/system/055-rm.bats index 0107114b5..a5770f20f 100644 --- a/test/system/055-rm.bats +++ b/test/system/055-rm.bats @@ -51,6 +51,13 @@ load helpers run_podman rm $rand $external_cid } +@test "podman rm <-> run --rm race" { + # A container's lock is released before attempting to stop it. This opens + # the window for race conditions that led to #9479. + run_podman run --rm -d $IMAGE sleep infinity + run_podman rm -af +} + # I'm sorry! This test takes 13 seconds. There's not much I can do about it, # please know that I think it's justified: podman 1.5.0 had a strange bug # in with exit status was not preserved on some code paths with 'rm -f' -- cgit v1.2.3-54-g00ecf