diff options
author | Valentin Rothberg <rothberg@redhat.com> | 2021-02-23 13:02:35 +0100 |
---|---|---|
committer | Valentin Rothberg <rothberg@redhat.com> | 2021-02-23 13:02:35 +0100 |
commit | e5ac28f3b968661e5c2603880a5c4576d590f3dd (patch) | |
tree | 378d34ca1cb862ea65deff2292085198a39770da | |
parent | 96fc9d983e0fc5bae48c3cec3acce86cdb6e1059 (diff) | |
download | podman-e5ac28f3b968661e5c2603880a5c4576d590f3dd.tar.gz podman-e5ac28f3b968661e5c2603880a5c4576d590f3dd.tar.bz2 podman-e5ac28f3b968661e5c2603880a5c4576d590f3dd.zip |
container removal: handle already removed containers
Since commit d54478d8eaec, a container's lock is released before
attempting to stop it via the OCI runtime. This opened the window
for various kinds of race conditions. One of them led to #9479 where
the removal+cleanup sequences of a `run --rm` session overlapped with
`rm -af`. Make both execution paths more robust by handling the case of
an already removed container.
Fixes: #9479
Signed-off-by: Valentin Rothberg <rothberg@redhat.com>
-rw-r--r-- | libpod/container_exec.go | 16 | ||||
-rw-r--r-- | pkg/domain/infra/abi/containers.go | 12 | ||||
-rw-r--r-- | test/system/055-rm.bats | 7 |
3 files changed, 26 insertions, 9 deletions
diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 7b1d797bb..8d63ef90f 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -954,18 +954,22 @@ func (c *Container) removeAllExecSessions() error { } // Delete all exec sessions if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil { - if lastErr != nil { - logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + if errors.Cause(err) != define.ErrCtrRemoved { + if lastErr != nil { + logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + } + lastErr = err } - lastErr = err } c.state.ExecSessions = nil c.state.LegacyExecSessions = nil if err := c.save(); err != nil { - if lastErr != nil { - logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + if errors.Cause(err) != define.ErrCtrRemoved { + if lastErr != nil { + logrus.Errorf("Error stopping container %s exec sessions: %v", c.ID(), lastErr) + } + lastErr = err } - lastErr = err } return lastErr diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 938a5a92b..4790bd58c 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -319,12 +319,18 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string, errMap, err := parallelctr.ContainerOp(ctx, ctrs, func(c *libpod.Container) error { err := ic.Libpod.RemoveContainer(ctx, c, options.Force, options.Volumes) - if err != nil { - if options.Ignore && errors.Cause(err) == define.ErrNoSuchCtr { + if err == nil { + return nil + } + logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error()) + switch errors.Cause(err) { + case define.ErrNoSuchCtr: + if options.Ignore { logrus.Debugf("Ignoring error (--allow-missing): %v", err) return nil } - logrus.Debugf("Failed to remove container %s: %s", c.ID(), err.Error()) + case define.ErrCtrRemoved: + return nil } return err }) diff --git a/test/system/055-rm.bats b/test/system/055-rm.bats index 0107114b5..a5770f20f 100644 --- a/test/system/055-rm.bats +++ b/test/system/055-rm.bats @@ -51,6 +51,13 @@ load helpers run_podman rm $rand $external_cid } +@test "podman rm <-> run --rm race" { + # A container's lock is released before attempting to stop it. This opens + # the window for race conditions that led to #9479. + run_podman run --rm -d $IMAGE sleep infinity + run_podman rm -af +} + # I'm sorry! This test takes 13 seconds. There's not much I can do about it, # please know that I think it's justified: podman 1.5.0 had a strange bug # in with exit status was not preserved on some code paths with 'rm -f' |