diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2021-01-14 13:37:16 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-14 13:37:16 -0500 |
commit | a1b49749af97c5a3b6256b5aa0f53897257bc838 (patch) | |
tree | 3aa0413a7fabc7faff920018b66dae54bc86120e | |
parent | e0211a14fc58657821a2af92d09f115470a38c36 (diff) | |
parent | d54478d8eaec9481d482942b87065af36995d39a (diff) | |
download | podman-a1b49749af97c5a3b6256b5aa0f53897257bc838.tar.gz podman-a1b49749af97c5a3b6256b5aa0f53897257bc838.tar.bz2 podman-a1b49749af97c5a3b6256b5aa0f53897257bc838.zip |
Merge pull request #8906 from vrothberg/fix-8501
container stop: release lock before calling the runtime
-rw-r--r-- | libpod/container_api.go | 10 | ||||
-rw-r--r-- | libpod/container_internal.go | 43 | ||||
-rw-r--r-- | libpod/define/containerstate.go | 5 | ||||
-rw-r--r-- | libpod/rootless_cni_linux.go | 2 | ||||
-rw-r--r-- | test/system/050-stop.bats | 28 |
5 files changed, 84 insertions, 4 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 87ff764e3..0d62a2dd7 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -210,7 +210,13 @@ func (c *Container) Kill(signal uint) error { } // TODO: Is killing a paused container OK? - if c.state.State != define.ContainerStateRunning { + switch c.state.State { + case define.ContainerStateRunning, define.ContainerStateStopping: + // Note that killing containers in "stopping" state is okay. + // In that state, the Podman is waiting for the runtime to + // stop the container and if that is taking too long, a user + // may have decided to kill the container after all. + default: return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String()) } @@ -539,7 +545,7 @@ func (c *Container) Cleanup(ctx context.Context) error { } // Check if state is good - if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) { + if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) { return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID()) } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index f4cdb749f..6d4d3ade0 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -764,7 +764,7 @@ func (c *Container) isStopped() (bool, error) { return true, err } - return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused), nil + return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), nil } // save container state to the database @@ -1290,10 +1290,49 @@ func (c *Container) stop(timeout uint) error { return err } + // Set the container state to "stopping" and unlock the container + // before handing it over to conmon to unblock other commands. #8501 + // demonstrates nicely that a high stop timeout will block even simple + // commands such as `podman ps` from progressing if the container lock + // is held when busy-waiting for the container to be stopped. + c.state.State = define.ContainerStateStopping + if err := c.save(); err != nil { + return errors.Wrapf(err, "error saving container %s state before stopping", c.ID()) + } + if !c.batched { + c.lock.Unlock() + } + if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil { return err } + if !c.batched { + c.lock.Lock() + if err := c.syncContainer(); err != nil { + switch errors.Cause(err) { + // If the container has already been removed (e.g., via + // the cleanup process), there's nothing left to do. + case define.ErrNoSuchCtr, define.ErrCtrRemoved: + return nil + default: + return err + } + } + } + + // Since we're now subject to a race condition with other processes who + // may have altered the state (and other data), let's check if the + // state has changed. If so, we should return immediately and log a + // warning. + if c.state.State != define.ContainerStateStopping { + logrus.Warnf( + "Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered", + c.ID(), define.ContainerStateStopping, c.state.State, + ) + return nil + } + c.newContainerEvent(events.Stop) c.state.PID = 0 @@ -2116,7 +2155,7 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume // Check for an exit file, and handle one if present func (c *Container) checkExitFile() error { // If the container's not running, nothing to do. - if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) { return nil } diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go index 825e77387..5d2bc9099 100644 --- a/libpod/define/containerstate.go +++ b/libpod/define/containerstate.go @@ -28,6 +28,9 @@ const ( // ContainerStateRemoving indicates the container is in the process of // being removed. ContainerStateRemoving ContainerStatus = iota + // ContainerStateStopping indicates the container is in the process of + // being stopped. + ContainerStateStopping ContainerStatus = iota ) // ContainerStatus returns a string representation for users @@ -50,6 +53,8 @@ func (t ContainerStatus) String() string { return "exited" case ContainerStateRemoving: return "removing" + case ContainerStateStopping: + return "stopping" } return "bad state" } diff --git a/libpod/rootless_cni_linux.go b/libpod/rootless_cni_linux.go index ce8a87759..9a980750f 100644 --- a/libpod/rootless_cni_linux.go +++ b/libpod/rootless_cni_linux.go @@ -110,6 +110,8 @@ func DeallocRootlessCNI(ctx context.Context, c *Container) error { logrus.Warn(err) } logrus.Debugf("rootless CNI: removing infra container %q", infra.ID()) + infra.lock.Lock() + defer infra.lock.Unlock() if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil { return err } diff --git a/test/system/050-stop.bats b/test/system/050-stop.bats index f604ea2e2..548fd56ee 100644 --- a/test/system/050-stop.bats +++ b/test/system/050-stop.bats @@ -67,4 +67,32 @@ load helpers done } +# Regression test for #8501 +@test "podman stop - unlock while waiting for timeout" { + # Test that the container state transitions to "stopping" and that other + # commands can get the container's lock. To do that, run a container that + # ingores SIGTERM such that the Podman would wait 20 seconds for the stop + # to finish. This gives us enough time to try some commands and inspect + # the container's status. + + run_podman run --name stopme -d $IMAGE sh -c \ + "trap 'echo Received SIGTERM, ignoring' SIGTERM; echo READY; while :; do sleep 1; done" + + # Stop the container in the background + $PODMAN stop -t 20 stopme & + + # Other commands can acquire the lock + run_podman ps -a + + # The container state transitioned to "stopping" + run_podman inspect --format '{{.State.Status}}' stopme + is "$output" "stopping" "Status of container should be 'stopping'" + + run_podman kill stopme + + # Exit code should be 137 as it was killed + run_podman inspect --format '{{.State.ExitCode}}' stopme + is "$output" "137" "Exit code of killed container" +} + # vim: filetype=sh |