summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2021-01-14 13:37:16 -0500
committerGitHub <noreply@github.com>2021-01-14 13:37:16 -0500
commita1b49749af97c5a3b6256b5aa0f53897257bc838 (patch)
tree3aa0413a7fabc7faff920018b66dae54bc86120e
parente0211a14fc58657821a2af92d09f115470a38c36 (diff)
parentd54478d8eaec9481d482942b87065af36995d39a (diff)
downloadpodman-a1b49749af97c5a3b6256b5aa0f53897257bc838.tar.gz
podman-a1b49749af97c5a3b6256b5aa0f53897257bc838.tar.bz2
podman-a1b49749af97c5a3b6256b5aa0f53897257bc838.zip
Merge pull request #8906 from vrothberg/fix-8501
container stop: release lock before calling the runtime
-rw-r--r--libpod/container_api.go10
-rw-r--r--libpod/container_internal.go43
-rw-r--r--libpod/define/containerstate.go5
-rw-r--r--libpod/rootless_cni_linux.go2
-rw-r--r--test/system/050-stop.bats28
5 files changed, 84 insertions, 4 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 87ff764e3..0d62a2dd7 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -210,7 +210,13 @@ func (c *Container) Kill(signal uint) error {
}
// TODO: Is killing a paused container OK?
- if c.state.State != define.ContainerStateRunning {
+ switch c.state.State {
+ case define.ContainerStateRunning, define.ContainerStateStopping:
+ // Note that killing containers in "stopping" state is okay.
+ // In that state, the Podman is waiting for the runtime to
+ // stop the container and if that is taking too long, a user
+ // may have decided to kill the container after all.
+ default:
return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String())
}
@@ -539,7 +545,7 @@ func (c *Container) Cleanup(ctx context.Context) error {
}
// Check if state is good
- if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
+ if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID())
}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index f4cdb749f..6d4d3ade0 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -764,7 +764,7 @@ func (c *Container) isStopped() (bool, error) {
return true, err
}
- return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused), nil
+ return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), nil
}
// save container state to the database
@@ -1290,10 +1290,49 @@ func (c *Container) stop(timeout uint) error {
return err
}
+ // Set the container state to "stopping" and unlock the container
+ // before handing it over to conmon to unblock other commands. #8501
+ // demonstrates nicely that a high stop timeout will block even simple
+ // commands such as `podman ps` from progressing if the container lock
+ // is held when busy-waiting for the container to be stopped.
+ c.state.State = define.ContainerStateStopping
+ if err := c.save(); err != nil {
+ return errors.Wrapf(err, "error saving container %s state before stopping", c.ID())
+ }
+ if !c.batched {
+ c.lock.Unlock()
+ }
+
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
return err
}
+ if !c.batched {
+ c.lock.Lock()
+ if err := c.syncContainer(); err != nil {
+ switch errors.Cause(err) {
+ // If the container has already been removed (e.g., via
+ // the cleanup process), there's nothing left to do.
+ case define.ErrNoSuchCtr, define.ErrCtrRemoved:
+ return nil
+ default:
+ return err
+ }
+ }
+ }
+
+ // Since we're now subject to a race condition with other processes who
+ // may have altered the state (and other data), let's check if the
+ // state has changed. If so, we should return immediately and log a
+ // warning.
+ if c.state.State != define.ContainerStateStopping {
+ logrus.Warnf(
+ "Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered",
+ c.ID(), define.ContainerStateStopping, c.state.State,
+ )
+ return nil
+ }
+
c.newContainerEvent(events.Stop)
c.state.PID = 0
@@ -2116,7 +2155,7 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume
// Check for an exit file, and handle one if present
func (c *Container) checkExitFile() error {
// If the container's not running, nothing to do.
- if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
+ if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) {
return nil
}
diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go
index 825e77387..5d2bc9099 100644
--- a/libpod/define/containerstate.go
+++ b/libpod/define/containerstate.go
@@ -28,6 +28,9 @@ const (
// ContainerStateRemoving indicates the container is in the process of
// being removed.
ContainerStateRemoving ContainerStatus = iota
+ // ContainerStateStopping indicates the container is in the process of
+ // being stopped.
+ ContainerStateStopping ContainerStatus = iota
)
// ContainerStatus returns a string representation for users
@@ -50,6 +53,8 @@ func (t ContainerStatus) String() string {
return "exited"
case ContainerStateRemoving:
return "removing"
+ case ContainerStateStopping:
+ return "stopping"
}
return "bad state"
}
diff --git a/libpod/rootless_cni_linux.go b/libpod/rootless_cni_linux.go
index ce8a87759..9a980750f 100644
--- a/libpod/rootless_cni_linux.go
+++ b/libpod/rootless_cni_linux.go
@@ -110,6 +110,8 @@ func DeallocRootlessCNI(ctx context.Context, c *Container) error {
logrus.Warn(err)
}
logrus.Debugf("rootless CNI: removing infra container %q", infra.ID())
+ infra.lock.Lock()
+ defer infra.lock.Unlock()
if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil {
return err
}
diff --git a/test/system/050-stop.bats b/test/system/050-stop.bats
index f604ea2e2..548fd56ee 100644
--- a/test/system/050-stop.bats
+++ b/test/system/050-stop.bats
@@ -67,4 +67,32 @@ load helpers
done
}
+# Regression test for #8501
+@test "podman stop - unlock while waiting for timeout" {
+ # Test that the container state transitions to "stopping" and that other
+ # commands can get the container's lock. To do that, run a container that
+ # ingores SIGTERM such that the Podman would wait 20 seconds for the stop
+ # to finish. This gives us enough time to try some commands and inspect
+ # the container's status.
+
+ run_podman run --name stopme -d $IMAGE sh -c \
+ "trap 'echo Received SIGTERM, ignoring' SIGTERM; echo READY; while :; do sleep 1; done"
+
+ # Stop the container in the background
+ $PODMAN stop -t 20 stopme &
+
+ # Other commands can acquire the lock
+ run_podman ps -a
+
+ # The container state transitioned to "stopping"
+ run_podman inspect --format '{{.State.Status}}' stopme
+ is "$output" "stopping" "Status of container should be 'stopping'"
+
+ run_podman kill stopme
+
+ # Exit code should be 137 as it was killed
+ run_podman inspect --format '{{.State.ExitCode}}' stopme
+ is "$output" "137" "Exit code of killed container"
+}
+
# vim: filetype=sh