From 4eff0c8cf284a6007122aec731e4d97059750166 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Wed, 13 Apr 2022 16:21:21 +0200 Subject: pod: add exit policies Add the notion of an "exit policy" to a pod. This policy controls the behaviour when the last container of pod exits. Initially, there are two policies: - "continue" : the pod continues running. This is the default policy when creating a pod. - "stop" : stop the pod when the last container exits. This is the default behaviour for `play kube`. In order to implement the deferred stop of a pod, add a worker queue to the libpod runtime. The queue will pick up work items and in this case helps resolve dead locks that would otherwise occur if we attempted to stop a pod during container cleanup. Note that the default restart policy of `play kube` is "Always". Hence, in order to really solve #13464, the YAML files must set a custom restart policy; the tests use "OnFailure". Fixes: #13464 Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'libpod/container_internal.go') diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 5c6719bdf..7494eb3ec 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error { } } + if err := c.stopPodIfNeeded(context.Background()); err != nil { + if lastError == nil { + lastError = err + } else { + logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err) + } + } + return lastError } +// If the container is part of a pod where only the infra container remains +// running, attempt to stop the pod. +func (c *Container) stopPodIfNeeded(ctx context.Context) error { + if c.config.Pod == "" { + return nil + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err) + } + + switch pod.config.ExitPolicy { + case config.PodExitPolicyContinue: + return nil + + case config.PodExitPolicyStop: + // Use the runtime's work queue to stop the pod. This resolves + // a number of scenarios where we'd otherwise run into + // deadlocks. For instance, during `pod stop`, the pod has + // already been locked. + // The work queue is a simple means without having to worry about + // future changes that may introduce more deadlock scenarios. + c.runtime.queueWork(func() { + if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil { + if !errors.Is(err, define.ErrNoSuchPod) { + logrus.Errorf("Checking if infra needs to be stopped: %v", err) + } + } + }) + } + return nil +} + // delete deletes the container and runs any configured poststop // hooks. func (c *Container) delete(ctx context.Context) error { -- cgit v1.2.3-54-g00ecf