From 4eff0c8cf284a6007122aec731e4d97059750166 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Wed, 13 Apr 2022 16:21:21 +0200 Subject: pod: add exit policies Add the notion of an "exit policy" to a pod. This policy controls the behaviour when the last container of pod exits. Initially, there are two policies: - "continue" : the pod continues running. This is the default policy when creating a pod. - "stop" : stop the pod when the last container exits. This is the default behaviour for `play kube`. In order to implement the deferred stop of a pod, add a worker queue to the libpod runtime. The queue will pick up work items and in this case helps resolve dead locks that would otherwise occur if we attempted to stop a pod during container cleanup. Note that the default restart policy of `play kube` is "Always". Hence, in order to really solve #13464, the YAML files must set a custom restart policy; the tests use "OnFailure". Fixes: #13464 Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 42 ++++++++++++++++++++++++++++++++++++ libpod/define/pod_inspect.go | 2 ++ libpod/options.go | 18 ++++++++++++++++ libpod/pod.go | 4 ++++ libpod/pod_api.go | 51 ++++++++++++++++++++++++++++++++++++++++++++ libpod/runtime.go | 14 ++++++++++++ libpod/runtime_worker.go | 41 +++++++++++++++++++++++++++++++++++ 7 files changed, 172 insertions(+) create mode 100644 libpod/runtime_worker.go (limited to 'libpod') diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 5c6719bdf..7494eb3ec 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error { } } + if err := c.stopPodIfNeeded(context.Background()); err != nil { + if lastError == nil { + lastError = err + } else { + logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err) + } + } + return lastError } +// If the container is part of a pod where only the infra container remains +// running, attempt to stop the pod. +func (c *Container) stopPodIfNeeded(ctx context.Context) error { + if c.config.Pod == "" { + return nil + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err) + } + + switch pod.config.ExitPolicy { + case config.PodExitPolicyContinue: + return nil + + case config.PodExitPolicyStop: + // Use the runtime's work queue to stop the pod. This resolves + // a number of scenarios where we'd otherwise run into + // deadlocks. For instance, during `pod stop`, the pod has + // already been locked. + // The work queue is a simple means without having to worry about + // future changes that may introduce more deadlock scenarios. + c.runtime.queueWork(func() { + if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil { + if !errors.Is(err, define.ErrNoSuchPod) { + logrus.Errorf("Checking if infra needs to be stopped: %v", err) + } + } + }) + } + return nil +} + // delete deletes the container and runs any configured poststop // hooks. func (c *Container) delete(ctx context.Context) error { diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index e85a660a1..219ffade2 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -19,6 +19,8 @@ type InspectPodData struct { // CreateCommand is the full command plus arguments of the process the // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` + // ExitPolicy of the pod. + ExitPolicy string `json:"ExitPolicy,omitempty"` // State represents the current state of the pod. State string `json:"State"` // Hostname is the hostname that the pod will set. diff --git a/libpod/options.go b/libpod/options.go index 98eb45e76..9b83cb76a 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption { } } +// WithPodExitPolicy sets the exit policy of the pod. +func WithPodExitPolicy(policy string) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + + parsed, err := config.ParsePodExitPolicy(policy) + if err != nil { + return err + } + + pod.config.ExitPolicy = parsed + + return nil + } +} + // WithPodHostname sets the hostname of the pod. func WithPodHostname(hostname string) PodCreateOption { return func(pod *Pod) error { diff --git a/libpod/pod.go b/libpod/pod.go index 237c42901..2211d5be7 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -6,6 +6,7 @@ import ( "strings" "time" + "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/lock" "github.com/opencontainers/runtime-spec/specs-go" @@ -70,6 +71,9 @@ type PodConfig struct { // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` + // The pod's exit policy. + ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"` + // ID of the pod's lock LockID uint32 `json:"lockID"` } diff --git a/libpod/pod_api.go b/libpod/pod_api.go index ba30d878e..73b28822b 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -2,6 +2,7 @@ package libpod import ( "context" + "fmt" "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" @@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m p.lock.Lock() defer p.lock.Unlock() + return p.stopWithTimeout(ctx, cleanup, timeout) +} + +func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) { if !p.valid { return nil, define.ErrPodRemoved } @@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m return nil, nil } +// Stops the pod if only the infra containers remains running. +func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error { + p.lock.Lock() + defer p.lock.Unlock() + + infraID := "" + + if p.HasInfraContainer() { + infra, err := p.infraContainer() + if err != nil { + return err + } + infraID = infra.ID() + } + + allCtrs, err := p.runtime.state.PodContainers(p) + if err != nil { + return err + } + + for _, ctr := range allCtrs { + if ctr.ID() == infraID || ctr.ID() == ignoreID { + continue + } + + state, err := ctr.State() + if err != nil { + return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err) + } + + switch state { + case define.ContainerStateExited, + define.ContainerStateRemoving, + define.ContainerStateStopping, + define.ContainerStateUnknown: + continue + default: + return nil + } + } + + _, err = p.stopWithTimeout(ctx, true, -1) + return err +} + // Cleanup cleans up all containers within a pod that have stopped. // All containers are cleaned up independently. An error with one container will // not prevent other containers being cleaned up. @@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { Namespace: p.Namespace(), Created: p.CreatedTime(), CreateCommand: p.config.CreateCommand, + ExitPolicy: string(p.config.ExitPolicy), State: podState, Hostname: p.config.Hostname, Labels: p.Labels(), diff --git a/libpod/runtime.go b/libpod/runtime.go index 6c2323d88..d5daa2f8a 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -86,6 +86,10 @@ type Runtime struct { libimageEventsShutdown chan bool lockManager lock.Manager + // Worker + workerShutdown chan bool + workerChannel chan func() + // syslog describes whenever logrus should log to the syslog as well. // Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go // This bool is just needed so that we can set it for netavark interface. @@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) { } } + runtime.startWorker() + // Mark the runtime as valid - ready to be used, cannot be modified // further runtime.valid = true @@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error { return define.ErrRuntimeStopped } + if r.workerShutdown != nil { + // Signal the worker routine to shutdown. The routine will + // process all pending work items and then read from the + // channel; we're blocked until all work items have been + // processed. + r.workerShutdown <- true + } + r.valid = false // Shutdown all containers if --force is given diff --git a/libpod/runtime_worker.go b/libpod/runtime_worker.go new file mode 100644 index 000000000..ca44a27f7 --- /dev/null +++ b/libpod/runtime_worker.go @@ -0,0 +1,41 @@ +package libpod + +import ( + "time" +) + +func (r *Runtime) startWorker() { + if r.workerChannel == nil { + r.workerChannel = make(chan func(), 1) + r.workerShutdown = make(chan bool) + } + go func() { + for { + // Make sure to read all workers before + // checking if we're about to shutdown. + for len(r.workerChannel) > 0 { + w := <-r.workerChannel + w() + } + + select { + // We'll read from the shutdown channel only when all + // items above have been processed. + // + // (*Runtime).Shutdown() will block until until the + // item is read. + case <-r.workerShutdown: + return + + default: + time.Sleep(100 * time.Millisecond) + } + } + }() +} + +func (r *Runtime) queueWork(f func()) { + go func() { + r.workerChannel <- f + }() +} -- cgit v1.2.3-54-g00ecf