summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorValentin Rothberg <vrothberg@redhat.com>2022-04-13 16:21:21 +0200
committerValentin Rothberg <vrothberg@redhat.com>2022-05-02 13:29:59 +0200
commit4eff0c8cf284a6007122aec731e4d97059750166 (patch)
treecdbfee34bd64bb295556667129a6a3c5db9b4612 /libpod
parent77d872ea38ec7b685ec99efe6688d1793c9fa256 (diff)
downloadpodman-4eff0c8cf284a6007122aec731e4d97059750166.tar.gz
podman-4eff0c8cf284a6007122aec731e4d97059750166.tar.bz2
podman-4eff0c8cf284a6007122aec731e4d97059750166.zip
pod: add exit policies
Add the notion of an "exit policy" to a pod. This policy controls the behaviour when the last container of pod exits. Initially, there are two policies: - "continue" : the pod continues running. This is the default policy when creating a pod. - "stop" : stop the pod when the last container exits. This is the default behaviour for `play kube`. In order to implement the deferred stop of a pod, add a worker queue to the libpod runtime. The queue will pick up work items and in this case helps resolve dead locks that would otherwise occur if we attempted to stop a pod during container cleanup. Note that the default restart policy of `play kube` is "Always". Hence, in order to really solve #13464, the YAML files must set a custom restart policy; the tests use "OnFailure". Fixes: #13464 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container_internal.go42
-rw-r--r--libpod/define/pod_inspect.go2
-rw-r--r--libpod/options.go18
-rw-r--r--libpod/pod.go4
-rw-r--r--libpod/pod_api.go51
-rw-r--r--libpod/runtime.go14
-rw-r--r--libpod/runtime_worker.go41
7 files changed, 172 insertions, 0 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 5c6719bdf..7494eb3ec 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error {
}
}
+ if err := c.stopPodIfNeeded(context.Background()); err != nil {
+ if lastError == nil {
+ lastError = err
+ } else {
+ logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
+ }
+ }
+
return lastError
}
+// If the container is part of a pod where only the infra container remains
+// running, attempt to stop the pod.
+func (c *Container) stopPodIfNeeded(ctx context.Context) error {
+ if c.config.Pod == "" {
+ return nil
+ }
+
+ pod, err := c.runtime.state.Pod(c.config.Pod)
+ if err != nil {
+ return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
+ }
+
+ switch pod.config.ExitPolicy {
+ case config.PodExitPolicyContinue:
+ return nil
+
+ case config.PodExitPolicyStop:
+ // Use the runtime's work queue to stop the pod. This resolves
+ // a number of scenarios where we'd otherwise run into
+ // deadlocks. For instance, during `pod stop`, the pod has
+ // already been locked.
+ // The work queue is a simple means without having to worry about
+ // future changes that may introduce more deadlock scenarios.
+ c.runtime.queueWork(func() {
+ if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
+ if !errors.Is(err, define.ErrNoSuchPod) {
+ logrus.Errorf("Checking if infra needs to be stopped: %v", err)
+ }
+ }
+ })
+ }
+ return nil
+}
+
// delete deletes the container and runs any configured poststop
// hooks.
func (c *Container) delete(ctx context.Context) error {
diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go
index e85a660a1..219ffade2 100644
--- a/libpod/define/pod_inspect.go
+++ b/libpod/define/pod_inspect.go
@@ -19,6 +19,8 @@ type InspectPodData struct {
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
+ // ExitPolicy of the pod.
+ ExitPolicy string `json:"ExitPolicy,omitempty"`
// State represents the current state of the pod.
State string `json:"State"`
// Hostname is the hostname that the pod will set.
diff --git a/libpod/options.go b/libpod/options.go
index 98eb45e76..9b83cb76a 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption {
}
}
+// WithPodExitPolicy sets the exit policy of the pod.
+func WithPodExitPolicy(policy string) PodCreateOption {
+ return func(pod *Pod) error {
+ if pod.valid {
+ return define.ErrPodFinalized
+ }
+
+ parsed, err := config.ParsePodExitPolicy(policy)
+ if err != nil {
+ return err
+ }
+
+ pod.config.ExitPolicy = parsed
+
+ return nil
+ }
+}
+
// WithPodHostname sets the hostname of the pod.
func WithPodHostname(hostname string) PodCreateOption {
return func(pod *Pod) error {
diff --git a/libpod/pod.go b/libpod/pod.go
index 237c42901..2211d5be7 100644
--- a/libpod/pod.go
+++ b/libpod/pod.go
@@ -6,6 +6,7 @@ import (
"strings"
"time"
+ "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/lock"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -70,6 +71,9 @@ type PodConfig struct {
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
+ // The pod's exit policy.
+ ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"`
+
// ID of the pod's lock
LockID uint32 `json:"lockID"`
}
diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index ba30d878e..73b28822b 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -2,6 +2,7 @@ package libpod
import (
"context"
+ "fmt"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
@@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
p.lock.Lock()
defer p.lock.Unlock()
+ return p.stopWithTimeout(ctx, cleanup, timeout)
+}
+
+func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
if !p.valid {
return nil, define.ErrPodRemoved
}
@@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, nil
}
+// Stops the pod if only the infra containers remains running.
+func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ infraID := ""
+
+ if p.HasInfraContainer() {
+ infra, err := p.infraContainer()
+ if err != nil {
+ return err
+ }
+ infraID = infra.ID()
+ }
+
+ allCtrs, err := p.runtime.state.PodContainers(p)
+ if err != nil {
+ return err
+ }
+
+ for _, ctr := range allCtrs {
+ if ctr.ID() == infraID || ctr.ID() == ignoreID {
+ continue
+ }
+
+ state, err := ctr.State()
+ if err != nil {
+ return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err)
+ }
+
+ switch state {
+ case define.ContainerStateExited,
+ define.ContainerStateRemoving,
+ define.ContainerStateStopping,
+ define.ContainerStateUnknown:
+ continue
+ default:
+ return nil
+ }
+ }
+
+ _, err = p.stopWithTimeout(ctx, true, -1)
+ return err
+}
+
// Cleanup cleans up all containers within a pod that have stopped.
// All containers are cleaned up independently. An error with one container will
// not prevent other containers being cleaned up.
@@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
Namespace: p.Namespace(),
Created: p.CreatedTime(),
CreateCommand: p.config.CreateCommand,
+ ExitPolicy: string(p.config.ExitPolicy),
State: podState,
Hostname: p.config.Hostname,
Labels: p.Labels(),
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 6c2323d88..d5daa2f8a 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -86,6 +86,10 @@ type Runtime struct {
libimageEventsShutdown chan bool
lockManager lock.Manager
+ // Worker
+ workerShutdown chan bool
+ workerChannel chan func()
+
// syslog describes whenever logrus should log to the syslog as well.
// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
// This bool is just needed so that we can set it for netavark interface.
@@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}
+ runtime.startWorker()
+
// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true
@@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error {
return define.ErrRuntimeStopped
}
+ if r.workerShutdown != nil {
+ // Signal the worker routine to shutdown. The routine will
+ // process all pending work items and then read from the
+ // channel; we're blocked until all work items have been
+ // processed.
+ r.workerShutdown <- true
+ }
+
r.valid = false
// Shutdown all containers if --force is given
diff --git a/libpod/runtime_worker.go b/libpod/runtime_worker.go
new file mode 100644
index 000000000..ca44a27f7
--- /dev/null
+++ b/libpod/runtime_worker.go
@@ -0,0 +1,41 @@
+package libpod
+
+import (
+ "time"
+)
+
+func (r *Runtime) startWorker() {
+ if r.workerChannel == nil {
+ r.workerChannel = make(chan func(), 1)
+ r.workerShutdown = make(chan bool)
+ }
+ go func() {
+ for {
+ // Make sure to read all workers before
+ // checking if we're about to shutdown.
+ for len(r.workerChannel) > 0 {
+ w := <-r.workerChannel
+ w()
+ }
+
+ select {
+ // We'll read from the shutdown channel only when all
+ // items above have been processed.
+ //
+ // (*Runtime).Shutdown() will block until until the
+ // item is read.
+ case <-r.workerShutdown:
+ return
+
+ default:
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+}
+
+func (r *Runtime) queueWork(f func()) {
+ go func() {
+ r.workerChannel <- f
+ }()
+}