summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Rothberg <vrothberg@redhat.com>2022-04-13 16:21:21 +0200
committerValentin Rothberg <vrothberg@redhat.com>2022-05-02 13:29:59 +0200
commit4eff0c8cf284a6007122aec731e4d97059750166 (patch)
treecdbfee34bd64bb295556667129a6a3c5db9b4612
parent77d872ea38ec7b685ec99efe6688d1793c9fa256 (diff)
downloadpodman-4eff0c8cf284a6007122aec731e4d97059750166.tar.gz
podman-4eff0c8cf284a6007122aec731e4d97059750166.tar.bz2
podman-4eff0c8cf284a6007122aec731e4d97059750166.zip
pod: add exit policies
Add the notion of an "exit policy" to a pod. This policy controls the behaviour when the last container of pod exits. Initially, there are two policies: - "continue" : the pod continues running. This is the default policy when creating a pod. - "stop" : stop the pod when the last container exits. This is the default behaviour for `play kube`. In order to implement the deferred stop of a pod, add a worker queue to the libpod runtime. The queue will pick up work items and in this case helps resolve dead locks that would otherwise occur if we attempted to stop a pod during container cleanup. Note that the default restart policy of `play kube` is "Always". Hence, in order to really solve #13464, the YAML files must set a custom restart policy; the tests use "OnFailure". Fixes: #13464 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
-rw-r--r--cmd/podman/common/completion.go5
-rw-r--r--cmd/podman/pods/create.go4
-rw-r--r--docs/source/markdown/podman-pod-create.1.md11
-rw-r--r--libpod/container_internal.go42
-rw-r--r--libpod/define/pod_inspect.go2
-rw-r--r--libpod/options.go18
-rw-r--r--libpod/pod.go4
-rw-r--r--libpod/pod_api.go51
-rw-r--r--libpod/runtime.go14
-rw-r--r--libpod/runtime_worker.go41
-rw-r--r--pkg/domain/entities/pods.go2
-rw-r--r--pkg/domain/infra/abi/play.go6
-rw-r--r--pkg/specgen/generate/pod_create.go2
-rw-r--r--pkg/specgen/podspecgen.go2
-rw-r--r--test/system/200-pod.bats69
15 files changed, 271 insertions, 2 deletions
diff --git a/cmd/podman/common/completion.go b/cmd/podman/common/completion.go
index ddf922b2a..6149a4465 100644
--- a/cmd/podman/common/completion.go
+++ b/cmd/podman/common/completion.go
@@ -492,6 +492,11 @@ func AutocompleteImages(cmd *cobra.Command, args []string, toComplete string) ([
return getImages(cmd, toComplete)
}
+// AutocompletePodExitPolicy - Autocomplete pod exit policy.
+func AutocompletePodExitPolicy(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
+ return config.PodExitPolicies, cobra.ShellCompDirectiveNoFileComp
+}
+
// AutocompleteCreateRun - Autocomplete only the fist argument as image and then do file completion.
func AutocompleteCreateRun(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
if !validCurrentCmdLine(cmd, args, toComplete) {
diff --git a/cmd/podman/pods/create.go b/cmd/podman/pods/create.go
index 4623ade63..62f820790 100644
--- a/cmd/podman/pods/create.go
+++ b/cmd/podman/pods/create.go
@@ -72,6 +72,10 @@ func init() {
flags.StringVarP(&createOptions.Name, nameFlagName, "n", "", "Assign a name to the pod")
_ = createCommand.RegisterFlagCompletionFunc(nameFlagName, completion.AutocompleteNone)
+ policyFlag := "exit-policy"
+ flags.StringVarP(&createOptions.ExitPolicy, policyFlag, "", string(containerConfig.Engine.PodExitPolicy), "Behaviour when the last container exits")
+ _ = createCommand.RegisterFlagCompletionFunc(policyFlag, common.AutocompletePodExitPolicy)
+
infraImageFlagName := "infra-image"
var defInfraImage string
if !registry.IsRemote() {
diff --git a/docs/source/markdown/podman-pod-create.1.md b/docs/source/markdown/podman-pod-create.1.md
index 714909b98..fa431b611 100644
--- a/docs/source/markdown/podman-pod-create.1.md
+++ b/docs/source/markdown/podman-pod-create.1.md
@@ -75,6 +75,15 @@ Set custom DNS options in the /etc/resolv.conf file that will be shared between
Set custom DNS search domains in the /etc/resolv.conf file that will be shared between all containers in the pod.
+#### **--exit-policy**=**continue** | *stop*
+
+Set the exit policy of the pod when the last container exits. Supported policies are:
+
+| Exit Policy | Description |
+| ------------------ | --------------------------------------------------------------------------- |
+| *continue* | The pod continues running when the last container exits. Used by default. |
+| *stop* | The pod is stopped when the last container exits. Used in `play kube`. |
+
#### **--gidmap**=*container_gid:host_gid:amount*
GID map for the user namespace. Using this flag will run the container with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags.
@@ -554,7 +563,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10
```
## SEE ALSO
-**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **containers.conf(1)**
+**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-play-kube(1)](podman-play-kube.1.md)**, **containers.conf(1)**
## HISTORY
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 5c6719bdf..7494eb3ec 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error {
}
}
+ if err := c.stopPodIfNeeded(context.Background()); err != nil {
+ if lastError == nil {
+ lastError = err
+ } else {
+ logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
+ }
+ }
+
return lastError
}
+// If the container is part of a pod where only the infra container remains
+// running, attempt to stop the pod.
+func (c *Container) stopPodIfNeeded(ctx context.Context) error {
+ if c.config.Pod == "" {
+ return nil
+ }
+
+ pod, err := c.runtime.state.Pod(c.config.Pod)
+ if err != nil {
+ return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
+ }
+
+ switch pod.config.ExitPolicy {
+ case config.PodExitPolicyContinue:
+ return nil
+
+ case config.PodExitPolicyStop:
+ // Use the runtime's work queue to stop the pod. This resolves
+ // a number of scenarios where we'd otherwise run into
+ // deadlocks. For instance, during `pod stop`, the pod has
+ // already been locked.
+ // The work queue is a simple means without having to worry about
+ // future changes that may introduce more deadlock scenarios.
+ c.runtime.queueWork(func() {
+ if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
+ if !errors.Is(err, define.ErrNoSuchPod) {
+ logrus.Errorf("Checking if infra needs to be stopped: %v", err)
+ }
+ }
+ })
+ }
+ return nil
+}
+
// delete deletes the container and runs any configured poststop
// hooks.
func (c *Container) delete(ctx context.Context) error {
diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go
index e85a660a1..219ffade2 100644
--- a/libpod/define/pod_inspect.go
+++ b/libpod/define/pod_inspect.go
@@ -19,6 +19,8 @@ type InspectPodData struct {
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
+ // ExitPolicy of the pod.
+ ExitPolicy string `json:"ExitPolicy,omitempty"`
// State represents the current state of the pod.
State string `json:"State"`
// Hostname is the hostname that the pod will set.
diff --git a/libpod/options.go b/libpod/options.go
index 98eb45e76..9b83cb76a 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption {
}
}
+// WithPodExitPolicy sets the exit policy of the pod.
+func WithPodExitPolicy(policy string) PodCreateOption {
+ return func(pod *Pod) error {
+ if pod.valid {
+ return define.ErrPodFinalized
+ }
+
+ parsed, err := config.ParsePodExitPolicy(policy)
+ if err != nil {
+ return err
+ }
+
+ pod.config.ExitPolicy = parsed
+
+ return nil
+ }
+}
+
// WithPodHostname sets the hostname of the pod.
func WithPodHostname(hostname string) PodCreateOption {
return func(pod *Pod) error {
diff --git a/libpod/pod.go b/libpod/pod.go
index 237c42901..2211d5be7 100644
--- a/libpod/pod.go
+++ b/libpod/pod.go
@@ -6,6 +6,7 @@ import (
"strings"
"time"
+ "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/lock"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -70,6 +71,9 @@ type PodConfig struct {
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
+ // The pod's exit policy.
+ ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"`
+
// ID of the pod's lock
LockID uint32 `json:"lockID"`
}
diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index ba30d878e..73b28822b 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -2,6 +2,7 @@ package libpod
import (
"context"
+ "fmt"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
@@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
p.lock.Lock()
defer p.lock.Unlock()
+ return p.stopWithTimeout(ctx, cleanup, timeout)
+}
+
+func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
if !p.valid {
return nil, define.ErrPodRemoved
}
@@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, nil
}
+// Stops the pod if only the infra containers remains running.
+func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ infraID := ""
+
+ if p.HasInfraContainer() {
+ infra, err := p.infraContainer()
+ if err != nil {
+ return err
+ }
+ infraID = infra.ID()
+ }
+
+ allCtrs, err := p.runtime.state.PodContainers(p)
+ if err != nil {
+ return err
+ }
+
+ for _, ctr := range allCtrs {
+ if ctr.ID() == infraID || ctr.ID() == ignoreID {
+ continue
+ }
+
+ state, err := ctr.State()
+ if err != nil {
+ return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err)
+ }
+
+ switch state {
+ case define.ContainerStateExited,
+ define.ContainerStateRemoving,
+ define.ContainerStateStopping,
+ define.ContainerStateUnknown:
+ continue
+ default:
+ return nil
+ }
+ }
+
+ _, err = p.stopWithTimeout(ctx, true, -1)
+ return err
+}
+
// Cleanup cleans up all containers within a pod that have stopped.
// All containers are cleaned up independently. An error with one container will
// not prevent other containers being cleaned up.
@@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
Namespace: p.Namespace(),
Created: p.CreatedTime(),
CreateCommand: p.config.CreateCommand,
+ ExitPolicy: string(p.config.ExitPolicy),
State: podState,
Hostname: p.config.Hostname,
Labels: p.Labels(),
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 6c2323d88..d5daa2f8a 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -86,6 +86,10 @@ type Runtime struct {
libimageEventsShutdown chan bool
lockManager lock.Manager
+ // Worker
+ workerShutdown chan bool
+ workerChannel chan func()
+
// syslog describes whenever logrus should log to the syslog as well.
// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
// This bool is just needed so that we can set it for netavark interface.
@@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}
+ runtime.startWorker()
+
// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true
@@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error {
return define.ErrRuntimeStopped
}
+ if r.workerShutdown != nil {
+ // Signal the worker routine to shutdown. The routine will
+ // process all pending work items and then read from the
+ // channel; we're blocked until all work items have been
+ // processed.
+ r.workerShutdown <- true
+ }
+
r.valid = false
// Shutdown all containers if --force is given
diff --git a/libpod/runtime_worker.go b/libpod/runtime_worker.go
new file mode 100644
index 000000000..ca44a27f7
--- /dev/null
+++ b/libpod/runtime_worker.go
@@ -0,0 +1,41 @@
+package libpod
+
+import (
+ "time"
+)
+
+func (r *Runtime) startWorker() {
+ if r.workerChannel == nil {
+ r.workerChannel = make(chan func(), 1)
+ r.workerShutdown = make(chan bool)
+ }
+ go func() {
+ for {
+ // Make sure to read all workers before
+ // checking if we're about to shutdown.
+ for len(r.workerChannel) > 0 {
+ w := <-r.workerChannel
+ w()
+ }
+
+ select {
+ // We'll read from the shutdown channel only when all
+ // items above have been processed.
+ //
+ // (*Runtime).Shutdown() will block until until the
+ // item is read.
+ case <-r.workerShutdown:
+ return
+
+ default:
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+}
+
+func (r *Runtime) queueWork(f func()) {
+ go func() {
+ r.workerChannel <- f
+ }()
+}
diff --git a/pkg/domain/entities/pods.go b/pkg/domain/entities/pods.go
index 1e25e0872..a19edcbf0 100644
--- a/pkg/domain/entities/pods.go
+++ b/pkg/domain/entities/pods.go
@@ -122,6 +122,7 @@ type PodCreateOptions struct {
CreateCommand []string `json:"create_command,omitempty"`
Devices []string `json:"devices,omitempty"`
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
+ ExitPolicy string `json:"exit_policy,omitempty"`
Hostname string `json:"hostname,omitempty"`
Infra bool `json:"infra,omitempty"`
InfraImage string `json:"infra_image,omitempty"`
@@ -319,6 +320,7 @@ func ToPodSpecGen(s specgen.PodSpecGenerator, p *PodCreateOptions) (*specgen.Pod
}
s.Pid = out
s.Hostname = p.Hostname
+ s.ExitPolicy = p.ExitPolicy
s.Labels = p.Labels
s.Devices = p.Devices
s.SecurityOpt = p.SecurityOpt
diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go
index 0da07bab8..b3ded7db6 100644
--- a/pkg/domain/infra/abi/play.go
+++ b/pkg/domain/infra/abi/play.go
@@ -197,7 +197,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
return nil, errors.Errorf("pod does not have a name")
}
- podOpt := entities.PodCreateOptions{Infra: true, Net: &entities.NetOptions{NoHosts: options.NoHosts}}
+ podOpt := entities.PodCreateOptions{
+ Infra: true,
+ Net: &entities.NetOptions{NoHosts: options.NoHosts},
+ ExitPolicy: string(config.PodExitPolicyStop),
+ }
podOpt, err = kube.ToPodOpt(ctx, podName, podOpt, podYAML)
if err != nil {
return nil, err
diff --git a/pkg/specgen/generate/pod_create.go b/pkg/specgen/generate/pod_create.go
index a3408b402..fce32d688 100644
--- a/pkg/specgen/generate/pod_create.go
+++ b/pkg/specgen/generate/pod_create.go
@@ -197,6 +197,8 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
options = append(options, libpod.WithPodHostname(p.Hostname))
}
+ options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy))
+
return options, nil
}
diff --git a/pkg/specgen/podspecgen.go b/pkg/specgen/podspecgen.go
index 759caa0c0..1bb64448f 100644
--- a/pkg/specgen/podspecgen.go
+++ b/pkg/specgen/podspecgen.go
@@ -19,6 +19,8 @@ type PodBasicConfig struct {
// all containers in the pod as long as the UTS namespace is shared.
// Optional.
Hostname string `json:"hostname,omitempty"`
+ // ExitPolicy determines the pod's exit and stop behaviour.
+ ExitPolicy string `json:"exit_policy,omitempty"`
// Labels are key-value pairs that are used to add metadata to pods.
// Optional.
Labels map[string]string `json:"labels,omitempty"`
diff --git a/test/system/200-pod.bats b/test/system/200-pod.bats
index 64f95f723..39982848f 100644
--- a/test/system/200-pod.bats
+++ b/test/system/200-pod.bats
@@ -406,7 +406,76 @@ EOF
run_podman pod inspect test --format {{.InfraConfig.HostNetwork}}
is "$output" "true" "Host network sharing with only ipc should be true"
run_podman pod rm test
+}
+
+# Wait for the pod (1st arg) to transition into the state (2nd arg)
+function _ensure_pod_state() {
+ for i in {0..5}; do
+ run_podman pod inspect $1 --format "{{.State}}"
+ if [[ $output == "$2" ]]; then
+ break
+ fi
+ sleep 0.5
+ done
+
+ is "$output" "$2" "unexpected pod state"
+}
+
+@test "pod exit policies" {
+ # Test setting exit policies
+ run_podman pod create
+ podID="$output"
+ run_podman pod inspect $podID --format "{{.ExitPolicy}}"
+ is "$output" "continue" "default exit policy"
+ run_podman pod rm $podID
+
+ run_podman pod create --exit-policy stop
+ podID="$output"
+ run_podman pod inspect $podID --format "{{.ExitPolicy}}"
+ is "$output" "stop" "custom exit policy"
+ run_podman pod rm $podID
+
+ run_podman 125 pod create --exit-policy invalid
+ is "$output" "Error: .*error running pod create option: invalid pod exit policy: \"invalid\"" "invalid exit policy"
+
+ # Test exit-policy behaviour
+ run_podman pod create --exit-policy continue
+ podID="$output"
+ run_podman run --pod $podID $IMAGE true
+ run_podman pod inspect $podID --format "{{.State}}"
+ _ensure_pod_state $podID Degraded
+ run_podman pod rm $podID
+
+ run_podman pod create --exit-policy stop
+ podID="$output"
+ run_podman run --pod $podID $IMAGE true
+ run_podman pod inspect $podID --format "{{.State}}"
+ _ensure_pod_state $podID Exited
+ run_podman pod rm $podID
+}
+@test "pod exit policies - play kube" {
+ # play-kube sets the exit policy to "stop"
+ local name="$(random_string 10 | tr A-Z a-z)"
+
+ kubeFile="apiVersion: v1
+kind: Pod
+metadata:
+ name: $name-pod
+spec:
+ containers:
+ - command:
+ - \"true\"
+ image: $IMAGE
+ name: ctr
+ restartPolicy: OnFailure"
+
+ echo "$kubeFile" > $PODMAN_TMPDIR/test.yaml
+ run_podman play kube $PODMAN_TMPDIR/test.yaml
+ run_podman pod inspect $name-pod --format "{{.ExitPolicy}}"
+ is "$output" "stop" "custom exit policy"
+ _ensure_pod_state $name-pod Exited
+ run_podman pod rm $name-pod
}
# vim: filetype=sh