diff options
author | Valentin Rothberg <vrothberg@redhat.com> | 2022-04-13 16:21:21 +0200 |
---|---|---|
committer | Valentin Rothberg <vrothberg@redhat.com> | 2022-05-02 13:29:59 +0200 |
commit | 4eff0c8cf284a6007122aec731e4d97059750166 (patch) | |
tree | cdbfee34bd64bb295556667129a6a3c5db9b4612 | |
parent | 77d872ea38ec7b685ec99efe6688d1793c9fa256 (diff) | |
download | podman-4eff0c8cf284a6007122aec731e4d97059750166.tar.gz podman-4eff0c8cf284a6007122aec731e4d97059750166.tar.bz2 podman-4eff0c8cf284a6007122aec731e4d97059750166.zip |
pod: add exit policies
Add the notion of an "exit policy" to a pod. This policy controls the
behaviour when the last container of pod exits. Initially, there are
two policies:
- "continue" : the pod continues running. This is the default policy
when creating a pod.
- "stop" : stop the pod when the last container exits. This is the
default behaviour for `play kube`.
In order to implement the deferred stop of a pod, add a worker queue to
the libpod runtime. The queue will pick up work items and in this case
helps resolve dead locks that would otherwise occur if we attempted to
stop a pod during container cleanup.
Note that the default restart policy of `play kube` is "Always". Hence,
in order to really solve #13464, the YAML files must set a custom
restart policy; the tests use "OnFailure".
Fixes: #13464
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
-rw-r--r-- | cmd/podman/common/completion.go | 5 | ||||
-rw-r--r-- | cmd/podman/pods/create.go | 4 | ||||
-rw-r--r-- | docs/source/markdown/podman-pod-create.1.md | 11 | ||||
-rw-r--r-- | libpod/container_internal.go | 42 | ||||
-rw-r--r-- | libpod/define/pod_inspect.go | 2 | ||||
-rw-r--r-- | libpod/options.go | 18 | ||||
-rw-r--r-- | libpod/pod.go | 4 | ||||
-rw-r--r-- | libpod/pod_api.go | 51 | ||||
-rw-r--r-- | libpod/runtime.go | 14 | ||||
-rw-r--r-- | libpod/runtime_worker.go | 41 | ||||
-rw-r--r-- | pkg/domain/entities/pods.go | 2 | ||||
-rw-r--r-- | pkg/domain/infra/abi/play.go | 6 | ||||
-rw-r--r-- | pkg/specgen/generate/pod_create.go | 2 | ||||
-rw-r--r-- | pkg/specgen/podspecgen.go | 2 | ||||
-rw-r--r-- | test/system/200-pod.bats | 69 |
15 files changed, 271 insertions, 2 deletions
diff --git a/cmd/podman/common/completion.go b/cmd/podman/common/completion.go index ddf922b2a..6149a4465 100644 --- a/cmd/podman/common/completion.go +++ b/cmd/podman/common/completion.go @@ -492,6 +492,11 @@ func AutocompleteImages(cmd *cobra.Command, args []string, toComplete string) ([ return getImages(cmd, toComplete) } +// AutocompletePodExitPolicy - Autocomplete pod exit policy. +func AutocompletePodExitPolicy(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + return config.PodExitPolicies, cobra.ShellCompDirectiveNoFileComp +} + // AutocompleteCreateRun - Autocomplete only the fist argument as image and then do file completion. func AutocompleteCreateRun(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { if !validCurrentCmdLine(cmd, args, toComplete) { diff --git a/cmd/podman/pods/create.go b/cmd/podman/pods/create.go index 4623ade63..62f820790 100644 --- a/cmd/podman/pods/create.go +++ b/cmd/podman/pods/create.go @@ -72,6 +72,10 @@ func init() { flags.StringVarP(&createOptions.Name, nameFlagName, "n", "", "Assign a name to the pod") _ = createCommand.RegisterFlagCompletionFunc(nameFlagName, completion.AutocompleteNone) + policyFlag := "exit-policy" + flags.StringVarP(&createOptions.ExitPolicy, policyFlag, "", string(containerConfig.Engine.PodExitPolicy), "Behaviour when the last container exits") + _ = createCommand.RegisterFlagCompletionFunc(policyFlag, common.AutocompletePodExitPolicy) + infraImageFlagName := "infra-image" var defInfraImage string if !registry.IsRemote() { diff --git a/docs/source/markdown/podman-pod-create.1.md b/docs/source/markdown/podman-pod-create.1.md index 714909b98..fa431b611 100644 --- a/docs/source/markdown/podman-pod-create.1.md +++ b/docs/source/markdown/podman-pod-create.1.md @@ -75,6 +75,15 @@ Set custom DNS options in the /etc/resolv.conf file that will be shared between Set custom DNS search domains in the /etc/resolv.conf file that will be shared between all containers in the pod. +#### **--exit-policy**=**continue** | *stop* + +Set the exit policy of the pod when the last container exits. Supported policies are: + +| Exit Policy | Description | +| ------------------ | --------------------------------------------------------------------------- | +| *continue* | The pod continues running when the last container exits. Used by default. | +| *stop* | The pod is stopped when the last container exits. Used in `play kube`. | + #### **--gidmap**=*container_gid:host_gid:amount* GID map for the user namespace. Using this flag will run the container with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags. @@ -554,7 +563,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10 ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **containers.conf(1)** +**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-play-kube(1)](podman-play-kube.1.md)**, **containers.conf(1)** ## HISTORY diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 5c6719bdf..7494eb3ec 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error { } } + if err := c.stopPodIfNeeded(context.Background()); err != nil { + if lastError == nil { + lastError = err + } else { + logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err) + } + } + return lastError } +// If the container is part of a pod where only the infra container remains +// running, attempt to stop the pod. +func (c *Container) stopPodIfNeeded(ctx context.Context) error { + if c.config.Pod == "" { + return nil + } + + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err) + } + + switch pod.config.ExitPolicy { + case config.PodExitPolicyContinue: + return nil + + case config.PodExitPolicyStop: + // Use the runtime's work queue to stop the pod. This resolves + // a number of scenarios where we'd otherwise run into + // deadlocks. For instance, during `pod stop`, the pod has + // already been locked. + // The work queue is a simple means without having to worry about + // future changes that may introduce more deadlock scenarios. + c.runtime.queueWork(func() { + if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil { + if !errors.Is(err, define.ErrNoSuchPod) { + logrus.Errorf("Checking if infra needs to be stopped: %v", err) + } + } + }) + } + return nil +} + // delete deletes the container and runs any configured poststop // hooks. func (c *Container) delete(ctx context.Context) error { diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index e85a660a1..219ffade2 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -19,6 +19,8 @@ type InspectPodData struct { // CreateCommand is the full command plus arguments of the process the // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` + // ExitPolicy of the pod. + ExitPolicy string `json:"ExitPolicy,omitempty"` // State represents the current state of the pod. State string `json:"State"` // Hostname is the hostname that the pod will set. diff --git a/libpod/options.go b/libpod/options.go index 98eb45e76..9b83cb76a 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption { } } +// WithPodExitPolicy sets the exit policy of the pod. +func WithPodExitPolicy(policy string) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + + parsed, err := config.ParsePodExitPolicy(policy) + if err != nil { + return err + } + + pod.config.ExitPolicy = parsed + + return nil + } +} + // WithPodHostname sets the hostname of the pod. func WithPodHostname(hostname string) PodCreateOption { return func(pod *Pod) error { diff --git a/libpod/pod.go b/libpod/pod.go index 237c42901..2211d5be7 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -6,6 +6,7 @@ import ( "strings" "time" + "github.com/containers/common/pkg/config" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/lock" "github.com/opencontainers/runtime-spec/specs-go" @@ -70,6 +71,9 @@ type PodConfig struct { // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` + // The pod's exit policy. + ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"` + // ID of the pod's lock LockID uint32 `json:"lockID"` } diff --git a/libpod/pod_api.go b/libpod/pod_api.go index ba30d878e..73b28822b 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -2,6 +2,7 @@ package libpod import ( "context" + "fmt" "github.com/containers/common/pkg/cgroups" "github.com/containers/podman/v4/libpod/define" @@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m p.lock.Lock() defer p.lock.Unlock() + return p.stopWithTimeout(ctx, cleanup, timeout) +} + +func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) { if !p.valid { return nil, define.ErrPodRemoved } @@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m return nil, nil } +// Stops the pod if only the infra containers remains running. +func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error { + p.lock.Lock() + defer p.lock.Unlock() + + infraID := "" + + if p.HasInfraContainer() { + infra, err := p.infraContainer() + if err != nil { + return err + } + infraID = infra.ID() + } + + allCtrs, err := p.runtime.state.PodContainers(p) + if err != nil { + return err + } + + for _, ctr := range allCtrs { + if ctr.ID() == infraID || ctr.ID() == ignoreID { + continue + } + + state, err := ctr.State() + if err != nil { + return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err) + } + + switch state { + case define.ContainerStateExited, + define.ContainerStateRemoving, + define.ContainerStateStopping, + define.ContainerStateUnknown: + continue + default: + return nil + } + } + + _, err = p.stopWithTimeout(ctx, true, -1) + return err +} + // Cleanup cleans up all containers within a pod that have stopped. // All containers are cleaned up independently. An error with one container will // not prevent other containers being cleaned up. @@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { Namespace: p.Namespace(), Created: p.CreatedTime(), CreateCommand: p.config.CreateCommand, + ExitPolicy: string(p.config.ExitPolicy), State: podState, Hostname: p.config.Hostname, Labels: p.Labels(), diff --git a/libpod/runtime.go b/libpod/runtime.go index 6c2323d88..d5daa2f8a 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -86,6 +86,10 @@ type Runtime struct { libimageEventsShutdown chan bool lockManager lock.Manager + // Worker + workerShutdown chan bool + workerChannel chan func() + // syslog describes whenever logrus should log to the syslog as well. // Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go // This bool is just needed so that we can set it for netavark interface. @@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) { } } + runtime.startWorker() + // Mark the runtime as valid - ready to be used, cannot be modified // further runtime.valid = true @@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error { return define.ErrRuntimeStopped } + if r.workerShutdown != nil { + // Signal the worker routine to shutdown. The routine will + // process all pending work items and then read from the + // channel; we're blocked until all work items have been + // processed. + r.workerShutdown <- true + } + r.valid = false // Shutdown all containers if --force is given diff --git a/libpod/runtime_worker.go b/libpod/runtime_worker.go new file mode 100644 index 000000000..ca44a27f7 --- /dev/null +++ b/libpod/runtime_worker.go @@ -0,0 +1,41 @@ +package libpod + +import ( + "time" +) + +func (r *Runtime) startWorker() { + if r.workerChannel == nil { + r.workerChannel = make(chan func(), 1) + r.workerShutdown = make(chan bool) + } + go func() { + for { + // Make sure to read all workers before + // checking if we're about to shutdown. + for len(r.workerChannel) > 0 { + w := <-r.workerChannel + w() + } + + select { + // We'll read from the shutdown channel only when all + // items above have been processed. + // + // (*Runtime).Shutdown() will block until until the + // item is read. + case <-r.workerShutdown: + return + + default: + time.Sleep(100 * time.Millisecond) + } + } + }() +} + +func (r *Runtime) queueWork(f func()) { + go func() { + r.workerChannel <- f + }() +} diff --git a/pkg/domain/entities/pods.go b/pkg/domain/entities/pods.go index 1e25e0872..a19edcbf0 100644 --- a/pkg/domain/entities/pods.go +++ b/pkg/domain/entities/pods.go @@ -122,6 +122,7 @@ type PodCreateOptions struct { CreateCommand []string `json:"create_command,omitempty"` Devices []string `json:"devices,omitempty"` DeviceReadBPs []string `json:"device_read_bps,omitempty"` + ExitPolicy string `json:"exit_policy,omitempty"` Hostname string `json:"hostname,omitempty"` Infra bool `json:"infra,omitempty"` InfraImage string `json:"infra_image,omitempty"` @@ -319,6 +320,7 @@ func ToPodSpecGen(s specgen.PodSpecGenerator, p *PodCreateOptions) (*specgen.Pod } s.Pid = out s.Hostname = p.Hostname + s.ExitPolicy = p.ExitPolicy s.Labels = p.Labels s.Devices = p.Devices s.SecurityOpt = p.SecurityOpt diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 0da07bab8..b3ded7db6 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -197,7 +197,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY return nil, errors.Errorf("pod does not have a name") } - podOpt := entities.PodCreateOptions{Infra: true, Net: &entities.NetOptions{NoHosts: options.NoHosts}} + podOpt := entities.PodCreateOptions{ + Infra: true, + Net: &entities.NetOptions{NoHosts: options.NoHosts}, + ExitPolicy: string(config.PodExitPolicyStop), + } podOpt, err = kube.ToPodOpt(ctx, podName, podOpt, podYAML) if err != nil { return nil, err diff --git a/pkg/specgen/generate/pod_create.go b/pkg/specgen/generate/pod_create.go index a3408b402..fce32d688 100644 --- a/pkg/specgen/generate/pod_create.go +++ b/pkg/specgen/generate/pod_create.go @@ -197,6 +197,8 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er options = append(options, libpod.WithPodHostname(p.Hostname)) } + options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy)) + return options, nil } diff --git a/pkg/specgen/podspecgen.go b/pkg/specgen/podspecgen.go index 759caa0c0..1bb64448f 100644 --- a/pkg/specgen/podspecgen.go +++ b/pkg/specgen/podspecgen.go @@ -19,6 +19,8 @@ type PodBasicConfig struct { // all containers in the pod as long as the UTS namespace is shared. // Optional. Hostname string `json:"hostname,omitempty"` + // ExitPolicy determines the pod's exit and stop behaviour. + ExitPolicy string `json:"exit_policy,omitempty"` // Labels are key-value pairs that are used to add metadata to pods. // Optional. Labels map[string]string `json:"labels,omitempty"` diff --git a/test/system/200-pod.bats b/test/system/200-pod.bats index 64f95f723..39982848f 100644 --- a/test/system/200-pod.bats +++ b/test/system/200-pod.bats @@ -406,7 +406,76 @@ EOF run_podman pod inspect test --format {{.InfraConfig.HostNetwork}} is "$output" "true" "Host network sharing with only ipc should be true" run_podman pod rm test +} + +# Wait for the pod (1st arg) to transition into the state (2nd arg) +function _ensure_pod_state() { + for i in {0..5}; do + run_podman pod inspect $1 --format "{{.State}}" + if [[ $output == "$2" ]]; then + break + fi + sleep 0.5 + done + + is "$output" "$2" "unexpected pod state" +} + +@test "pod exit policies" { + # Test setting exit policies + run_podman pod create + podID="$output" + run_podman pod inspect $podID --format "{{.ExitPolicy}}" + is "$output" "continue" "default exit policy" + run_podman pod rm $podID + + run_podman pod create --exit-policy stop + podID="$output" + run_podman pod inspect $podID --format "{{.ExitPolicy}}" + is "$output" "stop" "custom exit policy" + run_podman pod rm $podID + + run_podman 125 pod create --exit-policy invalid + is "$output" "Error: .*error running pod create option: invalid pod exit policy: \"invalid\"" "invalid exit policy" + + # Test exit-policy behaviour + run_podman pod create --exit-policy continue + podID="$output" + run_podman run --pod $podID $IMAGE true + run_podman pod inspect $podID --format "{{.State}}" + _ensure_pod_state $podID Degraded + run_podman pod rm $podID + + run_podman pod create --exit-policy stop + podID="$output" + run_podman run --pod $podID $IMAGE true + run_podman pod inspect $podID --format "{{.State}}" + _ensure_pod_state $podID Exited + run_podman pod rm $podID +} +@test "pod exit policies - play kube" { + # play-kube sets the exit policy to "stop" + local name="$(random_string 10 | tr A-Z a-z)" + + kubeFile="apiVersion: v1 +kind: Pod +metadata: + name: $name-pod +spec: + containers: + - command: + - \"true\" + image: $IMAGE + name: ctr + restartPolicy: OnFailure" + + echo "$kubeFile" > $PODMAN_TMPDIR/test.yaml + run_podman play kube $PODMAN_TMPDIR/test.yaml + run_podman pod inspect $name-pod --format "{{.ExitPolicy}}" + is "$output" "stop" "custom exit policy" + _ensure_pod_state $name-pod Exited + run_podman pod rm $name-pod } # vim: filetype=sh |