diff options
author | Valentin Rothberg <vrothberg@redhat.com> | 2022-08-30 11:17:25 +0200 |
---|---|---|
committer | Valentin Rothberg <vrothberg@redhat.com> | 2022-09-06 08:56:55 +0200 |
commit | 274d34a25a3ed7b69a6e4caec07e845157048c96 (patch) | |
tree | 614541562404a3fac4b66cb4473a4d5aa298c0fb | |
parent | bdfc4df1f20b18c785c67b79369b9011303889cc (diff) | |
download | podman-274d34a25a3ed7b69a6e4caec07e845157048c96.tar.gz podman-274d34a25a3ed7b69a6e4caec07e845157048c96.tar.bz2 podman-274d34a25a3ed7b69a6e4caec07e845157048c96.zip |
kube play: support auto updates and rollbacks
Add auto-update support to `podman kube play`. Auto-update policies can
be configured for:
* the entire pod via the `io.containers.autoupdate` annotation
* a specific container via the `io.containers.autoupdate/$name` annotation
To make use of rollbacks, the `io.containers.sdnotify` policy should be
set to `container` such that the workload running _inside_ the container
can send the READY message via the NOTIFY_SOCKET once ready. For
further details on auto updates and rollbacks, please refer to the
specific article [1].
Since auto updates and rollbacks bases on Podman's systemd integration,
the k8s YAML must be executed in the `podman-kube@` systemd template.
For further details on how to run k8s YAML in systemd via Podman, please
refer to the specific article [2].
An examplary k8s YAML may look as follows:
```YAML
apiVersion: v1
kind: Pod
metadata:
annotations:
io.containers.autoupdate: "local"
io.containers.autoupdate/b: "registry"
labels:
app: test
name: test_pod
spec:
containers:
- command:
- top
image: alpine
name: a
- command:
- top
image: alpine
name: b
```
[1] https://www.redhat.com/sysadmin/podman-auto-updates-rollbacks
[2] https://www.redhat.com/sysadmin/kubernetes-workloads-podman-systemd
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
-rw-r--r-- | docs/source/markdown/podman-auto-update.1.md.in | 12 | ||||
-rw-r--r-- | pkg/autoupdate/autoupdate.go | 21 | ||||
-rw-r--r-- | pkg/domain/infra/abi/play.go | 35 | ||||
-rw-r--r-- | pkg/specgen/generate/kube/kube.go | 8 | ||||
-rw-r--r-- | pkg/systemd/notifyproxy/notifyproxy.go | 65 | ||||
-rw-r--r-- | pkg/systemd/notifyproxy/notifyproxy_test.go | 2 | ||||
-rw-r--r-- | test/system/250-systemd.bats | 29 | ||||
-rw-r--r-- | test/system/255-auto-update.bats | 97 | ||||
-rw-r--r-- | test/system/helpers.systemd.bash | 14 |
9 files changed, 240 insertions, 43 deletions
diff --git a/docs/source/markdown/podman-auto-update.1.md.in b/docs/source/markdown/podman-auto-update.1.md.in index bc92d6165..b724f1157 100644 --- a/docs/source/markdown/podman-auto-update.1.md.in +++ b/docs/source/markdown/podman-auto-update.1.md.in @@ -29,6 +29,18 @@ This data is then being used in the auto-update sequence to instruct systemd (vi Note that **podman auto-update** relies on systemd. The systemd units are expected to be generated with **[podman-generate-systemd --new](podman-generate-systemd.1.md#--new)**, or similar units that create new containers in order to run the updated images. Systemd units that start and stop a container cannot run a new image. +### Auto Updates and Kubernetes YAML + +Podman supports auto updates for Kubernetes workloads. As mentioned above, `podman auto-update` requires the containers to be running systemd. Podman ships with a systemd template that can be instantiated with a Kubernetes YAML file, see podman-generate-systemd(1). + +To enable auto updates for containers running in a Kubernetes workload, set the following Podman-specific annotations in the YAML: + * `io.containers.autoupdate: "registry|local"` to apply the auto-update policy to all containers + * `io.containers.autoupdate/$container: "registry|local"` to apply the auto-update policy to `$container` only + * `io.containers.sdnotify: "conmon|container"` to apply the sdnotify policy to all containers + * `io.containers.sdnotify/$container: "conmon|container"` to apply the sdnotify policy to `$container` only + +By default, the autoupdate policy is set to "disabled", the sdnotify policy is set to "conmon". + ### Systemd Unit and Timer Podman ships with a `podman-auto-update.service` systemd unit. This unit is triggered daily at midnight by the `podman-auto-update.timer` systemd timer. The timer can be altered for custom time-based updates if desired. The unit can further be invoked by other systemd units (e.g., via the dependency tree) or manually via **systemctl start podman-auto-update.service**. diff --git a/pkg/autoupdate/autoupdate.go b/pkg/autoupdate/autoupdate.go index 17cea6719..9cf77d135 100644 --- a/pkg/autoupdate/autoupdate.go +++ b/pkg/autoupdate/autoupdate.go @@ -188,13 +188,8 @@ func AutoUpdate(ctx context.Context, runtime *libpod.Runtime, options entities.A // updateUnit auto updates the tasks in the specified systemd unit. func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) []error { var errors []error - // Sanity check: we'll support that in the future. - if len(tasks) != 1 { - errors = append(errors, fmt.Errorf("only 1 task per unit supported but unit %s has %d", unit, len(tasks))) - return errors - } - tasksUpdated := false + for _, task := range tasks { err := func() error { // Use an anonymous function to avoid spaghetti continue's updateAvailable, err := task.updateAvailable(ctx) @@ -255,6 +250,9 @@ func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) [] } if err := u.restartSystemdUnit(ctx, unit); err != nil { + for _, task := range tasks { + task.status = statusFailed + } err = fmt.Errorf("restarting unit %s during rollback: %w", unit, err) errors = append(errors, err) return errors @@ -283,7 +281,16 @@ func (t *task) report() *entities.AutoUpdateReport { func (t *task) updateAvailable(ctx context.Context) (bool, error) { switch t.policy { case PolicyRegistryImage: - return t.registryUpdateAvailable(ctx) + // Errors checking for updates only should not be fatal. + // Especially on Edge systems, connection may be limited or + // there may just be a temporary downtime of the registry. + // But make sure to leave some breadcrumbs in the debug logs + // such that potential issues _can_ be analyzed if needed. + available, err := t.registryUpdateAvailable(ctx) + if err != nil { + logrus.Debugf("Error checking updates for image %s: %v (ignoring error)", t.rawImageName, err) + } + return available, nil case PolicyLocalImage: return t.localUpdateAvailable() default: diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 6ea20a4f2..12786afcd 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -661,9 +661,10 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY opts = append(opts, libpod.WithSdNotifyMode(sdNotifyMode)) + var proxy *notifyproxy.NotifyProxy // Create a notify proxy for the container. if sdNotifyMode != "" && sdNotifyMode != define.SdNotifyModeIgnore { - proxy, err := notifyproxy.New("") + proxy, err = notifyproxy.New("") if err != nil { return nil, err } @@ -675,6 +676,9 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY if err != nil { return nil, err } + if proxy != nil { + proxy.AddContainer(ctr) + } containers = append(containers, ctr) } @@ -774,21 +778,26 @@ func (ic *ContainerEngine) getImageAndLabelInfo(ctx context.Context, cwd string, } // Handle kube annotations - for k, v := range annotations { - switch k { - // Auto update annotation without container name will apply to - // all containers within the pod - case autoupdate.Label, autoupdate.AuthfileLabel: - labels[k] = v - // Auto update annotation with container name will apply only - // to the specified container - case fmt.Sprintf("%s/%s", autoupdate.Label, container.Name), - fmt.Sprintf("%s/%s", autoupdate.AuthfileLabel, container.Name): - prefixAndCtr := strings.Split(k, "/") - labels[prefixAndCtr[0]] = v + setLabel := func(label string) { + var result string + ctrSpecific := fmt.Sprintf("%s/%s", label, container.Name) + for k, v := range annotations { + switch k { + case label: + result = v + case ctrSpecific: + labels[label] = v + return + } + } + if result != "" { + labels[label] = result } } + setLabel(autoupdate.Label) + setLabel(autoupdate.AuthfileLabel) + return pulledImage, labels, nil } diff --git a/pkg/specgen/generate/kube/kube.go b/pkg/specgen/generate/kube/kube.go index 5862d3f1c..9fd0adecf 100644 --- a/pkg/specgen/generate/kube/kube.go +++ b/pkg/specgen/generate/kube/kube.go @@ -7,6 +7,7 @@ import ( "fmt" "math" "net" + "os" "regexp" "runtime" "strconv" @@ -26,6 +27,7 @@ import ( "github.com/containers/podman/v4/pkg/k8s.io/apimachinery/pkg/api/resource" "github.com/containers/podman/v4/pkg/specgen" "github.com/containers/podman/v4/pkg/specgen/generate" + systemdDefine "github.com/containers/podman/v4/pkg/systemd/define" "github.com/containers/podman/v4/pkg/util" "github.com/docker/docker/pkg/system" "github.com/docker/go-units" @@ -445,6 +447,12 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener } } + // Make sure the container runs in a systemd unit which is + // stored as a label at container creation. + if unit := os.Getenv(systemdDefine.EnvVariable); unit != "" { + s.Labels[systemdDefine.EnvVariable] = unit + } + return s, nil } diff --git a/pkg/systemd/notifyproxy/notifyproxy.go b/pkg/systemd/notifyproxy/notifyproxy.go index 9e6eb4cf0..1bfab9ca0 100644 --- a/pkg/systemd/notifyproxy/notifyproxy.go +++ b/pkg/systemd/notifyproxy/notifyproxy.go @@ -1,12 +1,17 @@ package notifyproxy import ( + "errors" + "fmt" + "io" "io/ioutil" "net" "os" "strings" "syscall" + "time" + "github.com/containers/podman/v4/libpod/define" "github.com/coreos/go-systemd/v22/daemon" "github.com/sirupsen/logrus" ) @@ -39,6 +44,7 @@ func SendMessage(socketPath string, message string) error { type NotifyProxy struct { connection *net.UnixConn socketPath string + container Container // optional } // New creates a NotifyProxy. The specified temp directory can be left empty. @@ -77,9 +83,26 @@ func (p *NotifyProxy) close() error { return p.connection.Close() } +// AddContainer associates a container with the proxy. +func (p *NotifyProxy) AddContainer(container Container) { + p.container = container +} + +// ErrNoReadyMessage is returned when we are waiting for the READY message of a +// container that is not in the running state anymore. +var ErrNoReadyMessage = errors.New("container stopped running before READY message was received") + +// Container avoids a circular dependency among this package and libpod. +type Container interface { + State() (define.ContainerStatus, error) + ID() string +} + // WaitAndClose waits until receiving the `READY` notify message and close the // listener. Note that the this function must only be executed inside a systemd // service which will kill the process after a given timeout. +// If the (optional) container stopped running before the `READY` is received, +// the waiting gets canceled and ErrNoReadyMessage is returned. func (p *NotifyProxy) WaitAndClose() error { defer func() { if err := p.close(); err != nil { @@ -87,16 +110,48 @@ func (p *NotifyProxy) WaitAndClose() error { } }() + const bufferSize = 1024 + sBuilder := strings.Builder{} for { - buf := make([]byte, 1024) - num, err := p.connection.Read(buf) - if err != nil { + // Set a read deadline of one second such that we achieve a + // non-blocking read and can check if the container has already + // stopped running; in that case no READY message will be send + // and we're done. + if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil { return err } - for _, s := range strings.Split(string(buf[:num]), "\n") { - if s == daemon.SdNotifyReady { + + for { + buffer := make([]byte, bufferSize) + num, err := p.connection.Read(buffer) + if err != nil { + if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) { + return err + } + } + sBuilder.Write(buffer[:num]) + if num != bufferSize || buffer[num-1] == '\n' { + break + } + } + + for _, line := range strings.Split(sBuilder.String(), "\n") { + if line == daemon.SdNotifyReady { return nil } } + sBuilder.Reset() + + if p.container == nil { + continue + } + + state, err := p.container.State() + if err != nil { + return err + } + if state != define.ContainerStateRunning { + return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID()) + } } } diff --git a/pkg/systemd/notifyproxy/notifyproxy_test.go b/pkg/systemd/notifyproxy/notifyproxy_test.go index ce63fc9cd..066046cb8 100644 --- a/pkg/systemd/notifyproxy/notifyproxy_test.go +++ b/pkg/systemd/notifyproxy/notifyproxy_test.go @@ -41,7 +41,7 @@ func TestWaitAndClose(t *testing.T) { default: } - sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else") + sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else\n") done := func() bool { for i := 0; i < 10; i++ { select { diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats index 8f4471f91..dd1a0f05a 100644 --- a/test/system/250-systemd.bats +++ b/test/system/250-systemd.bats @@ -301,24 +301,16 @@ LISTEN_FDNAMES=listen_fdnames" | sort) } @test "podman-kube@.service template" { - # If running from a podman source directory, build and use the source - # version of the play-kube-@ unit file - unit_name="podman-kube@.service" - unit_file="contrib/systemd/system/${unit_name}" - if [[ -e ${unit_file}.in ]]; then - echo "# [Building & using $unit_name from source]" >&3 - # Force regenerating unit file (existing one may have /usr/bin path) - rm -f $unit_file - BINDIR=$(dirname $PODMAN) make $unit_file - cp $unit_file $UNIT_DIR/$unit_name - fi - + install_kube_template # Create the YAMl file yaml_source="$PODMAN_TMPDIR/test.yaml" cat >$yaml_source <<EOF apiVersion: v1 kind: Pod metadata: + annotations: + io.containers.autoupdate: "local" + io.containers.autoupdate/b: "registry" labels: app: test name: test_pod @@ -327,8 +319,11 @@ spec: - command: - top image: $IMAGE - name: test - resources: {} + name: a + - command: + - top + image: $IMAGE + name: b EOF # Dispatch the YAML file @@ -349,6 +344,12 @@ EOF run_podman 125 container rm $service_container is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)" + # Add a simple `auto-update --dry-run` test here to avoid too much redundancy + # with 255-auto-update.bats + run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}" + is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,local.*" "global auto-update policy gets applied" + is "$output" ".*$service_name,.* (test_pod-b),$IMAGE,false,registry.*" "container-specified auto-update policy gets applied" + # Kill the pod and make sure the service is not running. # The restart policy is set to "never" since there is no # design yet for propagating exit codes up to the service diff --git a/test/system/255-auto-update.bats b/test/system/255-auto-update.bats index c6f9600b6..1f350e87f 100644 --- a/test/system/255-auto-update.bats +++ b/test/system/255-auto-update.bats @@ -266,8 +266,6 @@ EOF # Generate a healthy image that will run correctly. run_podman build -t quay.io/libpod/$image -f $dockerfile1 - podman image inspect --format "{{.ID}}" $image - oldID="$output" generate_service $image local /runme --sdnotify=container noTag _wait_service_ready container-$cname.service @@ -277,7 +275,7 @@ EOF # Generate an unhealthy image that will fail. run_podman build -t quay.io/libpod/$image -f $dockerfile2 - podman image inspect --format "{{.ID}}" $image + run_podman image inspect --format "{{.ID}}" $image newID="$output" run_podman auto-update --dry-run --format "{{.Unit}},{{.Image}},{{.Updated}},{{.Policy}}" @@ -409,4 +407,97 @@ EOF _confirm_update $cname $ori_image } +@test "podman-kube@.service template with rollback" { + # sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just + # assume that we work only with crun, nothing else. + # [copied from 260-sdnotify.bats] + runtime=$(podman_runtime) + if [[ "$runtime" != "crun" ]]; then + skip "this test only works with crun, not $runtime" + fi + + install_kube_template + + dockerfile1=$PODMAN_TMPDIR/Dockerfile.1 + cat >$dockerfile1 <<EOF +FROM quay.io/libpod/fedora:31 +RUN echo -e "#!/bin/sh\n\ +printenv NOTIFY_SOCKET; echo READY; systemd-notify --ready;\n\ +trap 'echo Received SIGTERM, finishing; exit' SIGTERM; echo WAITING; while :; do sleep 0.1; done" \ +>> /runme +RUN chmod +x /runme +EOF + + dockerfile2=$PODMAN_TMPDIR/Dockerfile.2 + cat >$dockerfile2 <<EOF +FROM quay.io/libpod/fedora:31 +RUN echo -e "#!/bin/sh\n\ +exit 1" >> /runme +RUN chmod +x /runme +EOF + local_image=localhost/image:$(random_string 10) + + # Generate a healthy image that will run correctly. + run_podman build -t $local_image -f $dockerfile1 + run_podman image inspect --format "{{.ID}}" $local_image + oldID="$output" + + # Create the YAMl file + yaml_source="$PODMAN_TMPDIR/test.yaml" + cat >$yaml_source <<EOF +apiVersion: v1 +kind: Pod +metadata: + annotations: + io.containers.autoupdate: "registry" + io.containers.autoupdate/b: "local" + io.containers.sdnotify/b: "container" + labels: + app: test + name: test_pod +spec: + containers: + - command: + - top + image: $IMAGE + name: a + - command: + - /runme + image: $local_image + name: b +EOF + + # Dispatch the YAML file + service_name="podman-kube@$(systemd-escape $yaml_source).service" + systemctl start $service_name + systemctl is-active $service_name + + # Make sure the containers are properly configured + run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}" + is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied" + is "$output" ".*$service_name,.* (test_pod-b),$local_image,false,local.*" "container-specified auto-update policy gets applied" + + # Generate a broken image that will fail. + run_podman build -t $local_image -f $dockerfile2 + run_podman image inspect --format "{{.ID}}" $local_image + newID="$output" + + assert "$oldID" != "$newID" "broken image really is a new one" + + # Make sure container b sees the new image + run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}" + is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied" + is "$output" ".*$service_name,.* (test_pod-b),$local_image,pending,local.*" "container b sees the new image" + + # Now update and check for the rollback + run_podman auto-update --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}" + is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,rolled back,registry.*" "container a was rolled back as the update of b failed" + is "$output" ".*$service_name,.* (test_pod-b),$local_image,rolled back,local.*" "container b was rolled back as its update has failed" + + # Clean up + systemctl stop $service_name + run_podman rmi -f $(pause_image) $local_image $newID $oldID + rm -f $UNIT_DIR/$unit_name +} + # vim: filetype=sh diff --git a/test/system/helpers.systemd.bash b/test/system/helpers.systemd.bash index d9abc087d..afbab6e08 100644 --- a/test/system/helpers.systemd.bash +++ b/test/system/helpers.systemd.bash @@ -32,3 +32,17 @@ journalctl() { systemd-run() { command systemd-run $_DASHUSER "$@"; } + +install_kube_template() { + # If running from a podman source directory, build and use the source + # version of the play-kube-@ unit file + unit_name="podman-kube@.service" + unit_file="contrib/systemd/system/${unit_name}" + if [[ -e ${unit_file}.in ]]; then + echo "# [Building & using $unit_name from source]" >&3 + # Force regenerating unit file (existing one may have /usr/bin path) + rm -f $unit_file + BINDIR=$(dirname $PODMAN) make $unit_file + cp $unit_file $UNIT_DIR/$unit_name + fi +} |