summaryrefslogtreecommitdiff
path: root/test/system
diff options
context:
space:
mode:
authorValentin Rothberg <vrothberg@redhat.com>2022-09-07 11:45:30 +0200
committerValentin Rothberg <vrothberg@redhat.com>2022-09-09 13:02:05 +0200
commitaad29e759c78b415a3b0393d7aba2bddbbc0cd3e (patch)
tree86597d8d17a5bab7ee0417166c448249f86954eb /test/system
parent6d8bafe57a65970ead17a83cb1983629b3a2aedb (diff)
downloadpodman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.gz
podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.bz2
podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.zip
health check: add on-failure actions
For systems that have extreme robustness requirements (edge devices, particularly those in difficult to access environments), it is important that applications continue running in all circumstances. When the application fails, Podman must restart it automatically to provide this robustness. Otherwise, these devices may require customer IT to physically gain access to restart, which can be prohibitively difficult. Add a new `--on-failure` flag that supports four actions: - **none**: Take no action. - **kill**: Kill the container. - **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy. - **stop**: Stop the container. To remain backwards compatible, **none** is the default action. Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
Diffstat (limited to 'test/system')
-rw-r--r--test/system/220-healthcheck.bats96
-rw-r--r--test/system/250-systemd.bats51
-rw-r--r--test/system/helpers.bash54
3 files changed, 164 insertions, 37 deletions
diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats
index c502ad669..00ec1dd79 100644
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@@ -20,44 +20,8 @@ function _check_health {
done
}
-
@test "podman healthcheck" {
- # Create an image with a healthcheck script; said script will
- # pass until the file /uh-oh gets created (by us, via exec)
- cat >${PODMAN_TMPDIR}/healthcheck <<EOF
-#!/bin/sh
-
-if test -e /uh-oh; then
- echo "Uh-oh on stdout!"
- echo "Uh-oh on stderr!" >&2
- exit 1
-else
- echo "Life is Good on stdout"
- echo "Life is Good on stderr" >&2
- exit 0
-fi
-EOF
-
- cat >${PODMAN_TMPDIR}/entrypoint <<EOF
-#!/bin/sh
-
-while :; do
- sleep 1
-done
-EOF
-
- cat >${PODMAN_TMPDIR}/Containerfile <<EOF
-FROM $IMAGE
-
-COPY healthcheck /healthcheck
-COPY entrypoint /entrypoint
-
-RUN chmod 755 /healthcheck /entrypoint
-
-CMD ["/entrypoint"]
-EOF
-
- run_podman build -t healthcheck_i ${PODMAN_TMPDIR}
+ _build_health_check_image healthcheck_i
# Run that healthcheck image.
run_podman run -d --name healthcheck_c \
@@ -66,6 +30,9 @@ EOF
--health-retries 3 \
healthcheck_i
+ run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
+ is "$output" "none" "default on-failure action is none"
+
# We can't check for 'starting' because a 1-second interval is too
# short; it could run healthcheck before we get to our first check.
#
@@ -109,4 +76,59 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
run_podman rmi healthcheck_i
}
+@test "podman healthcheck --health-on-failure" {
+ run_podman 125 create --health-on-failure=kill $IMAGE
+ is "$output" "Error: cannot set on-failure action to kill without a health check"
+
+ ctr="healthcheck_c"
+ img="healthcheck_i"
+
+ for policy in none kill restart stop;do
+ if [[ $policy == "none" ]];then
+ # Do not remove the /uh-oh file for `none` as we want to
+ # demonstrate that no action was taken
+ _build_health_check_image $img
+ else
+ _build_health_check_image $img cleanfile
+ fi
+
+ # Run that healthcheck image.
+ run_podman run -d --name $ctr \
+ --health-cmd /healthcheck \
+ --health-on-failure=$policy \
+ $img
+
+ # healthcheck should succeed
+ run_podman healthcheck run $ctr
+
+ # Now cause the healthcheck to fail
+ run_podman exec $ctr touch /uh-oh
+
+ # healthcheck should now fail, with exit status 1 and 'unhealthy' output
+ run_podman 1 healthcheck run $ctr
+ # FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
+ is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
+
+ run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
+ if [[ $policy == "restart" ]];then
+ # Container has been restarted and health check works again
+ is "$output" "running $policy" "container has been restarted"
+ run_podman healthcheck run $ctr
+ elif [[ $policy == "none" ]];then
+ # Container is still running and health check still broken
+ is "$output" "running $policy" "container continued running"
+ run_podman 1 healthcheck run $ctr
+ # FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
+ is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
+ else
+ # kill and stop yield the container into a non-running state
+ is "$output" ".* $policy" "container was stopped/killed"
+ assert "$output" != "running $policy"
+ fi
+
+ run_podman rm -f -t0 $ctr
+ run_podman rmi -f $img
+ done
+}
+
# vim: filetype=sh
diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats
index b449e49d8..3f6296b36 100644
--- a/test/system/250-systemd.bats
+++ b/test/system/250-systemd.bats
@@ -304,6 +304,57 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
run_podman network rm -f $netname
}
+@test "podman create --health-on-failure=kill" {
+ img="healthcheck_i"
+ _build_health_check_image $img
+
+ cname=$(random_string)
+ run_podman create --name $cname \
+ --health-cmd /healthcheck \
+ --health-on-failure=kill \
+ --restart=on-failure \
+ $img
+
+ # run container in systemd unit
+ service_setup
+
+ run_podman container inspect $cname --format "{{.ID}}"
+ oldID="$output"
+
+ run_podman healthcheck run $cname
+
+ # Now cause the healthcheck to fail
+ run_podman exec $cname touch /uh-oh
+
+ # healthcheck should now fail, with exit status 1 and 'unhealthy' output
+ run_podman 1 healthcheck run $cname
+ is "$output" "unhealthy" "output from 'podman healthcheck run'"
+
+ # What is expected to happen now:
+ # 1) The container gets killed as the health check has failed
+ # 2) Systemd restarts the service as the restart policy is set to "on-failure"
+ # 3) The /uh-oh file is gone and $cname has another ID
+
+ # Wait at most 10 seconds for the service to be restarted
+ local timeout=10
+ while [[ $timeout -gt 1 ]]; do
+ run_podman '?' container inspect $cname
+ if [[ $status == 0 ]]; then
+ if [[ "$output" != "$oldID" ]]; then
+ break
+ fi
+ fi
+ sleep 1
+ let timeout=$timeout-1
+ done
+
+ run_podman healthcheck run $cname
+
+ # stop systemd container
+ service_cleanup
+ run_podman rmi -f $img
+}
+
@test "podman-kube@.service template" {
install_kube_template
# Create the YAMl file
diff --git a/test/system/helpers.bash b/test/system/helpers.bash
index f2eb3016c..b0d4b526a 100644
--- a/test/system/helpers.bash
+++ b/test/system/helpers.bash
@@ -894,5 +894,59 @@ function _podman_commands() {
awk '/^Available Commands:/{ok=1;next}/^Options:/{ok=0}ok { print $1 }' <<<"$output" | grep .
}
+###############################
+# _build_health_check_image # Builds a container image with a configured health check
+###############################
+#
+# The health check will fail once the /uh-oh file exists.
+#
+# First argument is the desired name of the image
+# Second argument, if present and non-null, forces removal of the /uh-oh file once the check failed; this way the container can be restarted
+#
+
+function _build_health_check_image {
+ local imagename="$1"
+ local cleanfile=""
+
+ if [[ ! -z "$2" ]]; then
+ cleanfile="rm -f /uh-oh"
+ fi
+ # Create an image with a healthcheck script; said script will
+ # pass until the file /uh-oh gets created (by us, via exec)
+ cat >${PODMAN_TMPDIR}/healthcheck <<EOF
+#!/bin/sh
+
+if test -e /uh-oh; then
+ echo "Uh-oh on stdout!"
+ echo "Uh-oh on stderr!" >&2
+ ${cleanfile}
+ exit 1
+else
+ echo "Life is Good on stdout"
+ echo "Life is Good on stderr" >&2
+ exit 0
+fi
+EOF
+
+ cat >${PODMAN_TMPDIR}/entrypoint <<EOF
+#!/bin/sh
+
+trap 'echo Received SIGTERM, finishing; exit' SIGTERM; echo WAITING; while :; do sleep 0.1; done
+EOF
+
+ cat >${PODMAN_TMPDIR}/Containerfile <<EOF
+FROM $IMAGE
+
+COPY healthcheck /healthcheck
+COPY entrypoint /entrypoint
+
+RUN chmod 755 /healthcheck /entrypoint
+
+CMD ["/entrypoint"]
+EOF
+
+ run_podman build -t $imagename ${PODMAN_TMPDIR}
+}
+
# END miscellaneous tools
###############################################################################