summaryrefslogtreecommitdiff
path: root/libpod/healthcheck.go
diff options
context:
space:
mode:
authorValentin Rothberg <vrothberg@redhat.com>2022-09-07 11:45:30 +0200
committerValentin Rothberg <vrothberg@redhat.com>2022-09-09 13:02:05 +0200
commitaad29e759c78b415a3b0393d7aba2bddbbc0cd3e (patch)
tree86597d8d17a5bab7ee0417166c448249f86954eb /libpod/healthcheck.go
parent6d8bafe57a65970ead17a83cb1983629b3a2aedb (diff)
downloadpodman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.gz
podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.bz2
podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.zip
health check: add on-failure actions
For systems that have extreme robustness requirements (edge devices, particularly those in difficult to access environments), it is important that applications continue running in all circumstances. When the application fails, Podman must restart it automatically to provide this robustness. Otherwise, these devices may require customer IT to physically gain access to restart, which can be prohibitively difficult. Add a new `--on-failure` flag that supports four actions: - **none**: Take no action. - **kill**: Kill the container. - **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy. - **stop**: Stop the container. To remain backwards compatible, **none** is the default action. Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
Diffstat (limited to 'libpod/healthcheck.go')
-rw-r--r--libpod/healthcheck.go41
1 files changed, 40 insertions, 1 deletions
diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go
index 9b9d12b17..e835af9f0 100644
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@@ -2,6 +2,7 @@ package libpod
import (
"bufio"
+ "context"
"errors"
"fmt"
"io/ioutil"
@@ -12,6 +13,7 @@ import (
"github.com/containers/podman/v4/libpod/define"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
)
const (
@@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
if err != nil {
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
}
+
hcStatus, err := checkHealthCheckCanBeRun(container)
if err == nil {
- return container.runHealthCheck()
+ hcStatus, err := container.runHealthCheck()
+ if err := container.processHealthCheckStatus(hcStatus); err != nil {
+ return hcStatus, err
+ }
+ return hcStatus, err
}
return hcStatus, err
}
@@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
hcResult = define.HealthCheckFailure
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
}
+
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}
+
return hcResult, hcErr
}
+func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
+ if status == define.HealthCheckSuccess {
+ return nil
+ }
+
+ switch c.config.HealthCheckOnFailureAction {
+ case define.HealthCheckOnFailureActionNone: // Nothing to do
+
+ case define.HealthCheckOnFailureActionKill:
+ if err := c.Kill(uint(unix.SIGKILL)); err != nil {
+ return fmt.Errorf("killing container health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionRestart:
+ if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
+ return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
+ }
+
+ case define.HealthCheckOnFailureActionStop:
+ if err := c.Stop(); err != nil {
+ return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err)
+ }
+
+ default: // Should not happen but better be safe than sorry
+ return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction)
+ }
+
+ return nil
+}
+
func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
cstate, err := c.State()
if err != nil {