diff options
author | Valentin Rothberg <vrothberg@redhat.com> | 2022-09-07 11:45:30 +0200 |
---|---|---|
committer | Valentin Rothberg <vrothberg@redhat.com> | 2022-09-09 13:02:05 +0200 |
commit | aad29e759c78b415a3b0393d7aba2bddbbc0cd3e (patch) | |
tree | 86597d8d17a5bab7ee0417166c448249f86954eb /libpod/healthcheck.go | |
parent | 6d8bafe57a65970ead17a83cb1983629b3a2aedb (diff) | |
download | podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.gz podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.tar.bz2 podman-aad29e759c78b415a3b0393d7aba2bddbbc0cd3e.zip |
health check: add on-failure actions
For systems that have extreme robustness requirements (edge devices,
particularly those in difficult to access environments), it is important
that applications continue running in all circumstances. When the
application fails, Podman must restart it automatically to provide this
robustness. Otherwise, these devices may require customer IT to
physically gain access to restart, which can be prohibitively difficult.
Add a new `--on-failure` flag that supports four actions:
- **none**: Take no action.
- **kill**: Kill the container.
- **restart**: Restart the container. Do not combine the `restart`
action with the `--restart` flag. When running inside of
a systemd unit, consider using the `kill` or `stop`
action instead to make use of systemd's restart policy.
- **stop**: Stop the container.
To remain backwards compatible, **none** is the default action.
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
Diffstat (limited to 'libpod/healthcheck.go')
-rw-r--r-- | libpod/healthcheck.go | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 9b9d12b17..e835af9f0 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -2,6 +2,7 @@ package libpod import ( "bufio" + "context" "errors" "fmt" "io/ioutil" @@ -12,6 +13,7 @@ import ( "github.com/containers/podman/v4/libpod/define" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( @@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) { if err != nil { return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err) } + hcStatus, err := checkHealthCheckCanBeRun(container) if err == nil { - return container.runHealthCheck() + hcStatus, err := container.runHealthCheck() + if err := container.processHealthCheckStatus(hcStatus); err != nil { + return hcStatus, err + } + return hcStatus, err } return hcStatus, err } @@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) { hcResult = define.HealthCheckFailure hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String()) } + hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog) if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil { return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err) } + return hcResult, hcErr } +func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error { + if status == define.HealthCheckSuccess { + return nil + } + + switch c.config.HealthCheckOnFailureAction { + case define.HealthCheckOnFailureActionNone: // Nothing to do + + case define.HealthCheckOnFailureActionKill: + if err := c.Kill(uint(unix.SIGKILL)); err != nil { + return fmt.Errorf("killing container health-check turned unhealthy: %w", err) + } + + case define.HealthCheckOnFailureActionRestart: + if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil { + return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err) + } + + case define.HealthCheckOnFailureActionStop: + if err := c.Stop(); err != nil { + return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err) + } + + default: // Should not happen but better be safe than sorry + return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction) + } + + return nil +} + func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) { cstate, err := c.State() if err != nil { |