diff options
author | baude <bbaude@redhat.com> | 2019-03-06 12:12:35 -0600 |
---|---|---|
committer | baude <bbaude@redhat.com> | 2019-03-12 14:29:18 -0500 |
commit | 03716cf7f331fa5b5ffab23dcc863bedd66b5dfc (patch) | |
tree | 866b063270455a6cd0dc91d95e91cf1d3ca7b53d /libpod | |
parent | 7038cac53c4c93cd088fdbb097eee8d45494c3b8 (diff) | |
download | podman-03716cf7f331fa5b5ffab23dcc863bedd66b5dfc.tar.gz podman-03716cf7f331fa5b5ffab23dcc863bedd66b5dfc.tar.bz2 podman-03716cf7f331fa5b5ffab23dcc863bedd66b5dfc.zip |
healtcheck phase 2
integration of healthcheck into create and run as well as inspect.
healthcheck enhancements are as follows:
* add the following options to create|run so that non-docker images can
define healthchecks at the container level.
* --healthcheck-command
* --healthcheck-retries
* --healthcheck-interval
* --healthcheck-start-period
* podman create|run --healthcheck-command=none disables healthcheck as
described by an image.
* the healthcheck itself and the healthcheck "history" can now be
observed in podman inspect
* added the wiring for healthcheck history which logs the health history
of the container, the current failed streak attempts, and log entries
for the last five attempts which themselves have start and stop times,
result, and a 500 character truncated (if needed) log of stderr/stdout.
The timings themselves are not implemented in this PR but will be in
future enablement (i.e. next).
Signed-off-by: baude <bbaude@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 2 | ||||
-rw-r--r-- | libpod/container_inspect.go | 11 | ||||
-rw-r--r-- | libpod/healthcheck.go | 129 |
3 files changed, 131 insertions, 11 deletions
diff --git a/libpod/container.go b/libpod/container.go index 2381f53ad..ec4e31026 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -368,7 +368,7 @@ type ContainerConfig struct { Systemd bool `json:"systemd"` // HealtchCheckConfig has the health check command and related timings - HealthCheckConfig *manifest.Schema2HealthConfig + HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"` } // ContainerStatus returns a string representation for users diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index e2730c282..a76163692 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -127,6 +127,17 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data) IsInfra: c.IsInfra(), } + if c.config.HealthCheckConfig != nil { + // This container has a healthcheck defined in it; we need to add it's state + healthCheckState, err := c.GetHealthCheckLog() + if err != nil { + // An error here is not considered fatal; no health state will be displayed + logrus.Error(err) + } else { + data.State.Healthcheck = healthCheckState + } + } + // Copy port mappings into network settings if config.PortMappings != nil { data.NetworkSettings.Ports = config.PortMappings diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 81addb9a8..d2c0ea0fb 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -1,9 +1,15 @@ package libpod import ( + "bufio" + "bytes" + "io/ioutil" "os" + "path/filepath" "strings" + "time" + "github.com/containers/libpod/pkg/inspect" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -30,8 +36,29 @@ const ( HealthCheckInternalError HealthCheckStatus = iota // HealthCheckDefined means the healthcheck was found on the container HealthCheckDefined HealthCheckStatus = iota + + // MaxHealthCheckNumberLogs is the maximum number of attempts we keep + // in the healtcheck history file + MaxHealthCheckNumberLogs int = 5 + // MaxHealthCheckLogLength in characters + MaxHealthCheckLogLength = 500 + + // HealthCheckHealthy describes a healthy container + HealthCheckHealthy string = "healthy" + // HealthCheckUnhealthy describes an unhealthy container + HealthCheckUnhealthy string = "unhealthy" ) +// hcWriteCloser allows us to use bufio as a WriteCloser +type hcWriteCloser struct { + *bufio.Writer +} + +// Used to add a closer to bufio +func (hcwc hcWriteCloser) Close() error { + return nil +} + // HealthCheck verifies the state and validity of the healthcheck configuration // on the container and then executes the healthcheck func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) { @@ -48,33 +75,51 @@ func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) { // RunHealthCheck runs the health check as defined by the container func (c *Container) RunHealthCheck() (HealthCheckStatus, error) { - var newCommand []string + var ( + newCommand []string + returnCode int + capture bytes.Buffer + ) hcStatus, err := checkHealthCheckCanBeRun(c) if err != nil { return hcStatus, err } hcCommand := c.HealthCheckConfig().Test if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" { - newCommand = []string{"sh", "-c"} - newCommand = append(newCommand, hcCommand[1:]...) + newCommand = []string{"sh", "-c", strings.Join(hcCommand[1:], " ")} } else { newCommand = hcCommand } - // TODO when history/logging is implemented for healthcheck, we need to change the output streams - // so we can capture i/o + captureBuffer := bufio.NewWriter(&capture) + hcw := hcWriteCloser{ + captureBuffer, + } streams := new(AttachStreams) - streams.OutputStream = os.Stdout - streams.ErrorStream = os.Stderr + streams.OutputStream = hcw + streams.ErrorStream = hcw streams.InputStream = os.Stdin streams.AttachOutput = true streams.AttachError = true streams.AttachInput = true logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID()) - if err := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0); err != nil { - return HealthCheckFailure, err + timeStart := time.Now() + hcResult := HealthCheckSuccess + hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0) + if hcErr != nil { + hcResult = HealthCheckFailure + returnCode = 1 + } + timeEnd := time.Now() + eventLog := capture.String() + if len(eventLog) > MaxHealthCheckLogLength { + eventLog = eventLog[:MaxHealthCheckLogLength] } - return HealthCheckSuccess, nil + hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog) + if err := c.updateHealthCheckLog(hcl); err != nil { + return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID()) + } + return hcResult, hcErr } func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) { @@ -90,3 +135,67 @@ func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) { } return HealthCheckDefined, nil } + +func newHealthCheckLog(start, end time.Time, exitCode int, log string) inspect.HealthCheckLog { + return inspect.HealthCheckLog{ + Start: start.Format(time.RFC3339Nano), + End: end.Format(time.RFC3339Nano), + ExitCode: exitCode, + Output: log, + } +} + +// UpdateHealthCheckLog parses the health check results and writes the log +func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error { + healthCheck, err := c.GetHealthCheckLog() + if err != nil { + return err + } + if hcl.ExitCode == 0 { + // set status to healthy, reset failing state to 0 + healthCheck.Status = HealthCheckHealthy + healthCheck.FailingStreak = 0 + } else { + if len(healthCheck.Status) < 1 { + healthCheck.Status = HealthCheckHealthy + } + // increment failing streak + healthCheck.FailingStreak = healthCheck.FailingStreak + 1 + // if failing streak > retries, then status to unhealthy + if int(healthCheck.FailingStreak) > c.HealthCheckConfig().Retries { + healthCheck.Status = HealthCheckUnhealthy + } + } + healthCheck.Log = append(healthCheck.Log, hcl) + if len(healthCheck.Log) > MaxHealthCheckNumberLogs { + healthCheck.Log = healthCheck.Log[1:] + } + newResults, err := json.Marshal(healthCheck) + if err != nil { + return errors.Wrapf(err, "unable to marshall healthchecks for writing") + } + return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700) +} + +// HealthCheckLogPath returns the path for where the health check log is +func (c *Container) healthCheckLogPath() string { + return filepath.Join(filepath.Dir(c.LogPath()), "healthcheck.log") +} + +// GetHealthCheckLog returns HealthCheck results by reading the container's +// health check log file. If the health check log file does not exist, then +// an empty healthcheck struct is returned +func (c *Container) GetHealthCheckLog() (inspect.HealthCheckResults, error) { + var healthCheck inspect.HealthCheckResults + if _, err := os.Stat(c.healthCheckLogPath()); os.IsNotExist(err) { + return healthCheck, nil + } + b, err := ioutil.ReadFile(c.healthCheckLogPath()) + if err != nil { + return healthCheck, errors.Wrapf(err, "failed to read health check log file %s", c.healthCheckLogPath()) + } + if err := json.Unmarshal(b, &healthCheck); err != nil { + return healthCheck, errors.Wrapf(err, "failed to unmarshal existing healthcheck results in %s", c.healthCheckLogPath()) + } + return healthCheck, nil +} |