From c8c2aab50d73ab767b8eb720b4d1abbc9bb36b62 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Mon, 12 Sep 2022 15:22:21 +0200 Subject: health checks: restart timers Restart the health-check timers instead of starting them. This will surpress annoying errors stating that an already running timer cannot be started anymore. Also make sure that the transient units/timers are stopped and removed when stopping a container. Fixes: #15691 Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 6 ++++++ libpod/healthcheck_linux.go | 2 +- test/system/220-healthcheck.bats | 6 ++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 227bb7f1f..d61812cb7 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1286,6 +1286,12 @@ func (c *Container) stop(timeout uint) error { c.lock.Unlock() } + if c.config.HealthCheckConfig != nil { + if err := c.removeTransientFiles(context.Background()); err != nil { + logrus.Error(err.Error()) + } + } + stopErr := c.ociRuntime.StopContainer(c, timeout, all) if !c.batched { diff --git a/libpod/healthcheck_linux.go b/libpod/healthcheck_linux.go index 3fb6dfb91..6948f976a 100644 --- a/libpod/healthcheck_linux.go +++ b/libpod/healthcheck_linux.go @@ -70,7 +70,7 @@ func (c *Container) startTimer() error { startFile := fmt.Sprintf("%s.service", c.ID()) startChan := make(chan string) - if _, err := conn.StartUnitContext(context.Background(), startFile, "fail", startChan); err != nil { + if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil { return err } if err := systemdOpSuccessful(startChan); err != nil { diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 00ec1dd79..a1b24d293 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -106,8 +106,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" # healthcheck should now fail, with exit status 1 and 'unhealthy' output run_podman 1 healthcheck run $ctr - # FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists - is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'" + is "$output" "unhealthy" "output from 'podman healthcheck run'" run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}" if [[ $policy == "restart" ]];then @@ -118,8 +117,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" # Container is still running and health check still broken is "$output" "running $policy" "container continued running" run_podman 1 healthcheck run $ctr - # FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists - is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'" + is "$output" "unhealthy" "output from 'podman healthcheck run'" else # kill and stop yield the container into a non-running state is "$output" ".* $policy" "container was stopped/killed" -- cgit v1.2.3-54-g00ecf