summaryrefslogtreecommitdiff
path: root/libpod/healthcheck_linux.go
blob: 1e03db5420e7582bcaece11d3a40992ed0603580 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package libpod

import (
	"context"
	"fmt"
	"os"
	"os/exec"
	"strings"

	"github.com/containers/podman/v4/pkg/errorhandling"
	"github.com/containers/podman/v4/pkg/rootless"
	"github.com/containers/podman/v4/pkg/systemd"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
)

// createTimer systemd timers for healthchecks of a container
func (c *Container) createTimer() error {
	if c.disableHealthCheckSystemd() {
		return nil
	}
	podman, err := os.Executable()
	if err != nil {
		return errors.Wrapf(err, "failed to get path for podman for a health check timer")
	}

	var cmd = []string{}
	if rootless.IsRootless() {
		cmd = append(cmd, "--user")
	}
	path := os.Getenv("PATH")
	if path != "" {
		cmd = append(cmd, "--setenv=PATH="+path)
	}
	cmd = append(cmd, "--unit", c.ID(), fmt.Sprintf("--on-unit-inactive=%s", c.HealthCheckConfig().Interval.String()), "--timer-property=AccuracySec=1s", podman, "healthcheck", "run", c.ID())

	conn, err := systemd.ConnectToDBUS()
	if err != nil {
		return errors.Wrapf(err, "unable to get systemd connection to add healthchecks")
	}
	conn.Close()
	logrus.Debugf("creating systemd-transient files: %s %s", "systemd-run", cmd)
	systemdRun := exec.Command("systemd-run", cmd...)
	if output, err := systemdRun.CombinedOutput(); err != nil {
		return errors.Errorf("%s", output)
	}
	return nil
}

// Wait for a message on the channel.  Throw an error if the message is not "done".
func systemdOpSuccessful(c chan string) error {
	msg := <-c
	switch msg {
	case "done":
		return nil
	default:
		return fmt.Errorf("expected %q but received %q", "done", msg)
	}
}

// startTimer starts a systemd timer for the healthchecks
func (c *Container) startTimer() error {
	if c.disableHealthCheckSystemd() {
		return nil
	}
	conn, err := systemd.ConnectToDBUS()
	if err != nil {
		return errors.Wrapf(err, "unable to get systemd connection to start healthchecks")
	}
	defer conn.Close()

	startFile := fmt.Sprintf("%s.service", c.ID())
	startChan := make(chan string)
	if _, err := conn.StartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
		return err
	}
	if err := systemdOpSuccessful(startChan); err != nil {
		return fmt.Errorf("starting systemd health-check timer %q: %w", startFile, err)
	}

	return nil
}

// removeTransientFiles removes the systemd timer and unit files
// for the container
func (c *Container) removeTransientFiles(ctx context.Context) error {
	if c.disableHealthCheckSystemd() {
		return nil
	}
	conn, err := systemd.ConnectToDBUS()
	if err != nil {
		return errors.Wrapf(err, "unable to get systemd connection to remove healthchecks")
	}
	defer conn.Close()

	// Errors are returned at the very end. Let's make sure to stop and
	// clean up as much as possible.
	stopErrors := []error{}

	// Stop the timer before the service to make sure the timer does not
	// fire after the service is stopped.
	timerChan := make(chan string)
	timerFile := fmt.Sprintf("%s.timer", c.ID())
	if _, err := conn.StopUnitContext(ctx, timerFile, "fail", timerChan); err != nil {
		if !strings.HasSuffix(err.Error(), ".timer not loaded.") {
			stopErrors = append(stopErrors, fmt.Errorf("removing health-check timer %q: %w", timerFile, err))
		}
	} else if err := systemdOpSuccessful(timerChan); err != nil {
		stopErrors = append(stopErrors, fmt.Errorf("stopping systemd health-check timer %q: %w", timerFile, err))
	}

	// Reset the service before stopping it to make sure it's being removed
	// on stop.
	serviceChan := make(chan string)
	serviceFile := fmt.Sprintf("%s.service", c.ID())
	if err := conn.ResetFailedUnitContext(ctx, serviceFile); err != nil {
		logrus.Debugf("Failed to reset unit file: %q", err)
	}
	if _, err := conn.StopUnitContext(ctx, serviceFile, "fail", serviceChan); err != nil {
		if !strings.HasSuffix(err.Error(), ".service not loaded.") {
			stopErrors = append(stopErrors, fmt.Errorf("removing health-check service %q: %w", serviceFile, err))
		}
	} else if err := systemdOpSuccessful(serviceChan); err != nil {
		stopErrors = append(stopErrors, fmt.Errorf("stopping systemd health-check service %q: %w", serviceFile, err))
	}

	return errorhandling.JoinErrors(stopErrors)
}