summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel J Walsh <dwalsh@redhat.com>2021-03-24 07:49:29 -0400
committerValentin Rothberg <rothberg@redhat.com>2021-08-20 11:12:05 +0200
commitc22f3e8b4e2593ca81d08924889f7e7251c83089 (patch)
tree757b724f94af33be8fee12658f7f55d81db92f4f
parent30b036c5d394bb523fa13074b1731ad4b6259693 (diff)
downloadpodman-c22f3e8b4e2593ca81d08924889f7e7251c83089.tar.gz
podman-c22f3e8b4e2593ca81d08924889f7e7251c83089.tar.bz2
podman-c22f3e8b4e2593ca81d08924889f7e7251c83089.zip
Implement SD-NOTIFY proxy in conmon
This leverages conmon's ability to proxy the SD-NOTIFY socket. This prevents locking caused by OCI runtime blocking, waiting for SD-NOTIFY messages, and instead passes the messages directly up to the host. NOTE: Also re-enable the auto-update tests which has been disabled due to flakiness. With this change, Podman properly integrates into systemd. Fixes: #7316 Signed-off-by: Joseph Gooch <mrwizard@dok.org> Signed-off-by: Daniel J Walsh <dwalsh@redhat.com> Signed-off-by: Valentin Rothberg <rothberg@redhat.com>
-rw-r--r--libpod/container.go4
-rw-r--r--libpod/container_internal_linux.go40
-rw-r--r--libpod/diff.go1
-rw-r--r--libpod/oci_conmon_exec_linux.go2
-rw-r--r--libpod/oci_conmon_linux.go49
-rw-r--r--test/system/255-auto-update.bats1
-rw-r--r--test/system/260-sdnotify.bats4
7 files changed, 79 insertions, 22 deletions
diff --git a/libpod/container.go b/libpod/container.go
index 80fd35c09..c57250d72 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -126,6 +126,10 @@ type Container struct {
// This is true if a container is restored from a checkpoint.
restoreFromCheckpoint bool
+ // Used to query the NOTIFY_SOCKET once along with setting up
+ // mounts etc.
+ notifySocket string
+
slirp4netnsSubnet *net.IPNet
}
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index f21aebb09..8b73c82de 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -352,6 +352,10 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
return nil, err
}
+ if err := c.mountNotifySocket(g); err != nil {
+ return nil, err
+ }
+
// Get host UID and GID based on the container process UID and GID.
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
if err != nil {
@@ -777,6 +781,41 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
return g.Config, nil
}
+// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
+// and if the sdnotify mode is set to container. It also sets c.notifySocket
+// to avoid redundantly looking up the env variable.
+func (c *Container) mountNotifySocket(g generate.Generator) error {
+ notify, ok := os.LookupEnv("NOTIFY_SOCKET")
+ if !ok {
+ return nil
+ }
+ c.notifySocket = notify
+
+ if c.config.SdNotifyMode != define.SdNotifyModeContainer {
+ return nil
+ }
+
+ notifyDir := filepath.Join(c.bundlePath(), "notify")
+ logrus.Debugf("checking notify %q dir", notifyDir)
+ if err := os.MkdirAll(notifyDir, 0755); err != nil {
+ if !os.IsExist(err) {
+ return errors.Wrapf(err, "unable to create notify %q dir", notifyDir)
+ }
+ }
+ if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
+ return errors.Wrapf(err, "relabel failed %q", notifyDir)
+ }
+ logrus.Debugf("add bindmount notify %q dir", notifyDir)
+ if _, ok := c.state.BindMounts["/run/notify"]; !ok {
+ c.state.BindMounts["/run/notify"] = notifyDir
+ }
+
+ // Set the container's notify socket to the proxy socket created by conmon
+ g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
+
+ return nil
+}
+
// systemd expects to have /run, /run/lock and /tmp on tmpfs
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
@@ -1730,6 +1769,7 @@ rootless=%d
c.state.BindMounts[dest] = src
}
}
+
return nil
}
diff --git a/libpod/diff.go b/libpod/diff.go
index cdd5e79cb..6a50bef32 100644
--- a/libpod/diff.go
+++ b/libpod/diff.go
@@ -14,6 +14,7 @@ var initInodes = map[string]bool{
"/etc/resolv.conf": true,
"/proc": true,
"/run": true,
+ "/run/notify": true,
"/run/.containerenv": true,
"/run/secrets": true,
"/sys": true,
diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go
index 05a4e19b0..469bc7d86 100644
--- a/libpod/oci_conmon_exec_linux.go
+++ b/libpod/oci_conmon_exec_linux.go
@@ -462,7 +462,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
Setpgid: true,
}
- err = startCommandGivenSelinux(execCmd)
+ err = startCommandGivenSelinux(execCmd, c)
// We don't need children pipes on the parent side
errorhandling.CloseQuiet(childSyncPipe)
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 846d3815a..ff25be234 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -364,11 +364,6 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
- if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
- if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
- env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
- }
- }
if path, ok := os.LookupEnv("PATH"); ok {
env = append(env, fmt.Sprintf("PATH=%s", path))
}
@@ -1014,12 +1009,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
}
- if ctr.config.SdNotifyMode == define.SdNotifyModeIgnore {
- if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
- logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
- }
- }
-
pidfile := ctr.config.PidFile
if pidfile == "" {
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
@@ -1027,6 +1016,10 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
+ if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.notifySocket != "" {
+ args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.notifySocket))
+ }
+
if ctr.config.Spec.Process.Terminal {
args = append(args, "-t")
} else if ctr.config.Stdin {
@@ -1171,7 +1164,8 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
}
}
- err = startCommandGivenSelinux(cmd)
+ err = startCommandGivenSelinux(cmd, ctr)
+
// regardless of whether we errored or not, we no longer need the children pipes
childSyncPipe.Close()
childStartPipe.Close()
@@ -1203,7 +1197,13 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
// conmon not having a pid file is a valid state, so don't set it if we don't have it
logrus.Infof("Got Conmon PID as %d", conmonPID)
ctr.state.ConmonPID = conmonPID
- if ctr.config.SdNotifyMode != define.SdNotifyModeIgnore {
+
+ // Send the MAINPID via sdnotify if needed.
+ switch ctr.config.SdNotifyMode {
+ case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
+ // Nothing to do or conmon takes care of it already.
+
+ default:
if sent, err := daemon.SdNotify(false, fmt.Sprintf("MAINPID=%d", conmonPID)); err != nil {
logrus.Errorf("Error notifying systemd of Conmon PID: %v", err)
} else if sent {
@@ -1239,11 +1239,6 @@ func (r *ConmonOCIRuntime) configureConmonEnv(ctr *Container, runtimeDir string)
}
extraFiles := make([]*os.File, 0)
- if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
- if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
- env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
- }
- }
if !r.sdNotify {
if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok {
env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1")
@@ -1335,7 +1330,23 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
// startCommandGivenSelinux starts a container ensuring to set the labels of
// the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled
-func startCommandGivenSelinux(cmd *exec.Cmd) error {
+func startCommandGivenSelinux(cmd *exec.Cmd, ctr *Container) error {
+ // Make sure to unset the NOTIFY_SOCKET and reset if afterwards if needed.
+ switch ctr.config.SdNotifyMode {
+ case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
+ if ctr.notifySocket != "" {
+ if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
+ logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
+ }
+
+ defer func() {
+ if err := os.Setenv("NOTIFY_SOCKET", ctr.notifySocket); err != nil {
+ logrus.Errorf("Error resetting NOTIFY_SOCKET=%s", ctr.notifySocket)
+ }
+ }()
+ }
+ }
+
if !selinux.GetEnabled() {
return cmd.Start()
}
diff --git a/test/system/255-auto-update.bats b/test/system/255-auto-update.bats
index 8bb32b5b7..7766ca3f9 100644
--- a/test/system/255-auto-update.bats
+++ b/test/system/255-auto-update.bats
@@ -221,7 +221,6 @@ function _confirm_update() {
}
@test "podman auto-update - label io.containers.autoupdate=local with rollback" {
- skip "This test flakes way too often, see #11175"
# sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just
# assume that we work only with crun, nothing else.
# [copied from 260-sdnotify.bats]
diff --git a/test/system/260-sdnotify.bats b/test/system/260-sdnotify.bats
index acb30de47..b5d3f9b86 100644
--- a/test/system/260-sdnotify.bats
+++ b/test/system/260-sdnotify.bats
@@ -130,6 +130,8 @@ function _assert_mainpid_is_conmon() {
_stop_socat
}
+# These tests can fail in dev. environment because of SELinux.
+# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "sdnotify : container" {
# Sigh... we need to pull a humongous image because it has systemd-notify.
# (IMPORTANT: fedora:32 and above silently removed systemd-notify; this
@@ -150,7 +152,7 @@ function _assert_mainpid_is_conmon() {
wait_for_ready $cid
run_podman logs $cid
- is "${lines[0]}" "/.*/container\.sock/notify" "NOTIFY_SOCKET is passed to container"
+ is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container"
# With container, READY=1 isn't necessarily the last message received;
# just look for it anywhere in received messages