From 27ca091c08674b1a34058c9639d60455023ffa30 Mon Sep 17 00:00:00 2001
From: Daniel J Walsh <dwalsh@redhat.com>
Date: Tue, 21 Aug 2018 07:31:00 -0400
Subject: Add proper support for systemd inside of podman

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
---
 cmd/podman/common.go     |  4 ++++
 cmd/podman/create.go     | 15 ++++++++++++++-
 completions/bash/podman  |  1 +
 docs/podman-create.1.md  | 13 +++++++++++++
 docs/podman-run.1.md     | 13 +++++++++++++
 pkg/spec/createconfig.go |  7 ++++---
 pkg/spec/spec.go         | 43 +++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/cmd/podman/common.go b/cmd/podman/common.go
index 93b16f4e3..43a0fe061 100644
--- a/cmd/podman/common.go
+++ b/cmd/podman/common.go
@@ -383,6 +383,10 @@ var createFlags = []cli.Flag{
 		Name:  "sysctl",
 		Usage: "Sysctl options (default [])",
 	},
+	cli.BoolTFlag{
+		Name:  "systemd",
+		Usage: "Run container in systemd mode if the command executable is systemd or init",
+	},
 	cli.StringSliceFlag{
 		Name:  "tmpfs",
 		Usage: "Mount a temporary filesystem (`tmpfs`) into a container (default [])",
diff --git a/cmd/podman/create.go b/cmd/podman/create.go
index 209064eff..e7e349306 100644
--- a/cmd/podman/create.go
+++ b/cmd/podman/create.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"syscall"
@@ -509,7 +510,7 @@ func parseCreateOpts(ctx context.Context, c *cli.Context, runtime *libpod.Runtim
 
 	// STOP SIGNAL
 	stopSignal := syscall.SIGTERM
-	signalString := "SIGTERM"
+	signalString := ""
 	if data != nil {
 		signalString = data.ContainerConfig.StopSignal
 	}
@@ -648,6 +649,17 @@ func parseCreateOpts(ctx context.Context, c *cli.Context, runtime *libpod.Runtim
 		return nil, errors.Errorf("invalid image-volume type %q. Pick one of bind, tmpfs, or ignore", c.String("image-volume"))
 	}
 
+	var systemd bool
+	if c.BoolT("systemd") && ((filepath.Base(command[0]) == "init") || (filepath.Base(command[0]) == "systemd")) {
+		systemd = true
+		if signalString == "" {
+			stopSignal, err = signal.ParseSignal("RTMIN+3")
+			if err != nil {
+				return nil, errors.Wrapf(err, "error parsing systemd signal")
+			}
+		}
+	}
+
 	config := &cc.CreateConfig{
 		Runtime:           runtime,
 		Annotations:       annotations,
@@ -726,6 +738,7 @@ func parseCreateOpts(ctx context.Context, c *cli.Context, runtime *libpod.Runtim
 		StopSignal:  stopSignal,
 		StopTimeout: c.Uint("stop-timeout"),
 		Sysctl:      sysctl,
+		Systemd:     systemd,
 		Tmpfs:       c.StringSlice("tmpfs"),
 		Tty:         tty,
 		User:        user,
diff --git a/completions/bash/podman b/completions/bash/podman
index 083211fc1..91c08a78c 100644
--- a/completions/bash/podman
+++ b/completions/bash/podman
@@ -1494,6 +1494,7 @@ _podman_container_run() {
 		--subgidname
 		--subuidname
 		--sysctl
+		--systemd
 		--uidmap
 		--ulimit
 		--user -u
diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md
index f27a84b1d..8523d0c78 100644
--- a/docs/podman-create.1.md
+++ b/docs/podman-create.1.md
@@ -544,6 +544,19 @@ Network Namespace - current sysctls allowed:
 
 Note: if you use the --network=host option these sysctls will not be allowed.
 
+**--systemd**=*true*|*false*
+
+Run container in systemd mode. The default is *true*.
+
+If the command you running inside of the container is systemd or init, podman
+will setup tmpfs mount points in the following directories:
+
+/run, /run/lock, /tmp, /sys/fs/cgroup/systemd, /var/lib/journal
+
+It will also set the default stop signal to SIGRTMIN+3.
+
+This allow systemd to run in a confined container without any modifications.
+
 **--tmpfs**=[] Create a tmpfs mount
 
 Mount a temporary filesystem (`tmpfs`) mount into a container, for example:
diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md
index 7b23f356f..59b9d4b01 100644
--- a/docs/podman-run.1.md
+++ b/docs/podman-run.1.md
@@ -578,6 +578,19 @@ Network Namespace - current sysctls allowed:
 
 Note: if you use the `--network=host` option these sysctls will not be allowed.
 
+**--systemd**=*true*|*false*
+
+Run container in systemd mode. The default is *true*.
+
+If the command you running inside of the container is systemd or init, podman
+will setup tmpfs mount points in the following directories:
+
+/run, /run/lock, /tmp, /sys/fs/cgroup/systemd, /var/lib/journal
+
+It will also set the default stop signal to SIGRTMIN+3.
+
+This allow systemd to run in a confined container without any modifications.
+
 **--tmpfs**=[] Create a tmpfs mount
 
 Mount a temporary filesystem (`tmpfs`) mount into a container, for example:
diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go
index c7eaeb9f7..486281200 100644
--- a/pkg/spec/createconfig.go
+++ b/pkg/spec/createconfig.go
@@ -115,9 +115,10 @@ type CreateConfig struct {
 	Resources          CreateResourceConfig
 	Rm                 bool //rm
 	ShmDir             string
-	StopSignal         syscall.Signal       // stop-signal
-	StopTimeout        uint                 // stop-timeout
-	Sysctl             map[string]string    //sysctl
+	StopSignal         syscall.Signal    // stop-signal
+	StopTimeout        uint              // stop-timeout
+	Sysctl             map[string]string //sysctl
+	Systemd            bool
 	Tmpfs              []string             // tmpfs
 	Tty                bool                 //tty
 	UsernsMode         container.UsernsMode //userns
diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go
index 26b93f5fe..11bc880cb 100644
--- a/pkg/spec/spec.go
+++ b/pkg/spec/spec.go
@@ -4,6 +4,7 @@ import (
 	"os"
 	"strings"
 
+	"github.com/containers/libpod/libpod"
 	"github.com/containers/libpod/pkg/rootless"
 	"github.com/docker/docker/daemon/caps"
 	"github.com/docker/docker/pkg/mount"
@@ -221,6 +222,12 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint
 		}
 	}
 
+	if config.Systemd && (strings.HasSuffix(config.Command[0], "init") ||
+		strings.HasSuffix(config.Command[0], "systemd")) {
+		if err := setupSystemd(config, &g); err != nil {
+			return nil, errors.Wrap(err, "failed to setup systemd")
+		}
+	}
 	for _, i := range config.Tmpfs {
 		// Default options if nothing passed
 		options := []string{"rw", "private", "noexec", "nosuid", "nodev", "size=65536k"}
@@ -353,6 +360,42 @@ func blockAccessToKernelFilesystems(config *CreateConfig, g *generate.Generator)
 	}
 }
 
+// systemd expects to have /run, /run/lock and /tmp on tmpfs
+// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
+
+func setupSystemd(config *CreateConfig, g *generate.Generator) error {
+	mounts, err := config.GetVolumeMounts([]spec.Mount{})
+	if err != nil {
+		return err
+	}
+	options := []string{"rw", "private", "noexec", "nosuid", "nodev"}
+	for _, dest := range []string{"/run", "/run/lock", "/sys/fs/cgroup/systemd"} {
+		if libpod.MountExists(mounts, dest) {
+			continue
+		}
+		tmpfsMnt := spec.Mount{
+			Destination: dest,
+			Type:        "tmpfs",
+			Source:      "tmpfs",
+			Options:     append(options, "tmpcopyup", "size=65536k"),
+		}
+		g.AddMount(tmpfsMnt)
+	}
+	for _, dest := range []string{"/tmp", "/var/log/journal"} {
+		if libpod.MountExists(mounts, dest) {
+			continue
+		}
+		tmpfsMnt := spec.Mount{
+			Destination: dest,
+			Type:        "tmpfs",
+			Source:      "tmpfs",
+			Options:     append(options, "tmpcopyup"),
+		}
+		g.AddMount(tmpfsMnt)
+	}
+	return nil
+}
+
 func addPidNS(config *CreateConfig, g *generate.Generator) error {
 	pidMode := config.PidMode
 	if IsNS(string(pidMode)) {
-- 
cgit v1.2.3-54-g00ecf