From b94a5e241095a55a6838970148d296e109b2afd1 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Fri, 6 Sep 2019 15:29:03 +0200
Subject: utils: use the user session for systemd

when running as rootless, use the user session bus.  It is already
implemented in the pkg/cgroups so just re-use it.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 utils/utils_supported.go | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/utils/utils_supported.go b/utils/utils_supported.go
index 8b0ba4438..8bc232179 100644
--- a/utils/utils_supported.go
+++ b/utils/utils_supported.go
@@ -3,6 +3,8 @@
 package utils
 
 import (
+	"github.com/containers/libpod/pkg/cgroups"
+	"github.com/containers/libpod/pkg/rootless"
 	systemdDbus "github.com/coreos/go-systemd/dbus"
 	"github.com/godbus/dbus"
 )
@@ -10,9 +12,19 @@ import (
 // RunUnderSystemdScope adds the specified pid to a systemd scope
 func RunUnderSystemdScope(pid int, slice string, unitName string) error {
 	var properties []systemdDbus.Property
-	conn, err := systemdDbus.New()
-	if err != nil {
-		return err
+	var conn *systemdDbus.Conn
+	var err error
+
+	if rootless.IsRootless() {
+		conn, err = cgroups.GetUserConnection(rootless.GetRootlessUID())
+		if err != nil {
+			return err
+		}
+	} else {
+		conn, err = systemdDbus.New()
+		if err != nil {
+			return err
+		}
 	}
 	properties = append(properties, systemdDbus.PropSlice(slice))
 	properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
-- 
cgit v1.2.3-54-g00ecf


From afd0818326aa37f03a3bc74f0269a06a403db16d Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Fri, 6 Sep 2019 15:30:30 +0200
Subject: rootless: automatically create a systemd scope

when running in rootless mode and using systemd as cgroup manager
create automatically a systemd scope when the user doesn't own the
current cgroup.

This solves a couple of issues:

on cgroup v2 it is necessary that a process before it can moved to a
different cgroup tree must be in a directory owned by the unprivileged
user.  This is not always true, e.g. when creating a session with su
-l.

Closes: https://github.com/containers/libpod/issues/3937

Also, for running systemd in a container it was before necessary to
specify "systemd-run --scope --user podman ...", now this is done
automatically as part of this PR.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 cmd/podman/main_local.go           | 49 ++++++++++++++++++++++++------
 libpod/oci_internal_linux.go       | 54 ++++++++++++++++++++-------------
 pkg/cgroups/cgroups_supported.go   | 62 ++++++++++++++++++++++++++++++++++++++
 pkg/cgroups/cgroups_unsupported.go |  6 ++++
 4 files changed, 140 insertions(+), 31 deletions(-)

diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go
index 0feba609b..56874baad 100644
--- a/cmd/podman/main_local.go
+++ b/cmd/podman/main_local.go
@@ -5,6 +5,7 @@ package main
 
 import (
 	"context"
+	"fmt"
 	"log/syslog"
 	"os"
 	"runtime/pprof"
@@ -18,6 +19,7 @@ import (
 	"github.com/containers/libpod/pkg/rootless"
 	"github.com/containers/libpod/pkg/tracing"
 	"github.com/containers/libpod/pkg/util"
+	"github.com/containers/libpod/utils"
 	"github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
@@ -120,6 +122,10 @@ func profileOff(cmd *cobra.Command) error {
 }
 
 func setupRootless(cmd *cobra.Command, args []string) error {
+	if !rootless.IsRootless() {
+		return nil
+	}
+
 	matches, err := rootless.ConfigurationMatches()
 	if err != nil {
 		return err
@@ -128,9 +134,6 @@ func setupRootless(cmd *cobra.Command, args []string) error {
 		logrus.Warningf("the current user namespace doesn't match the configuration in /etc/subuid or /etc/subgid")
 		logrus.Warningf("you can use `%s system migrate` to recreate the user namespace and restart the containers", os.Args[0])
 	}
-	if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") {
-		return nil
-	}
 
 	podmanCmd := cliconfig.PodmanCommand{
 		Command:     cmd,
@@ -139,6 +142,39 @@ func setupRootless(cmd *cobra.Command, args []string) error {
 		Remote:      remoteclient,
 	}
 
+	runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd)
+	if err != nil {
+		return errors.Wrapf(err, "could not get runtime")
+	}
+	defer runtime.DeferredShutdown(false)
+
+	// do it only after podman has already re-execed and running with uid==0.
+	if os.Geteuid() == 0 {
+		ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
+		if err != nil {
+			return err
+		}
+
+		if !ownsCgroup {
+			unitName := fmt.Sprintf("podman-%d.scope", os.Getpid())
+			if err := utils.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil {
+				conf, err := runtime.GetConfig()
+				if err != nil {
+					return err
+				}
+				if conf.CgroupManager == libpod.SystemdCgroupsManager {
+					logrus.Warnf("Failed to add podman to systemd sandbox cgroup: %v", err)
+				} else {
+					logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err)
+				}
+			}
+		}
+	}
+
+	if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") {
+		return nil
+	}
+
 	pausePidPath, err := util.GetRootlessPauseProcessPidPath()
 	if err != nil {
 		return errors.Wrapf(err, "could not get pause process pid file path")
@@ -158,13 +194,6 @@ func setupRootless(cmd *cobra.Command, args []string) error {
 	}
 
 	// if there is no pid file, try to join existing containers, and create a pause process.
-
-	runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd)
-	if err != nil {
-		return errors.Wrapf(err, "could not get runtime")
-	}
-	defer runtime.DeferredShutdown(false)
-
 	ctrs, err := runtime.GetRunningContainers()
 	if err != nil {
 		logrus.Errorf(err.Error())
diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go
index f9e935d86..4df1e4010 100644
--- a/libpod/oci_internal_linux.go
+++ b/libpod/oci_internal_linux.go
@@ -21,6 +21,7 @@ import (
 	"github.com/containers/libpod/pkg/cgroups"
 	"github.com/containers/libpod/pkg/errorhandling"
 	"github.com/containers/libpod/pkg/lookup"
+	"github.com/containers/libpod/pkg/rootless"
 	"github.com/containers/libpod/pkg/util"
 	"github.com/containers/libpod/utils"
 	"github.com/coreos/go-systemd/activation"
@@ -359,35 +360,46 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error {
 // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
 // it then signals for conmon to start by sending nonse data down the start fd
 func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error {
+	mustCreateCgroup := true
 	// If cgroup creation is disabled - just signal.
 	if ctr.config.NoCgroups {
-		return writeConmonPipeData(startFd)
+		mustCreateCgroup = false
 	}
 
-	cgroupParent := ctr.CgroupParent()
-	if r.cgroupManager == SystemdCgroupsManager {
-		unitName := createUnitName("libpod-conmon", ctr.ID())
-
-		realCgroupParent := cgroupParent
-		splitParent := strings.Split(cgroupParent, "/")
-		if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
-			realCgroupParent = splitParent[len(splitParent)-1]
+	if rootless.IsRootless() {
+		ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
+		if err != nil {
+			return err
 		}
+		mustCreateCgroup = !ownsCgroup
+	}
 
-		logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
-		if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
-			logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
-		}
-	} else {
-		cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
-		control, err := cgroups.New(cgroupPath, &spec.LinuxResources{})
-		if err != nil {
-			logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
+	if mustCreateCgroup {
+		cgroupParent := ctr.CgroupParent()
+		if r.cgroupManager == SystemdCgroupsManager {
+			unitName := createUnitName("libpod-conmon", ctr.ID())
+
+			realCgroupParent := cgroupParent
+			splitParent := strings.Split(cgroupParent, "/")
+			if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
+				realCgroupParent = splitParent[len(splitParent)-1]
+			}
+
+			logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
+			if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
+				logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
+			}
 		} else {
-			// we need to remove this defer and delete the cgroup once conmon exits
-			// maybe need a conmon monitor?
-			if err := control.AddPid(cmd.Process.Pid); err != nil {
+			cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
+			control, err := cgroups.New(cgroupPath, &spec.LinuxResources{})
+			if err != nil {
 				logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
+			} else {
+				// we need to remove this defer and delete the cgroup once conmon exits
+				// maybe need a conmon monitor?
+				if err := control.AddPid(cmd.Process.Pid); err != nil {
+					logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
+				}
 			}
 		}
 	}
diff --git a/pkg/cgroups/cgroups_supported.go b/pkg/cgroups/cgroups_supported.go
index fcd44dfc8..2a36777d4 100644
--- a/pkg/cgroups/cgroups_supported.go
+++ b/pkg/cgroups/cgroups_supported.go
@@ -3,8 +3,15 @@
 package cgroups
 
 import (
+	"bufio"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
 	"sync"
 	"syscall"
+
+	"github.com/pkg/errors"
 )
 
 var (
@@ -25,3 +32,58 @@ func IsCgroup2UnifiedMode() (bool, error) {
 	})
 	return isUnified, isUnifiedErr
 }
+
+// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the
+// current cgroup.
+func UserOwnsCurrentSystemdCgroup() (bool, error) {
+	uid := os.Geteuid()
+
+	cgroup2, err := IsCgroup2UnifiedMode()
+	if err != nil {
+		return false, err
+	}
+
+	f, err := os.Open("/proc/self/cgroup")
+	if err != nil {
+		return false, errors.Wrapf(err, "open file /proc/self/cgroup")
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := strings.SplitN(line, ":", 3)
+
+		if len(parts) < 3 {
+			continue
+		}
+
+		var cgroupPath string
+
+		if cgroup2 {
+			cgroupPath = filepath.Join(cgroupRoot, parts[2])
+		} else {
+			if parts[1] != "name=systemd" {
+				continue
+			}
+			cgroupPath = filepath.Join(cgroupRoot, "systemd", parts[2])
+		}
+
+		st, err := os.Stat(cgroupPath)
+		if err != nil {
+			return false, err
+		}
+		s := st.Sys()
+		if s == nil {
+			return false, fmt.Errorf("error stat cgroup path %s", cgroupPath)
+		}
+
+		if int(s.(*syscall.Stat_t).Uid) != uid {
+			return false, nil
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return false, errors.Wrapf(err, "parsing file /proc/self/cgroup")
+	}
+	return true, nil
+}
diff --git a/pkg/cgroups/cgroups_unsupported.go b/pkg/cgroups/cgroups_unsupported.go
index 9dc196e42..cd140fbf3 100644
--- a/pkg/cgroups/cgroups_unsupported.go
+++ b/pkg/cgroups/cgroups_unsupported.go
@@ -6,3 +6,9 @@ package cgroups
 func IsCgroup2UnifiedMode() (bool, error) {
 	return false, nil
 }
+
+// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the
+// current cgroup.
+func UserOwnsCurrentSystemdCgroup() (bool, error) {
+	return false, nil
+}
-- 
cgit v1.2.3-54-g00ecf


From 7e88bf7fd0207783e8feecb7ec7206df96897f4e Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Mon, 9 Sep 2019 11:12:45 +0200
Subject: rootless: run pause process in its own scope

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 cmd/podman/main_local.go | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go
index 56874baad..cad256615 100644
--- a/cmd/podman/main_local.go
+++ b/cmd/podman/main_local.go
@@ -6,9 +6,11 @@ package main
 import (
 	"context"
 	"fmt"
+	"io/ioutil"
 	"log/syslog"
 	"os"
 	"runtime/pprof"
+	"strconv"
 	"strings"
 	"syscall"
 
@@ -121,6 +123,24 @@ func profileOff(cmd *cobra.Command) error {
 	return nil
 }
 
+func movePauseProcessToScope() error {
+	pausePidPath, err := util.GetRootlessPauseProcessPidPath()
+	if err != nil {
+		return errors.Wrapf(err, "could not get pause process pid file path")
+	}
+
+	data, err := ioutil.ReadFile(pausePidPath)
+	if err != nil {
+		return errors.Wrapf(err, "cannot read pause pid file")
+	}
+	pid, err := strconv.ParseUint(string(data), 10, 0)
+	if err != nil {
+		return errors.Wrapf(err, "cannot parse pid file %s", pausePidPath)
+	}
+
+	return utils.RunUnderSystemdScope(int(pid), "user.slice", "podman-pause.scope")
+}
+
 func setupRootless(cmd *cobra.Command, args []string) error {
 	if !rootless.IsRootless() {
 		return nil
@@ -206,6 +226,17 @@ func setupRootless(cmd *cobra.Command, args []string) error {
 	}
 
 	became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
+	if err := movePauseProcessToScope(); err != nil {
+		conf, err := runtime.GetConfig()
+		if err != nil {
+			return err
+		}
+		if conf.CgroupManager == libpod.SystemdCgroupsManager {
+			logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err)
+		} else {
+			logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err)
+		}
+	}
 	if err != nil {
 		logrus.Errorf(err.Error())
 		os.Exit(1)
-- 
cgit v1.2.3-54-g00ecf