From afd0818326aa37f03a3bc74f0269a06a403db16d Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 6 Sep 2019 15:30:30 +0200 Subject: rootless: automatically create a systemd scope when running in rootless mode and using systemd as cgroup manager create automatically a systemd scope when the user doesn't own the current cgroup. This solves a couple of issues: on cgroup v2 it is necessary that a process before it can moved to a different cgroup tree must be in a directory owned by the unprivileged user. This is not always true, e.g. when creating a session with su -l. Closes: https://github.com/containers/libpod/issues/3937 Also, for running systemd in a container it was before necessary to specify "systemd-run --scope --user podman ...", now this is done automatically as part of this PR. Signed-off-by: Giuseppe Scrivano --- cmd/podman/main_local.go | 49 ++++++++++++++++++++++++------ libpod/oci_internal_linux.go | 54 ++++++++++++++++++++------------- pkg/cgroups/cgroups_supported.go | 62 ++++++++++++++++++++++++++++++++++++++ pkg/cgroups/cgroups_unsupported.go | 6 ++++ 4 files changed, 140 insertions(+), 31 deletions(-) diff --git a/cmd/podman/main_local.go b/cmd/podman/main_local.go index 0feba609b..56874baad 100644 --- a/cmd/podman/main_local.go +++ b/cmd/podman/main_local.go @@ -5,6 +5,7 @@ package main import ( "context" + "fmt" "log/syslog" "os" "runtime/pprof" @@ -18,6 +19,7 @@ import ( "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/tracing" "github.com/containers/libpod/pkg/util" + "github.com/containers/libpod/utils" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -120,6 +122,10 @@ func profileOff(cmd *cobra.Command) error { } func setupRootless(cmd *cobra.Command, args []string) error { + if !rootless.IsRootless() { + return nil + } + matches, err := rootless.ConfigurationMatches() if err != nil { return err @@ -128,9 +134,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { logrus.Warningf("the current user namespace doesn't match the configuration in /etc/subuid or /etc/subgid") logrus.Warningf("you can use `%s system migrate` to recreate the user namespace and restart the containers", os.Args[0]) } - if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { - return nil - } podmanCmd := cliconfig.PodmanCommand{ Command: cmd, @@ -139,6 +142,39 @@ func setupRootless(cmd *cobra.Command, args []string) error { Remote: remoteclient, } + runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) + if err != nil { + return errors.Wrapf(err, "could not get runtime") + } + defer runtime.DeferredShutdown(false) + + // do it only after podman has already re-execed and running with uid==0. + if os.Geteuid() == 0 { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + if err != nil { + return err + } + + if !ownsCgroup { + unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) + if err := utils.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { + conf, err := runtime.GetConfig() + if err != nil { + return err + } + if conf.CgroupManager == libpod.SystemdCgroupsManager { + logrus.Warnf("Failed to add podman to systemd sandbox cgroup: %v", err) + } else { + logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) + } + } + } + } + + if os.Geteuid() == 0 || cmd == _searchCommand || cmd == _versionCommand || cmd == _mountCommand || cmd == _migrateCommand || strings.HasPrefix(cmd.Use, "help") { + return nil + } + pausePidPath, err := util.GetRootlessPauseProcessPidPath() if err != nil { return errors.Wrapf(err, "could not get pause process pid file path") @@ -158,13 +194,6 @@ func setupRootless(cmd *cobra.Command, args []string) error { } // if there is no pid file, try to join existing containers, and create a pause process. - - runtime, err := libpodruntime.GetRuntime(getContext(), &podmanCmd) - if err != nil { - return errors.Wrapf(err, "could not get runtime") - } - defer runtime.DeferredShutdown(false) - ctrs, err := runtime.GetRunningContainers() if err != nil { logrus.Errorf(err.Error()) diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index f9e935d86..4df1e4010 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -21,6 +21,7 @@ import ( "github.com/containers/libpod/pkg/cgroups" "github.com/containers/libpod/pkg/errorhandling" "github.com/containers/libpod/pkg/lookup" + "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/containers/libpod/utils" "github.com/coreos/go-systemd/activation" @@ -359,35 +360,46 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error { // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup // it then signals for conmon to start by sending nonse data down the start fd func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { + mustCreateCgroup := true // If cgroup creation is disabled - just signal. if ctr.config.NoCgroups { - return writeConmonPipeData(startFd) + mustCreateCgroup = false } - cgroupParent := ctr.CgroupParent() - if r.cgroupManager == SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] + if rootless.IsRootless() { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + if err != nil { + return err } + mustCreateCgroup = !ownsCgroup + } - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) - if err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + if mustCreateCgroup { + cgroupParent := ctr.CgroupParent() + if r.cgroupManager == SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } } else { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - if err := control.AddPid(cmd.Process.Pid); err != nil { + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + if err != nil { logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + if err := control.AddPid(cmd.Process.Pid); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } } } } diff --git a/pkg/cgroups/cgroups_supported.go b/pkg/cgroups/cgroups_supported.go index fcd44dfc8..2a36777d4 100644 --- a/pkg/cgroups/cgroups_supported.go +++ b/pkg/cgroups/cgroups_supported.go @@ -3,8 +3,15 @@ package cgroups import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" "sync" "syscall" + + "github.com/pkg/errors" ) var ( @@ -25,3 +32,58 @@ func IsCgroup2UnifiedMode() (bool, error) { }) return isUnified, isUnifiedErr } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + uid := os.Geteuid() + + cgroup2, err := IsCgroup2UnifiedMode() + if err != nil { + return false, err + } + + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return false, errors.Wrapf(err, "open file /proc/self/cgroup") + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.SplitN(line, ":", 3) + + if len(parts) < 3 { + continue + } + + var cgroupPath string + + if cgroup2 { + cgroupPath = filepath.Join(cgroupRoot, parts[2]) + } else { + if parts[1] != "name=systemd" { + continue + } + cgroupPath = filepath.Join(cgroupRoot, "systemd", parts[2]) + } + + st, err := os.Stat(cgroupPath) + if err != nil { + return false, err + } + s := st.Sys() + if s == nil { + return false, fmt.Errorf("error stat cgroup path %s", cgroupPath) + } + + if int(s.(*syscall.Stat_t).Uid) != uid { + return false, nil + } + } + if err := scanner.Err(); err != nil { + return false, errors.Wrapf(err, "parsing file /proc/self/cgroup") + } + return true, nil +} diff --git a/pkg/cgroups/cgroups_unsupported.go b/pkg/cgroups/cgroups_unsupported.go index 9dc196e42..cd140fbf3 100644 --- a/pkg/cgroups/cgroups_unsupported.go +++ b/pkg/cgroups/cgroups_unsupported.go @@ -6,3 +6,9 @@ package cgroups func IsCgroup2UnifiedMode() (bool, error) { return false, nil } + +// UserOwnsCurrentSystemdCgroup checks whether the current EUID owns the +// current cgroup. +func UserOwnsCurrentSystemdCgroup() (bool, error) { + return false, nil +} -- cgit v1.2.3-54-g00ecf