From 6ee5f740a4ecb70636b888e78b02065ee984636c Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 18 Jun 2020 13:56:30 +0200 Subject: podman: add new cgroup mode split When running under systemd there is no need to create yet another cgroup for the container. With conmon-delegated the current cgroup will be split in two sub cgroups: - supervisor - container The supervisor cgroup will hold conmon and the podman process, while the container cgroup is used by the OCI runtime (using the cgroupfs backend). Closes: https://github.com/containers/libpod/issues/6400 Signed-off-by: Giuseppe Scrivano --- libpod/container.go | 22 +++++++++++++++++++--- libpod/container_internal_linux.go | 12 +++++++++++- libpod/container_validate.go | 4 ++++ libpod/oci_conmon.go | 7 +++++++ libpod/oci_conmon_linux.go | 10 ++++++++-- libpod/options.go | 2 +- libpod/runtime_ctr.go | 4 ++-- 7 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 libpod/oci_conmon.go (limited to 'libpod') diff --git a/libpod/container.go b/libpod/container.go index c85249676..20688e3ee 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -17,6 +17,7 @@ import ( "github.com/containers/libpod/libpod/lock" "github.com/containers/libpod/pkg/namespaces" "github.com/containers/libpod/pkg/rootless" + "github.com/containers/libpod/utils" "github.com/containers/storage" "github.com/cri-o/ocicni/pkg/ocicni" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -1089,10 +1090,25 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in // CGroupPath returns a cgroups "path" for a given container. func (c *Container) CGroupPath() (string, error) { - switch c.runtime.config.Engine.CgroupManager { - case config.CgroupfsCgroupsManager: + switch { + case c.config.CgroupsMode == cgroupSplit: + if c.config.CgroupParent != "" { + return "", errors.Errorf("cannot specify cgroup-parent with cgroup-mode %q", cgroupSplit) + } + cg, err := utils.GetCgroupProcess(c.state.ConmonPID) + if err != nil { + return "", err + } + // Use the conmon cgroup for two reasons: we validate the container + // delegation was correct, and the conmon cgroup doesn't change at runtime + // while we are not sure about the container that can create sub cgroups. + if !strings.HasSuffix(cg, "supervisor") { + return "", errors.Errorf("invalid cgroup for conmon %q", cg) + } + return strings.TrimSuffix(cg, "/supervisor") + "/container", nil + case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager: return filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID())), nil - case config.SystemdCgroupsManager: + case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: if rootless.IsRootless() { uid := rootless.GetRootlessUID() parts := strings.SplitN(c.config.CgroupParent, "/", 2) diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 5ee6726e0..2c78f6bd2 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -31,6 +31,7 @@ import ( "github.com/containers/libpod/pkg/resolvconf" "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" + "github.com/containers/libpod/utils" "github.com/containers/storage/pkg/archive" securejoin "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/user" @@ -1505,8 +1506,17 @@ func (c *Container) getOCICgroupPath() (string, error) { switch { case (rootless.IsRootless() && !unified) || c.config.NoCgroups: return "", nil + case c.config.CgroupsMode == cgroupSplit: + if c.config.CgroupParent != "" { + return c.config.CgroupParent, nil + } + selfCgroup, err := utils.GetOwnCgroup() + if err != nil { + return "", err + } + return filepath.Join(selfCgroup, "container"), nil case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: - // When runc is set to use Systemd as a cgroup manager, it + // When the OCI runtime is set to use Systemd as a cgroup manager, it // expects cgroups to be passed as follows: // slice:prefix:name systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID()) diff --git a/libpod/container_validate.go b/libpod/container_validate.go index b7f0aadff..a53a1839d 100644 --- a/libpod/container_validate.go +++ b/libpod/container_validate.go @@ -34,6 +34,10 @@ func (c *Container) validate() error { return errors.Wrapf(define.ErrInvalidArg, "cannot both create a network namespace and join another container's network namespace") } + if c.config.CgroupsMode == cgroupSplit && c.config.CgroupParent != "" { + return errors.Wrapf(define.ErrInvalidArg, "cannot specify --cgroup-mode=split with a cgroup-parent") + } + // Not creating cgroups has a number of requirements, mostly related to // the PID namespace. if c.config.NoCgroups || c.config.CgroupsMode == "disabled" { diff --git a/libpod/oci_conmon.go b/libpod/oci_conmon.go new file mode 100644 index 000000000..74060b357 --- /dev/null +++ b/libpod/oci_conmon.go @@ -0,0 +1,7 @@ +package libpod + +const ( + // cgroupSplit is the cgroup mode for reusing the current cgroup both + // for conmon and for the container payload. + cgroupSplit = "split" +) diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index d8a89047e..26e5d70b0 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -881,6 +881,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co return err } + if ctr.config.CgroupsMode == cgroupSplit { + if err := utils.MoveUnderCgroupSubtree("supervisor"); err != nil { + return err + } + } + args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) if ctr.config.Spec.Process.Terminal { @@ -1173,7 +1179,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p "--socket-dir-path", r.socketsDir, } - if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups { + if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { args = append(args, "-s") } @@ -1275,7 +1281,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec // If cgroup creation is disabled - just signal. switch ctr.config.CgroupsMode { - case "disabled", "no-conmon": + case "disabled", "no-conmon", cgroupSplit: mustCreateCgroup = false } diff --git a/libpod/options.go b/libpod/options.go index 7a60870a0..28be1bc03 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1049,7 +1049,7 @@ func WithCgroupsMode(mode string) CtrCreateOption { case "disabled": ctr.config.NoCgroups = true ctr.config.CgroupsMode = mode - case "enabled", "no-conmon": + case "enabled", "no-conmon", cgroupSplit: ctr.config.CgroupsMode = mode default: return errors.Wrapf(define.ErrInvalidArg, "Invalid cgroup mode %q", mode) diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index f1752cbeb..dd6602acb 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -233,9 +233,9 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) } ctr.config.CgroupParent = podCgroup - case rootless.IsRootless(): + case rootless.IsRootless() && ctr.config.CgroupsMode != cgroupSplit: ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent - default: + case ctr.config.CgroupsMode != cgroupSplit: ctr.config.CgroupParent = SystemdDefaultCgroupParent } } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { -- cgit v1.2.3-54-g00ecf