summaryrefslogtreecommitdiff
path: root/libpod
diff options
context:
space:
mode:
authorGiuseppe Scrivano <gscrivan@redhat.com>2020-06-18 13:56:30 +0200
committerGiuseppe Scrivano <gscrivan@redhat.com>2020-06-25 17:16:12 +0200
commit6ee5f740a4ecb70636b888e78b02065ee984636c (patch)
treefb01a86c33a53b78420426a6780ce938b25d068f /libpod
parent35cca198b31582b75304e3614f8942b83a38422e (diff)
downloadpodman-6ee5f740a4ecb70636b888e78b02065ee984636c.tar.gz
podman-6ee5f740a4ecb70636b888e78b02065ee984636c.tar.bz2
podman-6ee5f740a4ecb70636b888e78b02065ee984636c.zip
podman: add new cgroup mode split
When running under systemd there is no need to create yet another cgroup for the container. With conmon-delegated the current cgroup will be split in two sub cgroups: - supervisor - container The supervisor cgroup will hold conmon and the podman process, while the container cgroup is used by the OCI runtime (using the cgroupfs backend). Closes: https://github.com/containers/libpod/issues/6400 Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
Diffstat (limited to 'libpod')
-rw-r--r--libpod/container.go22
-rw-r--r--libpod/container_internal_linux.go12
-rw-r--r--libpod/container_validate.go4
-rw-r--r--libpod/oci_conmon.go7
-rw-r--r--libpod/oci_conmon_linux.go10
-rw-r--r--libpod/options.go2
-rw-r--r--libpod/runtime_ctr.go4
7 files changed, 52 insertions, 9 deletions
diff --git a/libpod/container.go b/libpod/container.go
index c85249676..20688e3ee 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -17,6 +17,7 @@ import (
"github.com/containers/libpod/libpod/lock"
"github.com/containers/libpod/pkg/namespaces"
"github.com/containers/libpod/pkg/rootless"
+ "github.com/containers/libpod/utils"
"github.com/containers/storage"
"github.com/cri-o/ocicni/pkg/ocicni"
spec "github.com/opencontainers/runtime-spec/specs-go"
@@ -1089,10 +1090,25 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in
// CGroupPath returns a cgroups "path" for a given container.
func (c *Container) CGroupPath() (string, error) {
- switch c.runtime.config.Engine.CgroupManager {
- case config.CgroupfsCgroupsManager:
+ switch {
+ case c.config.CgroupsMode == cgroupSplit:
+ if c.config.CgroupParent != "" {
+ return "", errors.Errorf("cannot specify cgroup-parent with cgroup-mode %q", cgroupSplit)
+ }
+ cg, err := utils.GetCgroupProcess(c.state.ConmonPID)
+ if err != nil {
+ return "", err
+ }
+ // Use the conmon cgroup for two reasons: we validate the container
+ // delegation was correct, and the conmon cgroup doesn't change at runtime
+ // while we are not sure about the container that can create sub cgroups.
+ if !strings.HasSuffix(cg, "supervisor") {
+ return "", errors.Errorf("invalid cgroup for conmon %q", cg)
+ }
+ return strings.TrimSuffix(cg, "/supervisor") + "/container", nil
+ case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager:
return filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID())), nil
- case config.SystemdCgroupsManager:
+ case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
if rootless.IsRootless() {
uid := rootless.GetRootlessUID()
parts := strings.SplitN(c.config.CgroupParent, "/", 2)
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 5ee6726e0..2c78f6bd2 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -31,6 +31,7 @@ import (
"github.com/containers/libpod/pkg/resolvconf"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
+ "github.com/containers/libpod/utils"
"github.com/containers/storage/pkg/archive"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/user"
@@ -1505,8 +1506,17 @@ func (c *Container) getOCICgroupPath() (string, error) {
switch {
case (rootless.IsRootless() && !unified) || c.config.NoCgroups:
return "", nil
+ case c.config.CgroupsMode == cgroupSplit:
+ if c.config.CgroupParent != "" {
+ return c.config.CgroupParent, nil
+ }
+ selfCgroup, err := utils.GetOwnCgroup()
+ if err != nil {
+ return "", err
+ }
+ return filepath.Join(selfCgroup, "container"), nil
case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager:
- // When runc is set to use Systemd as a cgroup manager, it
+ // When the OCI runtime is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
diff --git a/libpod/container_validate.go b/libpod/container_validate.go
index b7f0aadff..a53a1839d 100644
--- a/libpod/container_validate.go
+++ b/libpod/container_validate.go
@@ -34,6 +34,10 @@ func (c *Container) validate() error {
return errors.Wrapf(define.ErrInvalidArg, "cannot both create a network namespace and join another container's network namespace")
}
+ if c.config.CgroupsMode == cgroupSplit && c.config.CgroupParent != "" {
+ return errors.Wrapf(define.ErrInvalidArg, "cannot specify --cgroup-mode=split with a cgroup-parent")
+ }
+
// Not creating cgroups has a number of requirements, mostly related to
// the PID namespace.
if c.config.NoCgroups || c.config.CgroupsMode == "disabled" {
diff --git a/libpod/oci_conmon.go b/libpod/oci_conmon.go
new file mode 100644
index 000000000..74060b357
--- /dev/null
+++ b/libpod/oci_conmon.go
@@ -0,0 +1,7 @@
+package libpod
+
+const (
+ // cgroupSplit is the cgroup mode for reusing the current cgroup both
+ // for conmon and for the container payload.
+ cgroupSplit = "split"
+)
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index d8a89047e..26e5d70b0 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -881,6 +881,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
return err
}
+ if ctr.config.CgroupsMode == cgroupSplit {
+ if err := utils.MoveUnderCgroupSubtree("supervisor"); err != nil {
+ return err
+ }
+ }
+
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
if ctr.config.Spec.Process.Terminal {
@@ -1173,7 +1179,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"--socket-dir-path", r.socketsDir,
}
- if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups {
+ if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit {
args = append(args, "-s")
}
@@ -1275,7 +1281,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
// If cgroup creation is disabled - just signal.
switch ctr.config.CgroupsMode {
- case "disabled", "no-conmon":
+ case "disabled", "no-conmon", cgroupSplit:
mustCreateCgroup = false
}
diff --git a/libpod/options.go b/libpod/options.go
index 7a60870a0..28be1bc03 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -1049,7 +1049,7 @@ func WithCgroupsMode(mode string) CtrCreateOption {
case "disabled":
ctr.config.NoCgroups = true
ctr.config.CgroupsMode = mode
- case "enabled", "no-conmon":
+ case "enabled", "no-conmon", cgroupSplit:
ctr.config.CgroupsMode = mode
default:
return errors.Wrapf(define.ErrInvalidArg, "Invalid cgroup mode %q", mode)
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index f1752cbeb..dd6602acb 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -233,9 +233,9 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
- case rootless.IsRootless():
+ case rootless.IsRootless() && ctr.config.CgroupsMode != cgroupSplit:
ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent
- default:
+ case ctr.config.CgroupsMode != cgroupSplit:
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {