From c2284962c798a11f3c956ee237f27cfd3b0fcb21 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Mon, 15 Jul 2019 16:44:56 -0400 Subject: Add support for launching containers without CGroups This is mostly used with Systemd, which really wants to manage CGroups itself when managing containers via unit file. Signed-off-by: Matthew Heon --- cmd/podman/common.go | 4 ++ cmd/podman/shared/create.go | 1 + cmd/podman/shared/intermediate.go | 2 + contrib/cirrus/container_test.sh | 1 + contrib/cirrus/integration_test.sh | 2 + docs/podman-create.1.md | 6 ++ docs/podman-run.1.md | 6 ++ libpod.conf | 4 ++ libpod/container.go | 3 + libpod/container_inspect.go | 10 +++ libpod/container_internal.go | 8 +++ libpod/container_internal_linux.go | 2 +- libpod/container_top_linux.go | 4 ++ libpod/define/errors.go | 4 ++ libpod/oci.go | 30 +++++---- libpod/oci_internal_linux.go | 11 ++- libpod/oci_linux.go | 4 +- libpod/options.go | 29 ++++++++ libpod/runtime.go | 86 ++++++++++++----------- libpod/runtime_ctr.go | 135 +++++++++++++++++++++++-------------- libpod/stats.go | 4 ++ pkg/spec/createconfig.go | 4 ++ pkg/spec/spec.go | 12 ++++ test/e2e/run_test.go | 71 +++++++++++++++++++ 24 files changed, 334 insertions(+), 109 deletions(-) diff --git a/cmd/podman/common.go b/cmd/podman/common.go index 9724d18c6..0115e6ef1 100644 --- a/cmd/podman/common.go +++ b/cmd/podman/common.go @@ -134,6 +134,10 @@ func getCreateFlags(c *cliconfig.PodmanCommand) { "cgroupns", "host", "cgroup namespace to use", ) + createFlags.String( + "cgroups", "enabled", + "control container cgroup configuration", + ) createFlags.String( "cgroup-parent", "", "Optional parent cgroup for the container", diff --git a/cmd/podman/shared/create.go b/cmd/podman/shared/create.go index acbd53dba..fc8197721 100644 --- a/cmd/podman/shared/create.go +++ b/cmd/podman/shared/create.go @@ -695,6 +695,7 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod. CapDrop: c.StringSlice("cap-drop"), CidFile: c.String("cidfile"), Cgroupns: c.String("cgroupns"), + Cgroups: c.String("cgroups"), CgroupParent: c.String("cgroup-parent"), Command: command, UserCommand: userCommand, diff --git a/cmd/podman/shared/intermediate.go b/cmd/podman/shared/intermediate.go index 5aaac8687..cccdd1bea 100644 --- a/cmd/podman/shared/intermediate.go +++ b/cmd/podman/shared/intermediate.go @@ -370,6 +370,8 @@ func NewIntermediateLayer(c *cliconfig.PodmanCommand, remote bool) GenericCLIRes m["blkio-weight-device"] = newCRStringSlice(c, "blkio-weight-device") m["cap-add"] = newCRStringSlice(c, "cap-add") m["cap-drop"] = newCRStringSlice(c, "cap-drop") + m["cgroupns"] = newCRString(c, "cgroupns") + m["cgroups"] = newCRString(c, "cgroups") m["cgroup-parent"] = newCRString(c, "cgroup-parent") m["cidfile"] = newCRString(c, "cidfile") m["conmon-pidfile"] = newCRString(c, "conmon-pidfile") diff --git a/contrib/cirrus/container_test.sh b/contrib/cirrus/container_test.sh index 27baf0ad7..9d3f09f60 100644 --- a/contrib/cirrus/container_test.sh +++ b/contrib/cirrus/container_test.sh @@ -126,6 +126,7 @@ if [ $install -eq 1 ]; then make TAGS="${TAGS}" install.bin PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.man PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.cni PREFIX=/usr ETCDIR=/etc + make TAGS="${TAGS}" install.config PREFIX=/usr ETCDIR=/etc make TAGS="${TAGS}" install.systemd PREFIX=/usr ETCDIR=/etc fi diff --git a/contrib/cirrus/integration_test.sh b/contrib/cirrus/integration_test.sh index 552f2ba73..00c3b0ec3 100755 --- a/contrib/cirrus/integration_test.sh +++ b/contrib/cirrus/integration_test.sh @@ -45,6 +45,7 @@ case "$SPECIALMODE" in export OCI_RUNTIME=/usr/bin/crun make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries make local${TESTSUITE} ;; @@ -57,6 +58,7 @@ case "$SPECIALMODE" in none) make make install PREFIX=/usr ETCDIR=/etc + make install.config PREFIX=/usr make test-binaries if [[ "$TEST_REMOTE_CLIENT" == "true" ]] then diff --git a/docs/podman-create.1.md b/docs/podman-create.1.md index 9924e7dff..3bd5ed44c 100644 --- a/docs/podman-create.1.md +++ b/docs/podman-create.1.md @@ -73,6 +73,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/docs/podman-run.1.md b/docs/podman-run.1.md index c642b50b6..cb5a96098 100644 --- a/docs/podman-run.1.md +++ b/docs/podman-run.1.md @@ -87,6 +87,12 @@ Set the cgroup namespace mode for the container, by default **host** is used. **private**: create a new cgroup namespace. **ns:**: join the namespace at the specified path. +**--cgroups**=*mode* + +Determines whether the container will create CGroups. +Valid values are *enabled* and *disabled*, which the default being *enabled*. +The *disabled* option will force the container to not create CGroups, and thus conflicts with CGroup options (**--cgroupns** and **--cgroup-parent**). + **--cgroup-parent**=*cgroup* Path to cgroups under which the cgroup for the container will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. diff --git a/libpod.conf b/libpod.conf index 81fece5d2..47f66ecc1 100644 --- a/libpod.conf +++ b/libpod.conf @@ -122,6 +122,10 @@ runtime = "runc" # libpod will use it for reporting nicer errors. runtime_supports_json = ["crun", "runc"] +# List of all the OCI runtimes that support --cgroup-manager=disable to disable +# creation of CGroups for containers. +runtime_supports_nocgroups = ["crun"] + # Paths to look for a valid OCI runtime (runc, runv, etc) # If the paths are empty or no valid path was found, then the `$PATH` # environment variable will be used as the fallback. diff --git a/libpod/container.go b/libpod/container.go index 9c01d2adf..3d8e58375 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -356,6 +356,9 @@ type ContainerConfig struct { StopTimeout uint `json:"stopTimeout,omitempty"` // Time container was created CreatedTime time.Time `json:"createdTime"` + // NoCgroups indicates that the container will not create CGroups. It is + // incompatible with CgroupParent. + NoCgroups bool `json:"noCgroups,omitempty"` // Cgroup parent of the container CgroupParent string `json:"cgroupParent"` // LogPath log location diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 1b6dd829c..3c32a2f45 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -268,6 +268,11 @@ type InspectContainerHostConfig struct { // populated. // TODO. Cgroup string `json:"Cgroup"` + // Cgroups contains the container's CGroup mode. + // Allowed values are "default" (container is creating CGroups) and + // "disabled" (container is not creating CGroups). + // This is Libpod-specific and not included in `docker inspect`. + Cgroups string `json:"Cgroups"` // Links is unused, and provided purely for Docker compatibility. Links []string `json:"Links"` // OOMScoreAdj is an adjustment that will be made to the container's OOM @@ -958,6 +963,11 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named restartPolicy.Name = c.config.RestartPolicy restartPolicy.MaximumRetryCount = c.config.RestartRetries hostConfig.RestartPolicy = restartPolicy + if c.config.NoCgroups { + hostConfig.Cgroups = "disabled" + } else { + hostConfig.Cgroups = "default" + } hostConfig.Dns = make([]string, 0, len(c.config.DNSServer)) for _, dns := range c.config.DNSServer { diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ffc6c11ee..1ca20a05d 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1119,6 +1119,10 @@ func (c *Container) stop(timeout uint) error { // Internal, non-locking function to pause a container func (c *Container) pause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups") + } + if err := c.ociRuntime.pauseContainer(c); err != nil { return err } @@ -1132,6 +1136,10 @@ func (c *Container) pause() error { // Internal, non-locking function to unpause a container func (c *Container) unpause() error { + if c.config.NoCgroups { + return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups") + } + if err := c.ociRuntime.unpauseContainer(c); err != nil { return err } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index e96af8536..5dc53582f 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -375,7 +375,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { if err != nil { return nil, err } - if rootless.IsRootless() && !unified { + if (rootless.IsRootless() && !unified) || c.config.NoCgroups { g.SetLinuxCgroupsPath("") } else if c.runtime.config.CgroupManager == SystemdCgroupsManager { // When runc is set to use Systemd as a cgroup manager, it diff --git a/libpod/container_top_linux.go b/libpod/container_top_linux.go index ce471838d..5f4f28130 100644 --- a/libpod/container_top_linux.go +++ b/libpod/container_top_linux.go @@ -15,6 +15,10 @@ import ( // Top gathers statistics about the running processes in a container. It returns a // []string for output func (c *Container) Top(descriptors []string) ([]string, error) { + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + conStat, err := c.State() if err != nil { return nil, errors.Wrapf(err, "unable to look up state for %s", c.ID()) diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 9d532263c..004acd58f 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -61,6 +61,10 @@ var ( // the user. ErrDetach = utils.ErrDetach + // ErrNoCgroups indicates that the container does not have its own + // CGroup. + ErrNoCgroups = errors.New("this container does not have a cgroup") + // ErrRuntimeStopped indicates that the runtime has already been shut // down and no further operations can be performed on it ErrRuntimeStopped = errors.New("runtime has already been stopped") diff --git a/libpod/oci.go b/libpod/oci.go index 8a873ca5b..9879fa90e 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -48,19 +48,20 @@ const ( // OCIRuntime represents an OCI-compatible runtime that libpod can call into // to perform container operations type OCIRuntime struct { - name string - path string - conmonPath string - conmonEnv []string - cgroupManager string - tmpDir string - exitsDir string - socketsDir string - logSizeMax int64 - noPivot bool - reservePorts bool - supportsJSON bool - sdNotify bool + name string + path string + conmonPath string + conmonEnv []string + cgroupManager string + tmpDir string + exitsDir string + socketsDir string + logSizeMax int64 + noPivot bool + reservePorts bool + supportsJSON bool + supportsNoCgroups bool + sdNotify bool } // ociError is used to parse the OCI runtime JSON log. It is not part of the @@ -73,7 +74,7 @@ type ociError struct { // Make a new OCI runtime with provided options. // The first path that points to a valid executable will be used. -func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON bool) (*OCIRuntime, error) { +func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON, supportsNoCgroups bool) (*OCIRuntime, error) { if name == "" { return nil, errors.Wrapf(define.ErrInvalidArg, "the OCI runtime must be provided a non-empty name") } @@ -93,6 +94,7 @@ func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *R // TODO: probe OCI runtime for feature and enable automatically if // available. runtime.supportsJSON = supportsJSON + runtime.supportsNoCgroups = supportsNoCgroups foundPath := false for _, path := range paths { diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go index 48b7370e0..f9e935d86 100644 --- a/libpod/oci_internal_linux.go +++ b/libpod/oci_internal_linux.go @@ -263,7 +263,7 @@ func (r *OCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath string) []string { // set the conmon API version to be able to use the correct sync struct keys args := []string{"--api-version", "1"} - if r.cgroupManager == SystemdCgroupsManager { + if r.cgroupManager == SystemdCgroupsManager && !ctr.config.NoCgroups { args = append(args, "-s") } args = append(args, "-c", ctr.ID()) @@ -307,6 +307,10 @@ func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath if ociLogPath != "" { args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) } + if ctr.config.NoCgroups { + logrus.Debugf("Running with no CGroups") + args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") + } return args } @@ -355,6 +359,11 @@ func startCommandGivenSelinux(cmd *exec.Cmd) error { // moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup // it then signals for conmon to start by sending nonse data down the start fd func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { + // If cgroup creation is disabled - just signal. + if ctr.config.NoCgroups { + return writeConmonPipeData(startFd) + } + cgroupParent := ctr.CgroupParent() if r.cgroupManager == SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 6dba1260c..091b6d155 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -402,10 +402,12 @@ func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { } var args []string - if rootless.IsRootless() { + if rootless.IsRootless() || ctr.config.NoCgroups { // we don't use --all for rootless containers as the OCI runtime might use // the cgroups to determine the PIDs, but for rootless containers there is // not any. + // Same logic for NoCgroups - we can't use cgroups as the user + // explicitly requested none be created. args = []string{"kill", ctr.ID(), "KILL"} } else { args = []string{"kill", "--all", ctr.ID(), "KILL"} diff --git a/libpod/options.go b/libpod/options.go index 6df1ca5be..d28cb3d8c 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -847,6 +847,10 @@ func WithPIDNSFrom(nsCtr *Container) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "container has joined pod %s and dependency container %s is not a member of the pod", ctr.config.Pod, nsCtr.ID()) } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "container has disabled creation of CGroups, which is incompatible with sharing a PID namespace") + } + ctr.config.PIDNsCtr = nsCtr.ID() return nil @@ -1056,6 +1060,27 @@ func WithLogPath(path string) CtrCreateOption { } } +// WithNoCgroups disables the creation of CGroups for the new container. +func WithNoCgroups() CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + + if ctr.config.CgroupParent != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups conflicts with CgroupParent") + } + + if ctr.config.PIDNsCtr != "" { + return errors.Wrapf(define.ErrInvalidArg, "NoCgroups requires a private PID namespace and cannot be used when PID namespace is shared with another container") + } + + ctr.config.NoCgroups = true + + return nil + } +} + // WithCgroupParent sets the Cgroup Parent of the new container. func WithCgroupParent(parent string) CtrCreateOption { return func(ctr *Container) error { @@ -1067,6 +1092,10 @@ func WithCgroupParent(parent string) CtrCreateOption { return errors.Wrapf(define.ErrInvalidArg, "cgroup parent cannot be empty") } + if ctr.config.NoCgroups { + return errors.Wrapf(define.ErrInvalidArg, "CgroupParent conflicts with NoCgroups") + } + ctr.config.CgroupParent = parent return nil diff --git a/libpod/runtime.go b/libpod/runtime.go index 323a46266..80b58654e 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -157,8 +157,12 @@ type RuntimeConfig struct { OCIRuntime string `toml:"runtime"` // OCIRuntimes are the set of configured OCI runtimes (default is runc) OCIRuntimes map[string][]string `toml:"runtimes"` - // RuntimeSupportsJSON is the list of the OCI runtimes that support --format=json + // RuntimeSupportsJSON is the list of the OCI runtimes that support + // --format=json. RuntimeSupportsJSON []string `toml:"runtime_supports_json"` + // RuntimeSupportsNoCgroups is a list of OCI runtimes that support + // running containers without CGroups. + RuntimeSupportsNoCgroups []string `toml:"runtime_supports_nocgroups"` // RuntimePath is the path to OCI runtime binary for launching // containers. // The first path pointing to a valid file will be used @@ -259,21 +263,22 @@ type RuntimeConfig struct { // If they were not, we may override them with information from the database, // if it exists and differs from what is present in the system already. type runtimeConfiguredFrom struct { - storageGraphDriverSet bool - storageGraphRootSet bool - storageRunRootSet bool - libpodStaticDirSet bool - libpodTmpDirSet bool - volPathSet bool - conmonPath bool - conmonEnvVars bool - initPath bool - ociRuntimes bool - runtimePath bool - cniPluginDir bool - noPivotRoot bool - runtimeSupportsJSON bool - ociRuntime bool + storageGraphDriverSet bool + storageGraphRootSet bool + storageRunRootSet bool + libpodStaticDirSet bool + libpodTmpDirSet bool + volPathSet bool + conmonPath bool + conmonEnvVars bool + initPath bool + ociRuntimes bool + runtimePath bool + cniPluginDir bool + noPivotRoot bool + runtimeSupportsJSON bool + runtimeSupportsNoCgroups bool + ociRuntime bool } func defaultRuntimeConfig() (RuntimeConfig, error) { @@ -603,6 +608,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if tmpConfig.RuntimeSupportsJSON != nil { runtime.configuredFrom.runtimeSupportsJSON = true } + if tmpConfig.RuntimeSupportsNoCgroups != nil { + runtime.configuredFrom.runtimeSupportsNoCgroups = true + } if tmpConfig.OCIRuntime != "" { runtime.configuredFrom.ociRuntime = true } @@ -649,6 +657,9 @@ func newRuntimeFromConfig(ctx context.Context, userConfigPath string, options .. if !runtime.configuredFrom.runtimeSupportsJSON { runtime.config.RuntimeSupportsJSON = tmpConfig.RuntimeSupportsJSON } + if !runtime.configuredFrom.runtimeSupportsNoCgroups { + runtime.config.RuntimeSupportsNoCgroups = tmpConfig.RuntimeSupportsNoCgroups + } if !runtime.configuredFrom.ociRuntime { runtime.config.OCIRuntime = tmpConfig.OCIRuntime } @@ -1009,6 +1020,16 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { } } + // Make lookup tables for runtime support + supportsJSON := make(map[string]bool) + supportsNoCgroups := make(map[string]bool) + for _, r := range runtime.config.RuntimeSupportsJSON { + supportsJSON[r] = true + } + for _, r := range runtime.config.RuntimeSupportsNoCgroups { + supportsNoCgroups[r] = true + } + // Get us at least one working OCI runtime. runtime.ociRuntimes = make(map[string]*OCIRuntime) @@ -1026,15 +1047,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { name := filepath.Base(runtime.config.RuntimePath[0]) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } @@ -1045,15 +1061,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { // Initialize remaining OCI runtimes for name, paths := range runtime.config.OCIRuntimes { - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { // Don't fatally error. // This will allow us to ship configs including optional @@ -1073,15 +1084,10 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) { if strings.HasPrefix(runtime.config.OCIRuntime, "/") { name := filepath.Base(runtime.config.OCIRuntime) - supportsJSON := false - for _, r := range runtime.config.RuntimeSupportsJSON { - if r == name { - supportsJSON = true - break - } - } + json := supportsJSON[name] + nocgroups := supportsNoCgroups[name] - ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, supportsJSON) + ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, json, nocgroups) if err != nil { return err } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index acd317d20..e421c09f0 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -8,7 +8,7 @@ import ( "strings" "time" - config2 "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/events" "github.com/containers/libpod/pkg/rootless" "github.com/containers/storage/pkg/stringid" @@ -35,7 +35,7 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.newContainer(ctx, rSpec, options...) } @@ -45,7 +45,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config r.lock.Lock() defer r.lock.Unlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctr, err := r.initContainerVariables(rSpec, config) @@ -67,7 +67,7 @@ func (r *Runtime) RestoreContainer(ctx context.Context, rSpec *spec.Spec, config func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConfig) (c *Container, err error) { if rSpec == nil { - return nil, errors.Wrapf(config2.ErrInvalidArg, "must provide a valid runtime spec to create container") + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide a valid runtime spec to create container") } ctr := new(Container) ctr.config = new(ContainerConfig) @@ -100,7 +100,7 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf ctr.state.BindMounts = make(map[string]string) - ctr.config.StopTimeout = config2.CtrRemoveTimeout + ctr.config.StopTimeout = define.CtrRemoveTimeout ctr.config.OCIRuntime = r.defaultOCIRuntime.name @@ -152,7 +152,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai }() ctr.valid = true - ctr.state.State = config2.ContainerStateConfigured + ctr.state.State = define.ContainerStateConfigured ctr.runtime = r if ctr.config.OCIRuntime == "" { @@ -160,11 +160,18 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai } else { ociRuntime, ok := r.ociRuntimes[ctr.config.OCIRuntime] if !ok { - return nil, errors.Wrapf(config2.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not available", ctr.config.OCIRuntime) } ctr.ociRuntime = ociRuntime } + // Check NoCgroups support + if ctr.config.NoCgroups { + if !ctr.ociRuntime.supportsNoCgroups { + return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not compatible with NoCgroups", ctr.ociRuntime.name) + } + } + var pod *Pod if ctr.config.Pod != "" { // Get the pod from state @@ -183,43 +190,67 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctr.config.Name = name } - // Check CGroup parent sanity, and set it if it was not set - switch r.config.CgroupManager { - case CgroupfsCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + // If CGroups are disabled, we MUST create a PID namespace. + // Otherwise, the OCI runtime won't be able to stop our container. + if ctr.config.NoCgroups { + if ctr.config.Spec.Linux == nil { + return nil, errors.Wrapf(define.ErrInvalidArg, "must provide Linux namespace configuration in OCI spec when using NoCgroups") + } + foundPid := false + for _, ns := range ctr.config.Spec.Linux.Namespaces { + if ns.Type == spec.PIDNamespace { + foundPid = true + if ns.Path != "" { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace - cannot use another") } - if podCgroup == "" { - return nil, errors.Wrapf(config2.ErrInternal, "pod %s cgroup is not set", pod.ID()) + break + } + } + if !foundPid { + return nil, errors.Wrapf(define.ErrInvalidArg, "containers not creating CGroups must create a private PID namespace") + } + } + + // Check CGroup parent sanity, and set it if it was not set. + // Only if we're actually configuring CGroups. + if !ctr.config.NoCgroups { + switch r.config.CgroupManager { + case CgroupfsCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + if podCgroup == "" { + return nil, errors.Wrapf(define.ErrInternal, "pod %s cgroup is not set", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else { + ctr.config.CgroupParent = CgroupfsDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else { - ctr.config.CgroupParent = CgroupfsDefaultCgroupParent + } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") } - } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") - } - case SystemdCgroupsManager: - if ctr.config.CgroupParent == "" { - if pod != nil && pod.config.UsePodCgroup { - podCgroup, err := pod.CgroupPath() - if err != nil { - return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + case SystemdCgroupsManager: + if ctr.config.CgroupParent == "" { + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else if rootless.IsRootless() { + ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent + } else { + ctr.config.CgroupParent = SystemdDefaultCgroupParent } - ctr.config.CgroupParent = podCgroup - } else if rootless.IsRootless() { - ctr.config.CgroupParent = SystemdDefaultRootlessCgroupParent - } else { - ctr.config.CgroupParent = SystemdDefaultCgroupParent + } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { + return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") } - } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { - return nil, errors.Wrapf(config2.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") + default: + return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } - default: - return nil, errors.Wrapf(config2.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } if ctr.restoreFromCheckpoint { @@ -262,7 +293,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai ctrNamedVolumes = append(ctrNamedVolumes, dbVol) // The volume exists, we're good continue - } else if errors.Cause(err) != config2.ErrNoSuchVolume { + } else if errors.Cause(err) != define.ErrNoSuchVolume { return nil, errors.Wrapf(err, "error retrieving named volume %s for new container", vol.Name) } @@ -386,7 +417,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if !r.valid { - return config2.ErrRuntimeStopped + return define.ErrRuntimeStopped } // Update the container to get current state @@ -402,7 +433,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } - if c.state.State == config2.ContainerStatePaused { + if c.state.State == define.ContainerStatePaused { if err := c.ociRuntime.killContainer(c, 9); err != nil { return err } @@ -416,7 +447,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } // Check that the container's in a good state to be removed - if c.state.State == config2.ContainerStateRunning { + if c.state.State == define.ContainerStateRunning { if err := c.stop(c.StopTimeout()); err != nil { return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID()) } @@ -439,7 +470,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } if len(deps) != 0 { depsStr := strings.Join(deps, ", ") - return errors.Wrapf(config2.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) + return errors.Wrapf(define.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) } } @@ -483,8 +514,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, // Delete the container. // Not needed in Configured and Exited states, where the container // doesn't exist in the runtime - if c.state.State != config2.ContainerStateConfigured && - c.state.State != config2.ContainerStateExited { + if c.state.State != define.ContainerStateConfigured && + c.state.State != define.ContainerStateExited { if err := c.delete(ctx); err != nil { if cleanupErr == nil { cleanupErr = err @@ -514,7 +545,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, if !volume.IsCtrSpecific() { continue } - if err := runtime.removeVolume(ctx, volume, false); err != nil && err != config2.ErrNoSuchVolume && err != config2.ErrVolumeBeingUsed { + if err := runtime.removeVolume(ctx, volume, false); err != nil && err != define.ErrNoSuchVolume && err != define.ErrVolumeBeingUsed { logrus.Errorf("cleanup volume (%s): %v", v, err) } } @@ -529,7 +560,7 @@ func (r *Runtime) GetContainer(id string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.Container(id) @@ -541,7 +572,7 @@ func (r *Runtime) HasContainer(id string) (bool, error) { defer r.lock.RUnlock() if !r.valid { - return false, config2.ErrRuntimeStopped + return false, define.ErrRuntimeStopped } return r.state.HasContainer(id) @@ -554,7 +585,7 @@ func (r *Runtime) LookupContainer(idOrName string) (*Container, error) { defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } return r.state.LookupContainer(idOrName) } @@ -568,7 +599,7 @@ func (r *Runtime) GetContainers(filters ...ContainerFilter) ([]*Container, error defer r.lock.RUnlock() if !r.valid { - return nil, config2.ErrRuntimeStopped + return nil, define.ErrRuntimeStopped } ctrs, err := r.state.AllContainers() @@ -601,7 +632,7 @@ func (r *Runtime) GetAllContainers() ([]*Container, error) { func (r *Runtime) GetRunningContainers() ([]*Container, error) { running := func(c *Container) bool { state, _ := c.State() - return state == config2.ContainerStateRunning + return state == define.ContainerStateRunning } return r.GetContainers(running) } @@ -629,7 +660,7 @@ func (r *Runtime) GetLatestContainer() (*Container, error) { return nil, errors.Wrapf(err, "unable to find latest container") } if len(ctrs) == 0 { - return nil, config2.ErrNoSuchCtr + return nil, define.ErrNoSuchCtr } for containerIndex, ctr := range ctrs { createdTime := ctr.config.CreatedTime diff --git a/libpod/stats.go b/libpod/stats.go index 776870bd2..5513abce5 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -19,6 +19,10 @@ func (c *Container) GetContainerStats(previousStats *ContainerStats) (*Container stats.ContainerID = c.ID() stats.Name = c.Name() + if c.config.NoCgroups { + return nil, errors.Wrapf(define.ErrNoCgroups, "cannot run top on container %s as it did not create a cgroup", c.ID()) + } + if !c.batched { c.lock.Lock() defer c.lock.Unlock() diff --git a/pkg/spec/createconfig.go b/pkg/spec/createconfig.go index 3f70e5935..c17172016 100644 --- a/pkg/spec/createconfig.go +++ b/pkg/spec/createconfig.go @@ -64,6 +64,7 @@ type CreateConfig struct { CidFile string ConmonPidFile string Cgroupns string + Cgroups string CgroupParent string // cgroup-parent Command []string // Full command that will be used UserCommand []string // User-entered command (or image CMD) @@ -206,6 +207,9 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l logrus.Debugf("adding container to pod %s", c.Pod) options = append(options, runtime.WithPod(pod)) } + if c.Cgroups == "disabled" { + options = append(options, libpod.WithNoCgroups()) + } if len(c.PortBindings) > 0 { portBindings, err = c.CreatePortBindings() if err != nil { diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go index 44bbda885..38f9c7306 100644 --- a/pkg/spec/spec.go +++ b/pkg/spec/spec.go @@ -396,6 +396,18 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM } } + switch config.Cgroups { + case "disabled": + if addedResources { + return nil, errors.New("cannot specify resource limits when cgroups are disabled is specified") + } + configSpec.Linux.Resources = &spec.LinuxResources{} + case "enabled", "": + // Do nothing + default: + return nil, errors.New("unrecognized option for cgroups; supported are 'default' and 'disabled'") + } + // Add annotations if configSpec.Annotations == nil { configSpec.Annotations = make(map[string]string) diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go index 6e102cfa5..4d2cee8e3 100644 --- a/test/e2e/run_test.go +++ b/test/e2e/run_test.go @@ -903,4 +903,75 @@ USER mail` } Expect(found).To(BeTrue()) }) + + It("podman run with cgroups=disabled runs without cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=disabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + Expect(inspectOut[0].HostConfig.CgroupParent).To(Equal("")) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Equal(ctrCgroups)) + }) + + It("podman run with cgroups=enabled makes cgroups", func() { + SkipIfRemote() + SkipIfRootless() + // Only works on crun + if !strings.Contains(podmanTest.OCIRuntime, "crun") { + Skip("Test only works on crun") + } + + curCgroupsBytes, err := ioutil.ReadFile("/proc/self/cgroup") + Expect(err).To(BeNil()) + var curCgroups string = string(curCgroupsBytes) + fmt.Printf("Output:\n%s\n", curCgroups) + Expect(curCgroups).To(Not(Equal(""))) + + ctrName := "testctr" + container := podmanTest.Podman([]string{"run", "--name", ctrName, "-d", "--cgroups=enabled", ALPINE, "top"}) + container.WaitWithDefaultTimeout() + Expect(container.ExitCode()).To(Equal(0)) + + // Get PID and get cgroups of that PID + inspectOut := podmanTest.InspectContainer(ctrName) + Expect(len(inspectOut)).To(Equal(1)) + pid := inspectOut[0].State.Pid + Expect(pid).To(Not(Equal(0))) + + ctrCgroupsBytes, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + Expect(err).To(BeNil()) + var ctrCgroups string = string(ctrCgroupsBytes) + fmt.Printf("Output\n:%s\n", ctrCgroups) + Expect(curCgroups).To(Not(Equal(ctrCgroups))) + }) + + It("podman run with cgroups=garbage errors", func() { + session := podmanTest.Podman([]string{"run", "-d", "--cgroups=garbage", ALPINE, "top"}) + session.WaitWithDefaultTimeout() + Expect(session.ExitCode()).To(Not(Equal(0))) + }) }) -- cgit v1.2.3-54-g00ecf