diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container.go | 19 | ||||
-rw-r--r-- | libpod/container_config.go | 5 | ||||
-rw-r--r-- | libpod/container_inspect.go | 3 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 7 | ||||
-rw-r--r-- | libpod/define/container_inspect.go | 3 | ||||
-rw-r--r-- | libpod/image/docker_registry_options.go | 1 | ||||
-rw-r--r-- | libpod/kube.go | 40 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 12 | ||||
-rw-r--r-- | libpod/pod_api.go | 241 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 1 |
10 files changed, 174 insertions, 158 deletions
diff --git a/libpod/container.go b/libpod/container.go index 9b4ccbd5f..01419500e 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -888,9 +888,22 @@ func (c *Container) NamespacePath(linuxNS LinuxNS) (string, error) { //nolint:in return fmt.Sprintf("/proc/%d/ns/%s", c.state.PID, linuxNS.String()), nil } +// CgroupManager returns the cgroup manager used by the given container. +func (c *Container) CgroupManager() string { + cgroupManager := c.config.CgroupManager + if cgroupManager == "" { + cgroupManager = c.runtime.config.Engine.CgroupManager + } + return cgroupManager +} + // CGroupPath returns a cgroups "path" for a given container. func (c *Container) CGroupPath() (string, error) { + cgroupManager := c.CgroupManager() + switch { + case c.config.NoCgroups || c.config.CgroupsMode == "disabled": + return "", errors.Wrapf(define.ErrNoCgroups, "this container is not creating cgroups") case c.config.CgroupsMode == cgroupSplit: if c.config.CgroupParent != "" { return "", errors.Errorf("cannot specify cgroup-parent with cgroup-mode %q", cgroupSplit) @@ -906,9 +919,9 @@ func (c *Container) CGroupPath() (string, error) { return "", errors.Errorf("invalid cgroup for conmon %q", cg) } return strings.TrimSuffix(cg, "/supervisor") + "/container", nil - case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager: + case cgroupManager == config.CgroupfsCgroupsManager: return filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID())), nil - case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: + case cgroupManager == config.SystemdCgroupsManager: if rootless.IsRootless() { uid := rootless.GetRootlessUID() parts := strings.SplitN(c.config.CgroupParent, "/", 2) @@ -922,7 +935,7 @@ func (c *Container) CGroupPath() (string, error) { } return filepath.Join(c.config.CgroupParent, createUnitName("libpod", c.ID())), nil default: - return "", errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager %s in use", c.runtime.config.Engine.CgroupManager) + return "", errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager %s in use", cgroupManager) } } diff --git a/libpod/container_config.go b/libpod/container_config.go index fc93140dd..e264da4da 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -275,13 +275,16 @@ type ContainerMiscConfig struct { StopTimeout uint `json:"stopTimeout,omitempty"` // Time container was created CreatedTime time.Time `json:"createdTime"` + // CgroupManager is the cgroup manager used to create this container. + // If empty, the runtime default will be used. + CgroupManager string `json:"cgroupManager,omitempty"` // NoCgroups indicates that the container will not create CGroups. It is // incompatible with CgroupParent. Deprecated in favor of CgroupsMode. NoCgroups bool `json:"noCgroups,omitempty"` // CgroupsMode indicates how the container will create cgroups // (disabled, no-conmon, enabled). It supersedes NoCgroups. CgroupsMode string `json:"cgroupsMode,omitempty"` - // Cgroup parent of the container + // Cgroup parent of the container. CgroupParent string `json:"cgroupParent"` // LogPath log location LogPath string `json:"logPath"` diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 835dccd71..b8bce1272 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -729,7 +729,7 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named // CGroup parent // Need to check if it's the default, and not print if so. defaultCgroupParent := "" - switch c.runtime.config.Engine.CgroupManager { + switch c.CgroupManager() { case config.CgroupfsCgroupsManager: defaultCgroupParent = CgroupfsDefaultCgroupParent case config.SystemdCgroupsManager: @@ -738,6 +738,7 @@ func (c *Container) generateInspectContainerHostConfig(ctrSpec *spec.Spec, named if c.config.CgroupParent != defaultCgroupParent { hostConfig.CgroupParent = c.config.CgroupParent } + hostConfig.CgroupManager = c.CgroupManager() // PID namespace mode pidMode := "" diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 41cc80789..3a71c6601 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -1965,6 +1965,7 @@ func (c *Container) getOCICgroupPath() (string, error) { if err != nil { return "", err } + cgroupManager := c.CgroupManager() switch { case (rootless.IsRootless() && !unified) || c.config.NoCgroups: return "", nil @@ -1977,14 +1978,14 @@ func (c *Container) getOCICgroupPath() (string, error) { return "", err } return filepath.Join(selfCgroup, "container"), nil - case c.runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager: + case cgroupManager == config.SystemdCgroupsManager: // When the OCI runtime is set to use Systemd as a cgroup manager, it // expects cgroups to be passed as follows: // slice:prefix:name systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID()) logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups) return systemdCgroups, nil - case c.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager: + case cgroupManager == config.CgroupfsCgroupsManager: cgroupPath, err := c.CGroupPath() if err != nil { return "", err @@ -1992,7 +1993,7 @@ func (c *Container) getOCICgroupPath() (string, error) { logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath) return cgroupPath, nil default: - return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", c.runtime.config.Engine.CgroupManager) + return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", cgroupManager) } } diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 44c3d515b..38b3a6686 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -236,6 +236,9 @@ type InspectContainerHostConfig struct { // include a Mounts field in inspect. // Format: <src>:<destination>[:<comma-separated options>] Binds []string `json:"Binds"` + // CgroupManager is the cgroup manager used by the container. + // At present, allowed values are either "cgroupfs" or "systemd". + CgroupManager string `json:"CgroupManager,omitempty"` // CgroupMode is the configuration of the container's cgroup namespace. // Populated as follows: // private - a cgroup namespace has been created diff --git a/libpod/image/docker_registry_options.go b/libpod/image/docker_registry_options.go index 257b7ae8d..835473a1f 100644 --- a/libpod/image/docker_registry_options.go +++ b/libpod/image/docker_registry_options.go @@ -55,6 +55,7 @@ func (o DockerRegistryOptions) GetSystemContext(parent *types.SystemContext, add sc.DockerRegistryUserAgent = parent.DockerRegistryUserAgent sc.OSChoice = parent.OSChoice sc.ArchitectureChoice = parent.ArchitectureChoice + sc.BlobInfoCacheDir = parent.BlobInfoCacheDir } return sc } diff --git a/libpod/kube.go b/libpod/kube.go index 6df79e394..cd5064c84 100644 --- a/libpod/kube.go +++ b/libpod/kube.go @@ -307,18 +307,40 @@ func containerToV1Container(c *Container) (v1.Container, []v1.Volume, error) { kubeContainer.StdinOnce = false kubeContainer.TTY = c.config.Spec.Process.Terminal - // TODO add CPU limit support. if c.config.Spec.Linux != nil && - c.config.Spec.Linux.Resources != nil && - c.config.Spec.Linux.Resources.Memory != nil && - c.config.Spec.Linux.Resources.Memory.Limit != nil { - if kubeContainer.Resources.Limits == nil { - kubeContainer.Resources.Limits = v1.ResourceList{} + c.config.Spec.Linux.Resources != nil { + if c.config.Spec.Linux.Resources.Memory != nil && + c.config.Spec.Linux.Resources.Memory.Limit != nil { + if kubeContainer.Resources.Limits == nil { + kubeContainer.Resources.Limits = v1.ResourceList{} + } + + qty := kubeContainer.Resources.Limits.Memory() + qty.Set(*c.config.Spec.Linux.Resources.Memory.Limit) + kubeContainer.Resources.Limits[v1.ResourceMemory] = *qty } - qty := kubeContainer.Resources.Limits.Memory() - qty.Set(*c.config.Spec.Linux.Resources.Memory.Limit) - kubeContainer.Resources.Limits[v1.ResourceMemory] = *qty + if c.config.Spec.Linux.Resources.CPU != nil && + c.config.Spec.Linux.Resources.CPU.Quota != nil && + c.config.Spec.Linux.Resources.CPU.Period != nil { + quota := *c.config.Spec.Linux.Resources.CPU.Quota + period := *c.config.Spec.Linux.Resources.CPU.Period + + if quota > 0 && period > 0 { + cpuLimitMilli := int64(1000 * float64(quota) / float64(period)) + + // Kubernetes: precision finer than 1m is not allowed + if cpuLimitMilli >= 1 { + if kubeContainer.Resources.Limits == nil { + kubeContainer.Resources.Limits = v1.ResourceList{} + } + + qty := kubeContainer.Resources.Limits.Cpu() + qty.SetMilli(cpuLimitMilli) + kubeContainer.Resources.Limits[v1.ResourceCPU] = *qty + } + } + } } return kubeContainer, kubeVolumes, nil diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 7fb374e0d..94630e57b 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -57,7 +57,6 @@ type ConmonOCIRuntime struct { path string conmonPath string conmonEnv []string - cgroupManager string tmpDir string exitsDir string socketsDir string @@ -102,7 +101,6 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime runtime.runtimeFlags = runtimeFlags runtime.conmonEnv = runtimeCfg.Engine.ConmonEnvVars - runtime.cgroupManager = runtimeCfg.Engine.CgroupManager runtime.tmpDir = runtimeCfg.Engine.TmpDir runtime.logSizeMax = runtimeCfg.Containers.LogSizeMax runtime.noPivot = runtimeCfg.Engine.NoPivotRoot @@ -149,10 +147,6 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket") - if runtime.cgroupManager != config.CgroupfsCgroupsManager && runtime.cgroupManager != config.SystemdCgroupsManager { - return nil, errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager specified: %s", runtime.cgroupManager) - } - // Create the exit files and attach sockets directories if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { // The directory is allowed to exist @@ -1325,7 +1319,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p args = append(args, rFlags...) } - if r.cgroupManager == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { + if ctr.CgroupManager() == config.SystemdCgroupsManager && !ctr.config.NoCgroups && ctr.config.CgroupsMode != cgroupSplit { args = append(args, "-s") } @@ -1442,8 +1436,10 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec } if mustCreateCgroup { + // TODO: This should be a switch - we are not guaranteed that + // there are only 2 valid cgroup managers cgroupParent := ctr.CgroupParent() - if r.cgroupManager == config.SystemdCgroupsManager { + if ctr.CgroupManager() == config.SystemdCgroupsManager { unitName := createUnitName("libpod-conmon", ctr.ID()) realCgroupParent := cgroupParent diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 0ae180356..f2ddba9c9 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -6,6 +6,7 @@ import ( "github.com/containers/podman/v2/libpod/define" "github.com/containers/podman/v2/libpod/events" "github.com/containers/podman/v2/pkg/cgroups" + "github.com/containers/podman/v2/pkg/parallel" "github.com/containers/podman/v2/pkg/rootless" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -99,47 +100,52 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m return nil, err } - ctrErrors := make(map[string]error) - // TODO: There may be cases where it makes sense to order stops based on // dependencies. Should we bother with this? - // Stop to all containers - for _, ctr := range allCtrs { - ctr.lock.Lock() + ctrErrChan := make(map[string]<-chan error) - if err := ctr.syncContainer(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } - - // Ignore containers that are not running - if ctr.state.State != define.ContainerStateRunning { - ctr.lock.Unlock() - continue - } - stopTimeout := ctr.config.StopTimeout - if timeout > -1 { - stopTimeout = uint(timeout) - } - if err := ctr.stop(stopTimeout); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + // Enqueue a function for each container with the parallel executor. + for _, ctr := range allCtrs { + c := ctr + logrus.Debugf("Adding parallel job to stop container %s", c.ID()) + retChan := parallel.Enqueue(ctx, func() error { + // TODO: Might be better to batch stop and cleanup + // together? + if timeout > -1 { + if err := c.StopWithTimeout(uint(timeout)); err != nil { + return err + } + } else { + if err := c.Stop(); err != nil { + return err + } + } - if cleanup { - if err := ctr.cleanup(ctx); err != nil { - ctrErrors[ctr.ID()] = err + if cleanup { + return c.Cleanup(ctx) } - } - ctr.lock.Unlock() + return nil + }) + + ctrErrChan[c.ID()] = retChan } p.newPodEvent(events.Stop) + ctrErrors := make(map[string]error) + + // Get returned error for every container we worked on + for id, channel := range ctrErrChan { + if err := <-channel; err != nil { + if errors.Cause(err) == define.ErrCtrStateInvalid || errors.Cause(err) == define.ErrCtrStopped { + continue + } + ctrErrors[id] = err + } + } + if len(ctrErrors) > 0 { return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error stopping some containers") } @@ -169,45 +175,29 @@ func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) { return nil, err } - ctrErrors := make(map[string]error) + ctrErrChan := make(map[string]<-chan error) - // Clean up all containers + // Enqueue a function for each container with the parallel executor. for _, ctr := range allCtrs { - ctr.lock.Lock() - - if err := ctr.syncContainer(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } - - // Ignore containers that are running/paused - if !ctr.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) { - ctr.lock.Unlock() - continue - } - - // Check for running exec sessions, ignore containers with them. - sessions, err := ctr.getActiveExecSessions() - if err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } - if len(sessions) > 0 { - ctr.lock.Unlock() - continue - } + c := ctr + logrus.Debugf("Adding parallel job to clean up container %s", c.ID()) + retChan := parallel.Enqueue(ctx, func() error { + return c.Cleanup(ctx) + }) - // TODO: Should we handle restart policy here? + ctrErrChan[c.ID()] = retChan + } - ctr.newContainerEvent(events.Cleanup) + ctrErrors := make(map[string]error) - if err := ctr.cleanup(ctx); err != nil { - ctrErrors[ctr.ID()] = err + // Get returned error for every container we worked on + for id, channel := range ctrErrChan { + if err := <-channel; err != nil { + if errors.Cause(err) == define.ErrCtrStateInvalid || errors.Cause(err) == define.ErrCtrStopped { + continue + } + ctrErrors[id] = err } - - ctr.lock.Unlock() } if len(ctrErrors) > 0 { @@ -229,7 +219,7 @@ func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) { // containers. The container ID is mapped to the error encountered. The error is // set to ErrPodPartialFail. // If both error and the map are nil, all containers were paused without error -func (p *Pod) Pause() (map[string]error, error) { +func (p *Pod) Pause(ctx context.Context) (map[string]error, error) { p.lock.Lock() defer p.lock.Unlock() @@ -252,37 +242,34 @@ func (p *Pod) Pause() (map[string]error, error) { return nil, err } - ctrErrors := make(map[string]error) + ctrErrChan := make(map[string]<-chan error) - // Pause to all containers + // Enqueue a function for each container with the parallel executor. for _, ctr := range allCtrs { - ctr.lock.Lock() + c := ctr + logrus.Debugf("Adding parallel job to pause container %s", c.ID()) + retChan := parallel.Enqueue(ctx, c.Pause) - if err := ctr.syncContainer(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + ctrErrChan[c.ID()] = retChan + } - // Ignore containers that are not running - if ctr.state.State != define.ContainerStateRunning { - ctr.lock.Unlock() - continue - } + p.newPodEvent(events.Pause) - if err := ctr.pause(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + ctrErrors := make(map[string]error) - ctr.lock.Unlock() + // Get returned error for every container we worked on + for id, channel := range ctrErrChan { + if err := <-channel; err != nil { + if errors.Cause(err) == define.ErrCtrStateInvalid || errors.Cause(err) == define.ErrCtrStopped { + continue + } + ctrErrors[id] = err + } } if len(ctrErrors) > 0 { return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error pausing some containers") } - defer p.newPodEvent(events.Pause) return nil, nil } @@ -298,7 +285,7 @@ func (p *Pod) Pause() (map[string]error, error) { // containers. The container ID is mapped to the error encountered. The error is // set to ErrPodPartialFail. // If both error and the map are nil, all containers were unpaused without error. -func (p *Pod) Unpause() (map[string]error, error) { +func (p *Pod) Unpause(ctx context.Context) (map[string]error, error) { p.lock.Lock() defer p.lock.Unlock() @@ -311,38 +298,34 @@ func (p *Pod) Unpause() (map[string]error, error) { return nil, err } - ctrErrors := make(map[string]error) + ctrErrChan := make(map[string]<-chan error) - // Pause to all containers + // Enqueue a function for each container with the parallel executor. for _, ctr := range allCtrs { - ctr.lock.Lock() + c := ctr + logrus.Debugf("Adding parallel job to unpause container %s", c.ID()) + retChan := parallel.Enqueue(ctx, c.Unpause) - if err := ctr.syncContainer(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + ctrErrChan[c.ID()] = retChan + } - // Ignore containers that are not paused - if ctr.state.State != define.ContainerStatePaused { - ctr.lock.Unlock() - continue - } + p.newPodEvent(events.Unpause) - if err := ctr.unpause(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + ctrErrors := make(map[string]error) - ctr.lock.Unlock() + // Get returned error for every container we worked on + for id, channel := range ctrErrChan { + if err := <-channel; err != nil { + if errors.Cause(err) == define.ErrCtrStateInvalid || errors.Cause(err) == define.ErrCtrStopped { + continue + } + ctrErrors[id] = err + } } if len(ctrErrors) > 0 { return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error unpausing some containers") } - - defer p.newPodEvent(events.Unpause) return nil, nil } @@ -411,7 +394,7 @@ func (p *Pod) Restart(ctx context.Context) (map[string]error, error) { // containers. The container ID is mapped to the error encountered. The error is // set to ErrPodPartialFail. // If both error and the map are nil, all containers were signalled successfully. -func (p *Pod) Kill(signal uint) (map[string]error, error) { +func (p *Pod) Kill(ctx context.Context, signal uint) (map[string]error, error) { p.lock.Lock() defer p.lock.Unlock() @@ -424,44 +407,36 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) { return nil, err } - ctrErrors := make(map[string]error) + ctrErrChan := make(map[string]<-chan error) - // Send a signal to all containers + // Enqueue a function for each container with the parallel executor. for _, ctr := range allCtrs { - ctr.lock.Lock() - - if err := ctr.syncContainer(); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + c := ctr + logrus.Debugf("Adding parallel job to kill container %s", c.ID()) + retChan := parallel.Enqueue(ctx, func() error { + return c.Kill(signal) + }) - // Ignore containers that are not running - if ctr.state.State != define.ContainerStateRunning { - ctr.lock.Unlock() - continue - } + ctrErrChan[c.ID()] = retChan + } - if err := ctr.ociRuntime.KillContainer(ctr, signal, false); err != nil { - ctr.lock.Unlock() - ctrErrors[ctr.ID()] = err - continue - } + p.newPodEvent(events.Kill) - logrus.Debugf("Killed container %s with signal %d", ctr.ID(), signal) + ctrErrors := make(map[string]error) - ctr.state.StoppedByUser = true - if err := ctr.save(); err != nil { - ctrErrors[ctr.ID()] = err + // Get returned error for every container we worked on + for id, channel := range ctrErrChan { + if err := <-channel; err != nil { + if errors.Cause(err) == define.ErrCtrStateInvalid || errors.Cause(err) == define.ErrCtrStopped { + continue + } + ctrErrors[id] = err } - - ctr.lock.Unlock() } if len(ctrErrors) > 0 { return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error killing some containers") } - defer p.newPodEvent(events.Kill) return nil, nil } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index abb97293f..51b4c5f03 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -208,6 +208,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai // Check CGroup parent sanity, and set it if it was not set. // Only if we're actually configuring CGroups. if !ctr.config.NoCgroups { + ctr.config.CgroupManager = r.config.Engine.CgroupManager switch r.config.Engine.CgroupManager { case config.CgroupfsCgroupsManager: if ctr.config.CgroupParent == "" { |