From 7e1ea9d26dff92c346bb11640fdab523d513e867 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Wed, 16 May 2018 12:45:09 -0400 Subject: Add per-pod CGroups Pods can now create their own (cgroupfs) cgroups which containers in them can (optionally) use. This presently only works with CGroupFS, systemd cgroups are still WIP Signed-off-by: Matthew Heon Closes: #784 Approved by: rhatdan --- libpod/boltdb_state.go | 37 +++++++++++++++++++++++++++ libpod/options.go | 29 +++++++++++++++++++++ libpod/pod.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ libpod/runtime_ctr.go | 35 ++++++++++++++++++++------ libpod/runtime_pod.go | 35 ++++++++++++++++++++++++++ 5 files changed, 196 insertions(+), 8 deletions(-) (limited to 'libpod') diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 77b2fe5b7..805ff90fd 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error { return err } + podsBucket, err := getPodBucket(tx) + if err != nil { + return err + } + // Iterate through all IDs. Check if they are containers. // If they are, unmarshal their state, and then clear // PID, mountpoint, and state for all of them @@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error { err = idBucket.ForEach(func(id, name []byte) error { ctrBkt := ctrsBucket.Bucket(id) if ctrBkt == nil { + // It's a pod + podBkt := podsBucket.Bucket(id) + if podBkt == nil { + // This is neither a pod nor a container + // Error out on the dangling ID + return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id)) + } + + // Get the state + stateBytes := podBkt.Get(stateKey) + if stateBytes == nil { + return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id)) + } + + state := new(podState) + + if err := json.Unmarshal(stateBytes, state); err != nil { + return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id)) + } + + // Clear the CGroup path + state.CgroupPath = "" + + newStateBytes, err := json.Marshal(state) + if err != nil { + return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id)) + } + + if err := podBkt.Put(stateKey, newStateBytes); err != nil { + return errors.Wrapf(err, "error updating state for pod %s in DB", string(id)) + } + // It's not a container, nothing to do return nil } diff --git a/libpod/options.go b/libpod/options.go index 1af788e46..34bde3211 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption { return nil } } + +// WithPodCgroupParent sets the Cgroup Parent of the pod. +func WithPodCgroupParent(path string) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return ErrPodFinalized + } + + pod.config.CgroupParent = path + + return nil + } +} + +// WithPodCgroups tells containers in this pod to use the cgroup created for +// this pod. +// This can still be overridden at the container level by explicitly specifying +// a CGroup parent. +func WithPodCgroups() PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return ErrPodFinalized + } + + pod.config.UsePodCgroup = true + + return nil + } +} diff --git a/libpod/pod.go b/libpod/pod.go index e082ef807..19347dde7 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -31,6 +31,11 @@ type PodConfig struct { Labels map[string]string `json:"labels"` // CgroupParent contains the pod's CGroup parent CgroupParent string `json:"cgroupParent"` + // UsePodCgroup indicates whether the pod will create its own CGroup and + // join containers to it. + // If true, all containers joined to the pod will use the pod cgroup as + // their cgroup parent, and cannot set a different cgroup parent + UsePodCgroup bool } // podState represents a pod's state @@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string { return p.config.CgroupParent } +// UsePodCgroup returns whether containers in the pod will default to this pod's +// cgroup instead of the default libpod parent +func (p *Pod) UsePodCgroup() bool { + return p.config.UsePodCgroup +} + +// CgroupPath returns the path to the pod's CGroup +func (p *Pod) CgroupPath() (string, error) { + p.lock.Lock() + p.lock.Unlock() + if err := p.updatePod(); err != nil { + return "", err + } + + return p.state.CgroupPath, nil +} + // Creates a new, empty pod func newPod(lockDir string, runtime *Runtime) (*Pod, error) { pod := new(Pod) @@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) { return pod, nil } +// Update pod state from database +func (p *Pod) updatePod() error { + if err := p.runtime.state.UpdatePod(p); err != nil { + return err + } + + return nil +} + +// Save pod state to database +func (p *Pod) save() error { + if err := p.runtime.state.SavePod(p); err != nil { + return errors.Wrapf(err, "error saving pod %s state") + } + + return nil +} + +// Refresh a pod's state after restart +func (p *Pod) refresh() error { + p.lock.Lock() + defer p.lock.Unlock() + + if !p.valid { + return ErrPodRemoved + } + + // We need to recreate the pod's cgroup + if p.config.UsePodCgroup { + switch p.runtime.config.CgroupManager { + case SystemdCgroupsManager: + // NOOP for now, until proper systemd cgroup management + // is implemented + case CgroupfsCgroupsManager: + p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID()) + + logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath) + default: + return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager) + } + } + + // Save changes + return p.save() +} + // Start starts all containers within a pod // It combines the effects of Init() and Start() on a container // If a container has already been initialized it will be started, diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 0f992822a..c6973ff2a 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. ctr.state.State = ContainerStateConfigured ctr.runtime = r + var pod *Pod + if ctr.config.Pod != "" { + // Get the pod from state + pod, err = r.state.Pod(ctr.config.Pod) + if err != nil { + return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod) + } + } + if ctr.config.Name == "" { name, err := r.generateName() if err != nil { @@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. switch r.config.CgroupManager { case CgroupfsCgroupsManager: if ctr.config.CgroupParent == "" { - ctr.config.CgroupParent = CgroupfsDefaultCgroupParent + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else { + ctr.config.CgroupParent = CgroupfsDefaultCgroupParent + } } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") } case SystemdCgroupsManager: if ctr.config.CgroupParent == "" { - ctr.config.CgroupParent = SystemdDefaultCgroupParent + if pod != nil && pod.config.UsePodCgroup { + podCgroup, err := pod.CgroupPath() + if err != nil { + return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID()) + } + ctr.config.CgroupParent = podCgroup + } else { + ctr.config.CgroupParent = SystemdDefaultCgroupParent + } } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") } @@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options .. // Add the container to the state // TODO: May be worth looking into recovering from name/ID collisions here if ctr.config.Pod != "" { - // Get the pod from state - pod, err := r.state.Pod(ctr.config.Pod) - if err != nil { - return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod) - } - // Lock the pod to ensure we can't add containers to pods // being removed pod.lock.Lock() diff --git a/libpod/runtime_pod.go b/libpod/runtime_pod.go index 4ca8da9ee..3872a4b67 100644 --- a/libpod/runtime_pod.go +++ b/libpod/runtime_pod.go @@ -2,9 +2,12 @@ package libpod import ( "path" + "path/filepath" "strings" + "github.com/containerd/cgroups" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // Contains the public Runtime API for pods @@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) { } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") } + // Creating CGroup path is currently a NOOP until proper systemd + // cgroup management is merged case SystemdCgroupsManager: if pod.config.CgroupParent == "" { pod.config.CgroupParent = SystemdDefaultCgroupParent } else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") } + // If we are set to use pod cgroups, set the cgroup parent that + // all containers in the pod will share + // No need to create it with cgroupfs - the first container to + // launch should do it for us + if pod.config.UsePodCgroup { + pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) + } default: return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) } @@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error { ctr.valid = false } + // Remove pod cgroup, if present + if p.state.CgroupPath != "" { + switch p.runtime.config.CgroupManager { + case SystemdCgroupsManager: + // NOOP for now, until proper systemd cgroup management + // is implemented + case CgroupfsCgroupsManager: + // Delete the cgroupfs cgroup + logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath) + + cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath)) + if err != nil && err != cgroups.ErrCgroupDeleted { + return err + } else if err == nil { + if err := cgroup.Delete(); err != nil { + return err + } + } + default: + return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager) + } + } + // Remove pod from state if err := r.state.RemovePod(p); err != nil { return err -- cgit v1.2.3-54-g00ecf