summaryrefslogtreecommitdiff
path: root/pkg/cgroups
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/cgroups')
-rw-r--r--pkg/cgroups/blkio.go149
-rw-r--r--pkg/cgroups/cgroups.go493
-rw-r--r--pkg/cgroups/cgroups_supported.go27
-rw-r--r--pkg/cgroups/cgroups_unsupported.go8
-rw-r--r--pkg/cgroups/cpu.go123
-rw-r--r--pkg/cgroups/cpuset.go85
-rw-r--r--pkg/cgroups/memory.go66
-rw-r--r--pkg/cgroups/pids.go62
-rw-r--r--pkg/cgroups/systemd.go92
9 files changed, 1105 insertions, 0 deletions
diff --git a/pkg/cgroups/blkio.go b/pkg/cgroups/blkio.go
new file mode 100644
index 000000000..bacd4eb93
--- /dev/null
+++ b/pkg/cgroups/blkio.go
@@ -0,0 +1,149 @@
+package cgroups
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+)
+
+type blkioHandler struct {
+}
+
+func getBlkioHandler() *blkioHandler {
+ return &blkioHandler{}
+}
+
+// Apply set the specified constraints
+func (c *blkioHandler) Apply(ctr *CgroupControl, res *spec.LinuxResources) error {
+ if res.BlockIO == nil {
+ return nil
+ }
+ return fmt.Errorf("blkio apply function not implemented yet")
+}
+
+// Create the cgroup
+func (c *blkioHandler) Create(ctr *CgroupControl) (bool, error) {
+ if ctr.cgroup2 {
+ return false, nil
+ }
+ return ctr.createCgroupDirectory(Blkio)
+}
+
+// Destroy the cgroup
+func (c *blkioHandler) Destroy(ctr *CgroupControl) error {
+ return rmDirRecursively(ctr.getCgroupv1Path(Blkio))
+}
+
+// Stat fills a metrics structure with usage stats for the controller
+func (c *blkioHandler) Stat(ctr *CgroupControl, m *Metrics) error {
+ var ioServiceBytesRecursive []BlkIOEntry
+
+ if ctr.cgroup2 {
+ // more details on the io.stat file format:X https://facebookmicrosites.github.io/cgroup2/docs/io-controller.html
+ values, err := readCgroup2MapFile(ctr, "io.stat")
+ if err != nil {
+ return err
+ }
+ for k, v := range values {
+ d := strings.Split(k, ":")
+ if len(d) != 2 {
+ continue
+ }
+ minor, err := strconv.ParseUint(d[0], 10, 0)
+ if err != nil {
+ return err
+ }
+ major, err := strconv.ParseUint(d[1], 10, 0)
+ if err != nil {
+ return err
+ }
+
+ for _, item := range v {
+ d := strings.Split(item, "=")
+ if len(d) != 2 {
+ continue
+ }
+ op := d[0]
+
+ // Accommodate the cgroup v1 naming
+ switch op {
+ case "rbytes":
+ op = "read"
+ case "wbytes":
+ op = "write"
+ }
+
+ value, err := strconv.ParseUint(d[1], 10, 0)
+ if err != nil {
+ return err
+ }
+
+ entry := BlkIOEntry{
+ Op: op,
+ Major: major,
+ Minor: minor,
+ Value: value,
+ }
+ ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
+ }
+ }
+ } else {
+ BlkioRoot := ctr.getCgroupv1Path(Blkio)
+
+ p := filepath.Join(BlkioRoot, "blkio.throttle.io_service_bytes_recursive")
+ f, err := os.Open(p)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return errors.Wrapf(err, "open %s", p)
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := scanner.Text()
+ parts := strings.Fields(line)
+ if len(parts) < 3 {
+ continue
+ }
+ d := strings.Split(parts[0], ":")
+ if len(d) != 2 {
+ continue
+ }
+ minor, err := strconv.ParseUint(d[0], 10, 0)
+ if err != nil {
+ return err
+ }
+ major, err := strconv.ParseUint(d[1], 10, 0)
+ if err != nil {
+ return err
+ }
+
+ op := parts[1]
+
+ value, err := strconv.ParseUint(parts[2], 10, 0)
+ if err != nil {
+ return err
+ }
+ entry := BlkIOEntry{
+ Op: op,
+ Major: major,
+ Minor: minor,
+ Value: value,
+ }
+ ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
+ }
+ if err := scanner.Err(); err != nil {
+ return errors.Wrapf(err, "parse %s", p)
+ }
+ }
+ m.Blkio = BlkioMetrics{IoServiceBytesRecursive: ioServiceBytesRecursive}
+ return nil
+}
diff --git a/pkg/cgroups/cgroups.go b/pkg/cgroups/cgroups.go
new file mode 100644
index 000000000..f2c6b548e
--- /dev/null
+++ b/pkg/cgroups/cgroups.go
@@ -0,0 +1,493 @@
+package cgroups
+
+import (
+ "bufio"
+ "fmt"
+ "io/ioutil"
+ "math"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+var (
+ // ErrCgroupDeleted means the cgroup was deleted
+ ErrCgroupDeleted = errors.New("cgroups: cgroup deleted")
+)
+
+// CgroupControl controls a cgroup hierarchy
+type CgroupControl struct {
+ cgroup2 bool
+ path string
+ systemd bool
+ // List of additional cgroup subsystems joined that
+ // do not have a custom handler.
+ additionalControllers []controller
+}
+
+// CPUUsage keeps stats for the CPU usage (unit: nanoseconds)
+type CPUUsage struct {
+ Kernel uint64
+ Total uint64
+ PerCPU []uint64
+}
+
+// MemoryUsage keeps stats for the memory usage
+type MemoryUsage struct {
+ Usage uint64
+ Limit uint64
+}
+
+// CPUMetrics keeps stats for the CPU usage
+type CPUMetrics struct {
+ Usage CPUUsage
+}
+
+// BlkIOEntry describes an entry in the blkio stats
+type BlkIOEntry struct {
+ Op string
+ Major uint64
+ Minor uint64
+ Value uint64
+}
+
+// BlkioMetrics keeps usage stats for the blkio cgroup controller
+type BlkioMetrics struct {
+ IoServiceBytesRecursive []BlkIOEntry
+}
+
+// MemoryMetrics keeps usage stats for the memory cgroup controller
+type MemoryMetrics struct {
+ Usage MemoryUsage
+}
+
+// PidsMetrics keeps usage stats for the pids cgroup controller
+type PidsMetrics struct {
+ Current uint64
+}
+
+// Metrics keeps usage stats for the cgroup controllers
+type Metrics struct {
+ CPU CPUMetrics
+ Blkio BlkioMetrics
+ Memory MemoryMetrics
+ Pids PidsMetrics
+}
+
+type controller struct {
+ name string
+ symlink bool
+}
+
+type controllerHandler interface {
+ Create(*CgroupControl) (bool, error)
+ Apply(*CgroupControl, *spec.LinuxResources) error
+ Destroy(*CgroupControl) error
+ Stat(*CgroupControl, *Metrics) error
+}
+
+const (
+ cgroupRoot = "/sys/fs/cgroup"
+ _cgroup2SuperMagic = 0x63677270
+ // CPU is the cpu controller
+ CPU = "cpu"
+ // CPUAcct is the cpuacct controller
+ CPUAcct = "cpuacct"
+ // CPUset is the cpuset controller
+ CPUset = "cpuset"
+ // Memory is the memory controller
+ Memory = "memory"
+ // Pids is the pids controller
+ Pids = "pids"
+ // Blkio is the blkio controller
+ Blkio = "blkio"
+)
+
+var handlers map[string]controllerHandler
+
+func init() {
+ handlers = make(map[string]controllerHandler)
+ handlers[CPU] = getCPUHandler()
+ handlers[CPUset] = getCpusetHandler()
+ handlers[Memory] = getMemoryHandler()
+ handlers[Pids] = getPidsHandler()
+ handlers[Blkio] = getBlkioHandler()
+}
+
+// getAvailableControllers get the available controllers
+func getAvailableControllers(exclude map[string]controllerHandler, cgroup2 bool) ([]controller, error) {
+ if cgroup2 {
+ return nil, fmt.Errorf("getAvailableControllers not implemented yet for cgroup v2")
+ }
+
+ infos, err := ioutil.ReadDir(cgroupRoot)
+ if err != nil {
+ return nil, errors.Wrapf(err, "read directory %s", cgroupRoot)
+ }
+ var controllers []controller
+ for _, i := range infos {
+ name := i.Name()
+ if _, found := exclude[name]; found {
+ continue
+ }
+ c := controller{
+ name: name,
+ symlink: !i.IsDir(),
+ }
+ controllers = append(controllers, c)
+ }
+ return controllers, nil
+}
+
+// getCgroupv1Path is a helper function to get the cgroup v1 path
+func (c *CgroupControl) getCgroupv1Path(name string) string {
+ return filepath.Join(cgroupRoot, name, c.path)
+}
+
+// createCgroupv2Path creates the cgroupv2 path and enables all the available controllers
+func createCgroupv2Path(path string) (Err error) {
+ content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
+ if err != nil {
+ return errors.Wrapf(err, "read /sys/fs/cgroup/cgroup.controllers")
+ }
+ if !strings.HasPrefix(path, "/sys/fs/cgroup/") {
+ return fmt.Errorf("invalid cgroup path %s", path)
+ }
+
+ res := ""
+ for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
+ if i == 0 {
+ res = fmt.Sprintf("+%s", c)
+ } else {
+ res = res + fmt.Sprintf(" +%s", c)
+ }
+ }
+ resByte := []byte(res)
+
+ current := "/sys/fs"
+ elements := strings.Split(path, "/")
+ for i, e := range elements[3:] {
+ current = filepath.Join(current, e)
+ if i > 0 {
+ if err := os.Mkdir(current, 0755); err != nil {
+ if !os.IsExist(err) {
+ return errors.Wrapf(err, "mkdir %s", path)
+ }
+ } else {
+ // If the directory was created, be sure it is not left around on errors.
+ defer func() {
+ if Err != nil {
+ os.Remove(current)
+ }
+ }()
+ }
+ }
+ // We enable the controllers for all the path components except the last one. It is not allowed to add
+ // PIDs if there are already enabled controllers.
+ if i < len(elements[3:])-1 {
+ if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
+ return errors.Wrapf(err, "write %s", filepath.Join(current, "cgroup.subtree_control"))
+ }
+ }
+ }
+ return nil
+}
+
+// initialize initializes the specified hierarchy
+func (c *CgroupControl) initialize() (err error) {
+ createdSoFar := map[string]controllerHandler{}
+ defer func() {
+ if err != nil {
+ for name, ctr := range createdSoFar {
+ if err := ctr.Destroy(c); err != nil {
+ logrus.Warningf("error cleaning up controller %s for %s", name, c.path)
+ }
+ }
+ }
+ }()
+ if c.cgroup2 {
+ if err := createCgroupv2Path(filepath.Join(cgroupRoot, c.path)); err != nil {
+ return errors.Wrapf(err, "error creating cgroup path %s", c.path)
+ }
+ }
+ for name, handler := range handlers {
+ created, err := handler.Create(c)
+ if err != nil {
+ return err
+ }
+ if created {
+ createdSoFar[name] = handler
+ }
+ }
+
+ if !c.cgroup2 {
+ // We won't need to do this for cgroup v2
+ for _, ctr := range c.additionalControllers {
+ if ctr.symlink {
+ continue
+ }
+ path := c.getCgroupv1Path(ctr.name)
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return errors.Wrapf(err, "error creating cgroup path %s for %s", path, ctr.name)
+ }
+ }
+ }
+
+ return nil
+}
+
+func (c *CgroupControl) createCgroupDirectory(controller string) (bool, error) {
+ cPath := c.getCgroupv1Path(controller)
+ _, err := os.Stat(cPath)
+ if err == nil {
+ return false, nil
+ }
+
+ if !os.IsNotExist(err) {
+ return false, err
+ }
+
+ if err := os.MkdirAll(cPath, 0755); err != nil {
+ return false, errors.Wrapf(err, "error creating cgroup for %s", controller)
+ }
+ return true, nil
+}
+
+func readFileAsUint64(path string) (uint64, error) {
+ data, err := ioutil.ReadFile(path)
+ if err != nil {
+ return 0, errors.Wrapf(err, "open %s", path)
+ }
+ v := cleanString(string(data))
+ if v == "max" {
+ return math.MaxUint64, nil
+ }
+ ret, err := strconv.ParseUint(v, 10, 0)
+ if err != nil {
+ return ret, errors.Wrapf(err, "parse %s from %s", v, path)
+ }
+ return ret, nil
+}
+
+// New creates a new cgroup control
+func New(path string, resources *spec.LinuxResources) (*CgroupControl, error) {
+ cgroup2, err := IsCgroup2UnifiedMode()
+ if err != nil {
+ return nil, err
+ }
+ control := &CgroupControl{
+ cgroup2: cgroup2,
+ path: path,
+ }
+
+ if !cgroup2 {
+ controllers, err := getAvailableControllers(handlers, false)
+ if err != nil {
+ return nil, err
+ }
+ control.additionalControllers = controllers
+ }
+
+ if err := control.initialize(); err != nil {
+ return nil, err
+ }
+
+ return control, nil
+}
+
+// NewSystemd creates a new cgroup control
+func NewSystemd(path string) (*CgroupControl, error) {
+ cgroup2, err := IsCgroup2UnifiedMode()
+ if err != nil {
+ return nil, err
+ }
+ control := &CgroupControl{
+ cgroup2: cgroup2,
+ path: path,
+ systemd: true,
+ }
+ return control, nil
+}
+
+// Load loads an existing cgroup control
+func Load(path string) (*CgroupControl, error) {
+ cgroup2, err := IsCgroup2UnifiedMode()
+ if err != nil {
+ return nil, err
+ }
+ control := &CgroupControl{
+ cgroup2: cgroup2,
+ path: path,
+ systemd: false,
+ }
+ if !cgroup2 {
+ controllers, err := getAvailableControllers(handlers, false)
+ if err != nil {
+ return nil, err
+ }
+ control.additionalControllers = controllers
+ }
+ if !cgroup2 {
+ for name := range handlers {
+ p := control.getCgroupv1Path(name)
+ if _, err := os.Stat(p); err != nil {
+ if os.IsNotExist(err) {
+ // compatible with the error code
+ // used by containerd/cgroups
+ return nil, ErrCgroupDeleted
+ }
+ }
+ }
+ }
+ return control, nil
+}
+
+// CreateSystemdUnit creates the systemd cgroup
+func (c *CgroupControl) CreateSystemdUnit(path string) error {
+ if !c.systemd {
+ return fmt.Errorf("the cgroup controller is not using systemd")
+ }
+ return systemdCreate(path)
+}
+
+// Delete cleans a cgroup
+func (c *CgroupControl) Delete() error {
+ return c.DeleteByPath(c.path)
+}
+
+// rmDirRecursively delete recursively a cgroup directory.
+// It differs from os.RemoveAll as it doesn't attempt to unlink files.
+// On cgroupfs we are allowed only to rmdir empty directories.
+func rmDirRecursively(path string) error {
+ if err := os.Remove(path); err == nil || os.IsNotExist(err) {
+ return nil
+ }
+ entries, err := ioutil.ReadDir(path)
+ if err != nil {
+ return errors.Wrapf(err, "read %s", path)
+ }
+ for _, i := range entries {
+ if i.IsDir() {
+ if err := rmDirRecursively(filepath.Join(path, i.Name())); err != nil {
+ return err
+ }
+ }
+ }
+ if err := os.Remove(path); err != nil {
+ if !os.IsNotExist(err) {
+ return errors.Wrapf(err, "remove %s", path)
+ }
+ }
+ return nil
+}
+
+// DeleteByPath deletes the specified cgroup path
+func (c *CgroupControl) DeleteByPath(path string) error {
+ if c.systemd {
+ return systemdDestroy(path)
+ }
+ if c.cgroup2 {
+ return rmDirRecursively(filepath.Join(cgroupRoot, c.path))
+ }
+ var lastError error
+ for _, h := range handlers {
+ if err := h.Destroy(c); err != nil {
+ lastError = err
+ }
+ }
+
+ for _, ctr := range c.additionalControllers {
+ if ctr.symlink {
+ continue
+ }
+ p := c.getCgroupv1Path(ctr.name)
+ if err := rmDirRecursively(p); err != nil {
+ lastError = errors.Wrapf(err, "remove %s", p)
+ }
+ }
+ return lastError
+}
+
+// Update updates the cgroups
+func (c *CgroupControl) Update(resources *spec.LinuxResources) error {
+ for _, h := range handlers {
+ if err := h.Apply(c, resources); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// AddPid moves the specified pid to the cgroup
+func (c *CgroupControl) AddPid(pid int) error {
+ pidString := []byte(fmt.Sprintf("%d\n", pid))
+
+ if c.cgroup2 {
+ p := filepath.Join(cgroupRoot, c.path, "cgroup.procs")
+ if err := ioutil.WriteFile(p, pidString, 0644); err != nil {
+ return errors.Wrapf(err, "write %s", p)
+ }
+ return nil
+ }
+
+ var names []string
+ for n := range handlers {
+ names = append(names, n)
+ }
+
+ for _, c := range c.additionalControllers {
+ if !c.symlink {
+ names = append(names, c.name)
+ }
+ }
+
+ for _, n := range names {
+ p := filepath.Join(c.getCgroupv1Path(n), "tasks")
+ if err := ioutil.WriteFile(p, pidString, 0644); err != nil {
+ return errors.Wrapf(err, "write %s", p)
+ }
+ }
+ return nil
+}
+
+// Stat returns usage statistics for the cgroup
+func (c *CgroupControl) Stat() (*Metrics, error) {
+ m := Metrics{}
+ for _, h := range handlers {
+ if err := h.Stat(c, &m); err != nil {
+ return nil, err
+ }
+ }
+ return &m, nil
+}
+
+func readCgroup2MapFile(ctr *CgroupControl, name string) (map[string][]string, error) {
+ ret := map[string][]string{}
+ p := filepath.Join(cgroupRoot, ctr.path, name)
+ f, err := os.Open(p)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return ret, nil
+ }
+ return nil, errors.Wrapf(err, "open file %s", p)
+ }
+ defer f.Close()
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := scanner.Text()
+ parts := strings.Fields(line)
+ if len(parts) < 2 {
+ continue
+ }
+ ret[parts[0]] = parts[1:]
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, errors.Wrapf(err, "parsing file %s", p)
+ }
+ return ret, nil
+}
diff --git a/pkg/cgroups/cgroups_supported.go b/pkg/cgroups/cgroups_supported.go
new file mode 100644
index 000000000..fcd44dfc8
--- /dev/null
+++ b/pkg/cgroups/cgroups_supported.go
@@ -0,0 +1,27 @@
+// +build linux
+
+package cgroups
+
+import (
+ "sync"
+ "syscall"
+)
+
+var (
+ isUnifiedOnce sync.Once
+ isUnified bool
+ isUnifiedErr error
+)
+
+// IsCgroup2UnifiedMode returns whether we are running in cgroup 2 cgroup2 mode.
+func IsCgroup2UnifiedMode() (bool, error) {
+ isUnifiedOnce.Do(func() {
+ var st syscall.Statfs_t
+ if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
+ isUnified, isUnifiedErr = false, err
+ } else {
+ isUnified, isUnifiedErr = st.Type == _cgroup2SuperMagic, nil
+ }
+ })
+ return isUnified, isUnifiedErr
+}
diff --git a/pkg/cgroups/cgroups_unsupported.go b/pkg/cgroups/cgroups_unsupported.go
new file mode 100644
index 000000000..9dc196e42
--- /dev/null
+++ b/pkg/cgroups/cgroups_unsupported.go
@@ -0,0 +1,8 @@
+// +build !linux
+
+package cgroups
+
+// IsCgroup2UnifiedMode returns whether we are running in cgroup 2 cgroup2 mode.
+func IsCgroup2UnifiedMode() (bool, error) {
+ return false, nil
+}
diff --git a/pkg/cgroups/cpu.go b/pkg/cgroups/cpu.go
new file mode 100644
index 000000000..03677f1ef
--- /dev/null
+++ b/pkg/cgroups/cpu.go
@@ -0,0 +1,123 @@
+package cgroups
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+)
+
+type cpuHandler struct {
+}
+
+func getCPUHandler() *cpuHandler {
+ return &cpuHandler{}
+}
+
+func cleanString(s string) string {
+ return strings.Trim(s, "\n")
+}
+
+func readAcct(ctr *CgroupControl, name string) (uint64, error) {
+ p := filepath.Join(ctr.getCgroupv1Path(CPUAcct), name)
+ return readFileAsUint64(p)
+}
+
+func readAcctList(ctr *CgroupControl, name string) ([]uint64, error) {
+ var r []uint64
+
+ p := filepath.Join(ctr.getCgroupv1Path(CPUAcct), name)
+ data, err := ioutil.ReadFile(p)
+ if err != nil {
+ return nil, errors.Wrapf(err, "reading %s", p)
+ }
+ for _, s := range strings.Split(string(data), " ") {
+ s = cleanString(s)
+ if s == "" {
+ break
+ }
+ v, err := strconv.ParseUint(s, 10, 0)
+ if err != nil {
+ return nil, errors.Wrapf(err, "parsing %s", s)
+ }
+ r = append(r, v)
+ }
+ return r, nil
+}
+
+// Apply set the specified constraints
+func (c *cpuHandler) Apply(ctr *CgroupControl, res *spec.LinuxResources) error {
+ if res.CPU == nil {
+ return nil
+ }
+ return fmt.Errorf("cpu apply not implemented yet")
+}
+
+// Create the cgroup
+func (c *cpuHandler) Create(ctr *CgroupControl) (bool, error) {
+ if ctr.cgroup2 {
+ return false, nil
+ }
+ return ctr.createCgroupDirectory(CPU)
+}
+
+// Destroy the cgroup
+func (c *cpuHandler) Destroy(ctr *CgroupControl) error {
+ return rmDirRecursively(ctr.getCgroupv1Path(CPU))
+}
+
+// Stat fills a metrics structure with usage stats for the controller
+func (c *cpuHandler) Stat(ctr *CgroupControl, m *Metrics) error {
+ var err error
+ usage := CPUUsage{}
+ if ctr.cgroup2 {
+ values, err := readCgroup2MapFile(ctr, "cpu.stat")
+ if err != nil {
+ return err
+ }
+ if val, found := values["usage_usec"]; found {
+ usage.Kernel, err = strconv.ParseUint(cleanString(val[0]), 10, 0)
+ if err != nil {
+ return err
+ }
+ usage.Kernel *= 1000
+ }
+ if val, found := values["system_usec"]; found {
+ usage.Total, err = strconv.ParseUint(cleanString(val[0]), 10, 0)
+ if err != nil {
+ return err
+ }
+ usage.Total *= 1000
+ }
+ // FIXME: How to read usage.PerCPU?
+ } else {
+ usage.Total, err = readAcct(ctr, "cpuacct.usage")
+ if err != nil {
+ if !os.IsNotExist(errors.Cause(err)) {
+ return err
+ }
+ usage.Total = 0
+ }
+ usage.Kernel, err = readAcct(ctr, "cpuacct.usage_sys")
+ if err != nil {
+ if !os.IsNotExist(errors.Cause(err)) {
+ return err
+ }
+ usage.Kernel = 0
+ }
+ usage.PerCPU, err = readAcctList(ctr, "cpuacct.usage_percpu")
+ if err != nil {
+ if !os.IsNotExist(errors.Cause(err)) {
+ return err
+ }
+ usage.PerCPU = nil
+ }
+ }
+ m.CPU = CPUMetrics{Usage: usage}
+ return nil
+}
diff --git a/pkg/cgroups/cpuset.go b/pkg/cgroups/cpuset.go
new file mode 100644
index 000000000..46d0484f2
--- /dev/null
+++ b/pkg/cgroups/cpuset.go
@@ -0,0 +1,85 @@
+package cgroups
+
+import (
+ "fmt"
+ "io/ioutil"
+ "path/filepath"
+ "strings"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+)
+
+type cpusetHandler struct {
+}
+
+func cpusetCopyFileFromParent(dir, file string, cgroupv2 bool) ([]byte, error) {
+ if dir == cgroupRoot {
+ return nil, fmt.Errorf("could not find parent to initialize cpuset %s", file)
+ }
+ path := filepath.Join(dir, file)
+ parentPath := path
+ if cgroupv2 {
+ parentPath = fmt.Sprintf("%s.effective", parentPath)
+ }
+ data, err := ioutil.ReadFile(parentPath)
+ if err != nil {
+ return nil, errors.Wrapf(err, "open %s", path)
+ }
+ if len(strings.Trim(string(data), "\n")) != 0 {
+ return data, nil
+ }
+ data, err = cpusetCopyFileFromParent(filepath.Dir(dir), file, cgroupv2)
+ if err != nil {
+ return nil, err
+ }
+ if err := ioutil.WriteFile(path, data, 0644); err != nil {
+ return nil, errors.Wrapf(err, "write %s", path)
+ }
+ return data, nil
+}
+
+func cpusetCopyFromParent(path string, cgroupv2 bool) error {
+ for _, file := range []string{"cpuset.cpus", "cpuset.mems"} {
+ if _, err := cpusetCopyFileFromParent(path, file, cgroupv2); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func getCpusetHandler() *cpusetHandler {
+ return &cpusetHandler{}
+}
+
+// Apply set the specified constraints
+func (c *cpusetHandler) Apply(ctr *CgroupControl, res *spec.LinuxResources) error {
+ if res.CPU == nil {
+ return nil
+ }
+ return fmt.Errorf("cpuset apply not implemented yet")
+}
+
+// Create the cgroup
+func (c *cpusetHandler) Create(ctr *CgroupControl) (bool, error) {
+ if ctr.cgroup2 {
+ path := filepath.Join(cgroupRoot, ctr.path)
+ return true, cpusetCopyFromParent(path, true)
+ }
+
+ created, err := ctr.createCgroupDirectory(CPUset)
+ if !created || err != nil {
+ return created, err
+ }
+ return true, cpusetCopyFromParent(ctr.getCgroupv1Path(CPUset), false)
+}
+
+// Destroy the cgroup
+func (c *cpusetHandler) Destroy(ctr *CgroupControl) error {
+ return rmDirRecursively(ctr.getCgroupv1Path(CPUset))
+}
+
+// Stat fills a metrics structure with usage stats for the controller
+func (c *cpusetHandler) Stat(ctr *CgroupControl, m *Metrics) error {
+ return nil
+}
diff --git a/pkg/cgroups/memory.go b/pkg/cgroups/memory.go
new file mode 100644
index 000000000..b3991f7e3
--- /dev/null
+++ b/pkg/cgroups/memory.go
@@ -0,0 +1,66 @@
+package cgroups
+
+import (
+ "fmt"
+ "path/filepath"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+type memHandler struct {
+}
+
+func getMemoryHandler() *memHandler {
+ return &memHandler{}
+}
+
+// Apply set the specified constraints
+func (c *memHandler) Apply(ctr *CgroupControl, res *spec.LinuxResources) error {
+ if res.Memory == nil {
+ return nil
+ }
+ return fmt.Errorf("memory apply not implemented yet")
+}
+
+// Create the cgroup
+func (c *memHandler) Create(ctr *CgroupControl) (bool, error) {
+ if ctr.cgroup2 {
+ return false, nil
+ }
+ return ctr.createCgroupDirectory(Memory)
+}
+
+// Destroy the cgroup
+func (c *memHandler) Destroy(ctr *CgroupControl) error {
+ return rmDirRecursively(ctr.getCgroupv1Path(Memory))
+}
+
+// Stat fills a metrics structure with usage stats for the controller
+func (c *memHandler) Stat(ctr *CgroupControl, m *Metrics) error {
+ var err error
+ usage := MemoryUsage{}
+
+ var memoryRoot string
+ filenames := map[string]string{}
+
+ if ctr.cgroup2 {
+ memoryRoot = filepath.Join(cgroupRoot, ctr.path)
+ filenames["usage"] = "memory.current"
+ filenames["limit"] = "memory.max"
+ } else {
+ memoryRoot = ctr.getCgroupv1Path(Memory)
+ filenames["usage"] = "memory.usage_in_bytes"
+ filenames["limit"] = "memory.limit_in_bytes"
+ }
+ usage.Usage, err = readFileAsUint64(filepath.Join(memoryRoot, filenames["usage"]))
+ if err != nil {
+ return err
+ }
+ usage.Limit, err = readFileAsUint64(filepath.Join(memoryRoot, filenames["limit"]))
+ if err != nil {
+ return err
+ }
+
+ m.Memory = MemoryMetrics{Usage: usage}
+ return nil
+}
diff --git a/pkg/cgroups/pids.go b/pkg/cgroups/pids.go
new file mode 100644
index 000000000..65b9b5b34
--- /dev/null
+++ b/pkg/cgroups/pids.go
@@ -0,0 +1,62 @@
+package cgroups
+
+import (
+ "fmt"
+ "io/ioutil"
+ "path/filepath"
+
+ spec "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+type pidHandler struct {
+}
+
+func getPidsHandler() *pidHandler {
+ return &pidHandler{}
+}
+
+// Apply set the specified constraints
+func (c *pidHandler) Apply(ctr *CgroupControl, res *spec.LinuxResources) error {
+ if res.Pids == nil {
+ return nil
+ }
+ var PIDRoot string
+
+ if ctr.cgroup2 {
+ PIDRoot = filepath.Join(cgroupRoot, ctr.path)
+ } else {
+ PIDRoot = ctr.getCgroupv1Path(Pids)
+ }
+
+ p := filepath.Join(PIDRoot, "pids.max")
+ return ioutil.WriteFile(p, []byte(fmt.Sprintf("%d\n", res.Pids.Limit)), 0644)
+}
+
+// Create the cgroup
+func (c *pidHandler) Create(ctr *CgroupControl) (bool, error) {
+ return ctr.createCgroupDirectory(Pids)
+}
+
+// Destroy the cgroup
+func (c *pidHandler) Destroy(ctr *CgroupControl) error {
+ return rmDirRecursively(ctr.getCgroupv1Path(Pids))
+}
+
+// Stat fills a metrics structure with usage stats for the controller
+func (c *pidHandler) Stat(ctr *CgroupControl, m *Metrics) error {
+ var PIDRoot string
+
+ if ctr.cgroup2 {
+ PIDRoot = filepath.Join(cgroupRoot, ctr.path)
+ } else {
+ PIDRoot = ctr.getCgroupv1Path(Pids)
+ }
+
+ current, err := readFileAsUint64(filepath.Join(PIDRoot, "pids.current"))
+ if err != nil {
+ return err
+ }
+
+ m.Pids = PidsMetrics{Current: current}
+ return nil
+}
diff --git a/pkg/cgroups/systemd.go b/pkg/cgroups/systemd.go
new file mode 100644
index 000000000..e72e456bc
--- /dev/null
+++ b/pkg/cgroups/systemd.go
@@ -0,0 +1,92 @@
+package cgroups
+
+import (
+ "fmt"
+ "path/filepath"
+ "strings"
+
+ systemdDbus "github.com/coreos/go-systemd/dbus"
+ "github.com/godbus/dbus"
+)
+
+func systemdCreate(path string) error {
+ c, err := systemdDbus.New()
+ if err != nil {
+ return err
+ }
+ defer c.Close()
+
+ slice, name := filepath.Split(path)
+ slice = strings.TrimSuffix(slice, "/")
+
+ var lastError error
+ for i := 0; i < 2; i++ {
+ properties := []systemdDbus.Property{
+ systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)),
+ systemdDbus.PropWants(slice),
+ }
+ pMap := map[string]bool{
+ "DefaultDependencies": false,
+ "MemoryAccounting": true,
+ "CPUAccounting": true,
+ "BlockIOAccounting": true,
+ }
+ if i == 0 {
+ pMap["Delegate"] = true
+ }
+ for k, v := range pMap {
+ p := systemdDbus.Property{
+ Name: k,
+ Value: dbus.MakeVariant(v),
+ }
+ properties = append(properties, p)
+ }
+
+ ch := make(chan string)
+ _, err = c.StartTransientUnit(name, "replace", properties, ch)
+ if err != nil {
+ lastError = err
+ continue
+ }
+ <-ch
+ return nil
+ }
+ return lastError
+}
+
+/*
+ systemdDestroy is copied from containerd/cgroups/systemd.go file, that
+ has the following license:
+
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+func systemdDestroy(path string) error {
+ c, err := systemdDbus.New()
+ if err != nil {
+ return err
+ }
+ defer c.Close()
+
+ name := filepath.Base(path)
+
+ ch := make(chan string)
+ _, err = c.StopUnit(name, "replace", ch)
+ if err != nil {
+ return err
+ }
+ <-ch
+ return nil
+}