aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer/cgroups
diff options
context:
space:
mode:
authorMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
committerMatthew Heon <matthew.heon@gmail.com>2017-11-01 11:24:59 -0400
commita031b83a09a8628435317a03f199cdc18b78262f (patch)
treebc017a96769ce6de33745b8b0b1304ccf38e9df0 /vendor/github.com/opencontainers/runc/libcontainer/cgroups
parent2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff)
downloadpodman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz
podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2
podman-a031b83a09a8628435317a03f199cdc18b78262f.zip
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer/cgroups')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go64
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go3
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go360
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go237
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go125
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go121
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go163
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go80
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go61
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go3
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go71
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go313
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go40
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go43
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go41
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go35
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go73
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go78
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go128
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go108
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go55
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go553
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go462
23 files changed, 3217 insertions, 0 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
new file mode 100644
index 000000000..25ff51589
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
@@ -0,0 +1,64 @@
+// +build linux
+
+package cgroups
+
+import (
+ "fmt"
+
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type Manager interface {
+ // Applies cgroup configuration to the process with the specified pid
+ Apply(pid int) error
+
+ // Returns the PIDs inside the cgroup set
+ GetPids() ([]int, error)
+
+ // Returns the PIDs inside the cgroup set & all sub-cgroups
+ GetAllPids() ([]int, error)
+
+ // Returns statistics for the cgroup set
+ GetStats() (*Stats, error)
+
+ // Toggles the freezer cgroup according with specified state
+ Freeze(state configs.FreezerState) error
+
+ // Destroys the cgroup set
+ Destroy() error
+
+ // The option func SystemdCgroups() and Cgroupfs() require following attributes:
+ // Paths map[string]string
+ // Cgroups *configs.Cgroup
+ // Paths maps cgroup subsystem to path at which it is mounted.
+ // Cgroups specifies specific cgroup settings for the various subsystems
+
+ // Returns cgroup paths to save in a state file and to be able to
+ // restore the object later.
+ GetPaths() map[string]string
+
+ // Sets the cgroup as configured.
+ Set(container *configs.Config) error
+}
+
+type NotFoundError struct {
+ Subsystem string
+}
+
+func (e *NotFoundError) Error() string {
+ return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
+}
+
+func NewNotFoundError(sub string) error {
+ return &NotFoundError{
+ Subsystem: sub,
+ }
+}
+
+func IsNotFound(err error) bool {
+ if err == nil {
+ return false
+ }
+ _, ok := err.(*NotFoundError)
+ return ok
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
new file mode 100644
index 000000000..278d507e2
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go
@@ -0,0 +1,3 @@
+// +build !linux
+
+package cgroups
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
new file mode 100644
index 000000000..22d82acb4
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@@ -0,0 +1,360 @@
+// +build linux
+
+package fs
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+)
+
+var (
+ subsystems = subsystemSet{
+ &CpusetGroup{},
+ &DevicesGroup{},
+ &MemoryGroup{},
+ &CpuGroup{},
+ &CpuacctGroup{},
+ &PidsGroup{},
+ &BlkioGroup{},
+ &HugetlbGroup{},
+ &NetClsGroup{},
+ &NetPrioGroup{},
+ &PerfEventGroup{},
+ &FreezerGroup{},
+ &NameGroup{GroupName: "name=systemd", Join: true},
+ }
+ HugePageSizes, _ = cgroups.GetHugePageSize()
+)
+
+var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
+
+type subsystemSet []subsystem
+
+func (s subsystemSet) Get(name string) (subsystem, error) {
+ for _, ss := range s {
+ if ss.Name() == name {
+ return ss, nil
+ }
+ }
+ return nil, errSubsystemDoesNotExist
+}
+
+type subsystem interface {
+ // Name returns the name of the subsystem.
+ Name() string
+ // Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
+ GetStats(path string, stats *cgroups.Stats) error
+ // Removes the cgroup represented by 'cgroupData'.
+ Remove(*cgroupData) error
+ // Creates and joins the cgroup represented by 'cgroupData'.
+ Apply(*cgroupData) error
+ // Set the cgroup represented by cgroup.
+ Set(path string, cgroup *configs.Cgroup) error
+}
+
+type Manager struct {
+ mu sync.Mutex
+ Cgroups *configs.Cgroup
+ Paths map[string]string
+}
+
+// The absolute path to the root of the cgroup hierarchies.
+var cgroupRootLock sync.Mutex
+var cgroupRoot string
+
+// Gets the cgroupRoot.
+func getCgroupRoot() (string, error) {
+ cgroupRootLock.Lock()
+ defer cgroupRootLock.Unlock()
+
+ if cgroupRoot != "" {
+ return cgroupRoot, nil
+ }
+
+ root, err := cgroups.FindCgroupMountpointDir()
+ if err != nil {
+ return "", err
+ }
+
+ if _, err := os.Stat(root); err != nil {
+ return "", err
+ }
+
+ cgroupRoot = root
+ return cgroupRoot, nil
+}
+
+type cgroupData struct {
+ root string
+ innerPath string
+ config *configs.Cgroup
+ pid int
+}
+
+func (m *Manager) Apply(pid int) (err error) {
+ if m.Cgroups == nil {
+ return nil
+ }
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ var c = m.Cgroups
+
+ d, err := getCgroupData(m.Cgroups, pid)
+ if err != nil {
+ return err
+ }
+
+ m.Paths = make(map[string]string)
+ if c.Paths != nil {
+ for name, path := range c.Paths {
+ _, err := d.path(name)
+ if err != nil {
+ if cgroups.IsNotFound(err) {
+ continue
+ }
+ return err
+ }
+ m.Paths[name] = path
+ }
+ return cgroups.EnterPid(m.Paths, pid)
+ }
+
+ for _, sys := range subsystems {
+ // TODO: Apply should, ideally, be reentrant or be broken up into a separate
+ // create and join phase so that the cgroup hierarchy for a container can be
+ // created then join consists of writing the process pids to cgroup.procs
+ p, err := d.path(sys.Name())
+ if err != nil {
+ // The non-presence of the devices subsystem is
+ // considered fatal for security reasons.
+ if cgroups.IsNotFound(err) && sys.Name() != "devices" {
+ continue
+ }
+ return err
+ }
+ m.Paths[sys.Name()] = p
+
+ if err := sys.Apply(d); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (m *Manager) Destroy() error {
+ if m.Cgroups.Paths != nil {
+ return nil
+ }
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if err := cgroups.RemovePaths(m.Paths); err != nil {
+ return err
+ }
+ m.Paths = make(map[string]string)
+ return nil
+}
+
+func (m *Manager) GetPaths() map[string]string {
+ m.mu.Lock()
+ paths := m.Paths
+ m.mu.Unlock()
+ return paths
+}
+
+func (m *Manager) GetStats() (*cgroups.Stats, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ stats := cgroups.NewStats()
+ for name, path := range m.Paths {
+ sys, err := subsystems.Get(name)
+ if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
+ continue
+ }
+ if err := sys.GetStats(path, stats); err != nil {
+ return nil, err
+ }
+ }
+ return stats, nil
+}
+
+func (m *Manager) Set(container *configs.Config) error {
+ // If Paths are set, then we are just joining cgroups paths
+ // and there is no need to set any values.
+ if m.Cgroups.Paths != nil {
+ return nil
+ }
+
+ paths := m.GetPaths()
+ for _, sys := range subsystems {
+ path := paths[sys.Name()]
+ if err := sys.Set(path, container.Cgroups); err != nil {
+ return err
+ }
+ }
+
+ if m.Paths["cpu"] != "" {
+ if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Freeze toggles the container's freezer cgroup depending on the state
+// provided
+func (m *Manager) Freeze(state configs.FreezerState) error {
+ paths := m.GetPaths()
+ dir := paths["freezer"]
+ prevState := m.Cgroups.Resources.Freezer
+ m.Cgroups.Resources.Freezer = state
+ freezer, err := subsystems.Get("freezer")
+ if err != nil {
+ return err
+ }
+ err = freezer.Set(dir, m.Cgroups)
+ if err != nil {
+ m.Cgroups.Resources.Freezer = prevState
+ return err
+ }
+ return nil
+}
+
+func (m *Manager) GetPids() ([]int, error) {
+ paths := m.GetPaths()
+ return cgroups.GetPids(paths["devices"])
+}
+
+func (m *Manager) GetAllPids() ([]int, error) {
+ paths := m.GetPaths()
+ return cgroups.GetAllPids(paths["devices"])
+}
+
+func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
+ root, err := getCgroupRoot()
+ if err != nil {
+ return nil, err
+ }
+
+ if (c.Name != "" || c.Parent != "") && c.Path != "" {
+ return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
+ }
+
+ // XXX: Do not remove this code. Path safety is important! -- cyphar
+ cgPath := libcontainerUtils.CleanPath(c.Path)
+ cgParent := libcontainerUtils.CleanPath(c.Parent)
+ cgName := libcontainerUtils.CleanPath(c.Name)
+
+ innerPath := cgPath
+ if innerPath == "" {
+ innerPath = filepath.Join(cgParent, cgName)
+ }
+
+ return &cgroupData{
+ root: root,
+ innerPath: innerPath,
+ config: c,
+ pid: pid,
+ }, nil
+}
+
+func (raw *cgroupData) path(subsystem string) (string, error) {
+ mnt, err := cgroups.FindCgroupMountpoint(subsystem)
+ // If we didn't mount the subsystem, there is no point we make the path.
+ if err != nil {
+ return "", err
+ }
+
+ // If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
+ if filepath.IsAbs(raw.innerPath) {
+ // Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
+ return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
+ }
+
+ // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+ // process could in container and shared pid namespace with host, and
+ // /proc/1/cgroup could point to whole other world of cgroups.
+ parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(parentPath, raw.innerPath), nil
+}
+
+func (raw *cgroupData) join(subsystem string) (string, error) {
+ path, err := raw.path(subsystem)
+ if err != nil {
+ return "", err
+ }
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return "", err
+ }
+ if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
+ return "", err
+ }
+ return path, nil
+}
+
+func writeFile(dir, file, data string) error {
+ // Normally dir should not be empty, one case is that cgroup subsystem
+ // is not mounted, we will get empty dir, and we want it fail here.
+ if dir == "" {
+ return fmt.Errorf("no such directory for %s", file)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
+ return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
+ }
+ return nil
+}
+
+func readFile(dir, file string) (string, error) {
+ data, err := ioutil.ReadFile(filepath.Join(dir, file))
+ return string(data), err
+}
+
+func removePath(p string, err error) error {
+ if err != nil {
+ return err
+ }
+ if p != "" {
+ return os.RemoveAll(p)
+ }
+ return nil
+}
+
+func CheckCpushares(path string, c uint64) error {
+ var cpuShares uint64
+
+ if c == 0 {
+ return nil
+ }
+
+ fd, err := os.Open(filepath.Join(path, "cpu.shares"))
+ if err != nil {
+ return err
+ }
+ defer fd.Close()
+
+ _, err = fmt.Fscanf(fd, "%d", &cpuShares)
+ if err != nil && err != io.EOF {
+ return err
+ }
+
+ if c > cpuShares {
+ return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
+ } else if c < cpuShares {
+ return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
new file mode 100644
index 000000000..a142cb991
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
@@ -0,0 +1,237 @@
+// +build linux
+
+package fs
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type BlkioGroup struct {
+}
+
+func (s *BlkioGroup) Name() string {
+ return "blkio"
+}
+
+func (s *BlkioGroup) Apply(d *cgroupData) error {
+ _, err := d.join("blkio")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.BlkioWeight != 0 {
+ if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
+ return err
+ }
+ }
+
+ if cgroup.Resources.BlkioLeafWeight != 0 {
+ if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
+ return err
+ }
+ }
+ for _, wd := range cgroup.Resources.BlkioWeightDevice {
+ if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
+ return err
+ }
+ if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
+ return err
+ }
+ }
+ for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
+ if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
+ if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
+ if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
+ if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *BlkioGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("blkio"))
+}
+
+/*
+examples:
+
+ blkio.sectors
+ 8:0 6792
+
+ blkio.io_service_bytes
+ 8:0 Read 1282048
+ 8:0 Write 2195456
+ 8:0 Sync 2195456
+ 8:0 Async 1282048
+ 8:0 Total 3477504
+ Total 3477504
+
+ blkio.io_serviced
+ 8:0 Read 124
+ 8:0 Write 104
+ 8:0 Sync 104
+ 8:0 Async 124
+ 8:0 Total 228
+ Total 228
+
+ blkio.io_queued
+ 8:0 Read 0
+ 8:0 Write 0
+ 8:0 Sync 0
+ 8:0 Async 0
+ 8:0 Total 0
+ Total 0
+*/
+
+func splitBlkioStatLine(r rune) bool {
+ return r == ' ' || r == ':'
+}
+
+func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
+ var blkioStats []cgroups.BlkioStatEntry
+ f, err := os.Open(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return blkioStats, nil
+ }
+ return nil, err
+ }
+ defer f.Close()
+
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ // format: dev type amount
+ fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
+ if len(fields) < 3 {
+ if len(fields) == 2 && fields[0] == "Total" {
+ // skip total line
+ continue
+ } else {
+ return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
+ }
+ }
+
+ v, err := strconv.ParseUint(fields[0], 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ major := v
+
+ v, err = strconv.ParseUint(fields[1], 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ minor := v
+
+ op := ""
+ valueField := 2
+ if len(fields) == 4 {
+ op = fields[2]
+ valueField = 3
+ }
+ v, err = strconv.ParseUint(fields[valueField], 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
+ }
+
+ return blkioStats, nil
+}
+
+func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
+ // Try to read CFQ stats available on all CFQ enabled kernels first
+ if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
+ return getCFQStats(path, stats)
+ }
+ return getStats(path, stats) // Use generic stats as fallback
+}
+
+func getCFQStats(path string, stats *cgroups.Stats) error {
+ var blkioStats []cgroups.BlkioStatEntry
+ var err error
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.SectorsRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoServiceBytesRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoServicedRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoQueuedRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoServiceTimeRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoWaitTimeRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoMergedRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoTimeRecursive = blkioStats
+
+ return nil
+}
+
+func getStats(path string, stats *cgroups.Stats) error {
+ var blkioStats []cgroups.BlkioStatEntry
+ var err error
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoServiceBytesRecursive = blkioStats
+
+ if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
+ return err
+ }
+ stats.BlkioStats.IoServicedRecursive = blkioStats
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
new file mode 100644
index 000000000..b712bd0b1
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@@ -0,0 +1,125 @@
+// +build linux
+
+package fs
+
+import (
+ "bufio"
+ "os"
+ "path/filepath"
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type CpuGroup struct {
+}
+
+func (s *CpuGroup) Name() string {
+ return "cpu"
+}
+
+func (s *CpuGroup) Apply(d *cgroupData) error {
+ // We always want to join the cpu group, to allow fair cpu scheduling
+ // on a container basis
+ path, err := d.path("cpu")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return s.ApplyDir(path, d.config, d.pid)
+}
+
+func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
+ // This might happen if we have no cpu cgroup mounted.
+ // Just do nothing and don't fail.
+ if path == "" {
+ return nil
+ }
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return err
+ }
+ // We should set the real-Time group scheduling settings before moving
+ // in the process because if the process is already in SCHED_RR mode
+ // and no RT bandwidth is set, adding it will fail.
+ if err := s.SetRtSched(path, cgroup); err != nil {
+ return err
+ }
+ // because we are not using d.join we need to place the pid into the procs file
+ // unlike the other subsystems
+ if err := cgroups.WriteCgroupProc(path, pid); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.CpuRtPeriod != 0 {
+ if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.CpuRtRuntime != 0 {
+ if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.CpuShares != 0 {
+ if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.CpuPeriod != 0 {
+ if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.CpuQuota != 0 {
+ if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
+ return err
+ }
+ }
+ if err := s.SetRtSched(path, cgroup); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *CpuGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("cpu"))
+}
+
+func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
+ f, err := os.Open(filepath.Join(path, "cpu.stat"))
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ defer f.Close()
+
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ t, v, err := getCgroupParamKeyValue(sc.Text())
+ if err != nil {
+ return err
+ }
+ switch t {
+ case "nr_periods":
+ stats.CpuStats.ThrottlingData.Periods = v
+
+ case "nr_throttled":
+ stats.CpuStats.ThrottlingData.ThrottledPeriods = v
+
+ case "throttled_time":
+ stats.CpuStats.ThrottlingData.ThrottledTime = v
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
new file mode 100644
index 000000000..53afbaddf
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
@@ -0,0 +1,121 @@
+// +build linux
+
+package fs
+
+import (
+ "fmt"
+ "io/ioutil"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/system"
+)
+
+const (
+ cgroupCpuacctStat = "cpuacct.stat"
+ nanosecondsInSecond = 1000000000
+)
+
+var clockTicks = uint64(system.GetClockTicks())
+
+type CpuacctGroup struct {
+}
+
+func (s *CpuacctGroup) Name() string {
+ return "cpuacct"
+}
+
+func (s *CpuacctGroup) Apply(d *cgroupData) error {
+ // we just want to join this group even though we don't set anything
+ if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+
+ return nil
+}
+
+func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
+ return nil
+}
+
+func (s *CpuacctGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("cpuacct"))
+}
+
+func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
+ userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
+ if err != nil {
+ return err
+ }
+
+ totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
+ if err != nil {
+ return err
+ }
+
+ percpuUsage, err := getPercpuUsage(path)
+ if err != nil {
+ return err
+ }
+
+ stats.CpuStats.CpuUsage.TotalUsage = totalUsage
+ stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
+ stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
+ stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
+ return nil
+}
+
+// Returns user and kernel usage breakdown in nanoseconds.
+func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
+ userModeUsage := uint64(0)
+ kernelModeUsage := uint64(0)
+ const (
+ userField = "user"
+ systemField = "system"
+ )
+
+ // Expected format:
+ // user <usage in ticks>
+ // system <usage in ticks>
+ data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
+ if err != nil {
+ return 0, 0, err
+ }
+ fields := strings.Fields(string(data))
+ if len(fields) != 4 {
+ return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat))
+ }
+ if fields[0] != userField {
+ return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
+ }
+ if fields[2] != systemField {
+ return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
+ }
+ if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
+ return 0, 0, err
+ }
+ if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
+ return 0, 0, err
+ }
+
+ return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
+}
+
+func getPercpuUsage(path string) ([]uint64, error) {
+ percpuUsage := []uint64{}
+ data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
+ if err != nil {
+ return percpuUsage, err
+ }
+ for _, value := range strings.Fields(string(data)) {
+ value, err := strconv.ParseUint(value, 10, 64)
+ if err != nil {
+ return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
+ }
+ percpuUsage = append(percpuUsage, value)
+ }
+ return percpuUsage, nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
new file mode 100644
index 000000000..20c9eafac
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@@ -0,0 +1,163 @@
+// +build linux
+
+package fs
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+)
+
+type CpusetGroup struct {
+}
+
+func (s *CpusetGroup) Name() string {
+ return "cpuset"
+}
+
+func (s *CpusetGroup) Apply(d *cgroupData) error {
+ dir, err := d.path("cpuset")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return s.ApplyDir(dir, d.config, d.pid)
+}
+
+func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.CpusetCpus != "" {
+ if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.CpusetMems != "" {
+ if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (s *CpusetGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("cpuset"))
+}
+
+func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
+
+func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
+ // This might happen if we have no cpuset cgroup mounted.
+ // Just do nothing and don't fail.
+ if dir == "" {
+ return nil
+ }
+ mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
+ if err != nil {
+ return err
+ }
+ root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
+ // 'ensureParent' start with parent because we don't want to
+ // explicitly inherit from parent, it could conflict with
+ // 'cpuset.cpu_exclusive'.
+ if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
+ return err
+ }
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return err
+ }
+ // We didn't inherit cpuset configs from parent, but we have
+ // to ensure cpuset configs are set before moving task into the
+ // cgroup.
+ // The logic is, if user specified cpuset configs, use these
+ // specified configs, otherwise, inherit from parent. This makes
+ // cpuset configs work correctly with 'cpuset.cpu_exclusive', and
+ // keep backward compatbility.
+ if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
+ return err
+ }
+
+ // because we are not using d.join we need to place the pid into the procs file
+ // unlike the other subsystems
+ if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
+ if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
+ return
+ }
+ if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
+ return
+ }
+ return cpus, mems, nil
+}
+
+// ensureParent makes sure that the parent directory of current is created
+// and populated with the proper cpus and mems files copied from
+// it's parent.
+func (s *CpusetGroup) ensureParent(current, root string) error {
+ parent := filepath.Dir(current)
+ if libcontainerUtils.CleanPath(parent) == root {
+ return nil
+ }
+ // Avoid infinite recursion.
+ if parent == current {
+ return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
+ }
+ if err := s.ensureParent(parent, root); err != nil {
+ return err
+ }
+ if err := os.MkdirAll(current, 0755); err != nil {
+ return err
+ }
+ return s.copyIfNeeded(current, parent)
+}
+
+// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
+// directory to the current directory if the file's contents are 0
+func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
+ var (
+ err error
+ currentCpus, currentMems []byte
+ parentCpus, parentMems []byte
+ )
+
+ if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
+ return err
+ }
+ if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
+ return err
+ }
+
+ if s.isEmpty(currentCpus) {
+ if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
+ return err
+ }
+ }
+ if s.isEmpty(currentMems) {
+ if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (s *CpusetGroup) isEmpty(b []byte) bool {
+ return len(bytes.Trim(b, "\n")) == 0
+}
+
+func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
+ if err := s.Set(path, cgroup); err != nil {
+ return err
+ }
+ return s.copyIfNeeded(path, filepath.Dir(path))
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
new file mode 100644
index 000000000..0ac5b4ed7
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
@@ -0,0 +1,80 @@
+// +build linux
+
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/system"
+)
+
+type DevicesGroup struct {
+}
+
+func (s *DevicesGroup) Name() string {
+ return "devices"
+}
+
+func (s *DevicesGroup) Apply(d *cgroupData) error {
+ _, err := d.join("devices")
+ if err != nil {
+ // We will return error even it's `not found` error, devices
+ // cgroup is hard requirement for container's security.
+ return err
+ }
+ return nil
+}
+
+func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if system.RunningInUserNS() {
+ return nil
+ }
+
+ devices := cgroup.Resources.Devices
+ if len(devices) > 0 {
+ for _, dev := range devices {
+ file := "devices.deny"
+ if dev.Allow {
+ file = "devices.allow"
+ }
+ if err := writeFile(path, file, dev.CgroupString()); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ if cgroup.Resources.AllowAllDevices != nil {
+ if *cgroup.Resources.AllowAllDevices == false {
+ if err := writeFile(path, "devices.deny", "a"); err != nil {
+ return err
+ }
+
+ for _, dev := range cgroup.Resources.AllowedDevices {
+ if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+
+ if err := writeFile(path, "devices.allow", "a"); err != nil {
+ return err
+ }
+ }
+
+ for _, dev := range cgroup.Resources.DeniedDevices {
+ if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *DevicesGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("devices"))
+}
+
+func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
new file mode 100644
index 000000000..e70dfe3b9
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
@@ -0,0 +1,61 @@
+// +build linux
+
+package fs
+
+import (
+ "fmt"
+ "strings"
+ "time"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type FreezerGroup struct {
+}
+
+func (s *FreezerGroup) Name() string {
+ return "freezer"
+}
+
+func (s *FreezerGroup) Apply(d *cgroupData) error {
+ _, err := d.join("freezer")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
+ switch cgroup.Resources.Freezer {
+ case configs.Frozen, configs.Thawed:
+ if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
+ return err
+ }
+
+ for {
+ state, err := readFile(path, "freezer.state")
+ if err != nil {
+ return err
+ }
+ if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
+ break
+ }
+ time.Sleep(1 * time.Millisecond)
+ }
+ case configs.Undefined:
+ return nil
+ default:
+ return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
+ }
+
+ return nil
+}
+
+func (s *FreezerGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("freezer"))
+}
+
+func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go
new file mode 100644
index 000000000..3ef9e0315
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go
@@ -0,0 +1,3 @@
+// +build !linux
+
+package fs
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
new file mode 100644
index 000000000..2f9727719
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
@@ -0,0 +1,71 @@
+// +build linux
+
+package fs
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type HugetlbGroup struct {
+}
+
+func (s *HugetlbGroup) Name() string {
+ return "hugetlb"
+}
+
+func (s *HugetlbGroup) Apply(d *cgroupData) error {
+ _, err := d.join("hugetlb")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
+ for _, hugetlb := range cgroup.Resources.HugetlbLimit {
+ if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *HugetlbGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("hugetlb"))
+}
+
+func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
+ hugetlbStats := cgroups.HugetlbStats{}
+ for _, pageSize := range HugePageSizes {
+ usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
+ value, err := getCgroupParamUint(path, usage)
+ if err != nil {
+ return fmt.Errorf("failed to parse %s - %v", usage, err)
+ }
+ hugetlbStats.Usage = value
+
+ maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
+ value, err = getCgroupParamUint(path, maxUsage)
+ if err != nil {
+ return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+ }
+ hugetlbStats.MaxUsage = value
+
+ failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
+ value, err = getCgroupParamUint(path, failcnt)
+ if err != nil {
+ return fmt.Errorf("failed to parse %s - %v", failcnt, err)
+ }
+ hugetlbStats.Failcnt = value
+
+ stats.HugetlbStats[pageSize] = hugetlbStats
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
new file mode 100644
index 000000000..ad395a5d6
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@@ -0,0 +1,313 @@
+// +build linux
+
+package fs
+
+import (
+ "bufio"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall" // only for Errno
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+
+ "golang.org/x/sys/unix"
+)
+
+const (
+ cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+ cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
+ cgroupMemoryLimit = "memory.limit_in_bytes"
+)
+
+type MemoryGroup struct {
+}
+
+func (s *MemoryGroup) Name() string {
+ return "memory"
+}
+
+func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
+ path, err := d.path("memory")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ } else if path == "" {
+ return nil
+ }
+ if memoryAssigned(d.config) {
+ if _, err := os.Stat(path); os.IsNotExist(err) {
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return err
+ }
+ // Only enable kernel memory accouting when this cgroup
+ // is created by libcontainer, otherwise we might get
+ // error when people use `cgroupsPath` to join an existed
+ // cgroup whose kernel memory is not initialized.
+ if err := EnableKernelMemoryAccounting(path); err != nil {
+ return err
+ }
+ }
+ }
+ defer func() {
+ if err != nil {
+ os.RemoveAll(path)
+ }
+ }()
+
+ // We need to join memory cgroup after set memory limits, because
+ // kmem.limit_in_bytes can only be set when the cgroup is empty.
+ _, err = d.join("memory")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func EnableKernelMemoryAccounting(path string) error {
+ // Check if kernel memory is enabled
+ // We have to limit the kernel memory here as it won't be accounted at all
+ // until a limit is set on the cgroup and limit cannot be set once the
+ // cgroup has children, or if there are already tasks in the cgroup.
+ for _, i := range []int64{1, -1} {
+ if err := setKernelMemory(path, i); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func setKernelMemory(path string, kernelMemoryLimit int64) error {
+ if path == "" {
+ return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
+ }
+ if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
+ // kernel memory is not enabled on the system so we should do nothing
+ return nil
+ }
+ if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
+ // Check if the error number returned by the syscall is "EBUSY"
+ // The EBUSY signal is returned on attempts to write to the
+ // memory.kmem.limit_in_bytes file if the cgroup has children or
+ // once tasks have been attached to the cgroup
+ if pathErr, ok := err.(*os.PathError); ok {
+ if errNo, ok := pathErr.Err.(syscall.Errno); ok {
+ if errNo == unix.EBUSY {
+ return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
+ }
+ }
+ }
+ return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
+ }
+ return nil
+}
+
+func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
+ // If the memory update is set to -1 we should also
+ // set swap to -1, it means unlimited memory.
+ if cgroup.Resources.Memory == -1 {
+ // Only set swap if it's enabled in kernel
+ if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
+ cgroup.Resources.MemorySwap = -1
+ }
+ }
+
+ // When memory and swap memory are both set, we need to handle the cases
+ // for updating container.
+ if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
+ memoryUsage, err := getMemoryData(path, "")
+ if err != nil {
+ return err
+ }
+
+ // When update memory limit, we should adapt the write sequence
+ // for memory and swap memory, so it won't fail because the new
+ // value and the old value don't fit kernel's validation.
+ if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
+ if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+ return err
+ }
+ if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+ return err
+ }
+ } else {
+ if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+ return err
+ }
+ if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+ return err
+ }
+ }
+ } else {
+ if cgroup.Resources.Memory != 0 {
+ if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.MemorySwap != 0 {
+ if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if err := setMemoryAndSwap(path, cgroup); err != nil {
+ return err
+ }
+
+ if cgroup.Resources.KernelMemory != 0 {
+ if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
+ return err
+ }
+ }
+
+ if cgroup.Resources.MemoryReservation != 0 {
+ if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
+ return err
+ }
+ }
+
+ if cgroup.Resources.KernelMemoryTCP != 0 {
+ if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.OomKillDisable {
+ if err := writeFile(path, "memory.oom_control", "1"); err != nil {
+ return err
+ }
+ }
+ if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
+ return nil
+ } else if *cgroup.Resources.MemorySwappiness <= 100 {
+ if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
+ return err
+ }
+ } else {
+ return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
+ }
+
+ return nil
+}
+
+func (s *MemoryGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("memory"))
+}
+
+func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
+ // Set stats from memory.stat.
+ statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ defer statsFile.Close()
+
+ sc := bufio.NewScanner(statsFile)
+ for sc.Scan() {
+ t, v, err := getCgroupParamKeyValue(sc.Text())
+ if err != nil {
+ return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
+ }
+ stats.MemoryStats.Stats[t] = v
+ }
+ stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
+
+ memoryUsage, err := getMemoryData(path, "")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.Usage = memoryUsage
+ swapUsage, err := getMemoryData(path, "memsw")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.SwapUsage = swapUsage
+ kernelUsage, err := getMemoryData(path, "kmem")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.KernelUsage = kernelUsage
+ kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
+
+ useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
+ value, err := getCgroupParamUint(path, useHierarchy)
+ if err != nil {
+ return err
+ }
+ if value == 1 {
+ stats.MemoryStats.UseHierarchy = true
+ }
+ return nil
+}
+
+func memoryAssigned(cgroup *configs.Cgroup) bool {
+ return cgroup.Resources.Memory != 0 ||
+ cgroup.Resources.MemoryReservation != 0 ||
+ cgroup.Resources.MemorySwap > 0 ||
+ cgroup.Resources.KernelMemory > 0 ||
+ cgroup.Resources.KernelMemoryTCP > 0 ||
+ cgroup.Resources.OomKillDisable ||
+ (cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
+}
+
+func getMemoryData(path, name string) (cgroups.MemoryData, error) {
+ memoryData := cgroups.MemoryData{}
+
+ moduleName := "memory"
+ if name != "" {
+ moduleName = strings.Join([]string{"memory", name}, ".")
+ }
+ usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
+ maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
+ failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
+ limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
+
+ value, err := getCgroupParamUint(path, usage)
+ if err != nil {
+ if moduleName != "memory" && os.IsNotExist(err) {
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
+ }
+ memoryData.Usage = value
+ value, err = getCgroupParamUint(path, maxUsage)
+ if err != nil {
+ if moduleName != "memory" && os.IsNotExist(err) {
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+ }
+ memoryData.MaxUsage = value
+ value, err = getCgroupParamUint(path, failcnt)
+ if err != nil {
+ if moduleName != "memory" && os.IsNotExist(err) {
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
+ }
+ memoryData.Failcnt = value
+ value, err = getCgroupParamUint(path, limit)
+ if err != nil {
+ if moduleName != "memory" && os.IsNotExist(err) {
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
+ }
+ memoryData.Limit = value
+
+ return memoryData, nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
new file mode 100644
index 000000000..d8cf1d87c
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
@@ -0,0 +1,40 @@
+// +build linux
+
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NameGroup struct {
+ GroupName string
+ Join bool
+}
+
+func (s *NameGroup) Name() string {
+ return s.GroupName
+}
+
+func (s *NameGroup) Apply(d *cgroupData) error {
+ if s.Join {
+ // ignore errors if the named cgroup does not exist
+ d.join(s.GroupName)
+ }
+ return nil
+}
+
+func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
+ return nil
+}
+
+func (s *NameGroup) Remove(d *cgroupData) error {
+ if s.Join {
+ removePath(d.path(s.GroupName))
+ }
+ return nil
+}
+
+func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
new file mode 100644
index 000000000..8e74b645e
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
@@ -0,0 +1,43 @@
+// +build linux
+
+package fs
+
+import (
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NetClsGroup struct {
+}
+
+func (s *NetClsGroup) Name() string {
+ return "net_cls"
+}
+
+func (s *NetClsGroup) Apply(d *cgroupData) error {
+ _, err := d.join("net_cls")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.NetClsClassid != 0 {
+ if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *NetClsGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("net_cls"))
+}
+
+func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
new file mode 100644
index 000000000..d0ab2af89
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
@@ -0,0 +1,41 @@
+// +build linux
+
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NetPrioGroup struct {
+}
+
+func (s *NetPrioGroup) Name() string {
+ return "net_prio"
+}
+
+func (s *NetPrioGroup) Apply(d *cgroupData) error {
+ _, err := d.join("net_prio")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
+ for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
+ if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *NetPrioGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("net_prio"))
+}
+
+func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
new file mode 100644
index 000000000..5693676d3
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
@@ -0,0 +1,35 @@
+// +build linux
+
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type PerfEventGroup struct {
+}
+
+func (s *PerfEventGroup) Name() string {
+ return "perf_event"
+}
+
+func (s *PerfEventGroup) Apply(d *cgroupData) error {
+ // we just want to join this group even though we don't set anything
+ if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
+ return nil
+}
+
+func (s *PerfEventGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("perf_event"))
+}
+
+func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
new file mode 100644
index 000000000..f1e372055
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
@@ -0,0 +1,73 @@
+// +build linux
+
+package fs
+
+import (
+ "fmt"
+ "path/filepath"
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type PidsGroup struct {
+}
+
+func (s *PidsGroup) Name() string {
+ return "pids"
+}
+
+func (s *PidsGroup) Apply(d *cgroupData) error {
+ _, err := d.join("pids")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ return nil
+}
+
+func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
+ if cgroup.Resources.PidsLimit != 0 {
+ // "max" is the fallback value.
+ limit := "max"
+
+ if cgroup.Resources.PidsLimit > 0 {
+ limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
+ }
+
+ if err := writeFile(path, "pids.max", limit); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *PidsGroup) Remove(d *cgroupData) error {
+ return removePath(d.path("pids"))
+}
+
+func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
+ current, err := getCgroupParamUint(path, "pids.current")
+ if err != nil {
+ return fmt.Errorf("failed to parse pids.current - %s", err)
+ }
+
+ maxString, err := getCgroupParamString(path, "pids.max")
+ if err != nil {
+ return fmt.Errorf("failed to parse pids.max - %s", err)
+ }
+
+ // Default if pids.max == "max" is 0 -- which represents "no limit".
+ var max uint64
+ if maxString != "max" {
+ max, err = parseUint(maxString, 10, 64)
+ if err != nil {
+ return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
+ }
+ }
+
+ stats.PidsStats.Current = current
+ stats.PidsStats.Limit = max
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
new file mode 100644
index 000000000..5ff0a1615
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
@@ -0,0 +1,78 @@
+// +build linux
+
+package fs
+
+import (
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+var (
+ ErrNotValidFormat = errors.New("line is not a valid key value format")
+)
+
+// Saturates negative values at zero and returns a uint64.
+// Due to kernel bugs, some of the memory cgroup stats can be negative.
+func parseUint(s string, base, bitSize int) (uint64, error) {
+ value, err := strconv.ParseUint(s, base, bitSize)
+ if err != nil {
+ intValue, intErr := strconv.ParseInt(s, base, bitSize)
+ // 1. Handle negative values greater than MinInt64 (and)
+ // 2. Handle negative values lesser than MinInt64
+ if intErr == nil && intValue < 0 {
+ return 0, nil
+ } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
+ return 0, nil
+ }
+
+ return value, err
+ }
+
+ return value, nil
+}
+
+// Parses a cgroup param and returns as name, value
+// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
+func getCgroupParamKeyValue(t string) (string, uint64, error) {
+ parts := strings.Fields(t)
+ switch len(parts) {
+ case 2:
+ value, err := parseUint(parts[1], 10, 64)
+ if err != nil {
+ return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err)
+ }
+
+ return parts[0], value, nil
+ default:
+ return "", 0, ErrNotValidFormat
+ }
+}
+
+// Gets a single uint64 value from the specified cgroup file.
+func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
+ fileName := filepath.Join(cgroupPath, cgroupFile)
+ contents, err := ioutil.ReadFile(fileName)
+ if err != nil {
+ return 0, err
+ }
+
+ res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
+ if err != nil {
+ return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName)
+ }
+ return res, nil
+}
+
+// Gets a string value from the specified cgroup file
+func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
+ contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
+ if err != nil {
+ return "", err
+ }
+
+ return strings.TrimSpace(string(contents)), nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
new file mode 100644
index 000000000..b1efbfd99
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go
@@ -0,0 +1,128 @@
+// +build linux
+
+package rootless
+
+import (
+ "fmt"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/configs/validate"
+)
+
+// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
+// needlessly. We should probably export this list.
+
+var subsystems = []subsystem{
+ &fs.CpusetGroup{},
+ &fs.DevicesGroup{},
+ &fs.MemoryGroup{},
+ &fs.CpuGroup{},
+ &fs.CpuacctGroup{},
+ &fs.PidsGroup{},
+ &fs.BlkioGroup{},
+ &fs.HugetlbGroup{},
+ &fs.NetClsGroup{},
+ &fs.NetPrioGroup{},
+ &fs.PerfEventGroup{},
+ &fs.FreezerGroup{},
+ &fs.NameGroup{GroupName: "name=systemd"},
+}
+
+type subsystem interface {
+ // Name returns the name of the subsystem.
+ Name() string
+
+ // Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
+ GetStats(path string, stats *cgroups.Stats) error
+}
+
+// The noop cgroup manager is used for rootless containers, because we currently
+// cannot manage cgroups if we are in a rootless setup. This manager is chosen
+// by factory if we are in rootless mode. We error out if any cgroup options are
+// set in the config -- this may change in the future with upcoming kernel features
+// like the cgroup namespace.
+
+type Manager struct {
+ Cgroups *configs.Cgroup
+ Paths map[string]string
+}
+
+func (m *Manager) Apply(pid int) error {
+ // If there are no cgroup settings, there's nothing to do.
+ if m.Cgroups == nil {
+ return nil
+ }
+
+ // We can't set paths.
+ // TODO(cyphar): Implement the case where the runner of a rootless container
+ // owns their own cgroup, which would allow us to set up a
+ // cgroup for each path.
+ if m.Cgroups.Paths != nil {
+ return fmt.Errorf("cannot change cgroup path in rootless container")
+ }
+
+ // We load the paths into the manager.
+ paths := make(map[string]string)
+ for _, sys := range subsystems {
+ name := sys.Name()
+
+ path, err := cgroups.GetOwnCgroupPath(name)
+ if err != nil {
+ // Ignore paths we couldn't resolve.
+ continue
+ }
+
+ paths[name] = path
+ }
+
+ m.Paths = paths
+ return nil
+}
+
+func (m *Manager) GetPaths() map[string]string {
+ return m.Paths
+}
+
+func (m *Manager) Set(container *configs.Config) error {
+ // We have to re-do the validation here, since someone might decide to
+ // update a rootless container.
+ return validate.New().Validate(container)
+}
+
+func (m *Manager) GetPids() ([]int, error) {
+ dir, err := cgroups.GetOwnCgroupPath("devices")
+ if err != nil {
+ return nil, err
+ }
+ return cgroups.GetPids(dir)
+}
+
+func (m *Manager) GetAllPids() ([]int, error) {
+ dir, err := cgroups.GetOwnCgroupPath("devices")
+ if err != nil {
+ return nil, err
+ }
+ return cgroups.GetAllPids(dir)
+}
+
+func (m *Manager) GetStats() (*cgroups.Stats, error) {
+ // TODO(cyphar): We can make this work if we figure out a way to allow usage
+ // of cgroups with a rootless container. While this doesn't
+ // actually require write access to a cgroup directory, the
+ // statistics are not useful if they can be affected by
+ // non-container processes.
+ return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
+}
+
+func (m *Manager) Freeze(state configs.FreezerState) error {
+ // TODO(cyphar): We can make this work if we figure out a way to allow usage
+ // of cgroups with a rootless container.
+ return fmt.Errorf("cannot use freezer cgroup in rootless container")
+}
+
+func (m *Manager) Destroy() error {
+ // We don't have to do anything here because we didn't do any setup.
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
new file mode 100644
index 000000000..8eeedc55b
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@@ -0,0 +1,108 @@
+// +build linux
+
+package cgroups
+
+type ThrottlingData struct {
+ // Number of periods with throttling active
+ Periods uint64 `json:"periods,omitempty"`
+ // Number of periods when the container hit its throttling limit.
+ ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
+ // Aggregate time the container was throttled for in nanoseconds.
+ ThrottledTime uint64 `json:"throttled_time,omitempty"`
+}
+
+// CpuUsage denotes the usage of a CPU.
+// All CPU stats are aggregate since container inception.
+type CpuUsage struct {
+ // Total CPU time consumed.
+ // Units: nanoseconds.
+ TotalUsage uint64 `json:"total_usage,omitempty"`
+ // Total CPU time consumed per core.
+ // Units: nanoseconds.
+ PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
+ // Time spent by tasks of the cgroup in kernel mode.
+ // Units: nanoseconds.
+ UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
+ // Time spent by tasks of the cgroup in user mode.
+ // Units: nanoseconds.
+ UsageInUsermode uint64 `json:"usage_in_usermode"`
+}
+
+type CpuStats struct {
+ CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
+ ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
+}
+
+type MemoryData struct {
+ Usage uint64 `json:"usage,omitempty"`
+ MaxUsage uint64 `json:"max_usage,omitempty"`
+ Failcnt uint64 `json:"failcnt"`
+ Limit uint64 `json:"limit"`
+}
+
+type MemoryStats struct {
+ // memory used for cache
+ Cache uint64 `json:"cache,omitempty"`
+ // usage of memory
+ Usage MemoryData `json:"usage,omitempty"`
+ // usage of memory + swap
+ SwapUsage MemoryData `json:"swap_usage,omitempty"`
+ // usage of kernel memory
+ KernelUsage MemoryData `json:"kernel_usage,omitempty"`
+ // usage of kernel TCP memory
+ KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
+ // if true, memory usage is accounted for throughout a hierarchy of cgroups.
+ UseHierarchy bool `json:"use_hierarchy"`
+
+ Stats map[string]uint64 `json:"stats,omitempty"`
+}
+
+type PidsStats struct {
+ // number of pids in the cgroup
+ Current uint64 `json:"current,omitempty"`
+ // active pids hard limit
+ Limit uint64 `json:"limit,omitempty"`
+}
+
+type BlkioStatEntry struct {
+ Major uint64 `json:"major,omitempty"`
+ Minor uint64 `json:"minor,omitempty"`
+ Op string `json:"op,omitempty"`
+ Value uint64 `json:"value,omitempty"`
+}
+
+type BlkioStats struct {
+ // number of bytes tranferred to and from the block device
+ IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
+ IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
+ IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
+ IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
+ IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
+ IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
+ IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
+ SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
+}
+
+type HugetlbStats struct {
+ // current res_counter usage for hugetlb
+ Usage uint64 `json:"usage,omitempty"`
+ // maximum usage ever recorded.
+ MaxUsage uint64 `json:"max_usage,omitempty"`
+ // number of times hugetlb usage allocation failure.
+ Failcnt uint64 `json:"failcnt"`
+}
+
+type Stats struct {
+ CpuStats CpuStats `json:"cpu_stats,omitempty"`
+ MemoryStats MemoryStats `json:"memory_stats,omitempty"`
+ PidsStats PidsStats `json:"pids_stats,omitempty"`
+ BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
+ // the map is in the format "size of hugepage: stats of the hugepage"
+ HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
+}
+
+func NewStats() *Stats {
+ memoryStats := MemoryStats{Stats: make(map[string]uint64)}
+ hugetlbStats := make(map[string]HugetlbStats)
+ return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
new file mode 100644
index 000000000..7de9ae605
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
@@ -0,0 +1,55 @@
+// +build !linux
+
+package systemd
+
+import (
+ "fmt"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type Manager struct {
+ Cgroups *configs.Cgroup
+ Paths map[string]string
+}
+
+func UseSystemd() bool {
+ return false
+}
+
+func (m *Manager) Apply(pid int) error {
+ return fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) GetPids() ([]int, error) {
+ return nil, fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) GetAllPids() ([]int, error) {
+ return nil, fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) Destroy() error {
+ return fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) GetPaths() map[string]string {
+ return nil
+}
+
+func (m *Manager) GetStats() (*cgroups.Stats, error) {
+ return nil, fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) Set(container *configs.Config) error {
+ return nil, fmt.Errorf("Systemd not supported")
+}
+
+func (m *Manager) Freeze(state configs.FreezerState) error {
+ return fmt.Errorf("Systemd not supported")
+}
+
+func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
+ return fmt.Errorf("Systemd not supported")
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
new file mode 100644
index 000000000..b010b4b32
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@@ -0,0 +1,553 @@
+// +build linux
+
+package systemd
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+
+ systemdDbus "github.com/coreos/go-systemd/dbus"
+ systemdUtil "github.com/coreos/go-systemd/util"
+ "github.com/godbus/dbus"
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type Manager struct {
+ mu sync.Mutex
+ Cgroups *configs.Cgroup
+ Paths map[string]string
+}
+
+type subsystem interface {
+ // Name returns the name of the subsystem.
+ Name() string
+ // Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
+ GetStats(path string, stats *cgroups.Stats) error
+ // Set the cgroup represented by cgroup.
+ Set(path string, cgroup *configs.Cgroup) error
+}
+
+var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
+
+type subsystemSet []subsystem
+
+func (s subsystemSet) Get(name string) (subsystem, error) {
+ for _, ss := range s {
+ if ss.Name() == name {
+ return ss, nil
+ }
+ }
+ return nil, errSubsystemDoesNotExist
+}
+
+var subsystems = subsystemSet{
+ &fs.CpusetGroup{},
+ &fs.DevicesGroup{},
+ &fs.MemoryGroup{},
+ &fs.CpuGroup{},
+ &fs.CpuacctGroup{},
+ &fs.PidsGroup{},
+ &fs.BlkioGroup{},
+ &fs.HugetlbGroup{},
+ &fs.PerfEventGroup{},
+ &fs.FreezerGroup{},
+ &fs.NetPrioGroup{},
+ &fs.NetClsGroup{},
+ &fs.NameGroup{GroupName: "name=systemd"},
+}
+
+const (
+ testScopeWait = 4
+ testSliceWait = 4
+)
+
+var (
+ connLock sync.Mutex
+ theConn *systemdDbus.Conn
+ hasStartTransientUnit bool
+ hasStartTransientSliceUnit bool
+ hasTransientDefaultDependencies bool
+ hasDelegate bool
+)
+
+func newProp(name string, units interface{}) systemdDbus.Property {
+ return systemdDbus.Property{
+ Name: name,
+ Value: dbus.MakeVariant(units),
+ }
+}
+
+func UseSystemd() bool {
+ if !systemdUtil.IsRunningSystemd() {
+ return false
+ }
+
+ connLock.Lock()
+ defer connLock.Unlock()
+
+ if theConn == nil {
+ var err error
+ theConn, err = systemdDbus.New()
+ if err != nil {
+ return false
+ }
+
+ // Assume we have StartTransientUnit
+ hasStartTransientUnit = true
+
+ // But if we get UnknownMethod error we don't
+ if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
+ hasStartTransientUnit = false
+ return hasStartTransientUnit
+ }
+ }
+ }
+
+ // Ensure the scope name we use doesn't exist. Use the Pid to
+ // avoid collisions between multiple libcontainer users on a
+ // single host.
+ scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
+ testScopeExists := true
+ for i := 0; i <= testScopeWait; i++ {
+ if _, err := theConn.StopUnit(scope, "replace", nil); err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
+ testScopeExists = false
+ break
+ }
+ }
+ }
+ time.Sleep(time.Millisecond)
+ }
+
+ // Bail out if we can't kill this scope without testing for DefaultDependencies
+ if testScopeExists {
+ return hasStartTransientUnit
+ }
+
+ // Assume StartTransientUnit on a scope allows DefaultDependencies
+ hasTransientDefaultDependencies = true
+ ddf := newProp("DefaultDependencies", false)
+ if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
+ hasTransientDefaultDependencies = false
+ }
+ }
+ }
+
+ // Not critical because of the stop unit logic above.
+ theConn.StopUnit(scope, "replace", nil)
+
+ // Assume StartTransientUnit on a scope allows Delegate
+ hasDelegate = true
+ dl := newProp("Delegate", true)
+ if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
+ hasDelegate = false
+ }
+ }
+ }
+
+ // Assume we have the ability to start a transient unit as a slice
+ // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
+ // For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
+ hasStartTransientSliceUnit = true
+
+ // To ensure simple clean-up, we create a slice off the root with no hierarchy
+ slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
+ if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
+ if _, ok := err.(dbus.Error); ok {
+ hasStartTransientSliceUnit = false
+ }
+ }
+
+ for i := 0; i <= testSliceWait; i++ {
+ if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
+ hasStartTransientSliceUnit = false
+ break
+ }
+ }
+ } else {
+ break
+ }
+ time.Sleep(time.Millisecond)
+ }
+
+ // Not critical because of the stop unit logic above.
+ theConn.StopUnit(scope, "replace", nil)
+ theConn.StopUnit(slice, "replace", nil)
+ }
+ return hasStartTransientUnit
+}
+
+func (m *Manager) Apply(pid int) error {
+ var (
+ c = m.Cgroups
+ unitName = getUnitName(c)
+ slice = "system.slice"
+ properties []systemdDbus.Property
+ )
+
+ if c.Paths != nil {
+ paths := make(map[string]string)
+ for name, path := range c.Paths {
+ _, err := getSubsystemPath(m.Cgroups, name)
+ if err != nil {
+ // Don't fail if a cgroup hierarchy was not found, just skip this subsystem
+ if cgroups.IsNotFound(err) {
+ continue
+ }
+ return err
+ }
+ paths[name] = path
+ }
+ m.Paths = paths
+ return cgroups.EnterPid(m.Paths, pid)
+ }
+
+ if c.Parent != "" {
+ slice = c.Parent
+ }
+
+ properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
+
+ // if we create a slice, the parent is defined via a Wants=
+ if strings.HasSuffix(unitName, ".slice") {
+ // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
+ if !hasStartTransientSliceUnit {
+ return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
+ }
+ properties = append(properties, systemdDbus.PropWants(slice))
+ } else {
+ // otherwise, we use Slice=
+ properties = append(properties, systemdDbus.PropSlice(slice))
+ }
+
+ // only add pid if its valid, -1 is used w/ general slice creation.
+ if pid != -1 {
+ properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
+ }
+
+ if hasDelegate {
+ // This is only supported on systemd versions 218 and above.
+ properties = append(properties, newProp("Delegate", true))
+ }
+
+ // Always enable accounting, this gets us the same behaviour as the fs implementation,
+ // plus the kernel has some problems with joining the memory cgroup at a later time.
+ properties = append(properties,
+ newProp("MemoryAccounting", true),
+ newProp("CPUAccounting", true),
+ newProp("BlockIOAccounting", true))
+
+ if hasTransientDefaultDependencies {
+ properties = append(properties,
+ newProp("DefaultDependencies", false))
+ }
+
+ if c.Resources.Memory != 0 {
+ properties = append(properties,
+ newProp("MemoryLimit", c.Resources.Memory))
+ }
+
+ if c.Resources.CpuShares != 0 {
+ properties = append(properties,
+ newProp("CPUShares", c.Resources.CpuShares))
+ }
+
+ // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+ if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
+ cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+ properties = append(properties,
+ newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
+ }
+
+ if c.Resources.BlkioWeight != 0 {
+ properties = append(properties,
+ newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
+ }
+
+ // We have to set kernel memory here, as we can't change it once
+ // processes have been attached to the cgroup.
+ if c.Resources.KernelMemory != 0 {
+ if err := setKernelMemory(c); err != nil {
+ return err
+ }
+ }
+
+ if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) {
+ return err
+ }
+
+ if err := joinCgroups(c, pid); err != nil {
+ return err
+ }
+
+ paths := make(map[string]string)
+ for _, s := range subsystems {
+ subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
+ if err != nil {
+ // Don't fail if a cgroup hierarchy was not found, just skip this subsystem
+ if cgroups.IsNotFound(err) {
+ continue
+ }
+ return err
+ }
+ paths[s.Name()] = subsystemPath
+ }
+ m.Paths = paths
+ return nil
+}
+
+func (m *Manager) Destroy() error {
+ if m.Cgroups.Paths != nil {
+ return nil
+ }
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
+ if err := cgroups.RemovePaths(m.Paths); err != nil {
+ return err
+ }
+ m.Paths = make(map[string]string)
+ return nil
+}
+
+func (m *Manager) GetPaths() map[string]string {
+ m.mu.Lock()
+ paths := m.Paths
+ m.mu.Unlock()
+ return paths
+}
+
+func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
+ path, err := getSubsystemPath(c, subsystem)
+ if err != nil {
+ return "", err
+ }
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return "", err
+ }
+ if err := cgroups.WriteCgroupProc(path, pid); err != nil {
+ return "", err
+ }
+ return path, nil
+}
+
+func joinCgroups(c *configs.Cgroup, pid int) error {
+ for _, sys := range subsystems {
+ name := sys.Name()
+ switch name {
+ case "name=systemd":
+ // let systemd handle this
+ case "cpuset":
+ path, err := getSubsystemPath(c, name)
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+ s := &fs.CpusetGroup{}
+ if err := s.ApplyDir(path, c, pid); err != nil {
+ return err
+ }
+ default:
+ _, err := join(c, name, pid)
+ if err != nil {
+ // Even if it's `not found` error, we'll return err
+ // because devices cgroup is hard requirement for
+ // container security.
+ if name == "devices" {
+ return err
+ }
+ // For other subsystems, omit the `not found` error
+ // because they are optional.
+ if !cgroups.IsNotFound(err) {
+ return err
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+// systemd represents slice hierarchy using `-`, so we need to follow suit when
+// generating the path of slice. Essentially, test-a-b.slice becomes
+// test.slice/test-a.slice/test-a-b.slice.
+func ExpandSlice(slice string) (string, error) {
+ suffix := ".slice"
+ // Name has to end with ".slice", but can't be just ".slice".
+ if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
+ return "", fmt.Errorf("invalid slice name: %s", slice)
+ }
+
+ // Path-separators are not allowed.
+ if strings.Contains(slice, "/") {
+ return "", fmt.Errorf("invalid slice name: %s", slice)
+ }
+
+ var path, prefix string
+ sliceName := strings.TrimSuffix(slice, suffix)
+ // if input was -.slice, we should just return root now
+ if sliceName == "-" {
+ return "/", nil
+ }
+ for _, component := range strings.Split(sliceName, "-") {
+ // test--a.slice isn't permitted, nor is -test.slice.
+ if component == "" {
+ return "", fmt.Errorf("invalid slice name: %s", slice)
+ }
+
+ // Append the component to the path and to the prefix.
+ path += prefix + component + suffix + "/"
+ prefix += component + "-"
+ }
+
+ return path, nil
+}
+
+func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
+ mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ initPath, err := cgroups.GetInitCgroup(subsystem)
+ if err != nil {
+ return "", err
+ }
+ // if pid 1 is systemd 226 or later, it will be in init.scope, not the root
+ initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
+
+ slice := "system.slice"
+ if c.Parent != "" {
+ slice = c.Parent
+ }
+
+ slice, err = ExpandSlice(slice)
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
+}
+
+func (m *Manager) Freeze(state configs.FreezerState) error {
+ path, err := getSubsystemPath(m.Cgroups, "freezer")
+ if err != nil {
+ return err
+ }
+ prevState := m.Cgroups.Resources.Freezer
+ m.Cgroups.Resources.Freezer = state
+ freezer, err := subsystems.Get("freezer")
+ if err != nil {
+ return err
+ }
+ err = freezer.Set(path, m.Cgroups)
+ if err != nil {
+ m.Cgroups.Resources.Freezer = prevState
+ return err
+ }
+ return nil
+}
+
+func (m *Manager) GetPids() ([]int, error) {
+ path, err := getSubsystemPath(m.Cgroups, "devices")
+ if err != nil {
+ return nil, err
+ }
+ return cgroups.GetPids(path)
+}
+
+func (m *Manager) GetAllPids() ([]int, error) {
+ path, err := getSubsystemPath(m.Cgroups, "devices")
+ if err != nil {
+ return nil, err
+ }
+ return cgroups.GetAllPids(path)
+}
+
+func (m *Manager) GetStats() (*cgroups.Stats, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ stats := cgroups.NewStats()
+ for name, path := range m.Paths {
+ sys, err := subsystems.Get(name)
+ if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
+ continue
+ }
+ if err := sys.GetStats(path, stats); err != nil {
+ return nil, err
+ }
+ }
+
+ return stats, nil
+}
+
+func (m *Manager) Set(container *configs.Config) error {
+ // If Paths are set, then we are just joining cgroups paths
+ // and there is no need to set any values.
+ if m.Cgroups.Paths != nil {
+ return nil
+ }
+ for _, sys := range subsystems {
+ // Get the subsystem path, but don't error out for not found cgroups.
+ path, err := getSubsystemPath(container.Cgroups, sys.Name())
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+
+ if err := sys.Set(path, container.Cgroups); err != nil {
+ return err
+ }
+ }
+
+ if m.Paths["cpu"] != "" {
+ if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func getUnitName(c *configs.Cgroup) string {
+ // by default, we create a scope unless the user explicitly asks for a slice.
+ if !strings.HasSuffix(c.Name, ".slice") {
+ return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
+ }
+ return c.Name
+}
+
+func setKernelMemory(c *configs.Cgroup) error {
+ path, err := getSubsystemPath(c, "memory")
+ if err != nil && !cgroups.IsNotFound(err) {
+ return err
+ }
+
+ if err := os.MkdirAll(path, 0755); err != nil {
+ return err
+ }
+ return fs.EnableKernelMemoryAccounting(path)
+}
+
+// isUnitExists returns true if the error is that a systemd unit already exists.
+func isUnitExists(err error) bool {
+ if err != nil {
+ if dbusError, ok := err.(dbus.Error); ok {
+ return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
+ }
+ }
+ return false
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
new file mode 100644
index 000000000..7c995efee
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -0,0 +1,462 @@
+// +build linux
+
+package cgroups
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/docker/go-units"
+)
+
+const (
+ cgroupNamePrefix = "name="
+ CgroupProcesses = "cgroup.procs"
+)
+
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
+func FindCgroupMountpoint(subsystem string) (string, error) {
+ mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
+ return mnt, err
+}
+
+func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
+ // We are not using mount.GetMounts() because it's super-inefficient,
+ // parsing it directly sped up x10 times because of not using Sscanf.
+ // It was one of two major performance drawbacks in container start.
+ if !isSubsystemAvailable(subsystem) {
+ return "", "", NewNotFoundError(subsystem)
+ }
+ f, err := os.Open("/proc/self/mountinfo")
+ if err != nil {
+ return "", "", err
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ txt := scanner.Text()
+ fields := strings.Split(txt, " ")
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+ if opt == subsystem {
+ return fields[4], fields[3], nil
+ }
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return "", "", err
+ }
+
+ return "", "", NewNotFoundError(subsystem)
+}
+
+func isSubsystemAvailable(subsystem string) bool {
+ cgroups, err := ParseCgroupFile("/proc/self/cgroup")
+ if err != nil {
+ return false
+ }
+ _, avail := cgroups[subsystem]
+ return avail
+}
+
+func GetClosestMountpointAncestor(dir, mountinfo string) string {
+ deepestMountPoint := ""
+ for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
+ mountInfoParts := strings.Fields(mountInfoEntry)
+ if len(mountInfoParts) < 5 {
+ continue
+ }
+ mountPoint := mountInfoParts[4]
+ if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
+ deepestMountPoint = mountPoint
+ }
+ }
+ return deepestMountPoint
+}
+
+func FindCgroupMountpointDir() (string, error) {
+ f, err := os.Open("/proc/self/mountinfo")
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ text := scanner.Text()
+ fields := strings.Split(text, " ")
+ // Safe as mountinfo encodes mountpoints with spaces as \040.
+ index := strings.Index(text, " - ")
+ postSeparatorFields := strings.Fields(text[index+3:])
+ numPostFields := len(postSeparatorFields)
+
+ // This is an error as we can't detect if the mount is for "cgroup"
+ if numPostFields == 0 {
+ return "", fmt.Errorf("Found no fields post '-' in %q", text)
+ }
+
+ if postSeparatorFields[0] == "cgroup" {
+ // Check that the mount is properly formated.
+ if numPostFields < 3 {
+ return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+ }
+
+ return filepath.Dir(fields[4]), nil
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+
+ return "", NewNotFoundError("cgroup")
+}
+
+type Mount struct {
+ Mountpoint string
+ Root string
+ Subsystems []string
+}
+
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
+ if len(m.Subsystems) == 0 {
+ return "", fmt.Errorf("no subsystem for mount")
+ }
+
+ return getControllerPath(m.Subsystems[0], cgroups)
+}
+
+func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
+ res := make([]Mount, 0, len(ss))
+ scanner := bufio.NewScanner(mi)
+ numFound := 0
+ for scanner.Scan() && numFound < len(ss) {
+ txt := scanner.Text()
+ sepIdx := strings.Index(txt, " - ")
+ if sepIdx == -1 {
+ return nil, fmt.Errorf("invalid mountinfo format")
+ }
+ if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
+ continue
+ }
+ fields := strings.Split(txt, " ")
+ m := Mount{
+ Mountpoint: fields[4],
+ Root: fields[3],
+ }
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+ if !ss[opt] {
+ continue
+ }
+ if strings.HasPrefix(opt, cgroupNamePrefix) {
+ m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
+ } else {
+ m.Subsystems = append(m.Subsystems, opt)
+ }
+ if !all {
+ numFound++
+ }
+ }
+ res = append(res, m)
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return res, nil
+}
+
+// GetCgroupMounts returns the mounts for the cgroup subsystems.
+// all indicates whether to return just the first instance or all the mounts.
+func GetCgroupMounts(all bool) ([]Mount, error) {
+ f, err := os.Open("/proc/self/mountinfo")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
+ if err != nil {
+ return nil, err
+ }
+
+ allMap := make(map[string]bool)
+ for s := range allSubsystems {
+ allMap[s] = true
+ }
+ return getCgroupMountsHelper(allMap, f, all)
+}
+
+// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
+func GetAllSubsystems() ([]string, error) {
+ f, err := os.Open("/proc/cgroups")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ subsystems := []string{}
+
+ s := bufio.NewScanner(f)
+ for s.Scan() {
+ text := s.Text()
+ if text[0] != '#' {
+ parts := strings.Fields(text)
+ if len(parts) >= 4 && parts[3] != "0" {
+ subsystems = append(subsystems, parts[0])
+ }
+ }
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return subsystems, nil
+}
+
+// GetOwnCgroup returns the relative path to the cgroup docker is running in.
+func GetOwnCgroup(subsystem string) (string, error) {
+ cgroups, err := ParseCgroupFile("/proc/self/cgroup")
+ if err != nil {
+ return "", err
+ }
+
+ return getControllerPath(subsystem, cgroups)
+}
+
+func GetOwnCgroupPath(subsystem string) (string, error) {
+ cgroup, err := GetOwnCgroup(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func GetInitCgroup(subsystem string) (string, error) {
+ cgroups, err := ParseCgroupFile("/proc/1/cgroup")
+ if err != nil {
+ return "", err
+ }
+
+ return getControllerPath(subsystem, cgroups)
+}
+
+func GetInitCgroupPath(subsystem string) (string, error) {
+ cgroup, err := GetInitCgroup(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
+ mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ // This is needed for nested containers, because in /proc/self/cgroup we
+ // see pathes from host, which don't exist in container.
+ relCgroup, err := filepath.Rel(root, cgroup)
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(mnt, relCgroup), nil
+}
+
+func readProcsFile(dir string) ([]int, error) {
+ f, err := os.Open(filepath.Join(dir, CgroupProcesses))
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var (
+ s = bufio.NewScanner(f)
+ out = []int{}
+ )
+
+ for s.Scan() {
+ if t := s.Text(); t != "" {
+ pid, err := strconv.Atoi(t)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, pid)
+ }
+ }
+ return out, nil
+}
+
+// ParseCgroupFile parses the given cgroup file, typically from
+// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
+func ParseCgroupFile(path string) (map[string]string, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ return parseCgroupFromReader(f)
+}
+
+// helper function for ParseCgroupFile to make testing easier
+func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
+ s := bufio.NewScanner(r)
+ cgroups := make(map[string]string)
+
+ for s.Scan() {
+ text := s.Text()
+ // from cgroups(7):
+ // /proc/[pid]/cgroup
+ // ...
+ // For each cgroup hierarchy ... there is one entry
+ // containing three colon-separated fields of the form:
+ // hierarchy-ID:subsystem-list:cgroup-path
+ parts := strings.SplitN(text, ":", 3)
+ if len(parts) < 3 {
+ return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
+ }
+
+ for _, subs := range strings.Split(parts[1], ",") {
+ cgroups[subs] = parts[2]
+ }
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+
+ return cgroups, nil
+}
+
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
+
+ if p, ok := cgroups[subsystem]; ok {
+ return p, nil
+ }
+
+ if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
+ return p, nil
+ }
+
+ return "", NewNotFoundError(subsystem)
+}
+
+func PathExists(path string) bool {
+ if _, err := os.Stat(path); err != nil {
+ return false
+ }
+ return true
+}
+
+func EnterPid(cgroupPaths map[string]string, pid int) error {
+ for _, path := range cgroupPaths {
+ if PathExists(path) {
+ if err := WriteCgroupProc(path, pid); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// RemovePaths iterates over the provided paths removing them.
+// We trying to remove all paths five times with increasing delay between tries.
+// If after all there are not removed cgroups - appropriate error will be
+// returned.
+func RemovePaths(paths map[string]string) (err error) {
+ delay := 10 * time.Millisecond
+ for i := 0; i < 5; i++ {
+ if i != 0 {
+ time.Sleep(delay)
+ delay *= 2
+ }
+ for s, p := range paths {
+ os.RemoveAll(p)
+ // TODO: here probably should be logging
+ _, err := os.Stat(p)
+ // We need this strange way of checking cgroups existence because
+ // RemoveAll almost always returns error, even on already removed
+ // cgroups
+ if os.IsNotExist(err) {
+ delete(paths, s)
+ }
+ }
+ if len(paths) == 0 {
+ return nil
+ }
+ }
+ return fmt.Errorf("Failed to remove paths: %v", paths)
+}
+
+func GetHugePageSize() ([]string, error) {
+ var pageSizes []string
+ sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
+ files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
+ if err != nil {
+ return pageSizes, err
+ }
+ for _, st := range files {
+ nameArray := strings.Split(st.Name(), "-")
+ pageSize, err := units.RAMInBytes(nameArray[1])
+ if err != nil {
+ return []string{}, err
+ }
+ sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
+ pageSizes = append(pageSizes, sizeString)
+ }
+
+ return pageSizes, nil
+}
+
+// GetPids returns all pids, that were added to cgroup at path.
+func GetPids(path string) ([]int, error) {
+ return readProcsFile(path)
+}
+
+// GetAllPids returns all pids, that were added to cgroup at path and to all its
+// subcgroups.
+func GetAllPids(path string) ([]int, error) {
+ var pids []int
+ // collect pids from all sub-cgroups
+ err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
+ dir, file := filepath.Split(p)
+ if file != CgroupProcesses {
+ return nil
+ }
+ if iErr != nil {
+ return iErr
+ }
+ cPids, err := readProcsFile(dir)
+ if err != nil {
+ return err
+ }
+ pids = append(pids, cPids...)
+ return nil
+ })
+ return pids, err
+}
+
+// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
+func WriteCgroupProc(dir string, pid int) error {
+ // Normally dir should not be empty, one case is that cgroup subsystem
+ // is not mounted, we will get empty dir, and we want it fail here.
+ if dir == "" {
+ return fmt.Errorf("no such directory for %s", CgroupProcesses)
+ }
+
+ // Dont attach any pid to the cgroup if -1 is specified as a pid
+ if pid != -1 {
+ if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
+ return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
+ }
+ }
+ return nil
+}