diff options
author | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
---|---|---|
committer | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
commit | a031b83a09a8628435317a03f199cdc18b78262f (patch) | |
tree | bc017a96769ce6de33745b8b0b1304ccf38e9df0 /vendor/github.com/opencontainers/runc/libcontainer/cgroups | |
parent | 2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff) | |
download | podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2 podman-a031b83a09a8628435317a03f199cdc18b78262f.zip |
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer/cgroups')
23 files changed, 3217 insertions, 0 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go new file mode 100644 index 000000000..25ff51589 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -0,0 +1,64 @@ +// +build linux + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Applies cgroup configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside the cgroup set + GetPids() ([]int, error) + + // Returns the PIDs inside the cgroup set & all sub-cgroups + GetAllPids() ([]int, error) + + // Returns statistics for the cgroup set + GetStats() (*Stats, error) + + // Toggles the freezer cgroup according with specified state + Freeze(state configs.FreezerState) error + + // Destroys the cgroup set + Destroy() error + + // The option func SystemdCgroups() and Cgroupfs() require following attributes: + // Paths map[string]string + // Cgroups *configs.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. + GetPaths() map[string]string + + // Sets the cgroup as configured. + Set(container *configs.Config) error +} + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go new file mode 100644 index 000000000..278d507e2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package cgroups diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go new file mode 100644 index 000000000..22d82acb4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -0,0 +1,360 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sync" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +var ( + subsystems = subsystemSet{ + &CpusetGroup{}, + &DevicesGroup{}, + &MemoryGroup{}, + &CpuGroup{}, + &CpuacctGroup{}, + &PidsGroup{}, + &BlkioGroup{}, + &HugetlbGroup{}, + &NetClsGroup{}, + &NetPrioGroup{}, + &PerfEventGroup{}, + &FreezerGroup{}, + &NameGroup{GroupName: "name=systemd", Join: true}, + } + HugePageSizes, _ = cgroups.GetHugePageSize() +) + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'cgroupData'. + Remove(*cgroupData) error + // Creates and joins the cgroup represented by 'cgroupData'. + Apply(*cgroupData) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +// The absolute path to the root of the cgroup hierarchies. +var cgroupRootLock sync.Mutex +var cgroupRoot string + +// Gets the cgroupRoot. +func getCgroupRoot() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + root, err := cgroups.FindCgroupMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +type cgroupData struct { + root string + innerPath string + config *configs.Cgroup + pid int +} + +func (m *Manager) Apply(pid int) (err error) { + if m.Cgroups == nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + + var c = m.Cgroups + + d, err := getCgroupData(m.Cgroups, pid) + if err != nil { + return err + } + + m.Paths = make(map[string]string) + if c.Paths != nil { + for name, path := range c.Paths { + _, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return err + } + m.Paths[name] = path + } + return cgroups.EnterPid(m.Paths, pid) + } + + for _, sys := range subsystems { + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is + // considered fatal for security reasons. + if cgroups.IsNotFound(err) && sys.Name() != "devices" { + continue + } + return err + } + m.Paths[sys.Name()] = p + + if err := sys.Apply(d); err != nil { + return err + } + } + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := subsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + + paths := m.GetPaths() + for _, sys := range subsystems { + path := paths[sys.Name()] + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state configs.FreezerState) error { + paths := m.GetPaths() + dir := paths["freezer"] + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := subsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(dir, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetPids(paths["devices"]) +} + +func (m *Manager) GetAllPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetAllPids(paths["devices"]) +} + +func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { + root, err := getCgroupRoot() + if err != nil { + return nil, err + } + + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used") + } + + // XXX: Do not remove this code. Path safety is important! -- cyphar + cgPath := libcontainerUtils.CleanPath(c.Path) + cgParent := libcontainerUtils.CleanPath(c.Parent) + cgName := libcontainerUtils.CleanPath(c.Name) + + innerPath := cgPath + if innerPath == "" { + innerPath = filepath.Join(cgParent, cgName) + } + + return &cgroupData{ + root: root, + innerPath: innerPath, + config: c, + pid: pid, + }, nil +} + +func (raw *cgroupData) path(subsystem string) (string, error) { + mnt, err := cgroups.FindCgroupMountpoint(subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(raw.innerPath) { + // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. + return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil + } + + // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating + // process could in container and shared pid namespace with host, and + // /proc/1/cgroup could point to whole other world of cgroups. + parentPath, err := cgroups.GetOwnCgroupPath(subsystem) + if err != nil { + return "", err + } + + return filepath.Join(parentPath, raw.innerPath), nil +} + +func (raw *cgroupData) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +func writeFile(dir, file, data string) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func readFile(dir, file string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(dir, file)) + return string(data), err +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} + +func CheckCpushares(path string, c uint64) error { + var cpuShares uint64 + + if c == 0 { + return nil + } + + fd, err := os.Open(filepath.Join(path, "cpu.shares")) + if err != nil { + return err + } + defer fd.Close() + + _, err = fmt.Fscanf(fd, "%d", &cpuShares) + if err != nil && err != io.EOF { + return err + } + + if c > cpuShares { + return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) + } else if c < cpuShares { + return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go new file mode 100644 index 000000000..a142cb991 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go @@ -0,0 +1,237 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type BlkioGroup struct { +} + +func (s *BlkioGroup) Name() string { + return "blkio" +} + +func (s *BlkioGroup) Apply(d *cgroupData) error { + _, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.BlkioWeight != 0 { + if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + if cgroup.Resources.BlkioLeafWeight != 0 { + if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range cgroup.Resources.BlkioWeightDevice { + if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + + return nil +} + +func (s *BlkioGroup) Remove(d *cgroupData) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, err + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, err + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, err + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + // Try to read CFQ stats available on all CFQ enabled kernels first + if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { + return getCFQStats(path, stats) + } + return getStats(path, stats) // Use generic stats as fallback +} + +func getCFQStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { + return err + } + stats.BlkioStats.SectorsRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { + return err + } + stats.BlkioStats.IoQueuedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoWaitTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { + return err + } + stats.BlkioStats.IoMergedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoTimeRecursive = blkioStats + + return nil +} + +func getStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go new file mode 100644 index 000000000..b712bd0b1 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -0,0 +1,125 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroup struct { +} + +func (s *CpuGroup) Name() string { + return "cpu" +} + +func (s *CpuGroup) Apply(d *cgroupData) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + path, err := d.path("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(path, d.config, d.pid) +} + +func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpu cgroup mounted. + // Just do nothing and don't fail. + if path == "" { + return nil + } + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // We should set the real-Time group scheduling settings before moving + // in the process because if the process is already in SCHED_RR mode + // and no RT bandwidth is set, adding it will fail. + if err := s.SetRtSched(path, cgroup); err != nil { + return err + } + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return err + } + + return nil +} + +func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuRtPeriod != 0 { + if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuRtRuntime != 0 { + if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil { + return err + } + } + return nil +} + +func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuShares != 0 { + if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuPeriod != 0 { + if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuQuota != 0 { + if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil { + return err + } + } + if err := s.SetRtSched(path, cgroup); err != nil { + return err + } + + return nil +} + +func (s *CpuGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go new file mode 100644 index 000000000..53afbaddf --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go @@ -0,0 +1,121 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupCpuacctStat = "cpuacct.stat" + nanosecondsInSecond = 1000000000 +) + +var clockTicks = uint64(system.GetClockTicks()) + +type CpuacctGroup struct { +} + +func (s *CpuacctGroup) Name() string { + return "cpuacct" +} + +func (s *CpuacctGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { + return err + } + + return nil +} + +func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *CpuacctGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuacct")) +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + userModeUsage := uint64(0) + kernelModeUsage := uint64(0) + const ( + userField = "user" + systemField = "system" + ) + + // Expected format: + // user <usage in ticks> + // system <usage in ticks> + data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) + } + if fields[0] != userField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) + } + if fields[2] != systemField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, err + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, err + } + + return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + percpuUsage := []uint64{} + data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(string(data)) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go new file mode 100644 index 000000000..20c9eafac --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go @@ -0,0 +1,163 @@ +// +build linux + +package fs + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +type CpusetGroup struct { +} + +func (s *CpusetGroup) Name() string { + return "cpuset" +} + +func (s *CpusetGroup) Apply(d *cgroupData) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.config, d.pid) +} + +func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo") + if err != nil { + return err + } + root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo))) + // 'ensureParent' start with parent because we don't want to + // explicitly inherit from parent, it could conflict with + // 'cpuset.cpu_exclusive'. + if err := s.ensureParent(filepath.Dir(dir), root); err != nil { + return err + } + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + // We didn't inherit cpuset configs from parent, but we have + // to ensure cpuset configs are set before moving task into the + // cgroup. + // The logic is, if user specified cpuset configs, use these + // specified configs, otherwise, inherit from parent. This makes + // cpuset configs work correctly with 'cpuset.cpu_exclusive', and + // keep backward compatbility. + if err := s.ensureCpusAndMems(dir, cgroup); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := cgroups.WriteCgroupProc(dir, pid); err != nil { + return err + } + + return nil +} + +func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroup) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if libcontainerUtils.CleanPath(parent) == root { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} + +func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error { + if err := s.Set(path, cgroup); err != nil { + return err + } + return s.copyIfNeeded(path, filepath.Dir(path)) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go new file mode 100644 index 000000000..0ac5b4ed7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go @@ -0,0 +1,80 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +type DevicesGroup struct { +} + +func (s *DevicesGroup) Name() string { + return "devices" +} + +func (s *DevicesGroup) Apply(d *cgroupData) error { + _, err := d.join("devices") + if err != nil { + // We will return error even it's `not found` error, devices + // cgroup is hard requirement for container's security. + return err + } + return nil +} + +func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if system.RunningInUserNS() { + return nil + } + + devices := cgroup.Resources.Devices + if len(devices) > 0 { + for _, dev := range devices { + file := "devices.deny" + if dev.Allow { + file = "devices.allow" + } + if err := writeFile(path, file, dev.CgroupString()); err != nil { + return err + } + } + return nil + } + if cgroup.Resources.AllowAllDevices != nil { + if *cgroup.Resources.AllowAllDevices == false { + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range cgroup.Resources.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { + return err + } + } + return nil + } + + if err := writeFile(path, "devices.allow", "a"); err != nil { + return err + } + } + + for _, dev := range cgroup.Resources.DeniedDevices { + if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *DevicesGroup) Remove(d *cgroupData) error { + return removePath(d.path("devices")) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go new file mode 100644 index 000000000..e70dfe3b9 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go @@ -0,0 +1,61 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroup struct { +} + +func (s *FreezerGroup) Name() string { + return "freezer" +} + +func (s *FreezerGroup) Apply(d *cgroupData) error { + _, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + switch cgroup.Resources.Freezer { + case configs.Frozen, configs.Thawed: + if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + return err + } + + for { + state, err := readFile(path, "freezer.state") + if err != nil { + return err + } + if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { + break + } + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) + } + + return nil +} + +func (s *FreezerGroup) Remove(d *cgroupData) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go new file mode 100644 index 000000000..3ef9e0315 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package fs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go new file mode 100644 index 000000000..2f9727719 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go @@ -0,0 +1,71 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type HugetlbGroup struct { +} + +func (s *HugetlbGroup) Name() string { + return "hugetlb" +} + +func (s *HugetlbGroup) Apply(d *cgroupData) error { + _, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.Resources.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *HugetlbGroup) Remove(d *cgroupData) error { + return removePath(d.path("hugetlb")) +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go new file mode 100644 index 000000000..ad395a5d6 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -0,0 +1,313 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" // only for Errno + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + + "golang.org/x/sys/unix" +) + +const ( + cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes" + cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" + cgroupMemoryLimit = "memory.limit_in_bytes" +) + +type MemoryGroup struct { +} + +func (s *MemoryGroup) Name() string { + return "memory" +} + +func (s *MemoryGroup) Apply(d *cgroupData) (err error) { + path, err := d.path("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } else if path == "" { + return nil + } + if memoryAssigned(d.config) { + if _, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // Only enable kernel memory accouting when this cgroup + // is created by libcontainer, otherwise we might get + // error when people use `cgroupsPath` to join an existed + // cgroup whose kernel memory is not initialized. + if err := EnableKernelMemoryAccounting(path); err != nil { + return err + } + } + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + _, err = d.join("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func EnableKernelMemoryAccounting(path string) error { + // Check if kernel memory is enabled + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + for _, i := range []int64{1, -1} { + if err := setKernelMemory(path, i); err != nil { + return err + } + } + return nil +} + +func setKernelMemory(path string, kernelMemoryLimit int64) error { + if path == "" { + return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit) + } + if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { + // kernel memory is not enabled on the system so we should do nothing + return nil + } + if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil { + // Check if the error number returned by the syscall is "EBUSY" + // The EBUSY signal is returned on attempts to write to the + // memory.kmem.limit_in_bytes file if the cgroup has children or + // once tasks have been attached to the cgroup + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == unix.EBUSY { + return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit) + } + } + } + return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err) + } + return nil +} + +func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { + // If the memory update is set to -1 we should also + // set swap to -1, it means unlimited memory. + if cgroup.Resources.Memory == -1 { + // Only set swap if it's enabled in kernel + if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { + cgroup.Resources.MemorySwap = -1 + } + } + + // When memory and swap memory are both set, we need to handle the cases + // for updating container. + if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 { + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + + // When update memory limit, we should adapt the write sequence + // for memory and swap memory, so it won't fail because the new + // value and the old value don't fit kernel's validation. + if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } else { + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } else { + if cgroup.Resources.Memory != 0 { + if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + if cgroup.Resources.MemorySwap != 0 { + if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } + + return nil +} + +func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { + if err := setMemoryAndSwap(path, cgroup); err != nil { + return err + } + + if cgroup.Resources.KernelMemory != 0 { + if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { + return err + } + } + + if cgroup.Resources.MemoryReservation != 0 { + if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + + if cgroup.Resources.KernelMemoryTCP != 0 { + if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { + return err + } + } + if cgroup.Resources.OomKillDisable { + if err := writeFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 { + return nil + } else if *cgroup.Resources.MemorySwappiness <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil { + return err + } + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness) + } + + return nil +} + +func (s *MemoryGroup) Remove(d *cgroupData) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") + if err != nil { + return err + } + stats.MemoryStats.KernelTCPUsage = kernelTCPUsage + + useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".") + value, err := getCgroupParamUint(path, useHierarchy) + if err != nil { + return err + } + if value == 1 { + stats.MemoryStats.UseHierarchy = true + } + return nil +} + +func memoryAssigned(cgroup *configs.Cgroup) bool { + return cgroup.Resources.Memory != 0 || + cgroup.Resources.MemoryReservation != 0 || + cgroup.Resources.MemorySwap > 0 || + cgroup.Resources.KernelMemory > 0 || + cgroup.Resources.KernelMemoryTCP > 0 || + cgroup.Resources.OomKillDisable || + (cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1) +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + value, err = getCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go new file mode 100644 index 000000000..d8cf1d87c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go @@ -0,0 +1,40 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NameGroup struct { + GroupName string + Join bool +} + +func (s *NameGroup) Name() string { + return s.GroupName +} + +func (s *NameGroup) Apply(d *cgroupData) error { + if s.Join { + // ignore errors if the named cgroup does not exist + d.join(s.GroupName) + } + return nil +} + +func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *NameGroup) Remove(d *cgroupData) error { + if s.Join { + removePath(d.path(s.GroupName)) + } + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go new file mode 100644 index 000000000..8e74b645e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go @@ -0,0 +1,43 @@ +// +build linux + +package fs + +import ( + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetClsGroup struct { +} + +func (s *NetClsGroup) Name() string { + return "net_cls" +} + +func (s *NetClsGroup) Apply(d *cgroupData) error { + _, err := d.join("net_cls") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.NetClsClassid != 0 { + if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_cls")) +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go new file mode 100644 index 000000000..d0ab2af89 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go @@ -0,0 +1,41 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetPrioGroup struct { +} + +func (s *NetPrioGroup) Name() string { + return "net_prio" +} + +func (s *NetPrioGroup) Apply(d *cgroupData) error { + _, err := d.join("net_prio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, prioMap := range cgroup.Resources.NetPrioIfpriomap { + if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_prio")) +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go new file mode 100644 index 000000000..5693676d3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go @@ -0,0 +1,35 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PerfEventGroup struct { +} + +func (s *PerfEventGroup) Name() string { + return "perf_event" +} + +func (s *PerfEventGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *PerfEventGroup) Remove(d *cgroupData) error { + return removePath(d.path("perf_event")) +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go new file mode 100644 index 000000000..f1e372055 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go @@ -0,0 +1,73 @@ +// +build linux + +package fs + +import ( + "fmt" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PidsGroup struct { +} + +func (s *PidsGroup) Name() string { + return "pids" +} + +func (s *PidsGroup) Apply(d *cgroupData) error { + _, err := d.join("pids") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := writeFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroup) Remove(d *cgroupData) error { + return removePath(d.path("pids")) +} + +func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { + current, err := getCgroupParamUint(path, "pids.current") + if err != nil { + return fmt.Errorf("failed to parse pids.current - %s", err) + } + + maxString, err := getCgroupParamString(path, "pids.max") + if err != nil { + return fmt.Errorf("failed to parse pids.max - %s", err) + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = parseUint(maxString, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go new file mode 100644 index 000000000..5ff0a1615 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go @@ -0,0 +1,78 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Saturates negative values at zero and returns a uint64. +// Due to kernel bugs, some of the memory cgroup stats can be negative. +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified cgroup file. +func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { + fileName := filepath.Join(cgroupPath, cgroupFile) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified cgroup file +func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go new file mode 100644 index 000000000..b1efbfd99 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go @@ -0,0 +1,128 @@ +// +build linux + +package rootless + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" +) + +// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code +// needlessly. We should probably export this list. + +var subsystems = []subsystem{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.NetClsGroup{}, + &fs.NetPrioGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error +} + +// The noop cgroup manager is used for rootless containers, because we currently +// cannot manage cgroups if we are in a rootless setup. This manager is chosen +// by factory if we are in rootless mode. We error out if any cgroup options are +// set in the config -- this may change in the future with upcoming kernel features +// like the cgroup namespace. + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func (m *Manager) Apply(pid int) error { + // If there are no cgroup settings, there's nothing to do. + if m.Cgroups == nil { + return nil + } + + // We can't set paths. + // TODO(cyphar): Implement the case where the runner of a rootless container + // owns their own cgroup, which would allow us to set up a + // cgroup for each path. + if m.Cgroups.Paths != nil { + return fmt.Errorf("cannot change cgroup path in rootless container") + } + + // We load the paths into the manager. + paths := make(map[string]string) + for _, sys := range subsystems { + name := sys.Name() + + path, err := cgroups.GetOwnCgroupPath(name) + if err != nil { + // Ignore paths we couldn't resolve. + continue + } + + paths[name] = path + } + + m.Paths = paths + return nil +} + +func (m *Manager) GetPaths() map[string]string { + return m.Paths +} + +func (m *Manager) Set(container *configs.Config) error { + // We have to re-do the validation here, since someone might decide to + // update a rootless container. + return validate.New().Validate(container) +} + +func (m *Manager) GetPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(dir) +} + +func (m *Manager) GetAllPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(dir) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. While this doesn't + // actually require write access to a cgroup directory, the + // statistics are not useful if they can be affected by + // non-container processes. + return nil, fmt.Errorf("cannot get cgroup stats in rootless container") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. + return fmt.Errorf("cannot use freezer cgroup in rootless container") +} + +func (m *Manager) Destroy() error { + // We don't have to do anything here because we didn't do any setup. + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go new file mode 100644 index 000000000..8eeedc55b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -0,0 +1,108 @@ +// +build linux + +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +// CpuUsage denotes the usage of a CPU. +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` + Limit uint64 `json:"limit"` +} + +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + // usage of kernel TCP memory + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + // if true, memory usage is accounted for throughout a hierarchy of cgroups. + UseHierarchy bool `json:"use_hierarchy"` + + Stats map[string]uint64 `json:"stats,omitempty"` +} + +type PidsStats struct { + // number of pids in the cgroup + Current uint64 `json:"current,omitempty"` + // active pids hard limit + Limit uint64 `json:"limit,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes tranferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times hugetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + PidsStats PidsStats `json:"pids_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 000000000..7de9ae605 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,55 @@ +// +build !linux + +package systemd + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func UseSystemd() bool { + return false +} + +func (m *Manager) Apply(pid int) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetAllPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Destroy() error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPaths() map[string]string { + return nil +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Set(container *configs.Config) error { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go new file mode 100644 index 000000000..b010b4b32 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go @@ -0,0 +1,553 @@ +// +build linux + +package systemd + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/dbus" + systemdUtil "github.com/coreos/go-systemd/util" + "github.com/godbus/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +var subsystems = subsystemSet{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NetPrioGroup{}, + &fs.NetClsGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +const ( + testScopeWait = 4 + testSliceWait = 4 +) + +var ( + connLock sync.Mutex + theConn *systemdDbus.Conn + hasStartTransientUnit bool + hasStartTransientSliceUnit bool + hasTransientDefaultDependencies bool + hasDelegate bool +) + +func newProp(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func UseSystemd() bool { + if !systemdUtil.IsRunningSystemd() { + return false + } + + connLock.Lock() + defer connLock.Unlock() + + if theConn == nil { + var err error + theConn, err = systemdDbus.New() + if err != nil { + return false + } + + // Assume we have StartTransientUnit + hasStartTransientUnit = true + + // But if we get UnknownMethod error we don't + if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { + hasStartTransientUnit = false + return hasStartTransientUnit + } + } + } + + // Ensure the scope name we use doesn't exist. Use the Pid to + // avoid collisions between multiple libcontainer users on a + // single host. + scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid()) + testScopeExists := true + for i := 0; i <= testScopeWait; i++ { + if _, err := theConn.StopUnit(scope, "replace", nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { + testScopeExists = false + break + } + } + } + time.Sleep(time.Millisecond) + } + + // Bail out if we can't kill this scope without testing for DefaultDependencies + if testScopeExists { + return hasStartTransientUnit + } + + // Assume StartTransientUnit on a scope allows DefaultDependencies + hasTransientDefaultDependencies = true + ddf := newProp("DefaultDependencies", false) + if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { + hasTransientDefaultDependencies = false + } + } + } + + // Not critical because of the stop unit logic above. + theConn.StopUnit(scope, "replace", nil) + + // Assume StartTransientUnit on a scope allows Delegate + hasDelegate = true + dl := newProp("Delegate", true) + if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { + hasDelegate = false + } + } + } + + // Assume we have the ability to start a transient unit as a slice + // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 + // For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299 + hasStartTransientSliceUnit = true + + // To ensure simple clean-up, we create a slice off the root with no hierarchy + slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid()) + if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil { + if _, ok := err.(dbus.Error); ok { + hasStartTransientSliceUnit = false + } + } + + for i := 0; i <= testSliceWait; i++ { + if _, err := theConn.StopUnit(slice, "replace", nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { + hasStartTransientSliceUnit = false + break + } + } + } else { + break + } + time.Sleep(time.Millisecond) + } + + // Not critical because of the stop unit logic above. + theConn.StopUnit(scope, "replace", nil) + theConn.StopUnit(slice, "replace", nil) + } + return hasStartTransientUnit +} + +func (m *Manager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := getSubsystemPath(m.Cgroups, name) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(unitName, ".slice") { + // This was broken until systemd v229, but has been back-ported on RHEL environments >= 219 + if !hasStartTransientSliceUnit { + return fmt.Errorf("systemd version does not support ability to start a slice as transient unit") + } + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + if hasDelegate { + // This is only supported on systemd versions 218 and above. + properties = append(properties, newProp("Delegate", true)) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + if hasTransientDefaultDependencies { + properties = append(properties, + newProp("DefaultDependencies", false)) + } + + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", c.Resources.Memory)) + } + + if c.Resources.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", c.Resources.CpuShares)) + } + + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { + cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + properties = append(properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + if c.Resources.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) + } + + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if c.Resources.KernelMemory != 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) { + return err + } + + if err := joinCgroups(c, pid); err != nil { + return err + } + + paths := make(map[string]string) + for _, s := range subsystems { + subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[s.Name()] = subsystemPath + } + m.Paths = paths + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { + path, err := getSubsystemPath(c, subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return "", err + } + return path, nil +} + +func joinCgroups(c *configs.Cgroup, pid int) error { + for _, sys := range subsystems { + name := sys.Name() + switch name { + case "name=systemd": + // let systemd handle this + case "cpuset": + path, err := getSubsystemPath(c, name) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + s := &fs.CpusetGroup{} + if err := s.ApplyDir(path, c, pid); err != nil { + return err + } + default: + _, err := join(c, name, pid) + if err != nil { + // Even if it's `not found` error, we'll return err + // because devices cgroup is hard requirement for + // container security. + if name == "devices" { + return err + } + // For other subsystems, omit the `not found` error + // because they are optional. + if !cgroups.IsNotFound(err) { + return err + } + } + } + } + + return nil +} + +// systemd represents slice hierarchy using `-`, so we need to follow suit when +// generating the path of slice. Essentially, test-a-b.slice becomes +// test.slice/test-a.slice/test-a-b.slice. +func ExpandSlice(slice string) (string, error) { + suffix := ".slice" + // Name has to end with ".slice", but can't be just ".slice". + if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Path-separators are not allowed. + if strings.Contains(slice, "/") { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + var path, prefix string + sliceName := strings.TrimSuffix(slice, suffix) + // if input was -.slice, we should just return root now + if sliceName == "-" { + return "/", nil + } + for _, component := range strings.Split(sliceName, "-") { + // test--a.slice isn't permitted, nor is -test.slice. + if component == "" { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Append the component to the path and to the prefix. + path += prefix + component + suffix + "/" + prefix += component + "-" + } + + return path, nil +} + +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { + mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) + if err != nil { + return "", err + } + + initPath, err := cgroups.GetInitCgroup(subsystem) + if err != nil { + return "", err + } + // if pid 1 is systemd 226 or later, it will be in init.scope, not the root + initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope") + + slice := "system.slice" + if c.Parent != "" { + slice = c.Parent + } + + slice, err = ExpandSlice(slice) + if err != nil { + return "", err + } + + return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return err + } + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := subsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(path, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(path) +} + +func (m *Manager) GetAllPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(path) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := subsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + for _, sys := range subsystems { + // Get the subsystem path, but don't error out for not found cgroups. + path, err := getSubsystemPath(container.Cgroups, sys.Name()) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +func getUnitName(c *configs.Cgroup) string { + // by default, we create a scope unless the user explicitly asks for a slice. + if !strings.HasSuffix(c.Name, ".slice") { + return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) + } + return c.Name +} + +func setKernelMemory(c *configs.Cgroup) error { + path, err := getSubsystemPath(c, "memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + return fs.EnableKernelMemoryAccounting(path) +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + if err != nil { + if dbusError, ok := err.(dbus.Error); ok { + return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + } + } + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go new file mode 100644 index 000000000..7c995efee --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -0,0 +1,462 @@ +// +build linux + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/docker/go-units" +) + +const ( + cgroupNamePrefix = "name=" + CgroupProcesses = "cgroup.procs" +) + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + mnt, _, err := FindCgroupMountpointAndRoot(subsystem) + return mnt, err +} + +func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. + if !isSubsystemAvailable(subsystem) { + return "", "", NewNotFoundError(subsystem) + } + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func isSubsystemAvailable(subsystem string) bool { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return false + } + _, avail := cgroups[subsystem] + return avail +} + +func GetClosestMountpointAncestor(dir, mountinfo string) string { + deepestMountPoint := "" + for _, mountInfoEntry := range strings.Split(mountinfo, "\n") { + mountInfoParts := strings.Fields(mountInfoEntry) + if len(mountInfoParts) < 5 { + continue + } + mountPoint := mountInfoParts[4] + if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) { + deepestMountPoint = mountPoint + } + } + return deepestMountPoint +} + +func FindCgroupMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "cgroup" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return filepath.Dir(fields[4]), nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("cgroup") +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + scanner := bufio.NewScanner(mi) + numFound := 0 + for scanner.Scan() && numFound < len(ss) { + txt := scanner.Text() + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + return nil, fmt.Errorf("invalid mountinfo format") + } + if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { + continue + } + fields := strings.Split(txt, " ") + m := Mount{ + Mountpoint: fields[4], + Root: fields[3], + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if !ss[opt] { + continue + } + if strings.HasPrefix(opt, cgroupNamePrefix) { + m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) + } else { + m.Subsystems = append(m.Subsystems, opt) + } + if !all { + numFound++ + } + } + res = append(res, m) + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +// GetCgroupMounts returns the mounts for the cgroup subsystems. +// all indicates whether to return just the first instance or all the mounts. +func GetCgroupMounts(all bool) ([]Mount, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range allSubsystems { + allMap[s] = true + } + return getCgroupMountsHelper(allMap, f, all) +} + +// GetAllSubsystems returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + if err := s.Err(); err != nil { + return nil, err + } + return subsystems, nil +} + +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func GetInitCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/1/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetInitCgroupPath(subsystem string) (string, error) { + cgroup, err := GetInitCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot(subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see pathes from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + +func readProcsFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, CgroupProcesses)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +// ParseCgroupFile parses the given cgroup file, typically from +// /proc/<pid>/cgroup, into a map of subgroups to cgroup names. +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return parseCgroupFromReader(f) +} + +// helper function for ParseCgroupFile to make testing easier +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + s := bufio.NewScanner(r) + cgroups := make(map[string]string) + + for s.Scan() { + text := s.Text() + // from cgroups(7): + // /proc/[pid]/cgroup + // ... + // For each cgroup hierarchy ... there is one entry + // containing three colon-separated fields of the form: + // hierarchy-ID:subsystem-list:cgroup-path + parts := strings.SplitN(text, ":", 3) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) + } + + for _, subs := range strings.Split(parts[1], ",") { + cgroups[subs] = parts[2] + } + } + if err := s.Err(); err != nil { + return nil, err + } + + return cgroups, nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if PathExists(path) { + if err := WriteCgroupProc(path, pid); err != nil { + return err + } + } + } + return nil +} + +// RemovePaths iterates over the provided paths removing them. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. +func RemovePaths(paths map[string]string) (err error) { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil + } + } + return fmt.Errorf("Failed to remove paths: %v", paths) +} + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} + +// GetPids returns all pids, that were added to cgroup at path. +func GetPids(path string) ([]int, error) { + return readProcsFile(path) +} + +// GetAllPids returns all pids, that were added to cgroup at path and to all its +// subcgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + // collect pids from all sub-cgroups + err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { + dir, file := filepath.Split(p) + if file != CgroupProcesses { + return nil + } + if iErr != nil { + return iErr + } + cPids, err := readProcsFile(dir) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} + +// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file +func WriteCgroupProc(dir string, pid int) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", CgroupProcesses) + } + + // Dont attach any pid to the cgroup if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + } + } + return nil +} |