aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer
diff options
context:
space:
mode:
authoropenshift-ci[bot] <75433959+openshift-ci[bot]@users.noreply.github.com>2022-06-27 15:14:20 +0000
committerGitHub <noreply@github.com>2022-06-27 15:14:20 +0000
commit088665d2cfd488004f34dbd0804c3cd00bf94ca4 (patch)
tree0aa7d04edc4009672fcc760aca511b8d10796682 /vendor/github.com/opencontainers/runc/libcontainer
parent8806606ca2e2060750b3f86c6d31bb50125309de (diff)
parent2792e598c7ce1198ec8464a3119504123ae8397c (diff)
downloadpodman-088665d2cfd488004f34dbd0804c3cd00bf94ca4.tar.gz
podman-088665d2cfd488004f34dbd0804c3cd00bf94ca4.tar.bz2
podman-088665d2cfd488004f34dbd0804c3cd00bf94ca4.zip
Merge pull request #14654 from cdoern/cgroup
podman cgroup enhancement
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go15
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go311
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go129
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go166
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go245
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go39
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go15
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go158
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go264
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go62
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go348
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go31
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go32
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go30
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go186
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go24
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go62
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go25
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go87
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go28
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go152
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go99
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go127
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go271
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go48
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go193
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go216
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go72
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go121
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go145
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go8
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/config.go3
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go6
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go19
34 files changed, 3713 insertions, 24 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
index ba2b2266c..b9ba889b7 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
@@ -1,9 +1,24 @@
package cgroups
import (
+ "errors"
+
"github.com/opencontainers/runc/libcontainer/configs"
)
+var (
+ // ErrDevicesUnsupported is an error returned when a cgroup manager
+ // is not configured to set device rules.
+ ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
+
+ // DevicesSetV1 and DevicesSetV2 are functions to set devices for
+ // cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices
+ // package is imported, it is set to nil, so cgroup managers can't
+ // manage devices.
+ DevicesSetV1 func(path string, r *configs.Resources) error
+ DevicesSetV2 func(path string, r *configs.Resources) error
+)
+
type Manager interface {
// Apply creates a cgroup, if not yet created, and adds a process
// with the specified pid into that cgroup. A special value of -1
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
new file mode 100644
index 000000000..c81b6562a
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
@@ -0,0 +1,311 @@
+package fs
+
+import (
+ "bufio"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type BlkioGroup struct {
+ weightFilename string
+ weightDeviceFilename string
+}
+
+func (s *BlkioGroup) Name() string {
+ return "blkio"
+}
+
+func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
+ s.detectWeightFilenames(path)
+ if r.BlkioWeight != 0 {
+ if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
+ return err
+ }
+ }
+
+ if r.BlkioLeafWeight != 0 {
+ if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
+ return err
+ }
+ }
+ for _, wd := range r.BlkioWeightDevice {
+ if wd.Weight != 0 {
+ if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
+ return err
+ }
+ }
+ if wd.LeafWeight != 0 {
+ if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
+ return err
+ }
+ }
+ }
+ for _, td := range r.BlkioThrottleReadBpsDevice {
+ if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleWriteBpsDevice {
+ if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleReadIOPSDevice {
+ if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleWriteIOPSDevice {
+ if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+/*
+examples:
+
+ blkio.sectors
+ 8:0 6792
+
+ blkio.io_service_bytes
+ 8:0 Read 1282048
+ 8:0 Write 2195456
+ 8:0 Sync 2195456
+ 8:0 Async 1282048
+ 8:0 Total 3477504
+ Total 3477504
+
+ blkio.io_serviced
+ 8:0 Read 124
+ 8:0 Write 104
+ 8:0 Sync 104
+ 8:0 Async 124
+ 8:0 Total 228
+ Total 228
+
+ blkio.io_queued
+ 8:0 Read 0
+ 8:0 Write 0
+ 8:0 Sync 0
+ 8:0 Async 0
+ 8:0 Total 0
+ Total 0
+*/
+
+func splitBlkioStatLine(r rune) bool {
+ return r == ' ' || r == ':'
+}
+
+func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
+ var blkioStats []cgroups.BlkioStatEntry
+ f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return blkioStats, nil
+ }
+ return nil, err
+ }
+ defer f.Close()
+
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ // format: dev type amount
+ fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
+ if len(fields) < 3 {
+ if len(fields) == 2 && fields[0] == "Total" {
+ // skip total line
+ continue
+ } else {
+ return nil, malformedLine(dir, file, sc.Text())
+ }
+ }
+
+ v, err := strconv.ParseUint(fields[0], 10, 64)
+ if err != nil {
+ return nil, &parseError{Path: dir, File: file, Err: err}
+ }
+ major := v
+
+ v, err = strconv.ParseUint(fields[1], 10, 64)
+ if err != nil {
+ return nil, &parseError{Path: dir, File: file, Err: err}
+ }
+ minor := v
+
+ op := ""
+ valueField := 2
+ if len(fields) == 4 {
+ op = fields[2]
+ valueField = 3
+ }
+ v, err = strconv.ParseUint(fields[valueField], 10, 64)
+ if err != nil {
+ return nil, &parseError{Path: dir, File: file, Err: err}
+ }
+ blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
+ }
+ if err := sc.Err(); err != nil {
+ return nil, &parseError{Path: dir, File: file, Err: err}
+ }
+
+ return blkioStats, nil
+}
+
+func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
+ type blkioStatInfo struct {
+ filename string
+ blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
+ }
+ bfqDebugStats := []blkioStatInfo{
+ {
+ filename: "blkio.bfq.sectors_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_service_time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_wait_time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_merged_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_queued_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
+ },
+ {
+ filename: "blkio.bfq.time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_serviced_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_service_bytes_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
+ },
+ }
+ bfqStats := []blkioStatInfo{
+ {
+ filename: "blkio.bfq.io_serviced_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
+ },
+ {
+ filename: "blkio.bfq.io_service_bytes_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
+ },
+ }
+ cfqStats := []blkioStatInfo{
+ {
+ filename: "blkio.sectors_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
+ },
+ {
+ filename: "blkio.io_service_time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
+ },
+ {
+ filename: "blkio.io_wait_time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
+ },
+ {
+ filename: "blkio.io_merged_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
+ },
+ {
+ filename: "blkio.io_queued_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
+ },
+ {
+ filename: "blkio.time_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
+ },
+ {
+ filename: "blkio.io_serviced_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
+ },
+ {
+ filename: "blkio.io_service_bytes_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
+ },
+ }
+ throttleRecursiveStats := []blkioStatInfo{
+ {
+ filename: "blkio.throttle.io_serviced_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
+ },
+ {
+ filename: "blkio.throttle.io_service_bytes_recursive",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
+ },
+ }
+ baseStats := []blkioStatInfo{
+ {
+ filename: "blkio.throttle.io_serviced",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
+ },
+ {
+ filename: "blkio.throttle.io_service_bytes",
+ blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
+ },
+ }
+ orderedStats := [][]blkioStatInfo{
+ bfqDebugStats,
+ bfqStats,
+ cfqStats,
+ throttleRecursiveStats,
+ baseStats,
+ }
+
+ var blkioStats []cgroups.BlkioStatEntry
+ var err error
+
+ for _, statGroup := range orderedStats {
+ for i, statInfo := range statGroup {
+ if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
+ // if error occurs on first file, move to next group
+ if i == 0 {
+ break
+ }
+ return err
+ }
+ *statInfo.blkioStatEntriesPtr = blkioStats
+ // finish if all stats are gathered
+ if i == len(statGroup)-1 {
+ return nil
+ }
+ }
+ }
+ return nil
+}
+
+func (s *BlkioGroup) detectWeightFilenames(path string) {
+ if s.weightFilename != "" {
+ // Already detected.
+ return
+ }
+ if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
+ s.weightFilename = "blkio.weight"
+ s.weightDeviceFilename = "blkio.weight_device"
+ } else {
+ s.weightFilename = "blkio.bfq.weight"
+ s.weightDeviceFilename = "blkio.bfq.weight_device"
+ }
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
new file mode 100644
index 000000000..6c79f899b
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@@ -0,0 +1,129 @@
+package fs
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "os"
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "golang.org/x/sys/unix"
+)
+
+type CpuGroup struct{}
+
+func (s *CpuGroup) Name() string {
+ return "cpu"
+}
+
+func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
+ if err := os.MkdirAll(path, 0o755); err != nil {
+ return err
+ }
+ // We should set the real-Time group scheduling settings before moving
+ // in the process because if the process is already in SCHED_RR mode
+ // and no RT bandwidth is set, adding it will fail.
+ if err := s.SetRtSched(path, r); err != nil {
+ return err
+ }
+ // Since we are not using apply(), we need to place the pid
+ // into the procs file.
+ return cgroups.WriteCgroupProc(path, pid)
+}
+
+func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
+ if r.CpuRtPeriod != 0 {
+ if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
+ return err
+ }
+ }
+ if r.CpuRtRuntime != 0 {
+ if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (s *CpuGroup) Set(path string, r *configs.Resources) error {
+ if r.CpuShares != 0 {
+ shares := r.CpuShares
+ if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
+ return err
+ }
+ // read it back
+ sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
+ if err != nil {
+ return err
+ }
+ // ... and check
+ if shares > sharesRead {
+ return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
+ } else if shares < sharesRead {
+ return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
+ }
+ }
+
+ var period string
+ if r.CpuPeriod != 0 {
+ period = strconv.FormatUint(r.CpuPeriod, 10)
+ if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
+ // Sometimes when the period to be set is smaller
+ // than the current one, it is rejected by the kernel
+ // (EINVAL) as old_quota/new_period exceeds the parent
+ // cgroup quota limit. If this happens and the quota is
+ // going to be set, ignore the error for now and retry
+ // after setting the quota.
+ if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
+ return err
+ }
+ } else {
+ period = ""
+ }
+ }
+ if r.CpuQuota != 0 {
+ if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
+ return err
+ }
+ if period != "" {
+ if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
+ return err
+ }
+ }
+ }
+ return s.SetRtSched(path, r)
+}
+
+func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
+ const file = "cpu.stat"
+ f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ defer f.Close()
+
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ t, v, err := fscommon.ParseKeyValue(sc.Text())
+ if err != nil {
+ return &parseError{Path: path, File: file, Err: err}
+ }
+ switch t {
+ case "nr_periods":
+ stats.CpuStats.ThrottlingData.Periods = v
+
+ case "nr_throttled":
+ stats.CpuStats.ThrottlingData.ThrottledPeriods = v
+
+ case "throttled_time":
+ stats.CpuStats.ThrottlingData.ThrottledTime = v
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
new file mode 100644
index 000000000..d3bd7e111
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
@@ -0,0 +1,166 @@
+package fs
+
+import (
+ "bufio"
+ "os"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+const (
+ cgroupCpuacctStat = "cpuacct.stat"
+ cgroupCpuacctUsageAll = "cpuacct.usage_all"
+
+ nanosecondsInSecond = 1000000000
+
+ userModeColumn = 1
+ kernelModeColumn = 2
+ cuacctUsageAllColumnsNumber = 3
+
+ // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
+ // on Linux it's a constant which is safe to be hard coded,
+ // so we can avoid using cgo here. For details, see:
+ // https://github.com/containerd/cgroups/pull/12
+ clockTicks uint64 = 100
+)
+
+type CpuacctGroup struct{}
+
+func (s *CpuacctGroup) Name() string {
+ return "cpuacct"
+}
+
+func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
+ return nil
+}
+
+func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
+ if !cgroups.PathExists(path) {
+ return nil
+ }
+ userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
+ if err != nil {
+ return err
+ }
+
+ totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
+ if err != nil {
+ return err
+ }
+
+ percpuUsage, err := getPercpuUsage(path)
+ if err != nil {
+ return err
+ }
+
+ percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
+ if err != nil {
+ return err
+ }
+
+ stats.CpuStats.CpuUsage.TotalUsage = totalUsage
+ stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
+ stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
+ stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
+ stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
+ stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
+ return nil
+}
+
+// Returns user and kernel usage breakdown in nanoseconds.
+func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
+ var userModeUsage, kernelModeUsage uint64
+ const (
+ userField = "user"
+ systemField = "system"
+ file = cgroupCpuacctStat
+ )
+
+ // Expected format:
+ // user <usage in ticks>
+ // system <usage in ticks>
+ data, err := cgroups.ReadFile(path, file)
+ if err != nil {
+ return 0, 0, err
+ }
+ // TODO: use strings.SplitN instead.
+ fields := strings.Fields(data)
+ if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
+ return 0, 0, malformedLine(path, file, data)
+ }
+ if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
+ return 0, 0, &parseError{Path: path, File: file, Err: err}
+ }
+ if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
+ return 0, 0, &parseError{Path: path, File: file, Err: err}
+ }
+
+ return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
+}
+
+func getPercpuUsage(path string) ([]uint64, error) {
+ const file = "cpuacct.usage_percpu"
+ percpuUsage := []uint64{}
+ data, err := cgroups.ReadFile(path, file)
+ if err != nil {
+ return percpuUsage, err
+ }
+ // TODO: use strings.SplitN instead.
+ for _, value := range strings.Fields(data) {
+ value, err := strconv.ParseUint(value, 10, 64)
+ if err != nil {
+ return percpuUsage, &parseError{Path: path, File: file, Err: err}
+ }
+ percpuUsage = append(percpuUsage, value)
+ }
+ return percpuUsage, nil
+}
+
+func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
+ usageKernelMode := []uint64{}
+ usageUserMode := []uint64{}
+ const file = cgroupCpuacctUsageAll
+
+ fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
+ if os.IsNotExist(err) {
+ return usageKernelMode, usageUserMode, nil
+ } else if err != nil {
+ return nil, nil, err
+ }
+ defer fd.Close()
+
+ scanner := bufio.NewScanner(fd)
+ scanner.Scan() // skipping header line
+
+ for scanner.Scan() {
+ lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
+ if len(lineFields) != cuacctUsageAllColumnsNumber {
+ continue
+ }
+
+ usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
+ if err != nil {
+ return nil, nil, &parseError{Path: path, File: file, Err: err}
+ }
+ usageKernelMode = append(usageKernelMode, usageInKernelMode)
+
+ usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
+ if err != nil {
+ return nil, nil, &parseError{Path: path, File: file, Err: err}
+ }
+ usageUserMode = append(usageUserMode, usageInUserMode)
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, nil, &parseError{Path: path, File: file, Err: err}
+ }
+
+ return usageKernelMode, usageUserMode, nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
new file mode 100644
index 000000000..550baa427
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@@ -0,0 +1,245 @@
+package fs
+
+import (
+ "errors"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type CpusetGroup struct{}
+
+func (s *CpusetGroup) Name() string {
+ return "cpuset"
+}
+
+func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
+ return s.ApplyDir(path, r, pid)
+}
+
+func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
+ if r.CpusetCpus != "" {
+ if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
+ return err
+ }
+ }
+ if r.CpusetMems != "" {
+ if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func getCpusetStat(path string, file string) ([]uint16, error) {
+ var extracted []uint16
+ fileContent, err := fscommon.GetCgroupParamString(path, file)
+ if err != nil {
+ return extracted, err
+ }
+ if len(fileContent) == 0 {
+ return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
+ }
+
+ for _, s := range strings.Split(fileContent, ",") {
+ sp := strings.SplitN(s, "-", 3)
+ switch len(sp) {
+ case 3:
+ return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
+ case 2:
+ min, err := strconv.ParseUint(sp[0], 10, 16)
+ if err != nil {
+ return extracted, &parseError{Path: path, File: file, Err: err}
+ }
+ max, err := strconv.ParseUint(sp[1], 10, 16)
+ if err != nil {
+ return extracted, &parseError{Path: path, File: file, Err: err}
+ }
+ if min > max {
+ return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
+ }
+ for i := min; i <= max; i++ {
+ extracted = append(extracted, uint16(i))
+ }
+ case 1:
+ value, err := strconv.ParseUint(s, 10, 16)
+ if err != nil {
+ return extracted, &parseError{Path: path, File: file, Err: err}
+ }
+ extracted = append(extracted, uint16(value))
+ }
+ }
+
+ return extracted, nil
+}
+
+func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
+ var err error
+
+ stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
+ if err != nil && !errors.Is(err, os.ErrNotExist) {
+ return err
+ }
+
+ return nil
+}
+
+func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
+ // This might happen if we have no cpuset cgroup mounted.
+ // Just do nothing and don't fail.
+ if dir == "" {
+ return nil
+ }
+ // 'ensureParent' start with parent because we don't want to
+ // explicitly inherit from parent, it could conflict with
+ // 'cpuset.cpu_exclusive'.
+ if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
+ return err
+ }
+ if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
+ return err
+ }
+ // We didn't inherit cpuset configs from parent, but we have
+ // to ensure cpuset configs are set before moving task into the
+ // cgroup.
+ // The logic is, if user specified cpuset configs, use these
+ // specified configs, otherwise, inherit from parent. This makes
+ // cpuset configs work correctly with 'cpuset.cpu_exclusive', and
+ // keep backward compatibility.
+ if err := s.ensureCpusAndMems(dir, r); err != nil {
+ return err
+ }
+ // Since we are not using apply(), we need to place the pid
+ // into the procs file.
+ return cgroups.WriteCgroupProc(dir, pid)
+}
+
+func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
+ if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
+ return
+ }
+ if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
+ return
+ }
+ return cpus, mems, nil
+}
+
+// cpusetEnsureParent makes sure that the parent directories of current
+// are created and populated with the proper cpus and mems files copied
+// from their respective parent. It does that recursively, starting from
+// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
+func cpusetEnsureParent(current string) error {
+ var st unix.Statfs_t
+
+ parent := filepath.Dir(current)
+ err := unix.Statfs(parent, &st)
+ if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
+ return nil
+ }
+ // Treat non-existing directory as cgroupfs as it will be created,
+ // and the root cpuset directory obviously exists.
+ if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
+ return &os.PathError{Op: "statfs", Path: parent, Err: err}
+ }
+
+ if err := cpusetEnsureParent(parent); err != nil {
+ return err
+ }
+ if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
+ return err
+ }
+ return cpusetCopyIfNeeded(current, parent)
+}
+
+// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
+// directory to the current directory if the file's contents are 0
+func cpusetCopyIfNeeded(current, parent string) error {
+ currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
+ if err != nil {
+ return err
+ }
+ parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
+ if err != nil {
+ return err
+ }
+
+ if isEmptyCpuset(currentCpus) {
+ if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
+ return err
+ }
+ }
+ if isEmptyCpuset(currentMems) {
+ if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func isEmptyCpuset(str string) bool {
+ return str == "" || str == "\n"
+}
+
+func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
+ if err := s.Set(path, r); err != nil {
+ return err
+ }
+ return cpusetCopyIfNeeded(path, filepath.Dir(path))
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
new file mode 100644
index 000000000..0bf3d9deb
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
@@ -0,0 +1,39 @@
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type DevicesGroup struct{}
+
+func (s *DevicesGroup) Name() string {
+ return "devices"
+}
+
+func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
+ if r.SkipDevices {
+ return nil
+ }
+ if path == "" {
+ // Return error here, since devices cgroup
+ // is a hard requirement for container's security.
+ return errSubsystemDoesNotExist
+ }
+
+ return apply(path, pid)
+}
+
+func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
+ if cgroups.DevicesSetV1 == nil {
+ if len(r.Devices) == 0 {
+ return nil
+ }
+ return cgroups.ErrDevicesUnsupported
+ }
+ return cgroups.DevicesSetV1(path, r)
+}
+
+func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
new file mode 100644
index 000000000..f2ab6f130
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
@@ -0,0 +1,15 @@
+package fs
+
+import (
+ "fmt"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+)
+
+type parseError = fscommon.ParseError
+
+// malformedLine is used by all cgroupfs file parsers that expect a line
+// in a particular format but get some garbage instead.
+func malformedLine(path, file, line string) error {
+ return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
new file mode 100644
index 000000000..987f1bf5e
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
@@ -0,0 +1,158 @@
+package fs
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+type FreezerGroup struct{}
+
+func (s *FreezerGroup) Name() string {
+ return "freezer"
+}
+
+func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
+ switch r.Freezer {
+ case configs.Frozen:
+ defer func() {
+ if Err != nil {
+ // Freezing failed, and it is bad and dangerous
+ // to leave the cgroup in FROZEN or FREEZING
+ // state, so (try to) thaw it back.
+ _ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
+ }
+ }()
+
+ // As per older kernel docs (freezer-subsystem.txt before
+ // kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
+ // userspace should either retry or thaw. While current
+ // kernel cgroup v1 docs no longer mention a need to retry,
+ // even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
+ // freeze a cgroup v1 while new processes keep appearing in it
+ // (either via fork/clone or by writing new PIDs to
+ // cgroup.procs).
+ //
+ // The numbers below are empirically chosen to have a decent
+ // chance to succeed in various scenarios ("runc pause/unpause
+ // with parallel runc exec" and "bare freeze/unfreeze on a very
+ // slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
+ //
+ // Adding any amount of sleep in between retries did not
+ // increase the chances of successful freeze in "pause/unpause
+ // with parallel exec" reproducer. OTOH, adding an occasional
+ // sleep helped for the case where the system is extremely slow
+ // (CentOS 7 VM on GHA CI).
+ //
+ // Alas, this is still a game of chances, since the real fix
+ // belong to the kernel (cgroup v2 do not have this bug).
+
+ for i := 0; i < 1000; i++ {
+ if i%50 == 49 {
+ // Occasional thaw and sleep improves
+ // the chances to succeed in freezing
+ // in case new processes keep appearing
+ // in the cgroup.
+ _ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
+ time.Sleep(10 * time.Millisecond)
+ }
+
+ if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
+ return err
+ }
+
+ if i%25 == 24 {
+ // Occasional short sleep before reading
+ // the state back also improves the chances to
+ // succeed in freezing in case of a very slow
+ // system.
+ time.Sleep(10 * time.Microsecond)
+ }
+ state, err := cgroups.ReadFile(path, "freezer.state")
+ if err != nil {
+ return err
+ }
+ state = strings.TrimSpace(state)
+ switch state {
+ case "FREEZING":
+ continue
+ case string(configs.Frozen):
+ if i > 1 {
+ logrus.Debugf("frozen after %d retries", i)
+ }
+ return nil
+ default:
+ // should never happen
+ return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
+ }
+ }
+ // Despite our best efforts, it got stuck in FREEZING.
+ return errors.New("unable to freeze")
+ case configs.Thawed:
+ return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
+ case configs.Undefined:
+ return nil
+ default:
+ return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
+ }
+}
+
+func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
+
+func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
+ for {
+ state, err := cgroups.ReadFile(path, "freezer.state")
+ if err != nil {
+ // If the kernel is too old, then we just treat the freezer as
+ // being in an "undefined" state.
+ if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
+ err = nil
+ }
+ return configs.Undefined, err
+ }
+ switch strings.TrimSpace(state) {
+ case "THAWED":
+ return configs.Thawed, nil
+ case "FROZEN":
+ // Find out whether the cgroup is frozen directly,
+ // or indirectly via an ancestor.
+ self, err := cgroups.ReadFile(path, "freezer.self_freezing")
+ if err != nil {
+ // If the kernel is too old, then we just treat
+ // it as being frozen.
+ if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
+ err = nil
+ }
+ return configs.Frozen, err
+ }
+ switch self {
+ case "0\n":
+ return configs.Thawed, nil
+ case "1\n":
+ return configs.Frozen, nil
+ default:
+ return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
+ }
+ case "FREEZING":
+ // Make sure we get a stable freezer state, so retry if the cgroup
+ // is still undergoing freezing. This should be a temporary delay.
+ time.Sleep(1 * time.Millisecond)
+ continue
+ default:
+ return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
+ }
+ }
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
new file mode 100644
index 000000000..be4dcc341
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
@@ -0,0 +1,264 @@
+package fs
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "sync"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var subsystems = []subsystem{
+ &CpusetGroup{},
+ &DevicesGroup{},
+ &MemoryGroup{},
+ &CpuGroup{},
+ &CpuacctGroup{},
+ &PidsGroup{},
+ &BlkioGroup{},
+ &HugetlbGroup{},
+ &NetClsGroup{},
+ &NetPrioGroup{},
+ &PerfEventGroup{},
+ &FreezerGroup{},
+ &RdmaGroup{},
+ &NameGroup{GroupName: "name=systemd", Join: true},
+}
+
+var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
+
+func init() {
+ // If using cgroups-hybrid mode then add a "" controller indicating
+ // it should join the cgroups v2.
+ if cgroups.IsCgroup2HybridMode() {
+ subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
+ }
+}
+
+type subsystem interface {
+ // Name returns the name of the subsystem.
+ Name() string
+ // GetStats fills in the stats for the subsystem.
+ GetStats(path string, stats *cgroups.Stats) error
+ // Apply creates and joins a cgroup, adding pid into it. Some
+ // subsystems use resources to pre-configure the cgroup parents
+ // before creating or joining it.
+ Apply(path string, r *configs.Resources, pid int) error
+ // Set sets the cgroup resources.
+ Set(path string, r *configs.Resources) error
+}
+
+type manager struct {
+ mu sync.Mutex
+ cgroups *configs.Cgroup
+ paths map[string]string
+}
+
+func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
+ // Some v1 controllers (cpu, cpuset, and devices) expect
+ // cgroups.Resources to not be nil in Apply.
+ if cg.Resources == nil {
+ return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
+ }
+ if cg.Resources.Unified != nil {
+ return nil, cgroups.ErrV1NoUnified
+ }
+
+ if paths == nil {
+ var err error
+ paths, err = initPaths(cg)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return &manager{
+ cgroups: cg,
+ paths: paths,
+ }, nil
+}
+
+// isIgnorableError returns whether err is a permission error (in the loose
+// sense of the word). This includes EROFS (which for an unprivileged user is
+// basically a permission error) and EACCES (for similar reasons) as well as
+// the normal EPERM.
+func isIgnorableError(rootless bool, err error) bool {
+ // We do not ignore errors if we are root.
+ if !rootless {
+ return false
+ }
+ // Is it an ordinary EPERM?
+ if errors.Is(err, os.ErrPermission) {
+ return true
+ }
+ // Handle some specific syscall errors.
+ var errno unix.Errno
+ if errors.As(err, &errno) {
+ return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
+ }
+ return false
+}
+
+func (m *manager) Apply(pid int) (err error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ c := m.cgroups
+
+ for _, sys := range subsystems {
+ name := sys.Name()
+ p, ok := m.paths[name]
+ if !ok {
+ continue
+ }
+
+ if err := sys.Apply(p, c.Resources, pid); err != nil {
+ // In the case of rootless (including euid=0 in userns), where an
+ // explicit cgroup path hasn't been set, we don't bail on error in
+ // case of permission problems here, but do delete the path from
+ // the m.paths map, since it is either non-existent and could not
+ // be created, or the pid could not be added to it.
+ //
+ // Cases where limits for the subsystem have been set are handled
+ // later by Set, which fails with a friendly error (see
+ // if path == "" in Set).
+ if isIgnorableError(c.Rootless, err) && c.Path == "" {
+ delete(m.paths, name)
+ continue
+ }
+ return err
+ }
+
+ }
+ return nil
+}
+
+func (m *manager) Destroy() error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return cgroups.RemovePaths(m.paths)
+}
+
+func (m *manager) Path(subsys string) string {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return m.paths[subsys]
+}
+
+func (m *manager) GetStats() (*cgroups.Stats, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ stats := cgroups.NewStats()
+ for _, sys := range subsystems {
+ path := m.paths[sys.Name()]
+ if path == "" {
+ continue
+ }
+ if err := sys.GetStats(path, stats); err != nil {
+ return nil, err
+ }
+ }
+ return stats, nil
+}
+
+func (m *manager) Set(r *configs.Resources) error {
+ if r == nil {
+ return nil
+ }
+
+ if r.Unified != nil {
+ return cgroups.ErrV1NoUnified
+ }
+
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ for _, sys := range subsystems {
+ path := m.paths[sys.Name()]
+ if err := sys.Set(path, r); err != nil {
+ // When rootless is true, errors from the device subsystem
+ // are ignored, as it is really not expected to work.
+ if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) {
+ continue
+ }
+ // However, errors from other subsystems are not ignored.
+ // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
+ if path == "" {
+ // We never created a path for this cgroup, so we cannot set
+ // limits for it (though we have already tried at this point).
+ return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
+ }
+ return err
+ }
+ }
+
+ return nil
+}
+
+// Freeze toggles the container's freezer cgroup depending on the state
+// provided
+func (m *manager) Freeze(state configs.FreezerState) error {
+ path := m.Path("freezer")
+ if path == "" {
+ return errors.New("cannot toggle freezer: cgroups not configured for container")
+ }
+
+ prevState := m.cgroups.Resources.Freezer
+ m.cgroups.Resources.Freezer = state
+ freezer := &FreezerGroup{}
+ if err := freezer.Set(path, m.cgroups.Resources); err != nil {
+ m.cgroups.Resources.Freezer = prevState
+ return err
+ }
+ return nil
+}
+
+func (m *manager) GetPids() ([]int, error) {
+ return cgroups.GetPids(m.Path("devices"))
+}
+
+func (m *manager) GetAllPids() ([]int, error) {
+ return cgroups.GetAllPids(m.Path("devices"))
+}
+
+func (m *manager) GetPaths() map[string]string {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return m.paths
+}
+
+func (m *manager) GetCgroups() (*configs.Cgroup, error) {
+ return m.cgroups, nil
+}
+
+func (m *manager) GetFreezerState() (configs.FreezerState, error) {
+ dir := m.Path("freezer")
+ // If the container doesn't have the freezer cgroup, say it's undefined.
+ if dir == "" {
+ return configs.Undefined, nil
+ }
+ freezer := &FreezerGroup{}
+ return freezer.GetState(dir)
+}
+
+func (m *manager) Exists() bool {
+ return cgroups.PathExists(m.Path("devices"))
+}
+
+func OOMKillCount(path string) (uint64, error) {
+ return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
+}
+
+func (m *manager) OOMKillCount() (uint64, error) {
+ c, err := OOMKillCount(m.Path("memory"))
+ // Ignore ENOENT when rootless as it couldn't create cgroup.
+ if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
+ err = nil
+ }
+
+ return c, err
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
new file mode 100644
index 000000000..8ddd6fdd8
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
@@ -0,0 +1,62 @@
+package fs
+
+import (
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type HugetlbGroup struct{}
+
+func (s *HugetlbGroup) Name() string {
+ return "hugetlb"
+}
+
+func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
+ for _, hugetlb := range r.HugetlbLimit {
+ if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
+ if !cgroups.PathExists(path) {
+ return nil
+ }
+ hugetlbStats := cgroups.HugetlbStats{}
+ for _, pageSize := range cgroups.HugePageSizes() {
+ usage := "hugetlb." + pageSize + ".usage_in_bytes"
+ value, err := fscommon.GetCgroupParamUint(path, usage)
+ if err != nil {
+ return err
+ }
+ hugetlbStats.Usage = value
+
+ maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
+ value, err = fscommon.GetCgroupParamUint(path, maxUsage)
+ if err != nil {
+ return err
+ }
+ hugetlbStats.MaxUsage = value
+
+ failcnt := "hugetlb." + pageSize + ".failcnt"
+ value, err = fscommon.GetCgroupParamUint(path, failcnt)
+ if err != nil {
+ return err
+ }
+ hugetlbStats.Failcnt = value
+
+ stats.HugetlbStats[pageSize] = hugetlbStats
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
new file mode 100644
index 000000000..b7c75f941
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@@ -0,0 +1,348 @@
+package fs
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "math"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+const (
+ cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
+ cgroupMemoryLimit = "memory.limit_in_bytes"
+ cgroupMemoryUsage = "memory.usage_in_bytes"
+ cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
+)
+
+type MemoryGroup struct{}
+
+func (s *MemoryGroup) Name() string {
+ return "memory"
+}
+
+func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func setMemory(path string, val int64) error {
+ if val == 0 {
+ return nil
+ }
+
+ err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
+ if !errors.Is(err, unix.EBUSY) {
+ return err
+ }
+
+ // EBUSY means the kernel can't set new limit as it's too low
+ // (lower than the current usage). Return more specific error.
+ usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
+ if err != nil {
+ return err
+ }
+ max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
+ if err != nil {
+ return err
+ }
+
+ return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
+}
+
+func setSwap(path string, val int64) error {
+ if val == 0 {
+ return nil
+ }
+
+ return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
+}
+
+func setMemoryAndSwap(path string, r *configs.Resources) error {
+ // If the memory update is set to -1 and the swap is not explicitly
+ // set, we should also set swap to -1, it means unlimited memory.
+ if r.Memory == -1 && r.MemorySwap == 0 {
+ // Only set swap if it's enabled in kernel
+ if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
+ r.MemorySwap = -1
+ }
+ }
+
+ // When memory and swap memory are both set, we need to handle the cases
+ // for updating container.
+ if r.Memory != 0 && r.MemorySwap != 0 {
+ curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
+ if err != nil {
+ return err
+ }
+
+ // When update memory limit, we should adapt the write sequence
+ // for memory and swap memory, so it won't fail because the new
+ // value and the old value don't fit kernel's validation.
+ if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
+ if err := setSwap(path, r.MemorySwap); err != nil {
+ return err
+ }
+ if err := setMemory(path, r.Memory); err != nil {
+ return err
+ }
+ return nil
+ }
+ }
+
+ if err := setMemory(path, r.Memory); err != nil {
+ return err
+ }
+ if err := setSwap(path, r.MemorySwap); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
+ if err := setMemoryAndSwap(path, r); err != nil {
+ return err
+ }
+
+ // ignore KernelMemory and KernelMemoryTCP
+
+ if r.MemoryReservation != 0 {
+ if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
+ return err
+ }
+ }
+
+ if r.OomKillDisable {
+ if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
+ return err
+ }
+ }
+ if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
+ return nil
+ } else if *r.MemorySwappiness <= 100 {
+ if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
+ return err
+ }
+ } else {
+ return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
+ }
+
+ return nil
+}
+
+func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
+ const file = "memory.stat"
+ statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return err
+ }
+ defer statsFile.Close()
+
+ sc := bufio.NewScanner(statsFile)
+ for sc.Scan() {
+ t, v, err := fscommon.ParseKeyValue(sc.Text())
+ if err != nil {
+ return &parseError{Path: path, File: file, Err: err}
+ }
+ stats.MemoryStats.Stats[t] = v
+ }
+ stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
+
+ memoryUsage, err := getMemoryData(path, "")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.Usage = memoryUsage
+ swapUsage, err := getMemoryData(path, "memsw")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.SwapUsage = swapUsage
+ kernelUsage, err := getMemoryData(path, "kmem")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.KernelUsage = kernelUsage
+ kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
+
+ value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
+ if err != nil {
+ return err
+ }
+ if value == 1 {
+ stats.MemoryStats.UseHierarchy = true
+ }
+
+ pagesByNUMA, err := getPageUsageByNUMA(path)
+ if err != nil {
+ return err
+ }
+ stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
+
+ return nil
+}
+
+func getMemoryData(path, name string) (cgroups.MemoryData, error) {
+ memoryData := cgroups.MemoryData{}
+
+ moduleName := "memory"
+ if name != "" {
+ moduleName = "memory." + name
+ }
+ var (
+ usage = moduleName + ".usage_in_bytes"
+ maxUsage = moduleName + ".max_usage_in_bytes"
+ failcnt = moduleName + ".failcnt"
+ limit = moduleName + ".limit_in_bytes"
+ )
+
+ value, err := fscommon.GetCgroupParamUint(path, usage)
+ if err != nil {
+ if name != "" && os.IsNotExist(err) {
+ // Ignore ENOENT as swap and kmem controllers
+ // are optional in the kernel.
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.Usage = value
+ value, err = fscommon.GetCgroupParamUint(path, maxUsage)
+ if err != nil {
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.MaxUsage = value
+ value, err = fscommon.GetCgroupParamUint(path, failcnt)
+ if err != nil {
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.Failcnt = value
+ value, err = fscommon.GetCgroupParamUint(path, limit)
+ if err != nil {
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.Limit = value
+
+ return memoryData, nil
+}
+
+func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
+ const (
+ maxColumns = math.MaxUint8 + 1
+ file = "memory.numa_stat"
+ )
+ stats := cgroups.PageUsageByNUMA{}
+
+ fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
+ if os.IsNotExist(err) {
+ return stats, nil
+ } else if err != nil {
+ return stats, err
+ }
+ defer fd.Close()
+
+ // File format is documented in linux/Documentation/cgroup-v1/memory.txt
+ // and it looks like this:
+ //
+ // total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ // file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ // anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ // unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ // hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+ scanner := bufio.NewScanner(fd)
+ for scanner.Scan() {
+ var field *cgroups.PageStats
+
+ line := scanner.Text()
+ columns := strings.SplitN(line, " ", maxColumns)
+ for i, column := range columns {
+ byNode := strings.SplitN(column, "=", 2)
+ // Some custom kernels have non-standard fields, like
+ // numa_locality 0 0 0 0 0 0 0 0 0 0
+ // numa_exectime 0
+ if len(byNode) < 2 {
+ if i == 0 {
+ // Ignore/skip those.
+ break
+ } else {
+ // The first column was already validated,
+ // so be strict to the rest.
+ return stats, malformedLine(path, file, line)
+ }
+ }
+ key, val := byNode[0], byNode[1]
+ if i == 0 { // First column: key is name, val is total.
+ field = getNUMAField(&stats, key)
+ if field == nil { // unknown field (new kernel?)
+ break
+ }
+ field.Total, err = strconv.ParseUint(val, 0, 64)
+ if err != nil {
+ return stats, &parseError{Path: path, File: file, Err: err}
+ }
+ field.Nodes = map[uint8]uint64{}
+ } else { // Subsequent columns: key is N<id>, val is usage.
+ if len(key) < 2 || key[0] != 'N' {
+ // This is definitely an error.
+ return stats, malformedLine(path, file, line)
+ }
+
+ n, err := strconv.ParseUint(key[1:], 10, 8)
+ if err != nil {
+ return stats, &parseError{Path: path, File: file, Err: err}
+ }
+
+ usage, err := strconv.ParseUint(val, 10, 64)
+ if err != nil {
+ return stats, &parseError{Path: path, File: file, Err: err}
+ }
+
+ field.Nodes[uint8(n)] = usage
+ }
+
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
+ }
+
+ return stats, nil
+}
+
+func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
+ switch name {
+ case "total":
+ return &stats.Total
+ case "file":
+ return &stats.File
+ case "anon":
+ return &stats.Anon
+ case "unevictable":
+ return &stats.Unevictable
+ case "hierarchical_total":
+ return &stats.Hierarchical.Total
+ case "hierarchical_file":
+ return &stats.Hierarchical.File
+ case "hierarchical_anon":
+ return &stats.Hierarchical.Anon
+ case "hierarchical_unevictable":
+ return &stats.Hierarchical.Unevictable
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
new file mode 100644
index 000000000..b8d5d849c
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
@@ -0,0 +1,31 @@
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NameGroup struct {
+ GroupName string
+ Join bool
+}
+
+func (s *NameGroup) Name() string {
+ return s.GroupName
+}
+
+func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ if s.Join {
+ // Ignore errors if the named cgroup does not exist.
+ _ = apply(path, pid)
+ }
+ return nil
+}
+
+func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
+ return nil
+}
+
+func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
new file mode 100644
index 000000000..abfd09ce8
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
@@ -0,0 +1,32 @@
+package fs
+
+import (
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NetClsGroup struct{}
+
+func (s *NetClsGroup) Name() string {
+ return "net_cls"
+}
+
+func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
+ if r.NetClsClassid != 0 {
+ if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
new file mode 100644
index 000000000..da74d3779
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
@@ -0,0 +1,30 @@
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type NetPrioGroup struct{}
+
+func (s *NetPrioGroup) Name() string {
+ return "net_prio"
+}
+
+func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
+ for _, prioMap := range r.NetPrioIfpriomap {
+ if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
new file mode 100644
index 000000000..1092331b2
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
@@ -0,0 +1,186 @@
+package fs
+
+import (
+ "errors"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/utils"
+)
+
+// The absolute path to the root of the cgroup hierarchies.
+var (
+ cgroupRootLock sync.Mutex
+ cgroupRoot string
+)
+
+const defaultCgroupRoot = "/sys/fs/cgroup"
+
+func initPaths(cg *configs.Cgroup) (map[string]string, error) {
+ root, err := rootPath()
+ if err != nil {
+ return nil, err
+ }
+
+ inner, err := innerPath(cg)
+ if err != nil {
+ return nil, err
+ }
+
+ paths := make(map[string]string)
+ for _, sys := range subsystems {
+ name := sys.Name()
+ path, err := subsysPath(root, inner, name)
+ if err != nil {
+ // The non-presence of the devices subsystem
+ // is considered fatal for security reasons.
+ if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
+ continue
+ }
+
+ return nil, err
+ }
+ paths[name] = path
+ }
+
+ return paths, nil
+}
+
+func tryDefaultCgroupRoot() string {
+ var st, pst unix.Stat_t
+
+ // (1) it should be a directory...
+ err := unix.Lstat(defaultCgroupRoot, &st)
+ if err != nil || st.Mode&unix.S_IFDIR == 0 {
+ return ""
+ }
+
+ // (2) ... and a mount point ...
+ err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
+ if err != nil {
+ return ""
+ }
+
+ if st.Dev == pst.Dev {
+ // parent dir has the same dev -- not a mount point
+ return ""
+ }
+
+ // (3) ... of 'tmpfs' fs type.
+ var fst unix.Statfs_t
+ err = unix.Statfs(defaultCgroupRoot, &fst)
+ if err != nil || fst.Type != unix.TMPFS_MAGIC {
+ return ""
+ }
+
+ // (4) it should have at least 1 entry ...
+ dir, err := os.Open(defaultCgroupRoot)
+ if err != nil {
+ return ""
+ }
+ names, err := dir.Readdirnames(1)
+ if err != nil {
+ return ""
+ }
+ if len(names) < 1 {
+ return ""
+ }
+ // ... which is a cgroup mount point.
+ err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
+ if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
+ return ""
+ }
+
+ return defaultCgroupRoot
+}
+
+// rootPath finds and returns path to the root of the cgroup hierarchies.
+func rootPath() (string, error) {
+ cgroupRootLock.Lock()
+ defer cgroupRootLock.Unlock()
+
+ if cgroupRoot != "" {
+ return cgroupRoot, nil
+ }
+
+ // fast path
+ cgroupRoot = tryDefaultCgroupRoot()
+ if cgroupRoot != "" {
+ return cgroupRoot, nil
+ }
+
+ // slow path: parse mountinfo
+ mi, err := cgroups.GetCgroupMounts(false)
+ if err != nil {
+ return "", err
+ }
+ if len(mi) < 1 {
+ return "", errors.New("no cgroup mount found in mountinfo")
+ }
+
+ // Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
+ // use its parent directory.
+ root := filepath.Dir(mi[0].Mountpoint)
+
+ if _, err := os.Stat(root); err != nil {
+ return "", err
+ }
+
+ cgroupRoot = root
+ return cgroupRoot, nil
+}
+
+func innerPath(c *configs.Cgroup) (string, error) {
+ if (c.Name != "" || c.Parent != "") && c.Path != "" {
+ return "", errors.New("cgroup: either Path or Name and Parent should be used")
+ }
+
+ // XXX: Do not remove CleanPath. Path safety is important! -- cyphar
+ innerPath := utils.CleanPath(c.Path)
+ if innerPath == "" {
+ cgParent := utils.CleanPath(c.Parent)
+ cgName := utils.CleanPath(c.Name)
+ innerPath = filepath.Join(cgParent, cgName)
+ }
+
+ return innerPath, nil
+}
+
+func subsysPath(root, inner, subsystem string) (string, error) {
+ // If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
+ if filepath.IsAbs(inner) {
+ mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
+ // If we didn't mount the subsystem, there is no point we make the path.
+ if err != nil {
+ return "", err
+ }
+
+ // Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
+ return filepath.Join(root, filepath.Base(mnt), inner), nil
+ }
+
+ // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+ // process could in container and shared pid namespace with host, and
+ // /proc/1/cgroup could point to whole other world of cgroups.
+ parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(parentPath, inner), nil
+}
+
+func apply(path string, pid int) error {
+ if path == "" {
+ return nil
+ }
+ if err := os.MkdirAll(path, 0o755); err != nil {
+ return err
+ }
+ return cgroups.WriteCgroupProc(path, pid)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
new file mode 100644
index 000000000..b86955c8f
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
@@ -0,0 +1,24 @@
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type PerfEventGroup struct{}
+
+func (s *PerfEventGroup) Name() string {
+ return "perf_event"
+}
+
+func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
+ return nil
+}
+
+func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
new file mode 100644
index 000000000..1f13532a5
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
@@ -0,0 +1,62 @@
+package fs
+
+import (
+ "math"
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type PidsGroup struct{}
+
+func (s *PidsGroup) Name() string {
+ return "pids"
+}
+
+func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *PidsGroup) Set(path string, r *configs.Resources) error {
+ if r.PidsLimit != 0 {
+ // "max" is the fallback value.
+ limit := "max"
+
+ if r.PidsLimit > 0 {
+ limit = strconv.FormatInt(r.PidsLimit, 10)
+ }
+
+ if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
+ if !cgroups.PathExists(path) {
+ return nil
+ }
+ current, err := fscommon.GetCgroupParamUint(path, "pids.current")
+ if err != nil {
+ return err
+ }
+
+ max, err := fscommon.GetCgroupParamUint(path, "pids.max")
+ if err != nil {
+ return err
+ }
+ // If no limit is set, read from pids.max returns "max", which is
+ // converted to MaxUint64 by GetCgroupParamUint. Historically, we
+ // represent "no limit" for pids as 0, thus this conversion.
+ if max == math.MaxUint64 {
+ max = 0
+ }
+
+ stats.PidsStats.Current = current
+ stats.PidsStats.Limit = max
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
new file mode 100644
index 000000000..5bbe0f35f
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
@@ -0,0 +1,25 @@
+package fs
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type RdmaGroup struct{}
+
+func (s *RdmaGroup) Name() string {
+ return "rdma"
+}
+
+func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
+ return apply(path, pid)
+}
+
+func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
+ return fscommon.RdmaSet(path, r)
+}
+
+func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
+ return fscommon.RdmaGetStats(path, stats)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
new file mode 100644
index 000000000..bbbae4d58
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
@@ -0,0 +1,87 @@
+package fs2
+
+import (
+ "bufio"
+ "os"
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func isCpuSet(r *configs.Resources) bool {
+ return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0
+}
+
+func setCpu(dirPath string, r *configs.Resources) error {
+ if !isCpuSet(r) {
+ return nil
+ }
+
+ // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
+ if r.CpuWeight != 0 {
+ if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
+ return err
+ }
+ }
+
+ if r.CpuQuota != 0 || r.CpuPeriod != 0 {
+ str := "max"
+ if r.CpuQuota > 0 {
+ str = strconv.FormatInt(r.CpuQuota, 10)
+ }
+ period := r.CpuPeriod
+ if period == 0 {
+ // This default value is documented in
+ // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
+ period = 100000
+ }
+ str += " " + strconv.FormatUint(period, 10)
+ if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func statCpu(dirPath string, stats *cgroups.Stats) error {
+ const file = "cpu.stat"
+ f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ t, v, err := fscommon.ParseKeyValue(sc.Text())
+ if err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+ switch t {
+ case "usage_usec":
+ stats.CpuStats.CpuUsage.TotalUsage = v * 1000
+
+ case "user_usec":
+ stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
+
+ case "system_usec":
+ stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
+
+ case "nr_periods":
+ stats.CpuStats.ThrottlingData.Periods = v
+
+ case "nr_throttled":
+ stats.CpuStats.ThrottlingData.ThrottledPeriods = v
+
+ case "throttled_usec":
+ stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
+ }
+ }
+ if err := sc.Err(); err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
new file mode 100644
index 000000000..16c45bad8
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
@@ -0,0 +1,28 @@
+package fs2
+
+import (
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func isCpusetSet(r *configs.Resources) bool {
+ return r.CpusetCpus != "" || r.CpusetMems != ""
+}
+
+func setCpuset(dirPath string, r *configs.Resources) error {
+ if !isCpusetSet(r) {
+ return nil
+ }
+
+ if r.CpusetCpus != "" {
+ if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
+ return err
+ }
+ }
+ if r.CpusetMems != "" {
+ if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
new file mode 100644
index 000000000..641123a4d
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
@@ -0,0 +1,152 @@
+package fs2
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func supportedControllers() (string, error) {
+ return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
+}
+
+// needAnyControllers returns whether we enable some supported controllers or not,
+// based on (1) controllers available and (2) resources that are being set.
+// We don't check "pseudo" controllers such as
+// "freezer" and "devices".
+func needAnyControllers(r *configs.Resources) (bool, error) {
+ if r == nil {
+ return false, nil
+ }
+
+ // list of all available controllers
+ content, err := supportedControllers()
+ if err != nil {
+ return false, err
+ }
+ avail := make(map[string]struct{})
+ for _, ctr := range strings.Fields(content) {
+ avail[ctr] = struct{}{}
+ }
+
+ // check whether the controller if available or not
+ have := func(controller string) bool {
+ _, ok := avail[controller]
+ return ok
+ }
+
+ if isPidsSet(r) && have("pids") {
+ return true, nil
+ }
+ if isMemorySet(r) && have("memory") {
+ return true, nil
+ }
+ if isIoSet(r) && have("io") {
+ return true, nil
+ }
+ if isCpuSet(r) && have("cpu") {
+ return true, nil
+ }
+ if isCpusetSet(r) && have("cpuset") {
+ return true, nil
+ }
+ if isHugeTlbSet(r) && have("hugetlb") {
+ return true, nil
+ }
+
+ return false, nil
+}
+
+// containsDomainController returns whether the current config contains domain controller or not.
+// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
+// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
+func containsDomainController(r *configs.Resources) bool {
+ return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
+}
+
+// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
+func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
+ if !strings.HasPrefix(path, UnifiedMountpoint) {
+ return fmt.Errorf("invalid cgroup path %s", path)
+ }
+
+ content, err := supportedControllers()
+ if err != nil {
+ return err
+ }
+
+ const (
+ cgTypeFile = "cgroup.type"
+ cgStCtlFile = "cgroup.subtree_control"
+ )
+ ctrs := strings.Fields(content)
+ res := "+" + strings.Join(ctrs, " +")
+
+ elements := strings.Split(path, "/")
+ elements = elements[3:]
+ current := "/sys/fs"
+ for i, e := range elements {
+ current = filepath.Join(current, e)
+ if i > 0 {
+ if err := os.Mkdir(current, 0o755); err != nil {
+ if !os.IsExist(err) {
+ return err
+ }
+ } else {
+ // If the directory was created, be sure it is not left around on errors.
+ current := current
+ defer func() {
+ if Err != nil {
+ os.Remove(current)
+ }
+ }()
+ }
+ cgType, _ := cgroups.ReadFile(current, cgTypeFile)
+ cgType = strings.TrimSpace(cgType)
+ switch cgType {
+ // If the cgroup is in an invalid mode (usually this means there's an internal
+ // process in the cgroup tree, because we created a cgroup under an
+ // already-populated-by-other-processes cgroup), then we have to error out if
+ // the user requested controllers which are not thread-aware. However, if all
+ // the controllers requested are thread-aware we can simply put the cgroup into
+ // threaded mode.
+ case "domain invalid":
+ if containsDomainController(c.Resources) {
+ return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
+ } else {
+ // Not entirely correct (in theory we'd always want to be a domain --
+ // since that means we're a properly delegated cgroup subtree) but in
+ // this case there's not much we can do and it's better than giving an
+ // error.
+ _ = cgroups.WriteFile(current, cgTypeFile, "threaded")
+ }
+ // If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
+ // (and you cannot usually take a cgroup out of threaded mode).
+ case "domain threaded":
+ fallthrough
+ case "threaded":
+ if containsDomainController(c.Resources) {
+ return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
+ }
+ }
+ }
+ // enable all supported controllers
+ if i < len(elements)-1 {
+ if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
+ // try write one by one
+ allCtrs := strings.Split(res, " ")
+ for _, ctr := range allCtrs {
+ _ = cgroups.WriteFile(current, cgStCtlFile, ctr)
+ }
+ }
+ // Some controllers might not be enabled when rootless or containerized,
+ // but we don't catch the error here. (Caught in setXXX() functions.)
+ }
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
new file mode 100644
index 000000000..9c949c91f
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
@@ -0,0 +1,99 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package fs2
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/utils"
+)
+
+const UnifiedMountpoint = "/sys/fs/cgroup"
+
+func defaultDirPath(c *configs.Cgroup) (string, error) {
+ if (c.Name != "" || c.Parent != "") && c.Path != "" {
+ return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
+ }
+
+ return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
+}
+
+func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
+ if (cgName != "" || cgParent != "") && cgPath != "" {
+ return "", errors.New("cgroup: either Path or Name and Parent should be used")
+ }
+
+ // XXX: Do not remove CleanPath. Path safety is important! -- cyphar
+ innerPath := utils.CleanPath(cgPath)
+ if innerPath == "" {
+ cgParent := utils.CleanPath(cgParent)
+ cgName := utils.CleanPath(cgName)
+ innerPath = filepath.Join(cgParent, cgName)
+ }
+ if filepath.IsAbs(innerPath) {
+ return filepath.Join(root, innerPath), nil
+ }
+
+ ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
+ if err != nil {
+ return "", err
+ }
+ // The current user scope most probably has tasks in it already,
+ // making it impossible to enable controllers for its sub-cgroup.
+ // A parent cgroup (with no tasks in it) is what we need.
+ ownCgroup = filepath.Dir(ownCgroup)
+
+ return filepath.Join(root, ownCgroup, innerPath), nil
+}
+
+// parseCgroupFile parses /proc/PID/cgroup file and return string
+func parseCgroupFile(path string) (string, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+ return parseCgroupFromReader(f)
+}
+
+func parseCgroupFromReader(r io.Reader) (string, error) {
+ s := bufio.NewScanner(r)
+ for s.Scan() {
+ var (
+ text = s.Text()
+ parts = strings.SplitN(text, ":", 3)
+ )
+ if len(parts) < 3 {
+ return "", fmt.Errorf("invalid cgroup entry: %q", text)
+ }
+ // text is like "0::/user.slice/user-1001.slice/session-1.scope"
+ if parts[0] == "0" && parts[1] == "" {
+ return parts[2], nil
+ }
+ }
+ if err := s.Err(); err != nil {
+ return "", err
+ }
+ return "", errors.New("cgroup path not found")
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
new file mode 100644
index 000000000..8917a6411
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
@@ -0,0 +1,127 @@
+package fs2
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func setFreezer(dirPath string, state configs.FreezerState) error {
+ var stateStr string
+ switch state {
+ case configs.Undefined:
+ return nil
+ case configs.Frozen:
+ stateStr = "1"
+ case configs.Thawed:
+ stateStr = "0"
+ default:
+ return fmt.Errorf("invalid freezer state %q requested", state)
+ }
+
+ fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
+ if err != nil {
+ // We can ignore this request as long as the user didn't ask us to
+ // freeze the container (since without the freezer cgroup, that's a
+ // no-op).
+ if state != configs.Frozen {
+ return nil
+ }
+ return fmt.Errorf("freezer not supported: %w", err)
+ }
+ defer fd.Close()
+
+ if _, err := fd.WriteString(stateStr); err != nil {
+ return err
+ }
+ // Confirm that the cgroup did actually change states.
+ if actualState, err := readFreezer(dirPath, fd); err != nil {
+ return err
+ } else if actualState != state {
+ return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
+ }
+ return nil
+}
+
+func getFreezer(dirPath string) (configs.FreezerState, error) {
+ fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
+ if err != nil {
+ // If the kernel is too old, then we just treat the freezer as being in
+ // an "undefined" state.
+ if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
+ err = nil
+ }
+ return configs.Undefined, err
+ }
+ defer fd.Close()
+
+ return readFreezer(dirPath, fd)
+}
+
+func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
+ if _, err := fd.Seek(0, 0); err != nil {
+ return configs.Undefined, err
+ }
+ state := make([]byte, 2)
+ if _, err := fd.Read(state); err != nil {
+ return configs.Undefined, err
+ }
+ switch string(state) {
+ case "0\n":
+ return configs.Thawed, nil
+ case "1\n":
+ return waitFrozen(dirPath)
+ default:
+ return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
+ }
+}
+
+// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
+func waitFrozen(dirPath string) (configs.FreezerState, error) {
+ fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
+ if err != nil {
+ return configs.Undefined, err
+ }
+ defer fd.Close()
+
+ // XXX: Simple wait/read/retry is used here. An implementation
+ // based on poll(2) or inotify(7) is possible, but it makes the code
+ // much more complicated. Maybe address this later.
+ const (
+ // Perform maxIter with waitTime in between iterations.
+ waitTime = 10 * time.Millisecond
+ maxIter = 1000
+ )
+ scanner := bufio.NewScanner(fd)
+ for i := 0; scanner.Scan(); {
+ if i == maxIter {
+ return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
+ }
+ line := scanner.Text()
+ val := strings.TrimPrefix(line, "frozen ")
+ if val != line { // got prefix
+ if val[0] == '1' {
+ return configs.Frozen, nil
+ }
+
+ i++
+ // wait, then re-read
+ time.Sleep(waitTime)
+ _, err := fd.Seek(0, 0)
+ if err != nil {
+ return configs.Undefined, err
+ }
+ }
+ }
+ // Should only reach here either on read error,
+ // or if the file does not contain "frozen " line.
+ return configs.Undefined, scanner.Err()
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
new file mode 100644
index 000000000..d5208d778
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
@@ -0,0 +1,271 @@
+package fs2
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type parseError = fscommon.ParseError
+
+type manager struct {
+ config *configs.Cgroup
+ // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
+ dirPath string
+ // controllers is content of "cgroup.controllers" file.
+ // excludes pseudo-controllers ("devices" and "freezer").
+ controllers map[string]struct{}
+}
+
+// NewManager creates a manager for cgroup v2 unified hierarchy.
+// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
+// If dirPath is empty, it is automatically set using config.
+func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
+ if dirPath == "" {
+ var err error
+ dirPath, err = defaultDirPath(config)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ m := &manager{
+ config: config,
+ dirPath: dirPath,
+ }
+ return m, nil
+}
+
+func (m *manager) getControllers() error {
+ if m.controllers != nil {
+ return nil
+ }
+
+ data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
+ if err != nil {
+ if m.config.Rootless && m.config.Path == "" {
+ return nil
+ }
+ return err
+ }
+ fields := strings.Fields(data)
+ m.controllers = make(map[string]struct{}, len(fields))
+ for _, c := range fields {
+ m.controllers[c] = struct{}{}
+ }
+
+ return nil
+}
+
+func (m *manager) Apply(pid int) error {
+ if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
+ // Related tests:
+ // - "runc create (no limits + no cgrouppath + no permission) succeeds"
+ // - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
+ // - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
+ if m.config.Rootless {
+ if m.config.Path == "" {
+ if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
+ return nil
+ }
+ return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
+ }
+ }
+ return err
+ }
+ if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (m *manager) GetPids() ([]int, error) {
+ return cgroups.GetPids(m.dirPath)
+}
+
+func (m *manager) GetAllPids() ([]int, error) {
+ return cgroups.GetAllPids(m.dirPath)
+}
+
+func (m *manager) GetStats() (*cgroups.Stats, error) {
+ var errs []error
+
+ st := cgroups.NewStats()
+
+ // pids (since kernel 4.5)
+ if err := statPids(m.dirPath, st); err != nil {
+ errs = append(errs, err)
+ }
+ // memory (since kernel 4.5)
+ if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+ errs = append(errs, err)
+ }
+ // io (since kernel 4.5)
+ if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+ errs = append(errs, err)
+ }
+ // cpu (since kernel 4.15)
+ // Note cpu.stat is available even if the controller is not enabled.
+ if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+ errs = append(errs, err)
+ }
+ // hugetlb (since kernel 5.6)
+ if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+ errs = append(errs, err)
+ }
+ // rdma (since kernel 4.11)
+ if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
+ errs = append(errs, err)
+ }
+ if len(errs) > 0 && !m.config.Rootless {
+ return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
+ }
+ return st, nil
+}
+
+func (m *manager) Freeze(state configs.FreezerState) error {
+ if m.config.Resources == nil {
+ return errors.New("cannot toggle freezer: cgroups not configured for container")
+ }
+ if err := setFreezer(m.dirPath, state); err != nil {
+ return err
+ }
+ m.config.Resources.Freezer = state
+ return nil
+}
+
+func (m *manager) Destroy() error {
+ return cgroups.RemovePath(m.dirPath)
+}
+
+func (m *manager) Path(_ string) string {
+ return m.dirPath
+}
+
+func (m *manager) Set(r *configs.Resources) error {
+ if r == nil {
+ return nil
+ }
+ if err := m.getControllers(); err != nil {
+ return err
+ }
+ // pids (since kernel 4.5)
+ if err := setPids(m.dirPath, r); err != nil {
+ return err
+ }
+ // memory (since kernel 4.5)
+ if err := setMemory(m.dirPath, r); err != nil {
+ return err
+ }
+ // io (since kernel 4.5)
+ if err := setIo(m.dirPath, r); err != nil {
+ return err
+ }
+ // cpu (since kernel 4.15)
+ if err := setCpu(m.dirPath, r); err != nil {
+ return err
+ }
+ // devices (since kernel 4.15, pseudo-controller)
+ //
+ // When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
+ // However, errors from other subsystems are not ignored.
+ // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
+ if err := setDevices(m.dirPath, r); err != nil {
+ if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) {
+ return err
+ }
+ }
+ // cpuset (since kernel 5.0)
+ if err := setCpuset(m.dirPath, r); err != nil {
+ return err
+ }
+ // hugetlb (since kernel 5.6)
+ if err := setHugeTlb(m.dirPath, r); err != nil {
+ return err
+ }
+ // rdma (since kernel 4.11)
+ if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
+ return err
+ }
+ // freezer (since kernel 5.2, pseudo-controller)
+ if err := setFreezer(m.dirPath, r.Freezer); err != nil {
+ return err
+ }
+ if err := m.setUnified(r.Unified); err != nil {
+ return err
+ }
+ m.config.Resources = r
+ return nil
+}
+
+func setDevices(dirPath string, r *configs.Resources) error {
+ if cgroups.DevicesSetV2 == nil {
+ if len(r.Devices) > 0 {
+ return cgroups.ErrDevicesUnsupported
+ }
+ return nil
+ }
+ return cgroups.DevicesSetV2(dirPath, r)
+}
+
+func (m *manager) setUnified(res map[string]string) error {
+ for k, v := range res {
+ if strings.Contains(k, "/") {
+ return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
+ }
+ if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
+ // Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
+ if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
+ // Check if a controller is available,
+ // to give more specific error if not.
+ sk := strings.SplitN(k, ".", 2)
+ if len(sk) != 2 {
+ return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
+ }
+ c := sk[0]
+ if _, ok := m.controllers[c]; !ok && c != "cgroup" {
+ return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
+ }
+ }
+ return fmt.Errorf("unable to set unified resource %q: %w", k, err)
+ }
+ }
+
+ return nil
+}
+
+func (m *manager) GetPaths() map[string]string {
+ paths := make(map[string]string, 1)
+ paths[""] = m.dirPath
+ return paths
+}
+
+func (m *manager) GetCgroups() (*configs.Cgroup, error) {
+ return m.config, nil
+}
+
+func (m *manager) GetFreezerState() (configs.FreezerState, error) {
+ return getFreezer(m.dirPath)
+}
+
+func (m *manager) Exists() bool {
+ return cgroups.PathExists(m.dirPath)
+}
+
+func OOMKillCount(path string) (uint64, error) {
+ return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
+}
+
+func (m *manager) OOMKillCount() (uint64, error) {
+ c, err := OOMKillCount(m.dirPath)
+ if err != nil && m.config.Rootless && os.IsNotExist(err) {
+ err = nil
+ }
+
+ return c, err
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
new file mode 100644
index 000000000..c92a7e64a
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
@@ -0,0 +1,48 @@
+package fs2
+
+import (
+ "strconv"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func isHugeTlbSet(r *configs.Resources) bool {
+ return len(r.HugetlbLimit) > 0
+}
+
+func setHugeTlb(dirPath string, r *configs.Resources) error {
+ if !isHugeTlbSet(r) {
+ return nil
+ }
+ for _, hugetlb := range r.HugetlbLimit {
+ if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
+ hugetlbStats := cgroups.HugetlbStats{}
+ for _, pagesize := range cgroups.HugePageSizes() {
+ value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
+ if err != nil {
+ return err
+ }
+ hugetlbStats.Usage = value
+
+ fileName := "hugetlb." + pagesize + ".events"
+ value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
+ if err != nil {
+ return err
+ }
+ hugetlbStats.Failcnt = value
+
+ stats.HugetlbStats[pagesize] = hugetlbStats
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
new file mode 100644
index 000000000..b2ff7d340
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
@@ -0,0 +1,193 @@
+package fs2
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "os"
+ "strconv"
+ "strings"
+
+ "github.com/sirupsen/logrus"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func isIoSet(r *configs.Resources) bool {
+ return r.BlkioWeight != 0 ||
+ len(r.BlkioWeightDevice) > 0 ||
+ len(r.BlkioThrottleReadBpsDevice) > 0 ||
+ len(r.BlkioThrottleWriteBpsDevice) > 0 ||
+ len(r.BlkioThrottleReadIOPSDevice) > 0 ||
+ len(r.BlkioThrottleWriteIOPSDevice) > 0
+}
+
+// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
+// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
+func bfqDeviceWeightSupported(bfq *os.File) bool {
+ if bfq == nil {
+ return false
+ }
+ _, _ = bfq.Seek(0, 0)
+ buf := make([]byte, 32)
+ _, _ = bfq.Read(buf)
+ // If only a single number (default weight) if read back, we have older kernel.
+ _, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
+ return err != nil
+}
+
+func setIo(dirPath string, r *configs.Resources) error {
+ if !isIoSet(r) {
+ return nil
+ }
+
+ // If BFQ IO scheduler is available, use it.
+ var bfq *os.File
+ if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
+ var err error
+ bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
+ if err == nil {
+ defer bfq.Close()
+ } else if !os.IsNotExist(err) {
+ return err
+ }
+ }
+
+ if r.BlkioWeight != 0 {
+ if bfq != nil { // Use BFQ.
+ if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
+ return err
+ }
+ } else {
+ // Fallback to io.weight with a conversion scheme.
+ v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
+ if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
+ return err
+ }
+ }
+ }
+ if bfqDeviceWeightSupported(bfq) {
+ for _, wd := range r.BlkioWeightDevice {
+ if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
+ return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
+ }
+ }
+ }
+ for _, td := range r.BlkioThrottleReadBpsDevice {
+ if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleWriteBpsDevice {
+ if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleReadIOPSDevice {
+ if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
+ return err
+ }
+ }
+ for _, td := range r.BlkioThrottleWriteIOPSDevice {
+ if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
+ ret := map[string][]string{}
+ f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := scanner.Text()
+ parts := strings.Fields(line)
+ if len(parts) < 2 {
+ continue
+ }
+ ret[parts[0]] = parts[1:]
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, &parseError{Path: dirPath, File: name, Err: err}
+ }
+ return ret, nil
+}
+
+func statIo(dirPath string, stats *cgroups.Stats) error {
+ const file = "io.stat"
+ values, err := readCgroup2MapFile(dirPath, file)
+ if err != nil {
+ return err
+ }
+ // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
+ var parsedStats cgroups.BlkioStats
+ for k, v := range values {
+ d := strings.Split(k, ":")
+ if len(d) != 2 {
+ continue
+ }
+ major, err := strconv.ParseUint(d[0], 10, 64)
+ if err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+ minor, err := strconv.ParseUint(d[1], 10, 64)
+ if err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+
+ for _, item := range v {
+ d := strings.Split(item, "=")
+ if len(d) != 2 {
+ continue
+ }
+ op := d[0]
+
+ // Map to the cgroupv1 naming and layout (in separate tables).
+ var targetTable *[]cgroups.BlkioStatEntry
+ switch op {
+ // Equivalent to cgroupv1's blkio.io_service_bytes.
+ case "rbytes":
+ op = "Read"
+ targetTable = &parsedStats.IoServiceBytesRecursive
+ case "wbytes":
+ op = "Write"
+ targetTable = &parsedStats.IoServiceBytesRecursive
+ // Equivalent to cgroupv1's blkio.io_serviced.
+ case "rios":
+ op = "Read"
+ targetTable = &parsedStats.IoServicedRecursive
+ case "wios":
+ op = "Write"
+ targetTable = &parsedStats.IoServicedRecursive
+ default:
+ // Skip over entries we cannot map to cgroupv1 stats for now.
+ // In the future we should expand the stats struct to include
+ // them.
+ logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
+ continue
+ }
+
+ value, err := strconv.ParseUint(d[1], 10, 64)
+ if err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+
+ entry := cgroups.BlkioStatEntry{
+ Op: op,
+ Major: major,
+ Minor: minor,
+ Value: value,
+ }
+ *targetTable = append(*targetTable, entry)
+ }
+ }
+ stats.BlkioStats = parsedStats
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
new file mode 100644
index 000000000..adbc4b230
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
@@ -0,0 +1,216 @@
+package fs2
+
+import (
+ "bufio"
+ "errors"
+ "math"
+ "os"
+ "strconv"
+ "strings"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+// numToStr converts an int64 value to a string for writing to a
+// cgroupv2 files with .min, .max, .low, or .high suffix.
+// The value of -1 is converted to "max" for cgroupv1 compatibility
+// (which used to write -1 to remove the limit).
+func numToStr(value int64) (ret string) {
+ switch {
+ case value == 0:
+ ret = ""
+ case value == -1:
+ ret = "max"
+ default:
+ ret = strconv.FormatInt(value, 10)
+ }
+
+ return ret
+}
+
+func isMemorySet(r *configs.Resources) bool {
+ return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
+}
+
+func setMemory(dirPath string, r *configs.Resources) error {
+ if !isMemorySet(r) {
+ return nil
+ }
+ swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
+ if err != nil {
+ return err
+ }
+ swapStr := numToStr(swap)
+ if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
+ // memory and memorySwap set to the same value -- disable swap
+ swapStr = "0"
+ }
+ // never write empty string to `memory.swap.max`, it means set to 0.
+ if swapStr != "" {
+ if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
+ return err
+ }
+ }
+
+ if val := numToStr(r.Memory); val != "" {
+ if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
+ return err
+ }
+ }
+
+ // cgroup.Resources.KernelMemory is ignored
+
+ if val := numToStr(r.MemoryReservation); val != "" {
+ if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func statMemory(dirPath string, stats *cgroups.Stats) error {
+ const file = "memory.stat"
+ statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
+ if err != nil {
+ return err
+ }
+ defer statsFile.Close()
+
+ sc := bufio.NewScanner(statsFile)
+ for sc.Scan() {
+ t, v, err := fscommon.ParseKeyValue(sc.Text())
+ if err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+ stats.MemoryStats.Stats[t] = v
+ }
+ if err := sc.Err(); err != nil {
+ return &parseError{Path: dirPath, File: file, Err: err}
+ }
+ stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
+ // Unlike cgroup v1 which has memory.use_hierarchy binary knob,
+ // cgroup v2 is always hierarchical.
+ stats.MemoryStats.UseHierarchy = true
+
+ memoryUsage, err := getMemoryDataV2(dirPath, "")
+ if err != nil {
+ if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
+ // The root cgroup does not have memory.{current,max}
+ // so emulate those using data from /proc/meminfo.
+ return statsFromMeminfo(stats)
+ }
+ return err
+ }
+ stats.MemoryStats.Usage = memoryUsage
+ swapUsage, err := getMemoryDataV2(dirPath, "swap")
+ if err != nil {
+ return err
+ }
+ // As cgroup v1 reports SwapUsage values as mem+swap combined,
+ // while in cgroup v2 swap values do not include memory,
+ // report combined mem+swap for v1 compatibility.
+ swapUsage.Usage += memoryUsage.Usage
+ if swapUsage.Limit != math.MaxUint64 {
+ swapUsage.Limit += memoryUsage.Limit
+ }
+ stats.MemoryStats.SwapUsage = swapUsage
+
+ return nil
+}
+
+func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
+ memoryData := cgroups.MemoryData{}
+
+ moduleName := "memory"
+ if name != "" {
+ moduleName = "memory." + name
+ }
+ usage := moduleName + ".current"
+ limit := moduleName + ".max"
+
+ value, err := fscommon.GetCgroupParamUint(path, usage)
+ if err != nil {
+ if name != "" && os.IsNotExist(err) {
+ // Ignore EEXIST as there's no swap accounting
+ // if kernel CONFIG_MEMCG_SWAP is not set or
+ // swapaccount=0 kernel boot parameter is given.
+ return cgroups.MemoryData{}, nil
+ }
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.Usage = value
+
+ value, err = fscommon.GetCgroupParamUint(path, limit)
+ if err != nil {
+ return cgroups.MemoryData{}, err
+ }
+ memoryData.Limit = value
+
+ return memoryData, nil
+}
+
+func statsFromMeminfo(stats *cgroups.Stats) error {
+ const file = "/proc/meminfo"
+ f, err := os.Open(file)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ // Fields we are interested in.
+ var (
+ swap_free uint64
+ swap_total uint64
+ main_total uint64
+ main_free uint64
+ )
+ mem := map[string]*uint64{
+ "SwapFree": &swap_free,
+ "SwapTotal": &swap_total,
+ "MemTotal": &main_total,
+ "MemFree": &main_free,
+ }
+
+ found := 0
+ sc := bufio.NewScanner(f)
+ for sc.Scan() {
+ parts := strings.SplitN(sc.Text(), ":", 3)
+ if len(parts) != 2 {
+ // Should not happen.
+ continue
+ }
+ k := parts[0]
+ p, ok := mem[k]
+ if !ok {
+ // Unknown field -- not interested.
+ continue
+ }
+ vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
+ *p, err = strconv.ParseUint(vStr, 10, 64)
+ if err != nil {
+ return &parseError{File: file, Err: errors.New("bad value for " + k)}
+ }
+
+ found++
+ if found == len(mem) {
+ // Got everything we need -- skip the rest.
+ break
+ }
+ }
+ if err := sc.Err(); err != nil {
+ return &parseError{Path: "", File: file, Err: err}
+ }
+
+ stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
+ stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
+
+ stats.MemoryStats.Usage.Usage = (main_total - main_free) * 1024
+ stats.MemoryStats.Usage.Limit = math.MaxUint64
+
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
new file mode 100644
index 000000000..c8c4a3658
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
@@ -0,0 +1,72 @@
+package fs2
+
+import (
+ "errors"
+ "math"
+ "os"
+ "strings"
+
+ "golang.org/x/sys/unix"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+ "github.com/opencontainers/runc/libcontainer/configs"
+)
+
+func isPidsSet(r *configs.Resources) bool {
+ return r.PidsLimit != 0
+}
+
+func setPids(dirPath string, r *configs.Resources) error {
+ if !isPidsSet(r) {
+ return nil
+ }
+ if val := numToStr(r.PidsLimit); val != "" {
+ if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
+ // if the controller is not enabled, let's read PIDS from cgroups.procs
+ // (or threads if cgroup.threads is enabled)
+ contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
+ if errors.Is(err, unix.ENOTSUP) {
+ contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
+ }
+ if err != nil {
+ return err
+ }
+ pids := strings.Count(contents, "\n")
+ stats.PidsStats.Current = uint64(pids)
+ stats.PidsStats.Limit = 0
+ return nil
+}
+
+func statPids(dirPath string, stats *cgroups.Stats) error {
+ current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
+ if err != nil {
+ if os.IsNotExist(err) {
+ return statPidsFromCgroupProcs(dirPath, stats)
+ }
+ return err
+ }
+
+ max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
+ if err != nil {
+ return err
+ }
+ // If no limit is set, read from pids.max returns "max", which is
+ // converted to MaxUint64 by GetCgroupParamUint. Historically, we
+ // represent "no limit" for pids as 0, thus this conversion.
+ if max == math.MaxUint64 {
+ max = 0
+ }
+
+ stats.PidsStats.Current = current
+ stats.PidsStats.Limit = max
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
new file mode 100644
index 000000000..d463d15ee
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
@@ -0,0 +1,121 @@
+package fscommon
+
+import (
+ "bufio"
+ "errors"
+ "math"
+ "os"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+ "github.com/opencontainers/runc/libcontainer/configs"
+ "golang.org/x/sys/unix"
+)
+
+// parseRdmaKV parses raw string to RdmaEntry.
+func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
+ var value uint32
+
+ parts := strings.SplitN(raw, "=", 3)
+
+ if len(parts) != 2 {
+ return errors.New("Unable to parse RDMA entry")
+ }
+
+ k, v := parts[0], parts[1]
+
+ if v == "max" {
+ value = math.MaxUint32
+ } else {
+ val64, err := strconv.ParseUint(v, 10, 32)
+ if err != nil {
+ return err
+ }
+ value = uint32(val64)
+ }
+ if k == "hca_handle" {
+ entry.HcaHandles = value
+ } else if k == "hca_object" {
+ entry.HcaObjects = value
+ }
+
+ return nil
+}
+
+// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
+// example entry: mlx4_0 hca_handle=2 hca_object=2000
+func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
+ rdmaEntries := make([]cgroups.RdmaEntry, 0)
+ fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
+ if err != nil {
+ return nil, err
+ }
+ defer fd.Close() //nolint:errorlint
+ scanner := bufio.NewScanner(fd)
+ for scanner.Scan() {
+ parts := strings.SplitN(scanner.Text(), " ", 4)
+ if len(parts) == 3 {
+ entry := new(cgroups.RdmaEntry)
+ entry.Device = parts[0]
+ err = parseRdmaKV(parts[1], entry)
+ if err != nil {
+ continue
+ }
+ err = parseRdmaKV(parts[2], entry)
+ if err != nil {
+ continue
+ }
+
+ rdmaEntries = append(rdmaEntries, *entry)
+ }
+ }
+ return rdmaEntries, scanner.Err()
+}
+
+// RdmaGetStats returns rdma stats such as totalLimit and current entries.
+func RdmaGetStats(path string, stats *cgroups.Stats) error {
+ currentEntries, err := readRdmaEntries(path, "rdma.current")
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ err = nil
+ }
+ return err
+ }
+ maxEntries, err := readRdmaEntries(path, "rdma.max")
+ if err != nil {
+ return err
+ }
+ // If device got removed between reading two files, ignore returning stats.
+ if len(currentEntries) != len(maxEntries) {
+ return nil
+ }
+
+ stats.RdmaStats = cgroups.RdmaStats{
+ RdmaLimit: maxEntries,
+ RdmaCurrent: currentEntries,
+ }
+
+ return nil
+}
+
+func createCmdString(device string, limits configs.LinuxRdma) string {
+ cmdString := device
+ if limits.HcaHandles != nil {
+ cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
+ }
+ if limits.HcaObjects != nil {
+ cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
+ }
+ return cmdString
+}
+
+// RdmaSet sets RDMA resources.
+func RdmaSet(path string, r *configs.Resources) error {
+ for device, limits := range r.Rdma {
+ if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
new file mode 100644
index 000000000..f4a51c9e5
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
@@ -0,0 +1,145 @@
+package fscommon
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "path"
+ "strconv"
+ "strings"
+
+ "github.com/opencontainers/runc/libcontainer/cgroups"
+)
+
+var (
+ // Deprecated: use cgroups.OpenFile instead.
+ OpenFile = cgroups.OpenFile
+ // Deprecated: use cgroups.ReadFile instead.
+ ReadFile = cgroups.ReadFile
+ // Deprecated: use cgroups.WriteFile instead.
+ WriteFile = cgroups.WriteFile
+)
+
+// ParseError records a parse error details, including the file path.
+type ParseError struct {
+ Path string
+ File string
+ Err error
+}
+
+func (e *ParseError) Error() string {
+ return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
+}
+
+func (e *ParseError) Unwrap() error { return e.Err }
+
+// ParseUint converts a string to an uint64 integer.
+// Negative values are returned at zero as, due to kernel bugs,
+// some of the memory cgroup stats can be negative.
+func ParseUint(s string, base, bitSize int) (uint64, error) {
+ value, err := strconv.ParseUint(s, base, bitSize)
+ if err != nil {
+ intValue, intErr := strconv.ParseInt(s, base, bitSize)
+ // 1. Handle negative values greater than MinInt64 (and)
+ // 2. Handle negative values lesser than MinInt64
+ if intErr == nil && intValue < 0 {
+ return 0, nil
+ } else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
+ return 0, nil
+ }
+
+ return value, err
+ }
+
+ return value, nil
+}
+
+// ParseKeyValue parses a space-separated "name value" kind of cgroup
+// parameter and returns its key as a string, and its value as uint64
+// (ParseUint is used to convert the value). For example,
+// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
+func ParseKeyValue(t string) (string, uint64, error) {
+ parts := strings.SplitN(t, " ", 3)
+ if len(parts) != 2 {
+ return "", 0, fmt.Errorf("line %q is not in key value format", t)
+ }
+
+ value, err := ParseUint(parts[1], 10, 64)
+ if err != nil {
+ return "", 0, err
+ }
+
+ return parts[0], value, nil
+}
+
+// GetValueByKey reads a key-value pairs from the specified cgroup file,
+// and returns a value of the specified key. ParseUint is used for value
+// conversion.
+func GetValueByKey(path, file, key string) (uint64, error) {
+ content, err := cgroups.ReadFile(path, file)
+ if err != nil {
+ return 0, err
+ }
+
+ lines := strings.Split(content, "\n")
+ for _, line := range lines {
+ arr := strings.Split(line, " ")
+ if len(arr) == 2 && arr[0] == key {
+ val, err := ParseUint(arr[1], 10, 64)
+ if err != nil {
+ err = &ParseError{Path: path, File: file, Err: err}
+ }
+ return val, err
+ }
+ }
+
+ return 0, nil
+}
+
+// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
+// If the value read is "max", the math.MaxUint64 is returned.
+func GetCgroupParamUint(path, file string) (uint64, error) {
+ contents, err := GetCgroupParamString(path, file)
+ if err != nil {
+ return 0, err
+ }
+ contents = strings.TrimSpace(contents)
+ if contents == "max" {
+ return math.MaxUint64, nil
+ }
+
+ res, err := ParseUint(contents, 10, 64)
+ if err != nil {
+ return res, &ParseError{Path: path, File: file, Err: err}
+ }
+ return res, nil
+}
+
+// GetCgroupParamInt reads a single int64 value from specified cgroup file.
+// If the value read is "max", the math.MaxInt64 is returned.
+func GetCgroupParamInt(path, file string) (int64, error) {
+ contents, err := cgroups.ReadFile(path, file)
+ if err != nil {
+ return 0, err
+ }
+ contents = strings.TrimSpace(contents)
+ if contents == "max" {
+ return math.MaxInt64, nil
+ }
+
+ res, err := strconv.ParseInt(contents, 10, 64)
+ if err != nil {
+ return res, &ParseError{Path: path, File: file, Err: err}
+ }
+ return res, nil
+}
+
+// GetCgroupParamString reads a string from the specified cgroup file.
+func GetCgroupParamString(path, file string) (string, error) {
+ contents, err := cgroups.ReadFile(path, file)
+ if err != nil {
+ return "", err
+ }
+
+ return strings.TrimSpace(contents), nil
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
index fa195bf90..865344f99 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
@@ -2,8 +2,8 @@ package configs
import "fmt"
-// blockIODevice holds major:minor format supported in blkio cgroup
-type blockIODevice struct {
+// BlockIODevice holds major:minor format supported in blkio cgroup.
+type BlockIODevice struct {
// Major is the device's major number
Major int64 `json:"major"`
// Minor is the device's minor number
@@ -12,7 +12,7 @@ type blockIODevice struct {
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
type WeightDevice struct {
- blockIODevice
+ BlockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
@@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string {
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
- blockIODevice
+ BlockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
index c1b4a0041..7cf2fb657 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -83,9 +83,6 @@ type Syscall struct {
Args []*Arg `json:"args"`
}
-// TODO Windows. Many of these fields should be factored out into those parts
-// which are common across platforms, and those which are platform specific.
-
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
index 784c61820..b4c616d55 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
@@ -35,12 +35,6 @@ type Mount struct {
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
-
- // Optional Command to be run before Source is mounted.
- PremountCmds []Command `json:"premount_cmds"`
-
- // Optional Command to be run after Source is mounted.
- PostmountCmds []Command `json:"postmount_cmds"`
}
func (m *Mount) IsBind() bool {
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
index 6b9fc3435..dbd435341 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@@ -132,19 +132,16 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
return fn(procfd)
}
-// SearchLabels searches a list of key-value pairs for the provided key and
-// returns the corresponding value. The pairs must be separated with '='.
-func SearchLabels(labels []string, query string) string {
- for _, l := range labels {
- parts := strings.SplitN(l, "=", 2)
- if len(parts) < 2 {
- continue
- }
- if parts[0] == query {
- return parts[1]
+// SearchLabels searches through a list of key=value pairs for a given key,
+// returning its value, and the binary flag telling whether the key exist.
+func SearchLabels(labels []string, key string) (string, bool) {
+ key += "="
+ for _, s := range labels {
+ if strings.HasPrefix(s, key) {
+ return s[len(key):], true
}
}
- return ""
+ return "", false
}
// Annotations returns the bundle path and user defined annotations from the