diff options
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
21 files changed, 592 insertions, 189 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go index debfc1e48..73965f12d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go @@ -1,8 +1,7 @@ -// +build apparmor,linux - package apparmor import ( + "bytes" "fmt" "io/ioutil" "os" @@ -12,11 +11,9 @@ import ( // IsEnabled returns true if apparmor is enabled for the host. func IsEnabled() bool { - if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { - if _, err = os.Stat("/sbin/apparmor_parser"); err == nil { - buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") - return err == nil && len(buf) > 1 && buf[0] == 'Y' - } + if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { + buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") + return err == nil && bytes.HasPrefix(buf, []byte("Y")) } return false } @@ -24,9 +21,7 @@ func IsEnabled() bool { func setProcAttr(attr, value string) error { // Under AppArmor you can only change your own attr, so use /proc/self/ // instead of /proc/<tid>/ like libapparmor does - path := fmt.Sprintf("/proc/self/attr/%s", attr) - - f, err := os.OpenFile(path, os.O_WRONLY, 0) + f, err := os.OpenFile("/proc/self/attr/"+attr, os.O_WRONLY, 0) if err != nil { return err } @@ -36,14 +31,13 @@ func setProcAttr(attr, value string) error { return err } - _, err = fmt.Fprintf(f, "%s", value) + _, err = f.WriteString(value) return err } // changeOnExec reimplements aa_change_onexec from libapparmor in Go func changeOnExec(name string) error { - value := "exec " + name - if err := setProcAttr("exec", value); err != nil { + if err := setProcAttr("exec", "exec "+name); err != nil { return fmt.Errorf("apparmor failed to apply profile: %s", err) } return nil diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go index d4110cf0b..0bc473f81 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go @@ -1,4 +1,4 @@ -// +build !apparmor !linux +// +build !linux package apparmor diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go new file mode 100644 index 000000000..ae2613cdb --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go @@ -0,0 +1,51 @@ +// +build linux + +package fscommon + +import ( + "bytes" + "os" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// WriteFile writes data to a cgroup file in dir. +// It is supposed to be used for cgroup files only. +func WriteFile(dir, file, data string) error { + fd, err := OpenFile(dir, file, unix.O_WRONLY) + if err != nil { + return err + } + defer fd.Close() + if err := retryingWriteFile(fd, data); err != nil { + return errors.Wrapf(err, "failed to write %q", data) + } + return nil +} + +// ReadFile reads data from a cgroup file in dir. +// It is supposed to be used for cgroup files only. +func ReadFile(dir, file string) (string, error) { + fd, err := OpenFile(dir, file, unix.O_RDONLY) + if err != nil { + return "", err + } + defer fd.Close() + var buf bytes.Buffer + + _, err = buf.ReadFrom(fd) + return buf.String(), err +} + +func retryingWriteFile(fd *os.File, data string) error { + for { + _, err := fd.Write([]byte(data)) + if errors.Is(err, unix.EINTR) { + logrus.Infof("interrupted while writing %s to %s", data, fd.Name()) + continue + } + return err + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/open.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/open.go new file mode 100644 index 000000000..0a7e3d952 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/open.go @@ -0,0 +1,103 @@ +package fscommon + +import ( + "os" + "strings" + "sync" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const ( + cgroupfsDir = "/sys/fs/cgroup" + cgroupfsPrefix = cgroupfsDir + "/" +) + +var ( + // Set to true by fs unit tests + TestMode bool + + cgroupFd int = -1 + prepOnce sync.Once + prepErr error + resolveFlags uint64 +) + +func prepareOpenat2() error { + prepOnce.Do(func() { + fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ + Flags: unix.O_DIRECTORY | unix.O_PATH}) + if err != nil { + prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} + if err != unix.ENOSYS { + logrus.Warnf("falling back to securejoin: %s", prepErr) + } else { + logrus.Debug("openat2 not available, falling back to securejoin") + } + return + } + var st unix.Statfs_t + if err = unix.Fstatfs(fd, &st); err != nil { + prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} + logrus.Warnf("falling back to securejoin: %s", prepErr) + return + } + + cgroupFd = fd + + resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS + if st.Type == unix.CGROUP2_SUPER_MAGIC { + // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks + resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS + } + + }) + + return prepErr +} + +// OpenFile opens a cgroup file in a given dir with given flags. +// It is supposed to be used for cgroup files only. +func OpenFile(dir, file string, flags int) (*os.File, error) { + if dir == "" { + return nil, errors.Errorf("no directory specified for %s", file) + } + mode := os.FileMode(0) + if TestMode && flags&os.O_WRONLY != 0 { + // "emulate" cgroup fs for unit tests + flags |= os.O_TRUNC | os.O_CREATE + mode = 0o600 + } + reldir := strings.TrimPrefix(dir, cgroupfsPrefix) + if len(reldir) == len(dir) { // non-standard path, old system? + return openWithSecureJoin(dir, file, flags, mode) + } + if prepareOpenat2() != nil { + return openWithSecureJoin(dir, file, flags, mode) + } + + relname := reldir + "/" + file + fd, err := unix.Openat2(cgroupFd, relname, + &unix.OpenHow{ + Resolve: resolveFlags, + Flags: uint64(flags) | unix.O_CLOEXEC, + Mode: uint64(mode), + }) + if err != nil { + return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err} + } + + return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil +} + +func openWithSecureJoin(dir, file string, flags int, mode os.FileMode) (*os.File, error) { + path, err := securejoin.SecureJoin(dir, file) + if err != nil { + return nil, err + } + + return os.OpenFile(path, flags, mode) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go new file mode 100644 index 000000000..2e4e837f2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go @@ -0,0 +1,102 @@ +// +build linux + +package fscommon + +import ( + "errors" + "fmt" + "math" + "strconv" + "strings" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// ParseUint converts a string to an uint64 integer. +// Negative values are returned at zero as, due to kernel bugs, +// some of the memory cgroup stats can be negative. +func ParseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// GetCgroupParamKeyValue parses a space-separated "name value" kind of cgroup +// parameter and returns its components. For example, "io_service_bytes 1234" +// will return as "io_service_bytes", 1234. +func GetCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := ParseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert to uint64: %v", err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// GetCgroupParamUint reads a single uint64 value from the specified cgroup file. +// If the value read is "max", the math.MaxUint64 is returned. +func GetCgroupParamUint(path, file string) (uint64, error) { + contents, err := GetCgroupParamString(path, file) + if err != nil { + return 0, err + } + contents = strings.TrimSpace(contents) + if contents == "max" { + return math.MaxUint64, nil + } + + res, err := ParseUint(contents, 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse file %q", path+"/"+file) + } + return res, nil +} + +// GetCgroupParamInt reads a single int64 value from specified cgroup file. +// If the value read is "max", the math.MaxInt64 is returned. +func GetCgroupParamInt(path, file string) (int64, error) { + contents, err := ReadFile(path, file) + if err != nil { + return 0, err + } + contents = strings.TrimSpace(contents) + if contents == "max" { + return math.MaxInt64, nil + } + + res, err := strconv.ParseInt(contents, 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a int from Cgroup file %q", contents, path+"/"+file) + } + return res, nil +} + +// GetCgroupParamString reads a string from the specified cgroup file. +func GetCgroupParamString(path, file string) (string, error) { + contents, err := ReadFile(path, file) + if err != nil { + return "", err + } + + return strings.TrimSpace(contents), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go index 7ac816605..e7f9c4626 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -39,6 +39,33 @@ type CpuStats struct { ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } +type CPUSetStats struct { + // List of the physical numbers of the CPUs on which processes + // in that cpuset are allowed to execute + CPUs []uint16 `json:"cpus,omitempty"` + // cpu_exclusive flag + CPUExclusive uint64 `json:"cpu_exclusive"` + // List of memory nodes on which processes in that cpuset + // are allowed to allocate memory + Mems []uint16 `json:"mems,omitempty"` + // mem_hardwall flag + MemHardwall uint64 `json:"mem_hardwall"` + // mem_exclusive flag + MemExclusive uint64 `json:"mem_exclusive"` + // memory_migrate flag + MemoryMigrate uint64 `json:"memory_migrate"` + // memory_spread page flag + MemorySpreadPage uint64 `json:"memory_spread_page"` + // memory_spread slab flag + MemorySpreadSlab uint64 `json:"memory_spread_slab"` + // memory_pressure + MemoryPressure uint64 `json:"memory_pressure"` + // sched_load balance flag + SchedLoadBalance uint64 `json:"sched_load_balance"` + // sched_relax_domain_level + SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` +} + type MemoryData struct { Usage uint64 `json:"usage,omitempty"` MaxUsage uint64 `json:"max_usage,omitempty"` @@ -121,6 +148,7 @@ type HugetlbStats struct { type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` + CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` PidsStats PidsStats `json:"pids_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 6e88b5dff..840817e39 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -15,7 +15,9 @@ import ( "sync" "time" - units "github.com/docker/go-units" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -29,19 +31,19 @@ var ( isUnified bool ) -// HugePageSizeUnitList is a list of the units used by the linux kernel when -// naming the HugePage control files. -// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt -// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed, -// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393 -var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"} - // IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. func IsCgroup2UnifiedMode() bool { isUnifiedOnce.Do(func() { var st unix.Statfs_t - if err := unix.Statfs(unifiedMountpoint, &st); err != nil { - panic("cannot statfs cgroup root") + err := unix.Statfs(unifiedMountpoint, &st) + if err != nil { + if os.IsNotExist(err) && system.RunningInUserNS() { + // ignore the "not found" error if running in userns + logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint) + isUnified = false + return + } + panic(fmt.Sprintf("cannot statfs cgroup root: %s", err)) } isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC }) @@ -86,11 +88,11 @@ func GetAllSubsystems() ([]string, error) { // - freezer: implemented in kernel 5.2 // We assume these are always available, as it is hard to detect availability. pseudo := []string{"devices", "freezer"} - data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers") + data, err := fscommon.ReadFile("/sys/fs/cgroup", "cgroup.controllers") if err != nil { return nil, err } - subsystems := append(pseudo, strings.Fields(string(data))...) + subsystems := append(pseudo, strings.Fields(data)...) return subsystems, nil } f, err := os.Open("/proc/cgroups") @@ -207,20 +209,66 @@ func EnterPid(cgroupPaths map[string]string, pid int) error { return nil } +func rmdir(path string) error { + err := unix.Rmdir(path) + if err == nil || err == unix.ENOENT { + return nil + } + return &os.PathError{Op: "rmdir", Path: path, Err: err} +} + +// RemovePath aims to remove cgroup path. It does so recursively, +// by removing any subdirectories (sub-cgroups) first. +func RemovePath(path string) error { + // try the fast path first + if err := rmdir(path); err == nil { + return nil + } + + infos, err := ioutil.ReadDir(path) + if err != nil { + if os.IsNotExist(err) { + err = nil + } + return err + } + for _, info := range infos { + if info.IsDir() { + // We should remove subcgroups dir first + if err = RemovePath(filepath.Join(path, info.Name())); err != nil { + break + } + } + } + if err == nil { + err = rmdir(path) + } + return err +} + // RemovePaths iterates over the provided paths removing them. // We trying to remove all paths five times with increasing delay between tries. // If after all there are not removed cgroups - appropriate error will be // returned. func RemovePaths(paths map[string]string) (err error) { + const retries = 5 delay := 10 * time.Millisecond - for i := 0; i < 5; i++ { + for i := 0; i < retries; i++ { if i != 0 { time.Sleep(delay) delay *= 2 } for s, p := range paths { - os.RemoveAll(p) - // TODO: here probably should be logging + if err := RemovePath(p); err != nil { + // do not log intermediate iterations + switch i { + case 0: + logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)") + case retries - 1: + logrus.WithError(err).Error("Failed to remove cgroup") + } + + } _, err := os.Stat(p) // We need this strange way of checking cgroups existence because // RemoveAll almost always returns error, even on already removed @@ -230,6 +278,8 @@ func RemovePaths(paths map[string]string) (err error) { } } if len(paths) == 0 { + //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 + paths = make(map[string]string) return nil } } @@ -237,27 +287,50 @@ func RemovePaths(paths map[string]string) (err error) { } func GetHugePageSize() ([]string, error) { - files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) if err != nil { - return []string{}, err + return nil, err } - var fileNames []string - for _, st := range files { - fileNames = append(fileNames, st.Name()) + files, err := dir.Readdirnames(0) + dir.Close() + if err != nil { + return nil, err } - return getHugePageSizeFromFilenames(fileNames) + + return getHugePageSizeFromFilenames(files) } func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { - var pageSizes []string - for _, fileName := range fileNames { - nameArray := strings.Split(fileName, "-") - pageSize, err := units.RAMInBytes(nameArray[1]) + pageSizes := make([]string, 0, len(fileNames)) + + for _, file := range fileNames { + // example: hugepages-1048576kB + val := strings.TrimPrefix(file, "hugepages-") + if len(val) == len(file) { + // unexpected file name: no prefix found + continue + } + // The suffix is always "kB" (as of Linux 5.9) + eLen := len(val) - 2 + val = strings.TrimSuffix(val, "kB") + if len(val) != eLen { + logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file) + continue + } + size, err := strconv.Atoi(val) if err != nil { - return []string{}, err + return nil, err + } + // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 + // but in our case the size is in KB already. + if size >= (1 << 20) { + val = strconv.Itoa(size>>20) + "GB" + } else if size >= (1 << 10) { + val = strconv.Itoa(size>>10) + "MB" + } else { + val += "KB" } - sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList) - pageSizes = append(pageSizes, sizeString) + pageSizes = append(pageSizes, val) } return pageSizes, nil @@ -303,14 +376,14 @@ func WriteCgroupProc(dir string, pid int) error { return nil } - cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700) + file, err := fscommon.OpenFile(dir, CgroupProcesses, os.O_WRONLY) if err != nil { return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) } - defer cgroupProcessesFile.Close() + defer file.Close() for i := 0; i < 5; i++ { - _, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid)) + _, err = file.WriteString(strconv.Itoa(pid)) if err == nil { return nil } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go index f8487b0a9..95ec9dff0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go @@ -1,13 +1,17 @@ package cgroups import ( - "bufio" "errors" "fmt" - "io" "os" "path/filepath" "strings" + "sync" + "syscall" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/moby/sys/mountinfo" + "golang.org/x/sys/unix" ) // Code in this source file are specific to cgroup v1, @@ -15,10 +19,16 @@ import ( const ( CgroupNamePrefix = "name=" + defaultPrefix = "/sys/fs/cgroup" ) var ( - errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") + errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") + ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1") + + readMountinfoOnce sync.Once + readMountinfoErr error + cgroupMountinfo []*mountinfo.Info ) type NotFoundError struct { @@ -43,11 +53,74 @@ func IsNotFound(err error) bool { return ok } +func tryDefaultPath(cgroupPath, subsystem string) string { + if !strings.HasPrefix(defaultPrefix, cgroupPath) { + return "" + } + + // remove possible prefix + subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix) + + // Make sure we're still under defaultPrefix, and resolve + // a possible symlink (like cpu -> cpu,cpuacct). + path, err := securejoin.SecureJoin(defaultPrefix, subsystem) + if err != nil { + return "" + } + + // (1) path should be a directory. + st, err := os.Lstat(path) + if err != nil || !st.IsDir() { + return "" + } + + // (2) path should be a mount point. + pst, err := os.Lstat(filepath.Dir(path)) + if err != nil { + return "" + } + + if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev { + // parent dir has the same dev -- path is not a mount point + return "" + } + + // (3) path should have 'cgroup' fs type. + fst := unix.Statfs_t{} + err = unix.Statfs(path, &fst) + if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { + return "" + } + + return path +} + +// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones +// with fstype of "cgroup") for the current running process. +// +// The results are cached (to avoid re-reading mountinfo which is relatively +// expensive), so it is assumed that cgroup mounts are not being changed. +func readCgroupMountinfo() ([]*mountinfo.Info, error) { + readMountinfoOnce.Do(func() { + cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( + mountinfo.FSTypeFilter("cgroup"), + ) + }) + + return cgroupMountinfo, readMountinfoErr +} + // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { if IsCgroup2UnifiedMode() { return "", errUnified } + + // Avoid parsing mountinfo by trying the default path first, if possible. + if path := tryDefaultPath(cgroupPath, subsystem); path != "" { + return path, nil + } + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) return mnt, err } @@ -57,58 +130,28 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, return "", "", errUnified } - // We are not using mount.GetMounts() because it's super-inefficient, - // parsing it directly sped up x10 times because of not using Sscanf. - // It was one of two major performance drawbacks in container start. - if !isSubsystemAvailable(subsystem) { - return "", "", NewNotFoundError(subsystem) - } - - f, err := os.Open("/proc/self/mountinfo") + mi, err := readCgroupMountinfo() if err != nil { return "", "", err } - defer f.Close() - return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) + return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) } -func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { - scanner := bufio.NewScanner(reader) - for scanner.Scan() { - txt := scanner.Text() - fields := strings.Fields(txt) - if len(fields) < 9 { - continue - } - if strings.HasPrefix(fields[4], cgroupPath) { - for _, opt := range strings.Split(fields[len(fields)-1], ",") { +func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) { + for _, mi := range mounts { + if strings.HasPrefix(mi.Mountpoint, cgroupPath) { + for _, opt := range strings.Split(mi.VFSOptions, ",") { if opt == subsystem { - return fields[4], fields[3], nil + return mi.Mountpoint, mi.Root, nil } } } } - if err := scanner.Err(); err != nil { - return "", "", err - } return "", "", NewNotFoundError(subsystem) } -func isSubsystemAvailable(subsystem string) bool { - if IsCgroup2UnifiedMode() { - panic("don't call isSubsystemAvailable from cgroupv2 code") - } - - cgroups, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return false - } - _, avail := cgroups[subsystem] - return avail -} - func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") @@ -117,25 +160,15 @@ func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { return getControllerPath(m.Subsystems[0], cgroups) } -func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { +func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) { res := make([]Mount, 0, len(ss)) - scanner := bufio.NewScanner(mi) numFound := 0 - for scanner.Scan() && numFound < len(ss) { - txt := scanner.Text() - sepIdx := strings.Index(txt, " - ") - if sepIdx == -1 { - return nil, fmt.Errorf("invalid mountinfo format") - } - if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { - continue - } - fields := strings.Split(txt, " ") + for _, mi := range mounts { m := Mount{ - Mountpoint: fields[4], - Root: fields[3], + Mountpoint: mi.Mountpoint, + Root: mi.Root, } - for _, opt := range strings.Split(fields[len(fields)-1], ",") { + for _, opt := range strings.Split(mi.VFSOptions, ",") { seen, known := ss[opt] if !known || (!all && seen) { continue @@ -148,19 +181,18 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, if len(m.Subsystems) > 0 || all { res = append(res, m) } - } - if err := scanner.Err(); err != nil { - return nil, err + if !all && numFound >= len(ss) { + break + } } return res, nil } func getCgroupMountsV1(all bool) ([]Mount, error) { - f, err := os.Open("/proc/self/mountinfo") + mi, err := readCgroupMountinfo() if err != nil { return nil, err } - defer f.Close() allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { @@ -171,7 +203,8 @@ func getCgroupMountsV1(all bool) ([]Mount, error) { for s := range allSubsystems { allMap[s] = false } - return getCgroupMountsHelper(allMap, f, all) + + return getCgroupMountsHelper(allMap, mi, all) } // GetOwnCgroup returns the relative path to the cgroup docker is running in. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go index 6e90ae16b..aada5d62f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go @@ -2,6 +2,7 @@ package configs import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/opencontainers/runc/libcontainer/devices" ) type FreezerState string @@ -42,7 +43,7 @@ type Cgroup struct { type Resources struct { // Devices is the set of access rules for devices in the container. - Devices []*DeviceRule `json:"devices"` + Devices []*devices.Rule `json:"devices"` // Memory limit (in bytes) Memory int64 `json:"memory"` @@ -127,6 +128,9 @@ type Resources struct { // CpuWeight sets a proportional bandwidth limit. CpuWeight uint64 `json:"cpu_weight"` + // Unified is cgroupv2-only key-value map. + Unified map[string]string `json:"unified"` + // SkipDevices allows to skip configuring device permissions. // Used by e.g. kubelet while creating a parent cgroup (kubepods) // common for many containers. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index ac523b417..e1cd16265 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -7,6 +7,7 @@ import ( "os/exec" "time" + "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -92,6 +93,9 @@ type Config struct { // Path to a directory containing the container's root filesystem. Rootfs string `json:"rootfs"` + // Umask is the umask to use inside of the container. + Umask *uint32 `json:"umask"` + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted // bind mounts are writtable. Readonlyfs bool `json:"readonlyfs"` @@ -104,7 +108,7 @@ type Config struct { Mounts []*Mount `json:"mounts"` // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - Devices []*Device `json:"devices"` + Devices []*devices.Device `json:"devices"` MountLabel string `json:"mount_label"` @@ -239,15 +243,6 @@ const ( Poststop = "poststop" ) -// TODO move this to runtime-spec -// See: https://github.com/opencontainers/runtime-spec/pull/1046 -const ( - Creating = "creating" - Created = "created" - Running = "running" - Stopped = "stopped" -) - type Capabilities struct { // Bounding is the set of capabilities checked by the kernel. Bounding []string diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_windows.go deleted file mode 100644 index 729289393..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_windows.go +++ /dev/null @@ -1,5 +0,0 @@ -package configs - -func (d *DeviceRule) Mkdev() (uint64, error) { - return 0, nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/devices.go new file mode 100644 index 000000000..b9e3664ce --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/devices.go @@ -0,0 +1,17 @@ +package configs + +import "github.com/opencontainers/runc/libcontainer/devices" + +type ( + // Deprecated: use libcontainer/devices.Device + Device = devices.Device + + // Deprecated: use libcontainer/devices.Rule + DeviceRule = devices.Rule + + // Deprecated: use libcontainer/devices.Type + DeviceType = devices.Type + + // Deprecated: use libcontainer/devices.Permissions + DevicePermissions = devices.Permissions +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go index 1bbaef9bd..d52d6fcd1 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -56,7 +56,7 @@ func IsNamespaceSupported(ns NamespaceType) bool { if nsFile == "" { return false } - _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) + _, err := os.Stat("/proc/self/ns/" + nsFile) // a namespace is supported if it exists and we have permissions to read it supported = err == nil supportedNamespaces[ns] = supported diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/device.go index 632bf6ac4..3eb73cc7c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device.go @@ -1,4 +1,4 @@ -package configs +package devices import ( "fmt" @@ -11,7 +11,7 @@ const ( ) type Device struct { - DeviceRule + Rule // Path to the device. Path string `json:"path"` @@ -26,10 +26,10 @@ type Device struct { Gid uint32 `json:"gid"` } -// DevicePermissions is a cgroupv1-style string to represent device access. It +// Permissions is a cgroupv1-style string to represent device access. It // has to be a string for backward compatibility reasons, hence why it has // methods to do set operations. -type DevicePermissions string +type Permissions string const ( deviceRead uint = (1 << iota) @@ -37,7 +37,7 @@ const ( deviceMknod ) -func (p DevicePermissions) toSet() uint { +func (p Permissions) toSet() uint { var set uint for _, perm := range p { switch perm { @@ -52,7 +52,7 @@ func (p DevicePermissions) toSet() uint { return set } -func fromSet(set uint) DevicePermissions { +func fromSet(set uint) Permissions { var perm string if set&deviceRead == deviceRead { perm += "r" @@ -63,53 +63,53 @@ func fromSet(set uint) DevicePermissions { if set&deviceMknod == deviceMknod { perm += "m" } - return DevicePermissions(perm) + return Permissions(perm) } -// Union returns the union of the two sets of DevicePermissions. -func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions { +// Union returns the union of the two sets of Permissions. +func (p Permissions) Union(o Permissions) Permissions { lhs := p.toSet() rhs := o.toSet() return fromSet(lhs | rhs) } -// Difference returns the set difference of the two sets of DevicePermissions. +// Difference returns the set difference of the two sets of Permissions. // In set notation, A.Difference(B) gives you A\B. -func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions { +func (p Permissions) Difference(o Permissions) Permissions { lhs := p.toSet() rhs := o.toSet() return fromSet(lhs &^ rhs) } -// Intersection computes the intersection of the two sets of DevicePermissions. -func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions { +// Intersection computes the intersection of the two sets of Permissions. +func (p Permissions) Intersection(o Permissions) Permissions { lhs := p.toSet() rhs := o.toSet() return fromSet(lhs & rhs) } -// IsEmpty returns whether the set of permissions in a DevicePermissions is +// IsEmpty returns whether the set of permissions in a Permissions is // empty. -func (p DevicePermissions) IsEmpty() bool { - return p == DevicePermissions("") +func (p Permissions) IsEmpty() bool { + return p == Permissions("") } // IsValid returns whether the set of permissions is a subset of valid // permissions (namely, {r,w,m}). -func (p DevicePermissions) IsValid() bool { +func (p Permissions) IsValid() bool { return p == fromSet(p.toSet()) } -type DeviceType rune +type Type rune const ( - WildcardDevice DeviceType = 'a' - BlockDevice DeviceType = 'b' - CharDevice DeviceType = 'c' // or 'u' - FifoDevice DeviceType = 'p' + WildcardDevice Type = 'a' + BlockDevice Type = 'b' + CharDevice Type = 'c' // or 'u' + FifoDevice Type = 'p' ) -func (t DeviceType) IsValid() bool { +func (t Type) IsValid() bool { switch t { case WildcardDevice, BlockDevice, CharDevice, FifoDevice: return true @@ -118,7 +118,7 @@ func (t DeviceType) IsValid() bool { } } -func (t DeviceType) CanMknod() bool { +func (t Type) CanMknod() bool { switch t { case BlockDevice, CharDevice, FifoDevice: return true @@ -127,7 +127,7 @@ func (t DeviceType) CanMknod() bool { } } -func (t DeviceType) CanCgroup() bool { +func (t Type) CanCgroup() bool { switch t { case WildcardDevice, BlockDevice, CharDevice: return true @@ -136,10 +136,10 @@ func (t DeviceType) CanCgroup() bool { } } -type DeviceRule struct { +type Rule struct { // Type of device ('c' for char, 'b' for block). If set to 'a', this rule // acts as a wildcard and all fields other than Allow are ignored. - Type DeviceType `json:"type"` + Type Type `json:"type"` // Major is the device's major number. Major int64 `json:"major"` @@ -149,13 +149,13 @@ type DeviceRule struct { // Permissions is the set of permissions that this rule applies to (in the // cgroupv1 format -- any combination of "rwm"). - Permissions DevicePermissions `json:"permissions"` + Permissions Permissions `json:"permissions"` // Allow specifies whether this rule is allowed. Allow bool `json:"allow"` } -func (d *DeviceRule) CgroupString() string { +func (d *Rule) CgroupString() string { var ( major = strconv.FormatInt(d.Major, 10) minor = strconv.FormatInt(d.Minor, 10) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go index 650c46848..a400341e4 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go @@ -1,6 +1,6 @@ // +build !windows -package configs +package devices import ( "errors" @@ -8,7 +8,7 @@ import ( "golang.org/x/sys/unix" ) -func (d *DeviceRule) Mkdev() (uint64, error) { +func (d *Rule) Mkdev() (uint64, error) { if d.Major == Wildcard || d.Minor == Wildcard { return 0, errors.New("cannot mkdev() device with wildcards") } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_windows.go new file mode 100644 index 000000000..8511bf00e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_windows.go @@ -0,0 +1,5 @@ +package devices + +func (d *Rule) Mkdev() (uint64, error) { + return 0, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go index 702f913ec..5011f373d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" - "github.com/opencontainers/runc/libcontainer/configs" "golang.org/x/sys/unix" ) @@ -23,7 +22,7 @@ var ( // Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the // information about a linux device and return that information as a Device struct. -func DeviceFromPath(path, permissions string) (*configs.Device, error) { +func DeviceFromPath(path, permissions string) (*Device, error) { var stat unix.Stat_t err := unixLstat(path, &stat) if err != nil { @@ -31,28 +30,28 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) { } var ( - devType configs.DeviceType + devType Type mode = stat.Mode devNumber = uint64(stat.Rdev) major = unix.Major(devNumber) minor = unix.Minor(devNumber) ) - switch { - case mode&unix.S_IFBLK == unix.S_IFBLK: - devType = configs.BlockDevice - case mode&unix.S_IFCHR == unix.S_IFCHR: - devType = configs.CharDevice - case mode&unix.S_IFIFO == unix.S_IFIFO: - devType = configs.FifoDevice + switch mode & unix.S_IFMT { + case unix.S_IFBLK: + devType = BlockDevice + case unix.S_IFCHR: + devType = CharDevice + case unix.S_IFIFO: + devType = FifoDevice default: return nil, ErrNotADevice } - return &configs.Device{ - DeviceRule: configs.DeviceRule{ + return &Device{ + Rule: Rule{ Type: devType, Major: int64(major), Minor: int64(minor), - Permissions: configs.DevicePermissions(permissions), + Permissions: Permissions(permissions), }, Path: path, FileMode: os.FileMode(mode), @@ -62,18 +61,18 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) { } // HostDevices returns all devices that can be found under /dev directory. -func HostDevices() ([]*configs.Device, error) { +func HostDevices() ([]*Device, error) { return GetDevices("/dev") } // GetDevices recursively traverses a directory specified by path // and returns all devices found there. -func GetDevices(path string) ([]*configs.Device, error) { +func GetDevices(path string) ([]*Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } - var out []*configs.Device + var out []*Device for _, f := range files { switch { case f.IsDir(): @@ -104,6 +103,9 @@ func GetDevices(path string) ([]*configs.Device, error) { } return nil, err } + if device.Type == FifoDevice { + continue + } out = append(out, device) } return out, nil diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go index 79232a437..b73cf70b4 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go @@ -71,16 +71,6 @@ func Stat(pid int) (stat Stat_t, err error) { return parseStat(string(bytes)) } -// GetProcessStartTime is deprecated. Use Stat(pid) and -// Stat_t.StartTime instead. -func GetProcessStartTime(pid int) (string, error) { - stat, err := Stat(pid) - if err != nil { - return "", err - } - return fmt.Sprintf("%d", stat.StartTime), nil -} - func parseStat(data string) (stat Stat_t, err error) { // From proc(5), field 2 could contain space and is inside `(` and `)`. // The following is an example: diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go index 65cd40e92..f19333e61 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go @@ -3,8 +3,8 @@ package user import ( - "fmt" "os/user" + "strconv" ) func lookupUser(username string) (User, error) { @@ -16,7 +16,7 @@ func lookupUser(username string) (User, error) { } func lookupUid(uid int) (User, error) { - u, err := user.LookupId(fmt.Sprintf("%d", uid)) + u, err := user.LookupId(strconv.Itoa(uid)) if err != nil { return User{}, err } @@ -32,7 +32,7 @@ func lookupGroup(groupname string) (Group, error) { } func lookupGid(gid int) (Group, error) { - g, err := user.LookupGroupId(fmt.Sprintf("%d", gid)) + g, err := user.LookupGroupId(strconv.Itoa(gid)) if err != nil { return Group{}, err } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index 4b89dad73..a533bf5e6 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -466,7 +466,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err // we asked for a group but didn't find it. let's check to see // if we wanted a numeric group if !found { - gid, err := strconv.Atoi(ag) + gid, err := strconv.ParseInt(ag, 10, 64) if err != nil { return nil, fmt.Errorf("Unable to find group %s", ag) } @@ -474,7 +474,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err if gid < minId || gid > maxId { return nil, ErrRange } - gidMap[gid] = struct{}{} + gidMap[int(gid)] = struct{}{} } } gids := []int{} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go index 40ccfaa1a..1b72b7a1c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -1,6 +1,7 @@ package utils import ( + "encoding/binary" "encoding/json" "io" "os" @@ -15,6 +16,20 @@ const ( exitSignalOffset = 128 ) +// NativeEndian is the native byte order of the host system. +var NativeEndian binary.ByteOrder + +func init() { + // Copied from <golang.org/x/net/internal/socket/sys.go>. + i := uint32(1) + b := (*[4]byte)(unsafe.Pointer(&i)) + if b[0] == 1 { + NativeEndian = binary.LittleEndian + } else { + NativeEndian = binary.BigEndian + } +} + // ResolveRootfs ensures that the current working directory is // not a symlink and returns the absolute path to the rootfs func ResolveRootfs(uncleanRootfs string) (string, error) { @@ -106,7 +121,3 @@ func Annotations(labels []string) (bundle string, userAnnotations map[string]str } return } - -func GetIntSize() int { - return int(unsafe.Sizeof(1)) -} |