summaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go10
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go58
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go5
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go8
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/config.go1
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go2
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go93
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go112
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go68
9 files changed, 351 insertions, 6 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
index 7fff0627f..debfc1e48 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
@@ -6,6 +6,8 @@ import (
"fmt"
"io/ioutil"
"os"
+
+ "github.com/opencontainers/runc/libcontainer/utils"
)
// IsEnabled returns true if apparmor is enabled for the host.
@@ -19,7 +21,7 @@ func IsEnabled() bool {
return false
}
-func setprocattr(attr, value string) error {
+func setProcAttr(attr, value string) error {
// Under AppArmor you can only change your own attr, so use /proc/self/
// instead of /proc/<tid>/ like libapparmor does
path := fmt.Sprintf("/proc/self/attr/%s", attr)
@@ -30,6 +32,10 @@ func setprocattr(attr, value string) error {
}
defer f.Close()
+ if err := utils.EnsureProcHandle(f); err != nil {
+ return err
+ }
+
_, err = fmt.Fprintf(f, "%s", value)
return err
}
@@ -37,7 +43,7 @@ func setprocattr(attr, value string) error {
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
func changeOnExec(name string) error {
value := "exec " + name
- if err := setprocattr("exec", value); err != nil {
+ if err := setProcAttr("exec", value); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
index ec79ae767..60790f83b 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -11,6 +11,8 @@ import (
"path/filepath"
"strconv"
"strings"
+ "sync"
+ "syscall"
"time"
units "github.com/docker/go-units"
@@ -22,6 +24,11 @@ const (
CgroupProcesses = "cgroup.procs"
)
+var (
+ isUnifiedOnce sync.Once
+ isUnified bool
+)
+
// HugePageSizeUnitList is a list of the units used by the linux kernel when
// naming the HugePage control files.
// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
@@ -29,6 +36,18 @@ const (
// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393
var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
+// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
+func IsCgroup2UnifiedMode() bool {
+ isUnifiedOnce.Do(func() {
+ var st syscall.Statfs_t
+ if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
+ panic("cannot statfs cgroup root")
+ }
+ isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
+ })
+ return isUnified
+}
+
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
@@ -49,6 +68,10 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string,
}
defer f.Close()
+ if IsCgroup2UnifiedMode() {
+ subsystem = ""
+ }
+
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
@@ -57,12 +80,12 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Fields(txt)
- if len(fields) < 5 {
+ if len(fields) < 9 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
- if opt == subsystem {
+ if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
return fields[4], fields[3], nil
}
}
@@ -76,6 +99,19 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst
}
func isSubsystemAvailable(subsystem string) bool {
+ if IsCgroup2UnifiedMode() {
+ controllers, err := GetAllSubsystems()
+ if err != nil {
+ return false
+ }
+ for _, c := range controllers {
+ if c == subsystem {
+ return true
+ }
+ }
+ return false
+ }
+
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
@@ -120,7 +156,7 @@ func FindCgroupMountpointDir() (string, error) {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
- if postSeparatorFields[0] == "cgroup" {
+ if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
// Check that the mount is properly formatted.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
@@ -193,6 +229,19 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
func GetCgroupMounts(all bool) ([]Mount, error) {
+ if IsCgroup2UnifiedMode() {
+ availableControllers, err := GetAllSubsystems()
+ if err != nil {
+ return nil, err
+ }
+ m := Mount{
+ Mountpoint: "/sys/fs/cgroup",
+ Root: "/sys/fs/cgroup",
+ Subsystems: availableControllers,
+ }
+ return []Mount{m}, nil
+ }
+
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
@@ -356,6 +405,9 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
+ if IsCgroup2UnifiedMode() {
+ return "/", nil
+ }
if p, ok := cgroups[subsystem]; ok {
return p, nil
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
index e0f3ca165..fa195bf90 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
@@ -59,3 +59,8 @@ func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
func (td *ThrottleDevice) String() string {
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
}
+
+// StringName formats the struct to be writable to the cgroup specific file
+func (td *ThrottleDevice) StringName(name string) string {
+ return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
index e15a662f5..58ed19c9e 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@@ -119,4 +119,12 @@ type Resources struct {
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`
+
+ // Used on cgroups v2:
+
+ // CpuWeight sets a proportional bandwidth limit.
+ CpuWeight uint64 `json:"cpu_weight"`
+
+ // CpuMax sets she maximum bandwidth limit (format: max period).
+ CpuMax string `json:"cpu_max"`
}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
index 7728522fe..24989e9f5 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -44,6 +44,7 @@ const (
Trap
Allow
Trace
+ Log
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
index 11c3faafb..e05e30adc 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
@@ -1,5 +1,5 @@
// +build linux
-// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le s390x
+// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x
package system
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
new file mode 100644
index 000000000..c8a9364d5
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@@ -0,0 +1,93 @@
+// +build linux
+
+package utils
+
+/*
+ * Copyright 2016, 2017 SUSE LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import (
+ "fmt"
+ "os"
+
+ "golang.org/x/sys/unix"
+)
+
+// MaxSendfdLen is the maximum length of the name of a file descriptor being
+// sent using SendFd. The name of the file handle returned by RecvFd will never
+// be larger than this value.
+const MaxNameLen = 4096
+
+// oobSpace is the size of the oob slice required to store a single FD. Note
+// that unix.UnixRights appears to make the assumption that fd is always int32,
+// so sizeof(fd) = 4.
+var oobSpace = unix.CmsgSpace(4)
+
+// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
+// socket. The file name of the remote file descriptor will be recreated
+// locally (it is sent as non-auxiliary data in the same payload).
+func RecvFd(socket *os.File) (*os.File, error) {
+ // For some reason, unix.Recvmsg uses the length rather than the capacity
+ // when passing the msg_controllen and other attributes to recvmsg. So we
+ // have to actually set the length.
+ name := make([]byte, MaxNameLen)
+ oob := make([]byte, oobSpace)
+
+ sockfd := socket.Fd()
+ n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ if n >= MaxNameLen || oobn != oobSpace {
+ return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
+ }
+
+ // Truncate.
+ name = name[:n]
+ oob = oob[:oobn]
+
+ scms, err := unix.ParseSocketControlMessage(oob)
+ if err != nil {
+ return nil, err
+ }
+ if len(scms) != 1 {
+ return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
+ }
+ scm := scms[0]
+
+ fds, err := unix.ParseUnixRights(&scm)
+ if err != nil {
+ return nil, err
+ }
+ if len(fds) != 1 {
+ return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
+ }
+ fd := uintptr(fds[0])
+
+ return os.NewFile(fd, string(name)), nil
+}
+
+// SendFd sends a file descriptor over the given AF_UNIX socket. In
+// addition, the file.Name() of the given file will also be sent as
+// non-auxiliary data in the same payload (allowing to send contextual
+// information for a file descriptor).
+func SendFd(socket *os.File, name string, fd uintptr) error {
+ if len(name) >= MaxNameLen {
+ return fmt.Errorf("sendfd: filename too long: %s", name)
+ }
+ oob := unix.UnixRights(int(fd))
+ return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0)
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
new file mode 100644
index 000000000..40ccfaa1a
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@@ -0,0 +1,112 @@
+package utils
+
+import (
+ "encoding/json"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+const (
+ exitSignalOffset = 128
+)
+
+// ResolveRootfs ensures that the current working directory is
+// not a symlink and returns the absolute path to the rootfs
+func ResolveRootfs(uncleanRootfs string) (string, error) {
+ rootfs, err := filepath.Abs(uncleanRootfs)
+ if err != nil {
+ return "", err
+ }
+ return filepath.EvalSymlinks(rootfs)
+}
+
+// ExitStatus returns the correct exit status for a process based on if it
+// was signaled or exited cleanly
+func ExitStatus(status unix.WaitStatus) int {
+ if status.Signaled() {
+ return exitSignalOffset + int(status.Signal())
+ }
+ return status.ExitStatus()
+}
+
+// WriteJSON writes the provided struct v to w using standard json marshaling
+func WriteJSON(w io.Writer, v interface{}) error {
+ data, err := json.Marshal(v)
+ if err != nil {
+ return err
+ }
+ _, err = w.Write(data)
+ return err
+}
+
+// CleanPath makes a path safe for use with filepath.Join. This is done by not
+// only cleaning the path, but also (if the path is relative) adding a leading
+// '/' and cleaning it (then removing the leading '/'). This ensures that a
+// path resulting from prepending another path will always resolve to lexically
+// be a subdirectory of the prefixed path. This is all done lexically, so paths
+// that include symlinks won't be safe as a result of using CleanPath.
+func CleanPath(path string) string {
+ // Deal with empty strings nicely.
+ if path == "" {
+ return ""
+ }
+
+ // Ensure that all paths are cleaned (especially problematic ones like
+ // "/../../../../../" which can cause lots of issues).
+ path = filepath.Clean(path)
+
+ // If the path isn't absolute, we need to do more processing to fix paths
+ // such as "../../../../<etc>/some/path". We also shouldn't convert absolute
+ // paths to relative ones.
+ if !filepath.IsAbs(path) {
+ path = filepath.Clean(string(os.PathSeparator) + path)
+ // This can't fail, as (by definition) all paths are relative to root.
+ path, _ = filepath.Rel(string(os.PathSeparator), path)
+ }
+
+ // Clean the path again for good measure.
+ return filepath.Clean(path)
+}
+
+// SearchLabels searches a list of key-value pairs for the provided key and
+// returns the corresponding value. The pairs must be separated with '='.
+func SearchLabels(labels []string, query string) string {
+ for _, l := range labels {
+ parts := strings.SplitN(l, "=", 2)
+ if len(parts) < 2 {
+ continue
+ }
+ if parts[0] == query {
+ return parts[1]
+ }
+ }
+ return ""
+}
+
+// Annotations returns the bundle path and user defined annotations from the
+// libcontainer state. We need to remove the bundle because that is a label
+// added by libcontainer.
+func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
+ userAnnotations = make(map[string]string)
+ for _, l := range labels {
+ parts := strings.SplitN(l, "=", 2)
+ if len(parts) < 2 {
+ continue
+ }
+ if parts[0] == "bundle" {
+ bundle = parts[1]
+ } else {
+ userAnnotations[parts[0]] = parts[1]
+ }
+ }
+ return
+}
+
+func GetIntSize() int {
+ return int(unsafe.Sizeof(1))
+}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
new file mode 100644
index 000000000..1576f2d4a
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
@@ -0,0 +1,68 @@
+// +build !windows
+
+package utils
+
+import (
+ "fmt"
+ "os"
+ "strconv"
+
+ "golang.org/x/sys/unix"
+)
+
+// EnsureProcHandle returns whether or not the given file handle is on procfs.
+func EnsureProcHandle(fh *os.File) error {
+ var buf unix.Statfs_t
+ if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
+ return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err)
+ }
+ if buf.Type != unix.PROC_SUPER_MAGIC {
+ return fmt.Errorf("%s is not on procfs", fh.Name())
+ }
+ return nil
+}
+
+// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
+// the process (except for those below the given fd value).
+func CloseExecFrom(minFd int) error {
+ fdDir, err := os.Open("/proc/self/fd")
+ if err != nil {
+ return err
+ }
+ defer fdDir.Close()
+
+ if err := EnsureProcHandle(fdDir); err != nil {
+ return err
+ }
+
+ fdList, err := fdDir.Readdirnames(-1)
+ if err != nil {
+ return err
+ }
+ for _, fdStr := range fdList {
+ fd, err := strconv.Atoi(fdStr)
+ // Ignore non-numeric file names.
+ if err != nil {
+ continue
+ }
+ // Ignore descriptors lower than our specified minimum.
+ if fd < minFd {
+ continue
+ }
+ // Intentionally ignore errors from unix.CloseOnExec -- the cases where
+ // this might fail are basically file descriptors that have already
+ // been closed (including and especially the one that was created when
+ // ioutil.ReadDir did the "opendir" syscall).
+ unix.CloseOnExec(fd)
+ }
+ return nil
+}
+
+// NewSockPair returns a new unix socket pair
+func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, err
+ }
+ return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
+}