diff options
37 files changed, 780 insertions, 171 deletions
@@ -47,7 +47,7 @@ require ( github.com/onsi/gomega v1.17.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 - github.com/opencontainers/runc v1.0.3 + github.com/opencontainers/runc v1.1.0 github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f github.com/opencontainers/selinux v1.10.0 @@ -204,6 +204,7 @@ github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLI github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= +github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= @@ -240,6 +241,7 @@ github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ= +github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= github.com/containerd/containerd v1.2.10/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/containerd/containerd v1.3.0-beta.2.0.20190828155532-0293cbd26c69/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/containerd/containerd v1.3.0/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= @@ -987,8 +989,9 @@ github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= -github.com/opencontainers/runc v1.0.3 h1:1hbqejyQWCJBvtKAfdO0b1FmaEf2z/bxnjqbARass5k= github.com/opencontainers/runc v1.0.3/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= +github.com/opencontainers/runc v1.1.0 h1:O9+X96OcDjkmmZyfaG996kV7yq8HsoU2h1XRRQcefG8= +github.com/opencontainers/runc v1.1.0/go.mod h1:Tj1hFw6eFWp/o33uxGf5yF2BX5yz2Z6iptFpuvbbKqc= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= @@ -1114,8 +1117,9 @@ github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= -github.com/seccomp/libseccomp-golang v0.9.2-0.20200616122406-847368b35ebf h1:b0+ZBD3rohnkQ4q5duD1+RyTXTg9yk+qTOPMSQtapO0= github.com/seccomp/libseccomp-golang v0.9.2-0.20200616122406-847368b35ebf/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= +github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y= +github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/securego/gosec/v2 v2.9.1/go.mod h1:oDcDLcatOJxkCGaCaq8lua1jTnYf6Sou4wdiJ1n4iHc= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c/go.mod h1:/PevMnwAxekIXwN8qQyfc5gl2NlkB3CQlkizAbOkeBs= @@ -1582,6 +1586,7 @@ golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210817190340-bfb29a6856f2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210820121016-41cdb8703e55/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210915083310-ed5796bab164/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210917161153-d61c044b1678/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1590,6 +1595,7 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211107104306-e0b2ad06fe42/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211214234402-4825e8c3871d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go index 744d4e570..8b1483c7d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go @@ -3,7 +3,6 @@ package apparmor import ( "errors" "fmt" - "io/ioutil" "os" "sync" @@ -19,7 +18,7 @@ var ( func isEnabled() bool { checkAppArmor.Do(func() { if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { - buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") + buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled") appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y' } }) @@ -52,7 +51,7 @@ func setProcAttr(attr, value string) error { // changeOnExec reimplements aa_change_onexec from libapparmor in Go func changeOnExec(name string) error { if err := setProcAttr("exec", "exec "+name); err != nil { - return fmt.Errorf("apparmor failed to apply profile: %s", err) + return fmt.Errorf("apparmor failed to apply profile: %w", err) } return nil } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go index 1adadafec..684248f25 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package apparmor diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go index 68a346ca5..ba2b2266c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -1,5 +1,3 @@ -// +build linux - package cgroups import ( diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go deleted file mode 100644 index 278d507e2..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux - -package cgroups diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go index 5f6ab9fd6..0cdaf7478 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go @@ -2,20 +2,27 @@ package cgroups import ( "bytes" + "errors" + "fmt" "os" + "path" + "strconv" "strings" "sync" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) // OpenFile opens a cgroup file in a given dir with given flags. -// It is supposed to be used for cgroup files only. +// It is supposed to be used for cgroup files only, and returns +// an error if the file is not a cgroup file. +// +// Arguments dir and file are joined together to form an absolute path +// to a file being opened. func OpenFile(dir, file string, flags int) (*os.File, error) { if dir == "" { - return nil, errors.Errorf("no directory specified for %s", file) + return nil, fmt.Errorf("no directory specified for %s", file) } return openFile(dir, file, flags) } @@ -43,7 +50,8 @@ func WriteFile(dir, file, data string) error { } defer fd.Close() if err := retryingWriteFile(fd, data); err != nil { - return errors.Wrapf(err, "failed to write %q", data) + // Having data in the error message helps in debugging. + return fmt.Errorf("failed to write %q: %w", data, err) } return nil } @@ -81,7 +89,7 @@ func prepareOpenat2() error { }) if err != nil { prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} - if err != unix.ENOSYS { + if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare logrus.Warnf("falling back to securejoin: %s", prepErr) } else { logrus.Debug("openat2 not available, falling back to securejoin") @@ -107,8 +115,6 @@ func prepareOpenat2() error { return prepErr } -// OpenFile opens a cgroup file in a given dir with given flags. -// It is supposed to be used for cgroup files only. func openFile(dir, file string, flags int) (*os.File, error) { mode := os.FileMode(0) if TestMode && flags&os.O_WRONLY != 0 { @@ -116,34 +122,52 @@ func openFile(dir, file string, flags int) (*os.File, error) { flags |= os.O_TRUNC | os.O_CREATE mode = 0o600 } + path := path.Join(dir, file) if prepareOpenat2() != nil { - return openFallback(dir, file, flags, mode) + return openFallback(path, flags, mode) } - reldir := strings.TrimPrefix(dir, cgroupfsPrefix) - if len(reldir) == len(dir) { // non-standard path, old system? - return openFallback(dir, file, flags, mode) + relPath := strings.TrimPrefix(path, cgroupfsPrefix) + if len(relPath) == len(path) { // non-standard path, old system? + return openFallback(path, flags, mode) } - relname := reldir + "/" + file - fd, err := unix.Openat2(cgroupFd, relname, + fd, err := unix.Openat2(cgroupFd, relPath, &unix.OpenHow{ Resolve: resolveFlags, Flags: uint64(flags) | unix.O_CLOEXEC, Mode: uint64(mode), }) if err != nil { - return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err} + err = &os.PathError{Op: "openat2", Path: path, Err: err} + // Check if cgroupFd is still opened to cgroupfsDir + // (happens when this package is incorrectly used + // across the chroot/pivot_root/mntns boundary, or + // when /sys/fs/cgroup is remounted). + // + // TODO: if such usage will ever be common, amend this + // to reopen cgroupFd and retry openat2. + fdStr := strconv.Itoa(cgroupFd) + fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) + if fdDest != cgroupfsDir { + // Wrap the error so it is clear that cgroupFd + // is opened to an unexpected/wrong directory. + err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w", + fdStr, fdDest, cgroupfsDir, err) + } + return nil, err } - return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil + return os.NewFile(uintptr(fd), path), nil } var errNotCgroupfs = errors.New("not a cgroup file") -// openFallback is used when openat2(2) is not available. It checks the opened +// Can be changed by unit tests. +var openFallback = openAndCheck + +// openAndCheck is used when openat2(2) is not available. It checks the opened // file is on cgroupfs, returning an error otherwise. -func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) { - path := dir + "/" + file +func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) { fd, err := os.OpenFile(path, flags, mode) if err != nil { return nil, err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go new file mode 100644 index 000000000..1355a5101 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go @@ -0,0 +1,27 @@ +package cgroups + +import ( + "io/fs" + "path/filepath" +) + +// GetAllPids returns all pids from the cgroup identified by path, and all its +// sub-cgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error { + if iErr != nil { + return iErr + } + if !d.IsDir() { + return nil + } + cPids, err := readProcsFile(p) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go index e7f9c4626..40a81dd5a 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -1,5 +1,3 @@ -// +build linux - package cgroups type ThrottlingData struct { @@ -126,7 +124,7 @@ type BlkioStatEntry struct { } type BlkioStats struct { - // number of bytes tranferred to and from the block device + // number of bytes transferred to and from the block device IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` @@ -146,6 +144,17 @@ type HugetlbStats struct { Failcnt uint64 `json:"failcnt"` } +type RdmaEntry struct { + Device string `json:"device,omitempty"` + HcaHandles uint32 `json:"hca_handles,omitempty"` + HcaObjects uint32 `json:"hca_objects,omitempty"` +} + +type RdmaStats struct { + RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"` + RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` @@ -154,6 +163,7 @@ type Stats struct { BlkioStats BlkioStats `json:"blkio_stats,omitempty"` // the map is in the format "size of hugepage: stats of the hugepage" HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + RdmaStats RdmaStats `json:"rdma_stats,omitempty"` } func NewStats() *Stats { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 92606525b..13ebf52ab 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -1,5 +1,3 @@ -// +build linux - package cgroups import ( @@ -7,7 +5,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "path/filepath" "strconv" @@ -23,11 +20,14 @@ import ( const ( CgroupProcesses = "cgroup.procs" unifiedMountpoint = "/sys/fs/cgroup" + hybridMountpoint = "/sys/fs/cgroup/unified" ) var ( isUnifiedOnce sync.Once isUnified bool + isHybridOnce sync.Once + isHybrid bool ) // IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. @@ -49,6 +49,24 @@ func IsCgroup2UnifiedMode() bool { return isUnified } +// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode. +func IsCgroup2HybridMode() bool { + isHybridOnce.Do(func() { + var st unix.Statfs_t + err := unix.Statfs(hybridMountpoint, &st) + if err != nil { + if os.IsNotExist(err) { + // ignore the "not found" error + isHybrid = false + return + } + panic(fmt.Sprintf("cannot statfs cgroup root: %s", err)) + } + isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC + }) + return isHybrid +} + type Mount struct { Mountpoint string Root string @@ -118,8 +136,8 @@ func GetAllSubsystems() ([]string, error) { return subsystems, nil } -func readProcsFile(file string) ([]int, error) { - f, err := os.Open(file) +func readProcsFile(dir string) ([]int, error) { + f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY) if err != nil { return nil, err } @@ -210,7 +228,7 @@ func EnterPid(cgroupPaths map[string]string, pid int) error { func rmdir(path string) error { err := unix.Rmdir(path) - if err == nil || err == unix.ENOENT { + if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare return nil } return &os.PathError{Op: "rmdir", Path: path, Err: err} @@ -224,7 +242,7 @@ func RemovePath(path string) error { return nil } - infos, err := ioutil.ReadDir(path) + infos, err := os.ReadDir(path) if err != nil { if os.IsNotExist(err) { err = nil @@ -284,40 +302,61 @@ func RemovePaths(paths map[string]string) (err error) { return fmt.Errorf("Failed to remove paths: %v", paths) } -func GetHugePageSize() ([]string, error) { - dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return nil, err - } - files, err := dir.Readdirnames(0) - dir.Close() - if err != nil { - return nil, err - } +var ( + hugePageSizes []string + initHPSOnce sync.Once +) - return getHugePageSizeFromFilenames(files) +func HugePageSizes() []string { + initHPSOnce.Do(func() { + dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return + } + files, err := dir.Readdirnames(0) + dir.Close() + if err != nil { + return + } + + hugePageSizes, err = getHugePageSizeFromFilenames(files) + if err != nil { + logrus.Warn("HugePageSizes: ", err) + } + }) + + return hugePageSizes } func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { pageSizes := make([]string, 0, len(fileNames)) + var warn error for _, file := range fileNames { // example: hugepages-1048576kB val := strings.TrimPrefix(file, "hugepages-") if len(val) == len(file) { - // unexpected file name: no prefix found + // Unexpected file name: no prefix found, ignore it. continue } - // The suffix is always "kB" (as of Linux 5.9) + // The suffix is always "kB" (as of Linux 5.13). If we find + // something else, produce an error but keep going. eLen := len(val) - 2 val = strings.TrimSuffix(val, "kB") if len(val) != eLen { - logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file) + // Highly unlikely. + if warn == nil { + warn = errors.New(file + `: invalid suffix (expected "kB")`) + } continue } size, err := strconv.Atoi(val) if err != nil { - return nil, err + // Highly unlikely. + if warn == nil { + warn = fmt.Errorf("%s: %w", file, err) + } + continue } // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 // but in our case the size is in KB already. @@ -331,34 +370,12 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { pageSizes = append(pageSizes, val) } - return pageSizes, nil + return pageSizes, warn } // GetPids returns all pids, that were added to cgroup at path. func GetPids(dir string) ([]int, error) { - return readProcsFile(filepath.Join(dir, CgroupProcesses)) -} - -// GetAllPids returns all pids, that were added to cgroup at path and to all its -// subcgroups. -func GetAllPids(path string) ([]int, error) { - var pids []int - // collect pids from all sub-cgroups - err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { - if iErr != nil { - return iErr - } - if info.IsDir() || info.Name() != CgroupProcesses { - return nil - } - cPids, err := readProcsFile(p) - if err != nil { - return err - } - pids = append(pids, cPids...) - return nil - }) - return pids, err + return readProcsFile(dir) } // WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file @@ -376,7 +393,7 @@ func WriteCgroupProc(dir string, pid int) error { file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) if err != nil { - return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + return fmt.Errorf("failed to write %v: %w", pid, err) } defer file.Close() @@ -393,7 +410,7 @@ func WriteCgroupProc(dir string, pid int) error { continue } - return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + return fmt.Errorf("failed to write %v: %w", pid, err) } return err } @@ -446,5 +463,5 @@ func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 { if blkIoWeight == 0 { return 0 } - return uint64(1 + (uint64(blkIoWeight)-10)*9999/990) + return 1 + (uint64(blkIoWeight)-10)*9999/990 } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go index 95ec9dff0..47c75f22b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go @@ -46,11 +46,8 @@ func NewNotFoundError(sub string) error { } func IsNotFound(err error) bool { - if err == nil { - return false - } - _, ok := err.(*NotFoundError) - return ok + var nfErr *NotFoundError + return errors.As(err, &nfErr) } func tryDefaultPath(cgroupPath, subsystem string) string { @@ -116,6 +113,11 @@ func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { return "", errUnified } + // If subsystem is empty, we look for the cgroupv2 hybrid path. + if len(subsystem) == 0 { + return hybridMountpoint, nil + } + // Avoid parsing mountinfo by trying the default path first, if possible. if path := tryDefaultPath(cgroupPath, subsystem); path != "" { return path, nil @@ -154,7 +156,7 @@ func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, sub func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { - return "", fmt.Errorf("no subsystem for mount") + return "", errors.New("no subsystem for mount") } return getControllerPath(m.Subsystems[0], cgroups) @@ -226,6 +228,11 @@ func GetOwnCgroupPath(subsystem string) (string, error) { return "", err } + // If subsystem is empty, we look for the cgroupv2 hybrid path. + if len(subsystem) == 0 { + return hybridMountpoint, nil + } + return getCgroupPathHelper(subsystem, cgroup) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go index 5ea9d940c..2d4a89871 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go @@ -28,17 +28,26 @@ type Cgroup struct { // ScopePrefix describes prefix for the scope name ScopePrefix string `json:"scope_prefix"` - // Paths represent the absolute cgroups paths to join. - // This takes precedence over Path. - Paths map[string]string - // Resources contains various cgroups settings to apply *Resources + // Systemd tells if systemd should be used to manage cgroups. + Systemd bool + // SystemdProps are any additional properties for systemd, // derived from org.systemd.property.xxx annotations. // Ignored unless systemd is used for managing cgroups. SystemdProps []systemdDbus.Property `json:"-"` + + // Rootless tells if rootless cgroups should be used. + Rootless bool + + // The host UID that should own the cgroup, or nil to accept + // the default ownership. This should only be set when the + // cgroupfs is to be mounted read/write. + // Not all cgroup manager implementations support changing + // the ownership. + OwnerUID *int `json:"owner_uid,omitempty"` } type Resources struct { @@ -117,6 +126,9 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid_u"` + // Rdma resource restriction configuration + Rdma map[string]LinuxRdma `json:"rdma"` + // Used on cgroups v2: // CpuWeight sets a proportional bandwidth limit. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go index 2a519f582..7e383020f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index 4281593f0..c1b4a0041 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -7,10 +7,10 @@ import ( "os/exec" "time" + "github.com/sirupsen/logrus" + "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" ) type Rlimit struct { @@ -31,10 +31,12 @@ type IDMap struct { // for syscalls. Additional architectures can be added by specifying them in // Architectures. type Seccomp struct { - DefaultAction Action `json:"default_action"` - Architectures []string `json:"architectures"` - Syscalls []*Syscall `json:"syscalls"` - DefaultErrnoRet *uint `json:"default_errno_ret"` + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Syscalls []*Syscall `json:"syscalls"` + DefaultErrnoRet *uint `json:"default_errno_ret"` + ListenerPath string `json:"listener_path,omitempty"` + ListenerMetadata string `json:"listener_metadata,omitempty"` } // Action is taken upon rule match in Seccomp @@ -47,6 +49,9 @@ const ( Allow Trace Log + Notify + KillThread + KillProcess ) // Operator is a comparison operator to be used when matching syscall arguments in Seccomp @@ -246,6 +251,19 @@ const ( Poststop HookName = "poststop" ) +// KnownHookNames returns the known hook names. +// Used by `runc features`. +func KnownHookNames() []string { + return []string{ + string(Prestart), // deprecated + string(CreateRuntime), + string(CreateContainer), + string(StartContainer), + string(Poststart), + string(Poststop), + } +} + type Capabilities struct { // Bounding is the set of capabilities checked by the kernel. Bounding []string @@ -262,7 +280,7 @@ type Capabilities struct { func (hooks HookList) RunHooks(state *specs.State) error { for i, h := range hooks { if err := h.Run(state); err != nil { - return errors.Wrapf(err, "Running hook #%d:", i) + return fmt.Errorf("error running hook #%d: %w", i, err) } } @@ -375,7 +393,7 @@ func (c Command) Run(s *specs.State) error { go func() { err := cmd.Wait() if err != nil { - err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) + err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) } errC <- err }() diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go index 07da10804..8c02848b7 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go @@ -1,17 +1,24 @@ package configs -import "fmt" +import "errors" + +var ( + errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.") + errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.") + errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.") + errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.") +) // HostUID gets the translated uid for the process on host which could be // different when user namespaces are enabled. func (c Config) HostUID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") + return -1, errNoUIDMap } id, found := c.hostIDFromMapping(containerId, c.UidMappings) if !found { - return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") + return -1, errNoUserMap } return id, nil } @@ -30,11 +37,11 @@ func (c Config) HostRootUID() (int, error) { func (c Config) HostGID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + return -1, errNoGIDMap } id, found := c.hostIDFromMapping(containerId, c.GidMappings) if !found { - return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") + return -1, errNoGroupMap } return id, nil } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go index 93bf41c8d..bce829e29 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go @@ -1,3 +1,4 @@ +//go:build gofuzz // +build gofuzz package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go index 57e9f037d..f8d951ab8 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go @@ -1,6 +1,9 @@ package configs type IntelRdt struct { + // The identity for RDT Class of Service + ClosID string `json:"closID,omitempty"` + // The schema for L3 cache id and capacity bitmask (CBM) // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." L3CacheSchema string `json:"l3_cache_schema,omitempty"` diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go index a75ff10ec..784c61820 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -1,5 +1,7 @@ package configs +import "golang.org/x/sys/unix" + const ( // EXT_COPYUP is a directive to copy up the contents of a directory when // a tmpfs is mounted over it. @@ -28,6 +30,9 @@ type Mount struct { // Relabel source if set, "z" indicates shared, "Z" indicates unshared. Relabel string `json:"relabel"` + // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). + RecAttr *unix.MountAttr `json:"rec_attr"` + // Extensions are additional flags that are specific to runc. Extensions int `json:"extensions"` @@ -37,3 +42,7 @@ type Mount struct { // Optional Command to be run after Source is mounted. PostmountCmds []Command `json:"postmount_cmds"` } + +func (m *Mount) IsBind() bool { + return m.Flags&unix.MS_BIND != 0 +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go index 2dc7adfc9..0516dba8d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go index 5d9a5c81f..fbb0d4907 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -1,3 +1,4 @@ +//go:build !linux && !windows // +build !linux,!windows package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go index cc76e2f58..946db30a5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go new file mode 100644 index 000000000..c69f2c802 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go @@ -0,0 +1,9 @@ +package configs + +// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) +type LinuxRdma struct { + // Maximum number of HCA handles that can be opened. Default is "no limit". + HcaHandles *uint32 `json:"hca_handles,omitempty"` + // Maximum number of HCA objects that can be created. Default is "no limit". + HcaObjects *uint32 `json:"hca_objects,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go index 6d5b3d09d..7d8e9fc31 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go @@ -1,10 +1,10 @@ +//go:build !windows // +build !windows package devices import ( "errors" - "io/ioutil" "os" "path/filepath" @@ -16,8 +16,8 @@ var ErrNotADevice = errors.New("not a device node") // Testing dependencies var ( - unixLstat = unix.Lstat - ioutilReadDir = ioutil.ReadDir + unixLstat = unix.Lstat + osReadDir = os.ReadDir ) func mkDev(d *Rule) (uint64, error) { @@ -40,7 +40,7 @@ func DeviceFromPath(path, permissions string) (*Device, error) { var ( devType Type mode = stat.Mode - devNumber = uint64(stat.Rdev) + devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS. major = unix.Major(devNumber) minor = unix.Minor(devNumber) ) @@ -76,7 +76,7 @@ func HostDevices() ([]*Device, error) { // GetDevices recursively traverses a directory specified by path // and returns all devices found there. func GetDevices(path string) ([]*Device, error) { - files, err := ioutilReadDir(path) + files, err := osReadDir(path) if err != nil { return nil, err } @@ -103,7 +103,7 @@ func GetDevices(path string) ([]*Device, error) { } device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { - if err == ErrNotADevice { + if errors.Is(err, ErrNotADevice) { continue } if os.IsNotExist(err) { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index 967717a1b..f95c1409f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -1,3 +1,4 @@ +//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris // +build darwin dragonfly freebsd linux netbsd openbsd solaris package user diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index cc7a106be..2473c5ead 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -120,7 +120,7 @@ func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { if r == nil { - return nil, fmt.Errorf("nil source for passwd-formatted data") + return nil, errors.New("nil source for passwd-formatted data") } var ( @@ -178,7 +178,7 @@ func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { if r == nil { - return nil, fmt.Errorf("nil source for group-formatted data") + return nil, errors.New("nil source for group-formatted data") } rd := bufio.NewReader(r) out := []Group{} @@ -339,7 +339,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( if userArg == "" { userArg = strconv.Itoa(user.Uid) } - return nil, fmt.Errorf("unable to find user %s: %v", userArg, err) + return nil, fmt.Errorf("unable to find user %s: %w", userArg, err) } var matchedUserName string @@ -355,7 +355,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( if uidErr != nil { // Not numeric. - return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries) + return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries) } user.Uid = uidArg @@ -390,7 +390,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( return g.Name == groupArg }) if err != nil && group != nil { - return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err) + return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err) } // Only start modifying user.Gid if it is in explicit form. @@ -404,7 +404,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( if gidErr != nil { // Not numeric. - return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries) + return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries) } user.Gid = gidArg @@ -445,7 +445,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err return false }) if err != nil { - return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) + return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err) } } @@ -468,7 +468,8 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err if !found { gid, err := strconv.ParseInt(ag, 10, 64) if err != nil { - return nil, fmt.Errorf("Unable to find group %s", ag) + // Not a numeric ID either. + return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries) } // Ensure gid is inside gid range. if gid < minID || gid > maxID { @@ -521,7 +522,7 @@ func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { if r == nil { - return nil, fmt.Errorf("nil source for subid-formatted data") + return nil, errors.New("nil source for subid-formatted data") } var ( @@ -574,7 +575,7 @@ func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { if r == nil { - return nil, fmt.Errorf("nil source for idmap-formatted data") + return nil, errors.New("nil source for idmap-formatted data") } var ( diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go index 8c9bb5df3..e018eae61 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go @@ -1,3 +1,4 @@ +//go:build gofuzz // +build gofuzz package user diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go index 529f8eaea..1e00ab8b5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go @@ -1,3 +1,4 @@ +//go:build gofuzz // +build gofuzz package userns diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go index f45bb0c31..f35c13a10 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package userns diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go index c8a9364d5..7ef9da21f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -1,5 +1,3 @@ -// +build linux - package utils /* @@ -88,6 +86,11 @@ func SendFd(socket *os.File, name string, fd uintptr) error { if len(name) >= MaxNameLen { return fmt.Errorf("sendfd: filename too long: %s", name) } - oob := unix.UnixRights(int(fd)) - return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) + return SendFds(socket, []byte(name), int(fd)) +} + +// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket. +func SendFds(socket *os.File, msg []byte, fds ...int) error { + oob := unix.UnixRights(fds...) + return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go index cd78f23e1..6b9fc3435 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -11,7 +11,7 @@ import ( "strings" "unsafe" - "github.com/cyphar/filepath-securejoin" + securejoin "github.com/cyphar/filepath-securejoin" "golang.org/x/sys/unix" ) @@ -33,16 +33,6 @@ func init() { } } -// ResolveRootfs ensures that the current working directory is -// not a symlink and returns the absolute path to the rootfs -func ResolveRootfs(uncleanRootfs string) (string, error) { - rootfs, err := filepath.Abs(uncleanRootfs) - if err != nil { - return "", err - } - return filepath.EvalSymlinks(rootfs) -} - // ExitStatus returns the correct exit status for a process based on if it // was signaled or exited cleanly func ExitStatus(status unix.WaitStatus) int { @@ -120,7 +110,7 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { unsafePath = stripRoot(root, unsafePath) path, err := securejoin.SecureJoin(root, unsafePath) if err != nil { - return fmt.Errorf("resolving path inside rootfs failed: %v", err) + return fmt.Errorf("resolving path inside rootfs failed: %w", err) } // Open the target path. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go index 1576f2d4a..220d0b439 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -1,3 +1,4 @@ +//go:build !windows // +build !windows package utils @@ -14,7 +15,7 @@ import ( func EnsureProcHandle(fh *os.File) error { var buf unix.Statfs_t if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil { - return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err) + return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err) } if buf.Type != unix.PROC_SUPER_MAGIC { return fmt.Errorf("%s is not on procfs", fh.Name()) @@ -52,7 +53,7 @@ func CloseExecFrom(minFd int) error { // Intentionally ignore errors from unix.CloseOnExec -- the cases where // this might fail are basically file descriptors that have already // been closed (including and especially the one that was created when - // ioutil.ReadDir did the "opendir" syscall). + // os.ReadDir did the "opendir" syscall). unix.CloseOnExec(fd) } return nil diff --git a/vendor/github.com/seccomp/libseccomp-golang/.travis.yml b/vendor/github.com/seccomp/libseccomp-golang/.travis.yml index feef144d1..5240d4622 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/.travis.yml +++ b/vendor/github.com/seccomp/libseccomp-golang/.travis.yml @@ -19,19 +19,39 @@ os: language: go +jobs: + include: + - name: "last libseccomp 2.5.0" + env: + - SECCOMP_VER=2.5.0 + - SECCOMP_SHA256SUM=1ffa7038d2720ad191919816db3479295a4bcca1ec14e02f672539f4983014f3 + - name: "compat libseccomp 2.4.4" + env: + - SECCOMP_VER=2.4.4 + - SECCOMP_SHA256SUM=4e79738d1ef3c9b7ca9769f1f8b8d84fc17143c2c1c432e53b9c64787e0ff3eb + - name: "compat libseccomp 2.2.1" + env: + - SECCOMP_VER=2.2.1 + - SECCOMP_SHA256SUM=0ba1789f54786c644af54cdffc9fd0dd0a8bb2b2ee153933f658855d2851a740 + addons: apt: packages: - build-essential - # TODO: use the main libseccomp git repo instead of a distro package - - libseccomp2 - - libseccomp-dev + - astyle + - golint + - gperf install: - go get -u golang.org/x/lint/golint # run all of the tests independently, fail if any of the tests error script: + - wget https://github.com/seccomp/libseccomp/releases/download/v$SECCOMP_VER/libseccomp-$SECCOMP_VER.tar.gz + - echo $SECCOMP_SHA256SUM libseccomp-$SECCOMP_VER.tar.gz | sha256sum -c + - tar xf libseccomp-$SECCOMP_VER.tar.gz + - pushd libseccomp-$SECCOMP_VER && ./configure --prefix=/opt/libseccomp-$SECCOMP_VER && make && sudo make install && popd - make check-syntax - make lint - - make check + - PKG_CONFIG_PATH=/opt/libseccomp-$SECCOMP_VER/lib/pkgconfig LD_LIBRARY_PATH=/opt/libseccomp-$SECCOMP_VER/lib make vet + - PKG_CONFIG_PATH=/opt/libseccomp-$SECCOMP_VER/lib/pkgconfig LD_LIBRARY_PATH=/opt/libseccomp-$SECCOMP_VER/lib make test diff --git a/vendor/github.com/seccomp/libseccomp-golang/Makefile b/vendor/github.com/seccomp/libseccomp-golang/Makefile index 1ff4cc898..38cfa852c 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/Makefile +++ b/vendor/github.com/seccomp/libseccomp-golang/Makefile @@ -18,8 +18,14 @@ fix-syntax: vet: go vet -v +# Previous bugs have made the tests freeze until the timeout. Golang default +# timeout for tests is 10 minutes, which is too long, considering current tests +# can be executed in less than 1 second. Reduce the timeout, so problems can +# be noticed earlier in the CI. +TEST_TIMEOUT=10s + test: - go test -v + go test -v -timeout $(TEST_TIMEOUT) lint: @$(if $(shell which golint),true,$(error "install golint and include it in your PATH")) diff --git a/vendor/github.com/seccomp/libseccomp-golang/README.md b/vendor/github.com/seccomp/libseccomp-golang/README.md index 27423f2d9..806a5ddf2 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/README.md +++ b/vendor/github.com/seccomp/libseccomp-golang/README.md @@ -2,7 +2,7 @@ =============================================================================== https://github.com/seccomp/libseccomp-golang -[![Build Status](https://img.shields.io/travis/seccomp/libseccomp-golang/master.svg)](https://travis-ci.org/seccomp/libseccomp-golang) +[![Build Status](https://img.shields.io/travis/seccomp/libseccomp-golang/main.svg)](https://travis-ci.org/seccomp/libseccomp-golang) The libseccomp library provides an easy to use, platform independent, interface to the Linux Kernel's syscall filtering mechanism. The libseccomp API is diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go index e489b9ebd..e9b92e221 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go @@ -20,6 +20,13 @@ import ( // C wrapping code +// To compile libseccomp-golang against a specific version of libseccomp: +// cd ../libseccomp && mkdir -p prefix +// ./configure --prefix=$PWD/prefix && make && make install +// cd ../libseccomp-golang +// PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make +// LD_PRELOAD=$PWD/../libseccomp/prefix/lib/libseccomp.so.2.5.0 PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make test + // #cgo pkg-config: libseccomp // #include <stdlib.h> // #include <seccomp.h> @@ -34,19 +41,25 @@ type VersionError struct { minimum string } +func init() { + // This forces the cgo libseccomp to initialize its internal API support state, + // which is necessary on older versions of libseccomp in order to work + // correctly. + GetAPI() +} + func (e VersionError) Error() string { - format := "Libseccomp version too low: " + messageStr := "" if e.message != "" { - format += e.message + ": " + messageStr = e.message + ": " } - format += "minimum supported is " + minimumStr := "" if e.minimum != "" { - format += e.minimum + ": " + minimumStr = e.minimum } else { - format += "2.2.0: " + minimumStr = "2.2.0" } - format += "detected %d.%d.%d" - return fmt.Sprintf(format, verMajor, verMinor, verMicro) + return fmt.Sprintf("Libseccomp version too low: %sminimum supported is %s: detected %d.%d.%d", messageStr, minimumStr, verMajor, verMinor, verMicro) } // ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a @@ -69,9 +82,61 @@ type ScmpCondition struct { Operand2 uint64 `json:"operand_two,omitempty"` } -// ScmpSyscall represents a Linux System Call +// Seccomp userspace notification structures associated with filters that use the ActNotify action. + +// ScmpSyscall identifies a Linux System Call by its number. type ScmpSyscall int32 +// ScmpFd represents a file-descriptor used for seccomp userspace notifications. +type ScmpFd int32 + +// ScmpNotifData describes the system call context that triggered a notification. +// +// Syscall: the syscall number +// Arch: the filter architecture +// InstrPointer: address of the instruction that triggered a notification +// Args: arguments (up to 6) for the syscall +// +type ScmpNotifData struct { + Syscall ScmpSyscall `json:"syscall,omitempty"` + Arch ScmpArch `json:"arch,omitempty"` + InstrPointer uint64 `json:"instr_pointer,omitempty"` + Args []uint64 `json:"args,omitempty"` +} + +// ScmpNotifReq represents a seccomp userspace notification. See NotifReceive() for +// info on how to pull such a notification. +// +// ID: notification ID +// Pid: process that triggered the notification event +// Flags: filter flags (see seccomp(2)) +// Data: system call context that triggered the notification +// +type ScmpNotifReq struct { + ID uint64 `json:"id,omitempty"` + Pid uint32 `json:"pid,omitempty"` + Flags uint32 `json:"flags,omitempty"` + Data ScmpNotifData `json:"data,omitempty"` +} + +// ScmpNotifResp represents a seccomp userspace notification response. See NotifRespond() +// for info on how to push such a response. +// +// ID: notification ID (must match the corresponding ScmpNotifReq ID) +// Error: must be 0 if no error occurred, or an error constant from package +// syscall (e.g., syscall.EPERM, etc). In the latter case, it's used +// as an error return from the syscall that created the notification. +// Val: return value for the syscall that created the notification. Only +// relevant if Error is 0. +// Flags: userspace notification response flag (e.g., NotifRespFlagContinue) +// +type ScmpNotifResp struct { + ID uint64 `json:"id,omitempty"` + Error int32 `json:"error,omitempty"` + Val uint64 `json:"val,omitempty"` + Flags uint32 `json:"flags,omitempty"` +} + // Exported Constants const ( @@ -117,6 +182,10 @@ const ( ArchS390 ScmpArch = iota // ArchS390X represents 64-bit System z/390 syscalls ArchS390X ScmpArch = iota + // ArchPARISC represents 32-bit PA-RISC + ArchPARISC ScmpArch = iota + // ArchPARISC64 represents 64-bit PA-RISC + ArchPARISC64 ScmpArch = iota ) const ( @@ -130,6 +199,9 @@ const ( ActKill ScmpAction = iota // ActTrap throws SIGSYS ActTrap ScmpAction = iota + // ActNotify triggers a userspace notification. This action is only usable when + // libseccomp API level 6 or higher is supported. + ActNotify ScmpAction = iota // ActErrno causes the syscall to return a negative error code. This // code can be set with the SetReturnCode method ActErrno ScmpAction = iota @@ -181,6 +253,21 @@ const ( CompareMaskedEqual ScmpCompareOp = iota ) +var ( + // ErrSyscallDoesNotExist represents an error condition where + // libseccomp is unable to resolve the syscall + ErrSyscallDoesNotExist = fmt.Errorf("could not resolve syscall name") +) + +const ( + // Userspace notification response flags + + // NotifRespFlagContinue tells the kernel to continue executing the system + // call that triggered the notification. Must only be used when the notication + // response's error is 0. + NotifRespFlagContinue uint32 = 1 +) + // Helpers for types // GetArchFromString returns an ScmpArch constant from a string representing an @@ -223,6 +310,10 @@ func GetArchFromString(arch string) (ScmpArch, error) { return ArchS390, nil case "s390x": return ArchS390X, nil + case "parisc": + return ArchPARISC, nil + case "parisc64": + return ArchPARISC64, nil default: return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch) } @@ -263,6 +354,10 @@ func (a ScmpArch) String() string { return "s390" case ArchS390X: return "s390x" + case ArchPARISC: + return "parisc" + case ArchPARISC64: + return "parisc64" case ArchNative: return "native" case ArchInvalid: @@ -310,6 +405,8 @@ func (a ScmpAction) String() string { case ActTrace: return fmt.Sprintf("Action: Notify tracing processes with code %d", (a >> 16)) + case ActNotify: + return "Action: Notify userspace" case ActLog: return "Action: Log system call" case ActAllow: @@ -349,7 +446,7 @@ func GetLibraryVersion() (major, minor, micro uint) { // Returns a positive int containing the API level, or 0 with an error if the // API level could not be detected due to the library being older than v2.4.0. // See the seccomp_api_get(3) man page for details on available API levels: -// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 +// https://github.com/seccomp/libseccomp/blob/main/doc/man/man3/seccomp_api_get.3 func GetAPI() (uint, error) { return getAPI() } @@ -359,7 +456,7 @@ func GetAPI() (uint, error) { // Returns an error if the API level could not be set. An error is always // returned if the library is older than v2.4.0 // See the seccomp_api_get(3) man page for details on available API levels: -// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 +// https://github.com/seccomp/libseccomp/blob/main/doc/man/man3/seccomp_api_get.3 func SetAPI(api uint) error { return setAPI(api) } @@ -386,7 +483,7 @@ func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) if cString == nil { - return "", fmt.Errorf("could not resolve syscall name for %#x", int32(s)) + return "", ErrSyscallDoesNotExist } defer C.free(unsafe.Pointer(cString)) @@ -409,7 +506,7 @@ func GetSyscallFromName(name string) (ScmpSyscall, error) { result := C.seccomp_syscall_resolve_name(cString) if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall: %q", name) + return 0, ErrSyscallDoesNotExist } return ScmpSyscall(result), nil @@ -433,7 +530,7 @@ func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall: %q on %v", name, arch) + return 0, ErrSyscallDoesNotExist } return ScmpSyscall(result), nil @@ -506,11 +603,10 @@ type ScmpFilter struct { lock sync.Mutex } -// NewFilter creates and returns a new filter context. -// Accepts a default action to be taken for syscalls which match no rules in -// the filter. -// Returns a reference to a valid filter context, or nil and an error if the -// filter context could not be created or an invalid default action was given. +// NewFilter creates and returns a new filter context. Accepts a default action to be +// taken for syscalls which match no rules in the filter. +// Returns a reference to a valid filter context, or nil and an error +// if the filter context could not be created or an invalid default action was given. func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { if err := ensureSupportedVersion(); err != nil { return nil, err @@ -530,8 +626,8 @@ func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { filter.valid = true runtime.SetFinalizer(filter, filterFinalizer) - // Enable TSync so all goroutines will receive the same rules - // If the kernel does not support TSYNC, allow us to continue without error + // Enable TSync so all goroutines will receive the same rules. + // If the kernel does not support TSYNC, allow us to continue without error. if err := filter.setFilterAttr(filterAttrTsync, 0x1); err != nil && err != syscall.ENOTSUP { filter.Release() return nil, fmt.Errorf("could not create filter - error setting tsync bit: %v", err) @@ -778,8 +874,9 @@ func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { func (f *ScmpFilter) GetLogBit() (bool, error) { log, err := f.getFilterAttr(filterAttrLog) if err != nil { - api, apiErr := getAPI() - if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 3 { return false, fmt.Errorf("getting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") } @@ -793,6 +890,30 @@ func (f *ScmpFilter) GetLogBit() (bool, error) { return true, nil } +// GetSSB returns the current state the SSB bit will be set to on the filter +// being loaded, or an error if an issue was encountered retrieving the value. +// The SSB bit tells the kernel that a seccomp user is not interested in enabling +// Speculative Store Bypass mitigation. +// The SSB bit is only usable when libseccomp API level 4 or higher is +// supported. +func (f *ScmpFilter) GetSSB() (bool, error) { + ssb, err := f.getFilterAttr(filterAttrSSB) + if err != nil { + api, apiErr := getAPI() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 4) { + return false, fmt.Errorf("getting the SSB flag is only supported in libseccomp 2.5.0 and newer with API level 4 or higher") + } + + return false, err + } + + if ssb == 0 { + return false, nil + } + + return true, nil +} + // SetBadArchAction sets the default action taken on a syscall for an // architecture not in the filter, or an error if an issue was encountered // setting the value. @@ -832,8 +953,9 @@ func (f *ScmpFilter) SetLogBit(state bool) error { err := f.setFilterAttr(filterAttrLog, toSet) if err != nil { - api, apiErr := getAPI() - if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 3 { return fmt.Errorf("setting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") } } @@ -841,6 +963,28 @@ func (f *ScmpFilter) SetLogBit(state bool) error { return err } +// SetSSB sets the state of the SSB bit, which will be applied on filter +// load, or an error if an issue was encountered setting the value. +// The SSB bit is only usable when libseccomp API level 4 or higher is +// supported. +func (f *ScmpFilter) SetSSB(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + err := f.setFilterAttr(filterAttrSSB, toSet) + if err != nil { + api, apiErr := getAPI() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 4) { + return fmt.Errorf("setting the SSB flag is only supported in libseccomp 2.5.0 and newer with API level 4 or higher") + } + } + + return err +} + // SetSyscallPriority sets a syscall's priority. // This provides a hint to the filter generator in libseccomp about the // importance of this syscall. High-priority syscalls are placed @@ -947,3 +1091,36 @@ func (f *ScmpFilter) ExportBPF(file *os.File) error { return nil } + +// Userspace Notification API + +// GetNotifFd returns the userspace notification file descriptor associated with the given +// filter context. Such a file descriptor is only valid after the filter has been loaded +// and only when the filter uses the ActNotify action. The file descriptor can be used to +// retrieve and respond to notifications associated with the filter (see NotifReceive(), +// NotifRespond(), and NotifIDValid()). +func (f *ScmpFilter) GetNotifFd() (ScmpFd, error) { + return f.getNotifFd() +} + +// NotifReceive retrieves a seccomp userspace notification from a filter whose ActNotify +// action has triggered. The caller is expected to process the notification and return a +// response via NotifRespond(). Each invocation of this function returns one +// notification. As multiple notifications may be pending at any time, this function is +// normally called within a polling loop. +func NotifReceive(fd ScmpFd) (*ScmpNotifReq, error) { + return notifReceive(fd) +} + +// NotifRespond responds to a notification retrieved via NotifReceive(). The response Id +// must match that of the corresponding notification retrieved via NotifReceive(). +func NotifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { + return notifRespond(fd, scmpResp) +} + +// NotifIDValid checks if a notification is still valid. An return value of nil means the +// notification is still valid. Otherwise the notification is not valid. This can be used +// to mitigate time-of-check-time-of-use (TOCTOU) attacks as described in seccomp_notify_id_valid(2). +func NotifIDValid(fd ScmpFd, id uint64) error { + return notifIDValid(fd, id) +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go index 0982e930f..8dc7b296f 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -14,6 +14,13 @@ import ( // Get the seccomp header in scope // Need stdlib.h for free() on cstrings +// To compile libseccomp-golang against a specific version of libseccomp: +// cd ../libseccomp && mkdir -p prefix +// ./configure --prefix=$PWD/prefix && make && make install +// cd ../libseccomp-golang +// PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make +// LD_PRELOAD=$PWD/../libseccomp/prefix/lib/libseccomp.so.2.5.0 PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make test + // #cgo pkg-config: libseccomp /* #include <errno.h> @@ -50,6 +57,14 @@ const uint32_t C_ARCH_BAD = ARCH_BAD; #define SCMP_ARCH_S390X ARCH_BAD #endif +#ifndef SCMP_ARCH_PARISC +#define SCMP_ARCH_PARISC ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PARISC64 +#define SCMP_ARCH_PARISC64 ARCH_BAD +#endif + const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; @@ -67,6 +82,8 @@ const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; +const uint32_t C_ARCH_PARISC = SCMP_ARCH_PARISC; +const uint32_t C_ARCH_PARISC64 = SCMP_ARCH_PARISC64; #ifndef SCMP_ACT_LOG #define SCMP_ACT_LOG 0x7ffc0000U @@ -80,6 +97,10 @@ const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; #define SCMP_ACT_KILL_THREAD 0x00000000U #endif +#ifndef SCMP_ACT_NOTIFY +#define SCMP_ACT_NOTIFY 0x7fc00000U +#endif + const uint32_t C_ACT_KILL = SCMP_ACT_KILL; const uint32_t C_ACT_KILL_PROCESS = SCMP_ACT_KILL_PROCESS; const uint32_t C_ACT_KILL_THREAD = SCMP_ACT_KILL_THREAD; @@ -88,6 +109,7 @@ const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); const uint32_t C_ACT_LOG = SCMP_ACT_LOG; const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; +const uint32_t C_ACT_NOTIFY = SCMP_ACT_NOTIFY; // The libseccomp SCMP_FLTATR_CTL_LOG member of the scmp_filter_attr enum was // added in v2.4.0 @@ -95,12 +117,16 @@ const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) #define SCMP_FLTATR_CTL_LOG _SCMP_FLTATR_MIN #endif +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5 +#define SCMP_FLTATR_CTL_SSB _SCMP_FLTATR_MIN +#endif const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; +const uint32_t C_ATTRIBUTE_SSB = (uint32_t)SCMP_FLTATR_CTL_SSB; const int C_CMP_NE = (int)SCMP_CMP_NE; const int C_CMP_LT = (int)SCMP_CMP_LT; @@ -189,6 +215,51 @@ void add_struct_arg_cmp( return; } + +// The seccomp notify API functions were added in v2.5.0 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +int seccomp_notify_alloc(struct seccomp_notif **req, struct seccomp_notif_resp **resp) { + return -EOPNOTSUPP; +} +int seccomp_notify_fd(const scmp_filter_ctx ctx) { + return -EOPNOTSUPP; +} +void seccomp_notify_free(struct seccomp_notif *req, struct seccomp_notif_resp *resp) { +} +int seccomp_notify_id_valid(int fd, uint64_t id) { + return -EOPNOTSUPP; +} +int seccomp_notify_receive(int fd, struct seccomp_notif *req) { + return -EOPNOTSUPP; +} +int seccomp_notify_respond(int fd, struct seccomp_notif_resp *resp) { + return -EOPNOTSUPP; +} + +#endif */ import "C" @@ -203,6 +274,7 @@ const ( filterAttrNNP scmpFilterAttr = iota filterAttrTsync scmpFilterAttr = iota filterAttrLog scmpFilterAttr = iota + filterAttrSSB scmpFilterAttr = iota ) const ( @@ -210,7 +282,7 @@ const ( scmpError C.int = -1 // Comparison boundaries to check for architecture validity archStart ScmpArch = ArchNative - archEnd ScmpArch = ArchS390X + archEnd ScmpArch = ArchPARISC64 // Comparison boundaries to check for action validity actionStart ScmpAction = ActKill actionEnd ScmpAction = ActKillProcess @@ -460,6 +532,10 @@ func archFromNative(a C.uint32_t) (ScmpArch, error) { return ArchS390, nil case C.C_ARCH_S390X: return ArchS390X, nil + case C.C_ARCH_PARISC: + return ArchPARISC, nil + case C.C_ARCH_PARISC64: + return ArchPARISC64, nil default: return 0x0, fmt.Errorf("unrecognized architecture %#x", uint32(a)) } @@ -500,6 +576,10 @@ func (a ScmpArch) toNative() C.uint32_t { return C.C_ARCH_S390 case ArchS390X: return C.C_ARCH_S390X + case ArchPARISC: + return C.C_ARCH_PARISC + case ArchPARISC64: + return C.C_ARCH_PARISC64 case ArchNative: return C.C_ARCH_NATIVE default: @@ -548,6 +628,8 @@ func actionFromNative(a C.uint32_t) (ScmpAction, error) { return ActLog, nil case C.C_ACT_ALLOW: return ActAllow, nil + case C.C_ACT_NOTIFY: + return ActNotify, nil default: return 0x0, fmt.Errorf("unrecognized action %#x", uint32(a)) } @@ -572,6 +654,8 @@ func (a ScmpAction) toNative() C.uint32_t { return C.C_ACT_LOG case ActAllow: return C.C_ACT_ALLOW + case ActNotify: + return C.C_ACT_NOTIFY default: return 0x0 } @@ -590,7 +674,181 @@ func (a scmpFilterAttr) toNative() uint32 { return uint32(C.C_ATTRIBUTE_TSYNC) case filterAttrLog: return uint32(C.C_ATTRIBUTE_LOG) + case filterAttrSSB: + return uint32(C.C_ATTRIBUTE_SSB) default: return 0x0 } } + +func (a ScmpSyscall) toNative() C.uint32_t { + return C.uint32_t(a) +} + +func syscallFromNative(a C.int) ScmpSyscall { + return ScmpSyscall(a) +} + +func notifReqFromNative(req *C.struct_seccomp_notif) (*ScmpNotifReq, error) { + scmpArgs := make([]uint64, 6) + for i := 0; i < len(scmpArgs); i++ { + scmpArgs[i] = uint64(req.data.args[i]) + } + + arch, err := archFromNative(req.data.arch) + if err != nil { + return nil, err + } + + scmpData := ScmpNotifData{ + Syscall: syscallFromNative(req.data.nr), + Arch: arch, + InstrPointer: uint64(req.data.instruction_pointer), + Args: scmpArgs, + } + + scmpReq := &ScmpNotifReq{ + ID: uint64(req.id), + Pid: uint32(req.pid), + Flags: uint32(req.flags), + Data: scmpData, + } + + return scmpReq, nil +} + +func (scmpResp *ScmpNotifResp) toNative(resp *C.struct_seccomp_notif_resp) { + resp.id = C.__u64(scmpResp.ID) + resp.val = C.__s64(scmpResp.Val) + resp.error = (C.__s32(scmpResp.Error) * -1) // kernel requires a negated value + resp.flags = C.__u32(scmpResp.Flags) +} + +// Userspace Notification API +// Calls to C.seccomp_notify* hidden from seccomp.go + +func (f *ScmpFilter) getNotifFd() (ScmpFd, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return -1, errBadFilter + } + + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return -1, fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) + } + + fd := C.seccomp_notify_fd(f.filterCtx) + + return ScmpFd(fd), nil +} + +func notifReceive(fd ScmpFd) (*ScmpNotifReq, error) { + var req *C.struct_seccomp_notif + var resp *C.struct_seccomp_notif_resp + + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return nil, fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) + } + + // we only use the request here; the response is unused + if retCode := C.seccomp_notify_alloc(&req, &resp); retCode != 0 { + return nil, errRc(retCode) + } + + defer func() { + C.seccomp_notify_free(req, resp) + }() + + for { + retCode, errno := C.seccomp_notify_receive(C.int(fd), req) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return nil, errno + } + + return nil, errRc(retCode) + } + + return notifReqFromNative(req) +} + +func notifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { + var req *C.struct_seccomp_notif + var resp *C.struct_seccomp_notif_resp + + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) + } + + // we only use the reponse here; the request is discarded + if retCode := C.seccomp_notify_alloc(&req, &resp); retCode != 0 { + return errRc(retCode) + } + + defer func() { + C.seccomp_notify_free(req, resp) + }() + + scmpResp.toNative(resp) + + for { + retCode, errno := C.seccomp_notify_respond(C.int(fd), resp) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return errno + } + + return errRc(retCode) + } + + return nil +} + +func notifIDValid(fd ScmpFd, id uint64) error { + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) + } + + for { + retCode, errno := C.seccomp_notify_id_valid(C.int(fd), C.uint64_t(id)) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return errno + } + + return errRc(retCode) + } + + return nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 1837a01dd..ebe683486 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -562,7 +562,7 @@ github.com/opencontainers/go-digest ## explicit github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.0.3 +# github.com/opencontainers/runc v1.1.0 ## explicit github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/cgroups @@ -633,7 +633,7 @@ github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/tcp github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp github.com/rootless-containers/rootlesskit/pkg/port/builtin/parent/udp/udpproxy github.com/rootless-containers/rootlesskit/pkg/port/portutil -# github.com/seccomp/libseccomp-golang v0.9.2-0.20200616122406-847368b35ebf +# github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 github.com/seccomp/libseccomp-golang # github.com/sirupsen/logrus v1.8.1 ## explicit |