summaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/README.md8
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go61
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/config.go30
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go6
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go16
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go13
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md6
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c77
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go22
9 files changed, 142 insertions, 97 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md
index 42f3efe56..1d7fa04c0 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/README.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md
@@ -148,6 +148,7 @@ config := &configs.Config{
{Type: configs.NEWPID},
{Type: configs.NEWUSER},
{Type: configs.NEWNET},
+ {Type: configs.NEWCGROUP},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
@@ -323,6 +324,7 @@ generated when building libcontainer with docker.
## Copyright and license
-Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license.
-Docs released under Creative commons.
-
+Code and documentation copyright 2014 Docker, inc.
+The code and documentation are released under the [Apache 2.0 license](../LICENSE).
+The documentation is also released under Creative Commons Attribution 4.0 International License.
+You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/.
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
index 7c995efee..ea571ad93 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -13,40 +13,50 @@ import (
"strings"
"time"
- "github.com/docker/go-units"
+ units "github.com/docker/go-units"
)
const (
- cgroupNamePrefix = "name="
+ CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
-func FindCgroupMountpoint(subsystem string) (string, error) {
- mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
+func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
+ mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
-func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
+func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
+
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
- scanner := bufio.NewScanner(f)
+ return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
+}
+
+func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
+ scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
- fields := strings.Split(txt, " ")
- for _, opt := range strings.Split(fields[len(fields)-1], ",") {
- if opt == subsystem {
- return fields[4], fields[3], nil
+ fields := strings.Fields(txt)
+ if len(fields) < 5 {
+ continue
+ }
+ if strings.HasPrefix(fields[4], cgroupPath) {
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+ if opt == subsystem {
+ return fields[4], fields[3], nil
+ }
}
}
}
@@ -103,7 +113,7 @@ func FindCgroupMountpointDir() (string, error) {
}
if postSeparatorFields[0] == "cgroup" {
- // Check that the mount is properly formated.
+ // Check that the mount is properly formatted.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
@@ -151,19 +161,20 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
- if !ss[opt] {
+ seen, known := ss[opt]
+ if !known || (!all && seen) {
continue
}
- if strings.HasPrefix(opt, cgroupNamePrefix) {
- m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
- } else {
- m.Subsystems = append(m.Subsystems, opt)
- }
- if !all {
- numFound++
+ ss[opt] = true
+ if strings.HasPrefix(opt, CgroupNamePrefix) {
+ opt = opt[len(CgroupNamePrefix):]
}
+ m.Subsystems = append(m.Subsystems, opt)
+ numFound++
+ }
+ if len(m.Subsystems) > 0 || all {
+ res = append(res, m)
}
- res = append(res, m)
}
if err := scanner.Err(); err != nil {
return nil, err
@@ -187,7 +198,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
allMap := make(map[string]bool)
for s := range allSubsystems {
- allMap[s] = true
+ allMap[s] = false
}
return getCgroupMountsHelper(allMap, f, all)
}
@@ -256,13 +267,13 @@ func GetInitCgroupPath(subsystem string) (string, error) {
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
- mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
+ mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
- // see pathes from host, which don't exist in container.
+ // see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
@@ -342,7 +353,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err
return p, nil
}
- if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
+ if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
@@ -452,7 +463,7 @@ func WriteCgroupProc(dir string, pid int) error {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
- // Dont attach any pid to the cgroup if -1 is specified as a pid
+ // Don't attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
index b1c4762fe..7728522fe 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -186,12 +186,19 @@ type Config struct {
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
- // Rootless specifies whether the container is a rootless container.
- Rootless bool `json:"rootless"`
-
- // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
- // to limit the resources (e.g., L3 cache) the container has available
+ // IntelRdt specifies settings for Intel RDT group that the container is placed into
+ // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
+
+ // RootlessEUID is set when the runc was launched with non-zero EUID.
+ // Note that RootlessEUID is set to false when launched with EUID=0 in userns.
+ // When RootlessEUID is set, runc creates a new userns for the container.
+ // (config.json needs to contain userns settings)
+ RootlessEUID bool `json:"rootless_euid,omitempty"`
+
+ // RootlessCgroups is set when unlikely to have the full access to cgroups.
+ // When RootlessCgroups is set, cgroups errors are ignored.
+ RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}
type Hooks struct {
@@ -265,26 +272,23 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
})
}
-// HookState is the payload provided to a hook on execution.
-type HookState specs.State
-
type Hook interface {
// Run executes the hook with the provided state.
- Run(HookState) error
+ Run(*specs.State) error
}
// NewFunctionHook will call the provided function when the hook is run.
-func NewFunctionHook(f func(HookState) error) FuncHook {
+func NewFunctionHook(f func(*specs.State) error) FuncHook {
return FuncHook{
run: f,
}
}
type FuncHook struct {
- run func(HookState) error
+ run func(*specs.State) error
}
-func (f FuncHook) Run(s HookState) error {
+func (f FuncHook) Run(s *specs.State) error {
return f.run(s)
}
@@ -307,7 +311,7 @@ type CommandHook struct {
Command
}
-func (c Command) Run(s HookState) error {
+func (c Command) Run(s *specs.State) error {
b, err := json.Marshal(s)
if err != nil {
return err
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
index 36bd5f96a..57e9f037d 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
@@ -4,4 +4,10 @@ type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
+
+ // The schema of memory bandwidth per L3 cache id
+ // Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
+ // The unit of memory bandwidth is specified in "percentages" by
+ // default, and in "MBps" if MBA Software Controller is enabled.
+ MemBwSchema string `json:"memBwSchema,omitempty"`
}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
index 5fc171a57..1bbaef9bd 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@@ -7,12 +7,13 @@ import (
)
const (
- NEWNET NamespaceType = "NEWNET"
- NEWPID NamespaceType = "NEWPID"
- NEWNS NamespaceType = "NEWNS"
- NEWUTS NamespaceType = "NEWUTS"
- NEWIPC NamespaceType = "NEWIPC"
- NEWUSER NamespaceType = "NEWUSER"
+ NEWNET NamespaceType = "NEWNET"
+ NEWPID NamespaceType = "NEWPID"
+ NEWNS NamespaceType = "NEWNS"
+ NEWUTS NamespaceType = "NEWUTS"
+ NEWIPC NamespaceType = "NEWIPC"
+ NEWUSER NamespaceType = "NEWUSER"
+ NEWCGROUP NamespaceType = "NEWCGROUP"
)
var (
@@ -35,6 +36,8 @@ func NsName(ns NamespaceType) string {
return "user"
case NEWUTS:
return "uts"
+ case NEWCGROUP:
+ return "cgroup"
}
return ""
}
@@ -68,6 +71,7 @@ func NamespaceTypes() []NamespaceType {
NEWNET,
NEWPID,
NEWNS,
+ NEWCGROUP,
}
}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
index 4ce6813d2..2dc7adfc9 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@@ -9,12 +9,13 @@ func (n *Namespace) Syscall() int {
}
var namespaceInfo = map[NamespaceType]int{
- NEWNET: unix.CLONE_NEWNET,
- NEWNS: unix.CLONE_NEWNS,
- NEWUSER: unix.CLONE_NEWUSER,
- NEWIPC: unix.CLONE_NEWIPC,
- NEWUTS: unix.CLONE_NEWUTS,
- NEWPID: unix.CLONE_NEWPID,
+ NEWNET: unix.CLONE_NEWNET,
+ NEWNS: unix.CLONE_NEWNS,
+ NEWUSER: unix.CLONE_NEWUSER,
+ NEWIPC: unix.CLONE_NEWIPC,
+ NEWUTS: unix.CLONE_NEWUTS,
+ NEWPID: unix.CLONE_NEWPID,
+ NEWCGROUP: unix.CLONE_NEWCGROUP,
}
// CloneFlags parses the container's Namespaces options to set the correct
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md
index 575701371..9ec6c3931 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md
@@ -10,8 +10,8 @@ The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd
package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
called the preamble, is used as a header when compiling the C parts of the package.
So every time we import package `nsenter`, the C code function `nsexec()` would be
-called. And package `nsenter` is now only imported in `main_unix.go`, so every time
-before we call `cmd.Start` on linux, that C code would run.
+called. And package `nsenter` is only imported in `init.go`, so every time the runc
+`init` command is invoked, that C code is run.
Because `nsexec()` must be run before the Go runtime in order to use the
Linux kernel namespace, you must `import` this library into a package if
@@ -37,7 +37,7 @@ the parent `nsexec()` will exit and the child `nsexec()` process will
return to allow the Go runtime take over.
NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any
-CLONE_NEW* clone flags because we must fork a new process in order to
+`CLONE_NEW*` clone flags because we must fork a new process in order to
enter the PID namespace.
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
index a4cd1399d..28269dfc0 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@@ -42,6 +42,12 @@ enum sync_t {
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
};
+/*
+ * Synchronisation value for cgroup namespace setup.
+ * The same constant is defined in process_linux.go as "createCgroupns".
+ */
+#define CREATECGROUPNS 0x80
+
/* longjmp() arguments. */
#define JUMP_PARENT 0x00
#define JUMP_CHILD 0xA0
@@ -82,7 +88,7 @@ struct nlconfig_t {
uint8_t is_setgroup;
/* Rootless container settings. */
- uint8_t is_rootless;
+ uint8_t is_rootless_euid; /* boolean */
char *uidmappath;
size_t uidmappath_len;
char *gidmappath;
@@ -100,7 +106,7 @@ struct nlconfig_t {
#define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286
-#define ROOTLESS_ATTR 27287
+#define ROOTLESS_EUID_ATTR 27287
#define UIDMAPPATH_ATTR 27288
#define GIDMAPPATH_ATTR 27289
@@ -211,7 +217,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len)
/*
* If @app is NULL, execve will segfault. Just check it here and bail (if
- * we're in this path, the caller is already getting desparate and there
+ * we're in this path, the caller is already getting desperate and there
* isn't a backup to this failing). This usually would be a configuration
* or programming issue.
*/
@@ -419,8 +425,8 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current);
break;
- case ROOTLESS_ATTR:
- config->is_rootless = readint8(current);
+ case ROOTLESS_EUID_ATTR:
+ config->is_rootless_euid = readint8(current); /* boolean */
break;
case OOM_SCORE_ADJ_ATTR:
config->oom_score_adj = current;
@@ -640,7 +646,6 @@ void nsexec(void)
case JUMP_PARENT:{
int len;
pid_t child, first_child = -1;
- char buf[JSON_MAX];
bool ready = false;
/* For debugging. */
@@ -687,7 +692,7 @@ void nsexec(void)
* newuidmap/newgidmap shall be used.
*/
- if (config.is_rootless && !config.is_setgroup)
+ if (config.is_rootless_euid && !config.is_setgroup)
update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */
@@ -716,6 +721,18 @@ void nsexec(void)
kill(child, SIGKILL);
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
}
+
+ /* Send the init_func pid back to our parent.
+ *
+ * Send the init_func pid and the pid of the first child back to our parent.
+ * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
+ * It becomes the responsibility of our parent to reap the first child.
+ */
+ len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
+ if (len < 0) {
+ kill(child, SIGKILL);
+ bail("unable to generate JSON for child pid");
+ }
}
break;
case SYNC_CHILD_READY:
@@ -759,23 +776,6 @@ void nsexec(void)
bail("unexpected sync value: %u", s);
}
}
-
- /*
- * Send the init_func pid and the pid of the first child back to our parent.
- *
- * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
- * It becomes the responsibility of our parent to reap the first child.
- */
- len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
- if (len < 0) {
- kill(child, SIGKILL);
- bail("unable to generate JSON for child pid");
- }
- if (write(pipenum, buf, len) != len) {
- kill(child, SIGKILL);
- bail("unable to send child pid to bootstrapper");
- }
-
exit(0);
}
@@ -862,14 +862,17 @@ void nsexec(void)
if (setresuid(0, 0, 0) < 0)
bail("failed to become root in user namespace");
}
-
/*
- * Unshare all of the namespaces. Note that we don't merge this
- * with clone() because there were some old kernel versions where
- * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do
- * it the long way.
+ * Unshare all of the namespaces. Now, it should be noted that this
+ * ordering might break in the future (especially with rootless
+ * containers). But for now, it's not possible to split this into
+ * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
+ *
+ * Note that we don't merge this with clone() because there were
+ * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
+ * was broken, so we'll just do it the long way anyway.
*/
- if (unshare(config.cloneflags) < 0)
+ if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
bail("failed to unshare namespaces");
/*
@@ -953,11 +956,23 @@ void nsexec(void)
if (setgid(0) < 0)
bail("setgid failed");
- if (!config.is_rootless && config.is_setgroup) {
+ if (!config.is_rootless_euid && config.is_setgroup) {
if (setgroups(0, NULL) < 0)
bail("setgroups failed");
}
+ /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
+ if (config.cloneflags & CLONE_NEWCGROUP) {
+ uint8_t value;
+ if (read(pipenum, &value, sizeof(value)) != sizeof(value))
+ bail("read synchronisation value failed");
+ if (value == CREATECGROUPNS) {
+ if (unshare(CLONE_NEWCGROUP) < 0)
+ bail("failed to unshare cgroup namespace");
+ } else
+ bail("received unknown synchronisation value");
+ }
+
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
index c1e634c94..92b5ae8de 100644
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
@@ -5,6 +5,7 @@ package user
import (
"io"
"os"
+ "strconv"
"golang.org/x/sys/unix"
)
@@ -115,22 +116,23 @@ func CurrentGroup() (Group, error) {
return LookupGid(unix.Getgid())
}
-func CurrentUserSubUIDs() ([]SubID, error) {
+func currentUserSubIDs(fileName string) ([]SubID, error) {
u, err := CurrentUser()
if err != nil {
return nil, err
}
- return ParseSubIDFileFilter("/etc/subuid",
- func(entry SubID) bool { return entry.Name == u.Name })
+ filter := func(entry SubID) bool {
+ return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid)
+ }
+ return ParseSubIDFileFilter(fileName, filter)
}
-func CurrentGroupSubGIDs() ([]SubID, error) {
- g, err := CurrentGroup()
- if err != nil {
- return nil, err
- }
- return ParseSubIDFileFilter("/etc/subgid",
- func(entry SubID) bool { return entry.Name == g.Name })
+func CurrentUserSubUIDs() ([]SubID, error) {
+ return currentUserSubIDs("/etc/subuid")
+}
+
+func CurrentUserSubGIDs() ([]SubID, error) {
+ return currentUserSubIDs("/etc/subgid")
}
func CurrentProcessUIDMap() ([]IDMap, error) {