diff options
author | Daniel J Walsh <dwalsh@redhat.com> | 2019-01-04 10:15:38 -0500 |
---|---|---|
committer | Daniel J Walsh <dwalsh@redhat.com> | 2019-01-04 14:54:59 -0500 |
commit | 43686072d3ff559abb0aea865509ae85f8a301de (patch) | |
tree | 54946ccc3cbe4cf01355253f3d8b91194fba4a1b /vendor/github.com/opencontainers/runc/libcontainer | |
parent | 9ffd4806163e410d51d0f0cbece45b7405ff9fee (diff) | |
download | podman-43686072d3ff559abb0aea865509ae85f8a301de.tar.gz podman-43686072d3ff559abb0aea865509ae85f8a301de.tar.bz2 podman-43686072d3ff559abb0aea865509ae85f8a301de.zip |
Update vendor of runc
Updating the vendor or runc to pull in some fixes that we need.
In order to get this vendor to work, we needed to update the vendor
of docker/docker, which causes all sorts of issues, just to fix
the docker/pkg/sysinfo. Rather then doing this, I pulled in pkg/sysinfo
into libpod and fixed the code locally.
I then switched the use of docker/pkg/sysinfo to libpod/pkg/sysinfo.
I also switched out the docker/pkg/mount to containers/storage/pkg/mount
Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
9 files changed, 142 insertions, 97 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md index 42f3efe56..1d7fa04c0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -148,6 +148,7 @@ config := &configs.Config{ {Type: configs.NEWPID}, {Type: configs.NEWUSER}, {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, }), Cgroups: &configs.Cgroup{ Name: "test-container", @@ -323,6 +324,7 @@ generated when building libcontainer with docker. ## Copyright and license -Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license. -Docs released under Creative commons. - +Code and documentation copyright 2014 Docker, inc. +The code and documentation are released under the [Apache 2.0 license](../LICENSE). +The documentation is also released under Creative Commons Attribution 4.0 International License. +You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 7c995efee..ea571ad93 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -13,40 +13,50 @@ import ( "strings" "time" - "github.com/docker/go-units" + units "github.com/docker/go-units" ) const ( - cgroupNamePrefix = "name=" + CgroupNamePrefix = "name=" CgroupProcesses = "cgroup.procs" ) // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt -func FindCgroupMountpoint(subsystem string) (string, error) { - mnt, _, err := FindCgroupMountpointAndRoot(subsystem) +func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) return mnt, err } -func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) { +func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { // We are not using mount.GetMounts() because it's super-inefficient, // parsing it directly sped up x10 times because of not using Sscanf. // It was one of two major performance drawbacks in container start. if !isSubsystemAvailable(subsystem) { return "", "", NewNotFoundError(subsystem) } + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", "", err } defer f.Close() - scanner := bufio.NewScanner(f) + return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) +} + +func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { + scanner := bufio.NewScanner(reader) for scanner.Scan() { txt := scanner.Text() - fields := strings.Split(txt, " ") - for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if opt == subsystem { - return fields[4], fields[3], nil + fields := strings.Fields(txt) + if len(fields) < 5 { + continue + } + if strings.HasPrefix(fields[4], cgroupPath) { + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } } } } @@ -103,7 +113,7 @@ func FindCgroupMountpointDir() (string, error) { } if postSeparatorFields[0] == "cgroup" { - // Check that the mount is properly formated. + // Check that the mount is properly formatted. if numPostFields < 3 { return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) } @@ -151,19 +161,20 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, Root: fields[3], } for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if !ss[opt] { + seen, known := ss[opt] + if !known || (!all && seen) { continue } - if strings.HasPrefix(opt, cgroupNamePrefix) { - m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) - } else { - m.Subsystems = append(m.Subsystems, opt) - } - if !all { - numFound++ + ss[opt] = true + if strings.HasPrefix(opt, CgroupNamePrefix) { + opt = opt[len(CgroupNamePrefix):] } + m.Subsystems = append(m.Subsystems, opt) + numFound++ + } + if len(m.Subsystems) > 0 || all { + res = append(res, m) } - res = append(res, m) } if err := scanner.Err(); err != nil { return nil, err @@ -187,7 +198,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) { allMap := make(map[string]bool) for s := range allSubsystems { - allMap[s] = true + allMap[s] = false } return getCgroupMountsHelper(allMap, f, all) } @@ -256,13 +267,13 @@ func GetInitCgroupPath(subsystem string) (string, error) { } func getCgroupPathHelper(subsystem, cgroup string) (string, error) { - mnt, root, err := FindCgroupMountpointAndRoot(subsystem) + mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) if err != nil { return "", err } // This is needed for nested containers, because in /proc/self/cgroup we - // see pathes from host, which don't exist in container. + // see paths from host, which don't exist in container. relCgroup, err := filepath.Rel(root, cgroup) if err != nil { return "", err @@ -342,7 +353,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err return p, nil } - if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok { + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { return p, nil } @@ -452,7 +463,7 @@ func WriteCgroupProc(dir string, pid int) error { return fmt.Errorf("no such directory for %s", CgroupProcesses) } - // Dont attach any pid to the cgroup if -1 is specified as a pid + // Don't attach any pid to the cgroup if -1 is specified as a pid if pid != -1 { if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil { return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index b1c4762fe..7728522fe 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -186,12 +186,19 @@ type Config struct { // callers keyring in this case. NoNewKeyring bool `json:"no_new_keyring"` - // Rootless specifies whether the container is a rootless container. - Rootless bool `json:"rootless"` - - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into - // to limit the resources (e.g., L3 cache) the container has available + // IntelRdt specifies settings for Intel RDT group that the container is placed into + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` + + // RootlessEUID is set when the runc was launched with non-zero EUID. + // Note that RootlessEUID is set to false when launched with EUID=0 in userns. + // When RootlessEUID is set, runc creates a new userns for the container. + // (config.json needs to contain userns settings) + RootlessEUID bool `json:"rootless_euid,omitempty"` + + // RootlessCgroups is set when unlikely to have the full access to cgroups. + // When RootlessCgroups is set, cgroups errors are ignored. + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` } type Hooks struct { @@ -265,26 +272,23 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) { }) } -// HookState is the payload provided to a hook on execution. -type HookState specs.State - type Hook interface { // Run executes the hook with the provided state. - Run(HookState) error + Run(*specs.State) error } // NewFunctionHook will call the provided function when the hook is run. -func NewFunctionHook(f func(HookState) error) FuncHook { +func NewFunctionHook(f func(*specs.State) error) FuncHook { return FuncHook{ run: f, } } type FuncHook struct { - run func(HookState) error + run func(*specs.State) error } -func (f FuncHook) Run(s HookState) error { +func (f FuncHook) Run(s *specs.State) error { return f.run(s) } @@ -307,7 +311,7 @@ type CommandHook struct { Command } -func (c Command) Run(s HookState) error { +func (c Command) Run(s *specs.State) error { b, err := json.Marshal(s) if err != nil { return err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go index 36bd5f96a..57e9f037d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go @@ -4,4 +4,10 @@ type IntelRdt struct { // The schema for L3 cache id and capacity bitmask (CBM) // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The schema of memory bandwidth per L3 cache id + // Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. + MemBwSchema string `json:"memBwSchema,omitempty"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go index 5fc171a57..1bbaef9bd 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -7,12 +7,13 @@ import ( ) const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" + NEWCGROUP NamespaceType = "NEWCGROUP" ) var ( @@ -35,6 +36,8 @@ func NsName(ns NamespaceType) string { return "user" case NEWUTS: return "uts" + case NEWCGROUP: + return "cgroup" } return "" } @@ -68,6 +71,7 @@ func NamespaceTypes() []NamespaceType { NEWNET, NEWPID, NEWNS, + NEWCGROUP, } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go index 4ce6813d2..2dc7adfc9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -9,12 +9,13 @@ func (n *Namespace) Syscall() int { } var namespaceInfo = map[NamespaceType]int{ - NEWNET: unix.CLONE_NEWNET, - NEWNS: unix.CLONE_NEWNS, - NEWUSER: unix.CLONE_NEWUSER, - NEWIPC: unix.CLONE_NEWIPC, - NEWUTS: unix.CLONE_NEWUTS, - NEWPID: unix.CLONE_NEWPID, + NEWNET: unix.CLONE_NEWNET, + NEWNS: unix.CLONE_NEWNS, + NEWUSER: unix.CLONE_NEWUSER, + NEWIPC: unix.CLONE_NEWIPC, + NEWUTS: unix.CLONE_NEWUTS, + NEWPID: unix.CLONE_NEWPID, + NEWCGROUP: unix.CLONE_NEWCGROUP, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md index 575701371..9ec6c3931 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md @@ -10,8 +10,8 @@ The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, called the preamble, is used as a header when compiling the C parts of the package. So every time we import package `nsenter`, the C code function `nsexec()` would be -called. And package `nsenter` is now only imported in `main_unix.go`, so every time -before we call `cmd.Start` on linux, that C code would run. +called. And package `nsenter` is only imported in `init.go`, so every time the runc +`init` command is invoked, that C code is run. Because `nsexec()` must be run before the Go runtime in order to use the Linux kernel namespace, you must `import` this library into a package if @@ -37,7 +37,7 @@ the parent `nsexec()` will exit and the child `nsexec()` process will return to allow the Go runtime take over. NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any -CLONE_NEW* clone flags because we must fork a new process in order to +`CLONE_NEW*` clone flags because we must fork a new process in order to enter the PID namespace. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index a4cd1399d..28269dfc0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -42,6 +42,12 @@ enum sync_t { SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ }; +/* + * Synchronisation value for cgroup namespace setup. + * The same constant is defined in process_linux.go as "createCgroupns". + */ +#define CREATECGROUPNS 0x80 + /* longjmp() arguments. */ #define JUMP_PARENT 0x00 #define JUMP_CHILD 0xA0 @@ -82,7 +88,7 @@ struct nlconfig_t { uint8_t is_setgroup; /* Rootless container settings. */ - uint8_t is_rootless; + uint8_t is_rootless_euid; /* boolean */ char *uidmappath; size_t uidmappath_len; char *gidmappath; @@ -100,7 +106,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 -#define ROOTLESS_ATTR 27287 +#define ROOTLESS_EUID_ATTR 27287 #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 @@ -211,7 +217,7 @@ static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) /* * If @app is NULL, execve will segfault. Just check it here and bail (if - * we're in this path, the caller is already getting desparate and there + * we're in this path, the caller is already getting desperate and there * isn't a backup to this failing). This usually would be a configuration * or programming issue. */ @@ -419,8 +425,8 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; - case ROOTLESS_ATTR: - config->is_rootless = readint8(current); + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; @@ -640,7 +646,6 @@ void nsexec(void) case JUMP_PARENT:{ int len; pid_t child, first_child = -1; - char buf[JSON_MAX]; bool ready = false; /* For debugging. */ @@ -687,7 +692,7 @@ void nsexec(void) * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && !config.is_setgroup) + if (config.is_rootless_euid && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -716,6 +721,18 @@ void nsexec(void) kill(child, SIGKILL); bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } + + /* Send the init_func pid back to our parent. + * + * Send the init_func pid and the pid of the first child back to our parent. + * We need to send both back because we can't reap the first child we created (CLONE_PARENT). + * It becomes the responsibility of our parent to reap the first child. + */ + len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } } break; case SYNC_CHILD_READY: @@ -759,23 +776,6 @@ void nsexec(void) bail("unexpected sync value: %u", s); } } - - /* - * Send the init_func pid and the pid of the first child back to our parent. - * - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } - if (write(pipenum, buf, len) != len) { - kill(child, SIGKILL); - bail("unable to send child pid to bootstrapper"); - } - exit(0); } @@ -862,14 +862,17 @@ void nsexec(void) if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } - /* - * Unshare all of the namespaces. Note that we don't merge this - * with clone() because there were some old kernel versions where - * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do - * it the long way. + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. */ - if (unshare(config.cloneflags) < 0) + if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) bail("failed to unshare namespaces"); /* @@ -953,11 +956,23 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (!config.is_rootless && config.is_setgroup) { + if (!config.is_rootless_euid && config.is_setgroup) { if (setgroups(0, NULL) < 0) bail("setgroups failed"); } + /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + if (config.cloneflags & CLONE_NEWCGROUP) { + uint8_t value; + if (read(pipenum, &value, sizeof(value)) != sizeof(value)) + bail("read synchronisation value failed"); + if (value == CREATECGROUPNS) { + if (unshare(CLONE_NEWCGROUP) < 0) + bail("failed to unshare cgroup namespace"); + } else + bail("received unknown synchronisation value"); + } + s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index c1e634c94..92b5ae8de 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -5,6 +5,7 @@ package user import ( "io" "os" + "strconv" "golang.org/x/sys/unix" ) @@ -115,22 +116,23 @@ func CurrentGroup() (Group, error) { return LookupGid(unix.Getgid()) } -func CurrentUserSubUIDs() ([]SubID, error) { +func currentUserSubIDs(fileName string) ([]SubID, error) { u, err := CurrentUser() if err != nil { return nil, err } - return ParseSubIDFileFilter("/etc/subuid", - func(entry SubID) bool { return entry.Name == u.Name }) + filter := func(entry SubID) bool { + return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) + } + return ParseSubIDFileFilter(fileName, filter) } -func CurrentGroupSubGIDs() ([]SubID, error) { - g, err := CurrentGroup() - if err != nil { - return nil, err - } - return ParseSubIDFileFilter("/etc/subgid", - func(entry SubID) bool { return entry.Name == g.Name }) +func CurrentUserSubUIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subuid") +} + +func CurrentUserSubGIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subgid") } func CurrentProcessUIDMap() ([]IDMap, error) { |