aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/opencontainers/runc/libcontainer
diff options
context:
space:
mode:
authorValentin Rothberg <rothberg@redhat.com>2019-06-24 11:29:13 +0200
committerValentin Rothberg <rothberg@redhat.com>2019-06-24 13:20:59 +0200
commitd697456dc90adbaf68224ed7c115b38d5855e582 (patch)
tree5fd88c48b34e7bead0028fa97e39f43f03880642 /vendor/github.com/opencontainers/runc/libcontainer
parenta3211b73c62a9fcc13f09305bf629ef507b26d34 (diff)
downloadpodman-d697456dc90adbaf68224ed7c115b38d5855e582.tar.gz
podman-d697456dc90adbaf68224ed7c115b38d5855e582.tar.bz2
podman-d697456dc90adbaf68224ed7c115b38d5855e582.zip
migrate to go-modules
Signed-off-by: Valentin Rothberg <rothberg@redhat.com>
Diffstat (limited to 'vendor/github.com/opencontainers/runc/libcontainer')
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/README.md330
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md44
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h32
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go12
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go25
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go5
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c995
-rw-r--r--vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS2
8 files changed, 2 insertions, 1443 deletions
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md
deleted file mode 100644
index 1d7fa04c0..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/README.md
+++ /dev/null
@@ -1,330 +0,0 @@
-# libcontainer
-
-[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
-
-Libcontainer provides a native Go implementation for creating containers
-with namespaces, cgroups, capabilities, and filesystem access controls.
-It allows you to manage the lifecycle of the container performing additional operations
-after the container is created.
-
-
-#### Container
-A container is a self contained execution environment that shares the kernel of the
-host system and which is (optionally) isolated from other containers in the system.
-
-#### Using libcontainer
-
-Because containers are spawned in a two step process you will need a binary that
-will be executed as the init process for the container. In libcontainer, we use
-the current binary (/proc/self/exe) to be executed as the init process, and use
-arg "init", we call the first step process "bootstrap", so you always need a "init"
-function as the entry of "bootstrap".
-
-In addition to the go init function the early stage bootstrap is handled by importing
-[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
-
-```go
-import (
- _ "github.com/opencontainers/runc/libcontainer/nsenter"
-)
-
-func init() {
- if len(os.Args) > 1 && os.Args[1] == "init" {
- runtime.GOMAXPROCS(1)
- runtime.LockOSThread()
- factory, _ := libcontainer.New("")
- if err := factory.StartInitialization(); err != nil {
- logrus.Fatal(err)
- }
- panic("--this line should have never been executed, congratulations--")
- }
-}
-```
-
-Then to create a container you first have to initialize an instance of a factory
-that will handle the creation and initialization for a container.
-
-```go
-factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
-if err != nil {
- logrus.Fatal(err)
- return
-}
-```
-
-Once you have an instance of the factory created we can create a configuration
-struct describing how the container is to be created. A sample would look similar to this:
-
-```go
-defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
-config := &configs.Config{
- Rootfs: "/your/path/to/rootfs",
- Capabilities: &configs.Capabilities{
- Bounding: []string{
- "CAP_CHOWN",
- "CAP_DAC_OVERRIDE",
- "CAP_FSETID",
- "CAP_FOWNER",
- "CAP_MKNOD",
- "CAP_NET_RAW",
- "CAP_SETGID",
- "CAP_SETUID",
- "CAP_SETFCAP",
- "CAP_SETPCAP",
- "CAP_NET_BIND_SERVICE",
- "CAP_SYS_CHROOT",
- "CAP_KILL",
- "CAP_AUDIT_WRITE",
- },
- Effective: []string{
- "CAP_CHOWN",
- "CAP_DAC_OVERRIDE",
- "CAP_FSETID",
- "CAP_FOWNER",
- "CAP_MKNOD",
- "CAP_NET_RAW",
- "CAP_SETGID",
- "CAP_SETUID",
- "CAP_SETFCAP",
- "CAP_SETPCAP",
- "CAP_NET_BIND_SERVICE",
- "CAP_SYS_CHROOT",
- "CAP_KILL",
- "CAP_AUDIT_WRITE",
- },
- Inheritable: []string{
- "CAP_CHOWN",
- "CAP_DAC_OVERRIDE",
- "CAP_FSETID",
- "CAP_FOWNER",
- "CAP_MKNOD",
- "CAP_NET_RAW",
- "CAP_SETGID",
- "CAP_SETUID",
- "CAP_SETFCAP",
- "CAP_SETPCAP",
- "CAP_NET_BIND_SERVICE",
- "CAP_SYS_CHROOT",
- "CAP_KILL",
- "CAP_AUDIT_WRITE",
- },
- Permitted: []string{
- "CAP_CHOWN",
- "CAP_DAC_OVERRIDE",
- "CAP_FSETID",
- "CAP_FOWNER",
- "CAP_MKNOD",
- "CAP_NET_RAW",
- "CAP_SETGID",
- "CAP_SETUID",
- "CAP_SETFCAP",
- "CAP_SETPCAP",
- "CAP_NET_BIND_SERVICE",
- "CAP_SYS_CHROOT",
- "CAP_KILL",
- "CAP_AUDIT_WRITE",
- },
- Ambient: []string{
- "CAP_CHOWN",
- "CAP_DAC_OVERRIDE",
- "CAP_FSETID",
- "CAP_FOWNER",
- "CAP_MKNOD",
- "CAP_NET_RAW",
- "CAP_SETGID",
- "CAP_SETUID",
- "CAP_SETFCAP",
- "CAP_SETPCAP",
- "CAP_NET_BIND_SERVICE",
- "CAP_SYS_CHROOT",
- "CAP_KILL",
- "CAP_AUDIT_WRITE",
- },
- },
- Namespaces: configs.Namespaces([]configs.Namespace{
- {Type: configs.NEWNS},
- {Type: configs.NEWUTS},
- {Type: configs.NEWIPC},
- {Type: configs.NEWPID},
- {Type: configs.NEWUSER},
- {Type: configs.NEWNET},
- {Type: configs.NEWCGROUP},
- }),
- Cgroups: &configs.Cgroup{
- Name: "test-container",
- Parent: "system",
- Resources: &configs.Resources{
- MemorySwappiness: nil,
- AllowAllDevices: nil,
- AllowedDevices: configs.DefaultAllowedDevices,
- },
- },
- MaskPaths: []string{
- "/proc/kcore",
- "/sys/firmware",
- },
- ReadonlyPaths: []string{
- "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
- },
- Devices: configs.DefaultAutoCreatedDevices,
- Hostname: "testing",
- Mounts: []*configs.Mount{
- {
- Source: "proc",
- Destination: "/proc",
- Device: "proc",
- Flags: defaultMountFlags,
- },
- {
- Source: "tmpfs",
- Destination: "/dev",
- Device: "tmpfs",
- Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
- Data: "mode=755",
- },
- {
- Source: "devpts",
- Destination: "/dev/pts",
- Device: "devpts",
- Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
- Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
- },
- {
- Device: "tmpfs",
- Source: "shm",
- Destination: "/dev/shm",
- Data: "mode=1777,size=65536k",
- Flags: defaultMountFlags,
- },
- {
- Source: "mqueue",
- Destination: "/dev/mqueue",
- Device: "mqueue",
- Flags: defaultMountFlags,
- },
- {
- Source: "sysfs",
- Destination: "/sys",
- Device: "sysfs",
- Flags: defaultMountFlags | unix.MS_RDONLY,
- },
- },
- UidMappings: []configs.IDMap{
- {
- ContainerID: 0,
- HostID: 1000,
- Size: 65536,
- },
- },
- GidMappings: []configs.IDMap{
- {
- ContainerID: 0,
- HostID: 1000,
- Size: 65536,
- },
- },
- Networks: []*configs.Network{
- {
- Type: "loopback",
- Address: "127.0.0.1/0",
- Gateway: "localhost",
- },
- },
- Rlimits: []configs.Rlimit{
- {
- Type: unix.RLIMIT_NOFILE,
- Hard: uint64(1025),
- Soft: uint64(1025),
- },
- },
-}
-```
-
-Once you have the configuration populated you can create a container:
-
-```go
-container, err := factory.Create("container-id", config)
-if err != nil {
- logrus.Fatal(err)
- return
-}
-```
-
-To spawn bash as the initial process inside the container and have the
-processes pid returned in order to wait, signal, or kill the process:
-
-```go
-process := &libcontainer.Process{
- Args: []string{"/bin/bash"},
- Env: []string{"PATH=/bin"},
- User: "daemon",
- Stdin: os.Stdin,
- Stdout: os.Stdout,
- Stderr: os.Stderr,
-}
-
-err := container.Run(process)
-if err != nil {
- container.Destroy()
- logrus.Fatal(err)
- return
-}
-
-// wait for the process to finish.
-_, err := process.Wait()
-if err != nil {
- logrus.Fatal(err)
-}
-
-// destroy the container.
-container.Destroy()
-```
-
-Additional ways to interact with a running container are:
-
-```go
-// return all the pids for all processes running inside the container.
-processes, err := container.Processes()
-
-// get detailed cpu, memory, io, and network statistics for the container and
-// it's processes.
-stats, err := container.Stats()
-
-// pause all processes inside the container.
-container.Pause()
-
-// resume all paused processes.
-container.Resume()
-
-// send signal to container's init process.
-container.Signal(signal)
-
-// update container resource constraints.
-container.Set(config)
-
-// get current status of the container.
-status, err := container.Status()
-
-// get current container's state information.
-state, err := container.State()
-```
-
-
-#### Checkpoint & Restore
-
-libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
-This let's you save the state of a process running inside a container to disk, and then restore
-that state into a new process, on the same machine or on another machine.
-
-`criu` version 1.5.2 or higher is required to use checkpoint and restore.
-If you don't already have `criu` installed, you can build it from source, following the
-[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
-generated when building libcontainer with docker.
-
-
-## Copyright and license
-
-Code and documentation copyright 2014 Docker, inc.
-The code and documentation are released under the [Apache 2.0 license](../LICENSE).
-The documentation is also released under Creative Commons Attribution 4.0 International License.
-You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/.
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md
deleted file mode 100644
index 9ec6c3931..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-## nsenter
-
-The `nsenter` package registers a special init constructor that is called before
-the Go runtime has a chance to boot. This provides us the ability to `setns` on
-existing namespaces and avoid the issues that the Go runtime has with multiple
-threads. This constructor will be called if this package is registered,
-imported, in your go application.
-
-The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/)
-package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
-called the preamble, is used as a header when compiling the C parts of the package.
-So every time we import package `nsenter`, the C code function `nsexec()` would be
-called. And package `nsenter` is only imported in `init.go`, so every time the runc
-`init` command is invoked, that C code is run.
-
-Because `nsexec()` must be run before the Go runtime in order to use the
-Linux kernel namespace, you must `import` this library into a package if
-you plan to use `libcontainer` directly. Otherwise Go will not execute
-the `nsexec()` constructor, which means that the re-exec will not cause
-the namespaces to be joined. You can import it like this:
-
-```go
-import _ "github.com/opencontainers/runc/libcontainer/nsenter"
-```
-
-`nsexec()` will first get the file descriptor number for the init pipe
-from the environment variable `_LIBCONTAINER_INITPIPE` (which was opened
-by the parent and kept open across the fork-exec of the `nsexec()` init
-process). The init pipe is used to read bootstrap data (namespace paths,
-clone flags, uid and gid mappings, and the console path) from the parent
-process. `nsexec()` will then call `setns(2)` to join the namespaces
-provided in the bootstrap data (if available), `clone(2)` a child process
-with the provided clone flags, update the user and group ID mappings, do
-some further miscellaneous setup steps, and then send the PID of the
-child process to the parent of the `nsexec()` "caller". Finally,
-the parent `nsexec()` will exit and the child `nsexec()` process will
-return to allow the Go runtime take over.
-
-NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any
-`CLONE_NEW*` clone flags because we must fork a new process in order to
-enter the PID namespace.
-
-
-
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
deleted file mode 100644
index 9e9bdca05..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef NSENTER_NAMESPACE_H
-#define NSENTER_NAMESPACE_H
-
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE
-#endif
-#include <sched.h>
-
-/* All of these are taken from include/uapi/linux/sched.h */
-#ifndef CLONE_NEWNS
-# define CLONE_NEWNS 0x00020000 /* New mount namespace group */
-#endif
-#ifndef CLONE_NEWCGROUP
-# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
-#endif
-#ifndef CLONE_NEWUTS
-# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
-#endif
-#ifndef CLONE_NEWIPC
-# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
-#endif
-#ifndef CLONE_NEWUSER
-# define CLONE_NEWUSER 0x10000000 /* New user namespace */
-#endif
-#ifndef CLONE_NEWPID
-# define CLONE_NEWPID 0x20000000 /* New pid namespace */
-#endif
-#ifndef CLONE_NEWNET
-# define CLONE_NEWNET 0x40000000 /* New network namespace */
-#endif
-
-#endif /* NSENTER_NAMESPACE_H */
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
deleted file mode 100644
index 07f4d63e4..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
+++ /dev/null
@@ -1,12 +0,0 @@
-// +build linux,!gccgo
-
-package nsenter
-
-/*
-#cgo CFLAGS: -Wall
-extern void nsexec();
-void __attribute__((constructor)) init(void) {
- nsexec();
-}
-*/
-import "C"
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
deleted file mode 100644
index 63c7a3ec2..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
+++ /dev/null
@@ -1,25 +0,0 @@
-// +build linux,gccgo
-
-package nsenter
-
-/*
-#cgo CFLAGS: -Wall
-extern void nsexec();
-void __attribute__((constructor)) init(void) {
- nsexec();
-}
-*/
-import "C"
-
-// AlwaysFalse is here to stay false
-// (and be exported so the compiler doesn't optimize out its reference)
-var AlwaysFalse bool
-
-func init() {
- if AlwaysFalse {
- // by referencing this C init() in a noop test, it will ensure the compiler
- // links in the C function.
- // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
- C.init()
- }
-}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
deleted file mode 100644
index ac701ca39..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
+++ /dev/null
@@ -1,5 +0,0 @@
-// +build !linux !cgo
-
-package nsenter
-
-import "C"
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
deleted file mode 100644
index 28269dfc0..000000000
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ /dev/null
@@ -1,995 +0,0 @@
-
-#define _GNU_SOURCE
-#include <endian.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <grp.h>
-#include <sched.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/prctl.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <linux/limits.h>
-#include <linux/netlink.h>
-#include <linux/types.h>
-
-/* Get all of the CLONE_NEW* flags. */
-#include "namespace.h"
-
-/* Synchronisation values. */
-enum sync_t {
- SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
- SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
- SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
- SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
- SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
- SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
-
- /* XXX: This doesn't help with segfaults and other such issues. */
- SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
-};
-
-/*
- * Synchronisation value for cgroup namespace setup.
- * The same constant is defined in process_linux.go as "createCgroupns".
- */
-#define CREATECGROUPNS 0x80
-
-/* longjmp() arguments. */
-#define JUMP_PARENT 0x00
-#define JUMP_CHILD 0xA0
-#define JUMP_INIT 0xA1
-
-/* JSON buffer. */
-#define JSON_MAX 4096
-
-/* Assume the stack grows down, so arguments should be above it. */
-struct clone_t {
- /*
- * Reserve some space for clone() to locate arguments
- * and retcode in this place
- */
- char stack[4096] __attribute__ ((aligned(16)));
- char stack_ptr[0];
-
- /* There's two children. This is used to execute the different code. */
- jmp_buf *env;
- int jmpval;
-};
-
-struct nlconfig_t {
- char *data;
-
- /* Process settings. */
- uint32_t cloneflags;
- char *oom_score_adj;
- size_t oom_score_adj_len;
-
- /* User namespace settings. */
- char *uidmap;
- size_t uidmap_len;
- char *gidmap;
- size_t gidmap_len;
- char *namespaces;
- size_t namespaces_len;
- uint8_t is_setgroup;
-
- /* Rootless container settings. */
- uint8_t is_rootless_euid; /* boolean */
- char *uidmappath;
- size_t uidmappath_len;
- char *gidmappath;
- size_t gidmappath_len;
-};
-
-/*
- * List of netlink message types sent to us as part of bootstrapping the init.
- * These constants are defined in libcontainer/message_linux.go.
- */
-#define INIT_MSG 62000
-#define CLONE_FLAGS_ATTR 27281
-#define NS_PATHS_ATTR 27282
-#define UIDMAP_ATTR 27283
-#define GIDMAP_ATTR 27284
-#define SETGROUP_ATTR 27285
-#define OOM_SCORE_ADJ_ATTR 27286
-#define ROOTLESS_EUID_ATTR 27287
-#define UIDMAPPATH_ATTR 27288
-#define GIDMAPPATH_ATTR 27289
-
-/*
- * Use the raw syscall for versions of glibc which don't include a function for
- * it, namely (glibc 2.12).
- */
-#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
-# define _GNU_SOURCE
-# include "syscall.h"
-# if !defined(SYS_setns) && defined(__NR_setns)
-# define SYS_setns __NR_setns
-# endif
-
-#ifndef SYS_setns
-# error "setns(2) syscall not supported by glibc version"
-#endif
-
-int setns(int fd, int nstype)
-{
- return syscall(SYS_setns, fd, nstype);
-}
-#endif
-
-/* XXX: This is ugly. */
-static int syncfd = -1;
-
-/* TODO(cyphar): Fix this so it correctly deals with syncT. */
-#define bail(fmt, ...) \
- do { \
- int ret = __COUNTER__ + 1; \
- fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
- if (syncfd >= 0) { \
- enum sync_t s = SYNC_ERR; \
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \
- fprintf(stderr, "nsenter: failed: write(s)"); \
- if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \
- fprintf(stderr, "nsenter: failed: write(ret)"); \
- } \
- exit(ret); \
- } while(0)
-
-static int write_file(char *data, size_t data_len, char *pathfmt, ...)
-{
- int fd, len, ret = 0;
- char path[PATH_MAX];
-
- va_list ap;
- va_start(ap, pathfmt);
- len = vsnprintf(path, PATH_MAX, pathfmt, ap);
- va_end(ap);
- if (len < 0)
- return -1;
-
- fd = open(path, O_RDWR);
- if (fd < 0) {
- return -1;
- }
-
- len = write(fd, data, data_len);
- if (len != data_len) {
- ret = -1;
- goto out;
- }
-
- out:
- close(fd);
- return ret;
-}
-
-enum policy_t {
- SETGROUPS_DEFAULT = 0,
- SETGROUPS_ALLOW,
- SETGROUPS_DENY,
-};
-
-/* This *must* be called before we touch gid_map. */
-static void update_setgroups(int pid, enum policy_t setgroup)
-{
- char *policy;
-
- switch (setgroup) {
- case SETGROUPS_ALLOW:
- policy = "allow";
- break;
- case SETGROUPS_DENY:
- policy = "deny";
- break;
- case SETGROUPS_DEFAULT:
- default:
- /* Nothing to do. */
- return;
- }
-
- if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) {
- /*
- * If the kernel is too old to support /proc/pid/setgroups,
- * open(2) or write(2) will return ENOENT. This is fine.
- */
- if (errno != ENOENT)
- bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
- }
-}
-
-static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len)
-{
- int child;
-
- /*
- * If @app is NULL, execve will segfault. Just check it here and bail (if
- * we're in this path, the caller is already getting desperate and there
- * isn't a backup to this failing). This usually would be a configuration
- * or programming issue.
- */
- if (!app)
- bail("mapping tool not present");
-
- child = fork();
- if (child < 0)
- bail("failed to fork");
-
- if (!child) {
-#define MAX_ARGV 20
- char *argv[MAX_ARGV];
- char *envp[] = { NULL };
- char pid_fmt[16];
- int argc = 0;
- char *next;
-
- snprintf(pid_fmt, 16, "%d", pid);
-
- argv[argc++] = (char *)app;
- argv[argc++] = pid_fmt;
- /*
- * Convert the map string into a list of argument that
- * newuidmap/newgidmap can understand.
- */
-
- while (argc < MAX_ARGV) {
- if (*map == '\0') {
- argv[argc++] = NULL;
- break;
- }
- argv[argc++] = map;
- next = strpbrk(map, "\n ");
- if (next == NULL)
- break;
- *next++ = '\0';
- map = next + strspn(next, "\n ");
- }
-
- execve(app, argv, envp);
- bail("failed to execv");
- } else {
- int status;
-
- while (true) {
- if (waitpid(child, &status, 0) < 0) {
- if (errno == EINTR)
- continue;
- bail("failed to waitpid");
- }
- if (WIFEXITED(status) || WIFSIGNALED(status))
- return WEXITSTATUS(status);
- }
- }
-
- return -1;
-}
-
-static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
-{
- if (map == NULL || map_len <= 0)
- return;
-
- if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
- if (errno != EPERM)
- bail("failed to update /proc/%d/uid_map", pid);
- if (try_mapping_tool(path, pid, map, map_len))
- bail("failed to use newuid map on %d", pid);
- }
-}
-
-static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
-{
- if (map == NULL || map_len <= 0)
- return;
-
- if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
- if (errno != EPERM)
- bail("failed to update /proc/%d/gid_map", pid);
- if (try_mapping_tool(path, pid, map, map_len))
- bail("failed to use newgid map on %d", pid);
- }
-}
-
-static void update_oom_score_adj(char *data, size_t len)
-{
- if (data == NULL || len <= 0)
- return;
-
- if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
- bail("failed to update /proc/self/oom_score_adj");
-}
-
-/* A dummy function that just jumps to the given jumpval. */
-static int child_func(void *arg) __attribute__ ((noinline));
-static int child_func(void *arg)
-{
- struct clone_t *ca = (struct clone_t *)arg;
- longjmp(*ca->env, ca->jmpval);
-}
-
-static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
-static int clone_parent(jmp_buf *env, int jmpval)
-{
- struct clone_t ca = {
- .env = env,
- .jmpval = jmpval,
- };
-
- return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
-}
-
-/*
- * Gets the init pipe fd from the environment, which is used to read the
- * bootstrap data and tell the parent what the new pid is after we finish
- * setting up the environment.
- */
-static int initpipe(void)
-{
- int pipenum;
- char *initpipe, *endptr;
-
- initpipe = getenv("_LIBCONTAINER_INITPIPE");
- if (initpipe == NULL || *initpipe == '\0')
- return -1;
-
- pipenum = strtol(initpipe, &endptr, 10);
- if (*endptr != '\0')
- bail("unable to parse _LIBCONTAINER_INITPIPE");
-
- return pipenum;
-}
-
-/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
-static int nsflag(char *name)
-{
- if (!strcmp(name, "cgroup"))
- return CLONE_NEWCGROUP;
- else if (!strcmp(name, "ipc"))
- return CLONE_NEWIPC;
- else if (!strcmp(name, "mnt"))
- return CLONE_NEWNS;
- else if (!strcmp(name, "net"))
- return CLONE_NEWNET;
- else if (!strcmp(name, "pid"))
- return CLONE_NEWPID;
- else if (!strcmp(name, "user"))
- return CLONE_NEWUSER;
- else if (!strcmp(name, "uts"))
- return CLONE_NEWUTS;
-
- /* If we don't recognise a name, fallback to 0. */
- return 0;
-}
-
-static uint32_t readint32(char *buf)
-{
- return *(uint32_t *) buf;
-}
-
-static uint8_t readint8(char *buf)
-{
- return *(uint8_t *) buf;
-}
-
-static void nl_parse(int fd, struct nlconfig_t *config)
-{
- size_t len, size;
- struct nlmsghdr hdr;
- char *data, *current;
-
- /* Retrieve the netlink header. */
- len = read(fd, &hdr, NLMSG_HDRLEN);
- if (len != NLMSG_HDRLEN)
- bail("invalid netlink header length %zu", len);
-
- if (hdr.nlmsg_type == NLMSG_ERROR)
- bail("failed to read netlink message");
-
- if (hdr.nlmsg_type != INIT_MSG)
- bail("unexpected msg type %d", hdr.nlmsg_type);
-
- /* Retrieve data. */
- size = NLMSG_PAYLOAD(&hdr, 0);
- current = data = malloc(size);
- if (!data)
- bail("failed to allocate %zu bytes of memory for nl_payload", size);
-
- len = read(fd, data, size);
- if (len != size)
- bail("failed to read netlink payload, %zu != %zu", len, size);
-
- /* Parse the netlink payload. */
- config->data = data;
- while (current < data + size) {
- struct nlattr *nlattr = (struct nlattr *)current;
- size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
-
- /* Advance to payload. */
- current += NLA_HDRLEN;
-
- /* Handle payload. */
- switch (nlattr->nla_type) {
- case CLONE_FLAGS_ATTR:
- config->cloneflags = readint32(current);
- break;
- case ROOTLESS_EUID_ATTR:
- config->is_rootless_euid = readint8(current); /* boolean */
- break;
- case OOM_SCORE_ADJ_ATTR:
- config->oom_score_adj = current;
- config->oom_score_adj_len = payload_len;
- break;
- case NS_PATHS_ATTR:
- config->namespaces = current;
- config->namespaces_len = payload_len;
- break;
- case UIDMAP_ATTR:
- config->uidmap = current;
- config->uidmap_len = payload_len;
- break;
- case GIDMAP_ATTR:
- config->gidmap = current;
- config->gidmap_len = payload_len;
- break;
- case UIDMAPPATH_ATTR:
- config->uidmappath = current;
- config->uidmappath_len = payload_len;
- break;
- case GIDMAPPATH_ATTR:
- config->gidmappath = current;
- config->gidmappath_len = payload_len;
- break;
- case SETGROUP_ATTR:
- config->is_setgroup = readint8(current);
- break;
- default:
- bail("unknown netlink message type %d", nlattr->nla_type);
- }
-
- current += NLA_ALIGN(payload_len);
- }
-}
-
-void nl_free(struct nlconfig_t *config)
-{
- free(config->data);
-}
-
-void join_namespaces(char *nslist)
-{
- int num = 0, i;
- char *saveptr = NULL;
- char *namespace = strtok_r(nslist, ",", &saveptr);
- struct namespace_t {
- int fd;
- int ns;
- char type[PATH_MAX];
- char path[PATH_MAX];
- } *namespaces = NULL;
-
- if (!namespace || !strlen(namespace) || !strlen(nslist))
- bail("ns paths are empty");
-
- /*
- * We have to open the file descriptors first, since after
- * we join the mnt namespace we might no longer be able to
- * access the paths.
- */
- do {
- int fd;
- char *path;
- struct namespace_t *ns;
-
- /* Resize the namespace array. */
- namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
- if (!namespaces)
- bail("failed to reallocate namespace array");
- ns = &namespaces[num - 1];
-
- /* Split 'ns:path'. */
- path = strstr(namespace, ":");
- if (!path)
- bail("failed to parse %s", namespace);
- *path++ = '\0';
-
- fd = open(path, O_RDONLY);
- if (fd < 0)
- bail("failed to open %s", path);
-
- ns->fd = fd;
- ns->ns = nsflag(namespace);
- strncpy(ns->path, path, PATH_MAX - 1);
- ns->path[PATH_MAX - 1] = '\0';
- } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
-
- /*
- * The ordering in which we join namespaces is important. We should
- * always join the user namespace *first*. This is all guaranteed
- * from the container_linux.go side of this, so we're just going to
- * follow the order given to us.
- */
-
- for (i = 0; i < num; i++) {
- struct namespace_t ns = namespaces[i];
-
- if (setns(ns.fd, ns.ns) < 0)
- bail("failed to setns to %s", ns.path);
-
- close(ns.fd);
- }
-
- free(namespaces);
-}
-
-void nsexec(void)
-{
- int pipenum;
- jmp_buf env;
- int sync_child_pipe[2], sync_grandchild_pipe[2];
- struct nlconfig_t config = { 0 };
-
- /*
- * If we don't have an init pipe, just return to the go routine.
- * We'll only get an init pipe for start or exec.
- */
- pipenum = initpipe();
- if (pipenum == -1)
- return;
-
- /* Parse all of the netlink configuration. */
- nl_parse(pipenum, &config);
-
- /* Set oom_score_adj. This has to be done before !dumpable because
- * /proc/self/oom_score_adj is not writeable unless you're an privileged
- * user (if !dumpable is set). All children inherit their parent's
- * oom_score_adj value on fork(2) so this will always be propagated
- * properly.
- */
- update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
-
- /*
- * Make the process non-dumpable, to avoid various race conditions that
- * could cause processes in namespaces we're joining to access host
- * resources (or potentially execute code).
- *
- * However, if the number of namespaces we are joining is 0, we are not
- * going to be switching to a different security context. Thus setting
- * ourselves to be non-dumpable only breaks things (like rootless
- * containers), which is the recommendation from the kernel folks.
- */
- if (config.namespaces) {
- if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
- bail("failed to set process as non-dumpable");
- }
-
- /* Pipe so we can tell the child when we've finished setting up. */
- if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
- bail("failed to setup sync pipe between parent and child");
-
- /*
- * We need a new socketpair to sync with grandchild so we don't have
- * race condition with child.
- */
- if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
- bail("failed to setup sync pipe between parent and grandchild");
-
- /* TODO: Currently we aren't dealing with child deaths properly. */
-
- /*
- * Okay, so this is quite annoying.
- *
- * In order for this unsharing code to be more extensible we need to split
- * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case
- * would be if we did clone(CLONE_NEWUSER) and the other namespaces
- * separately, but because of SELinux issues we cannot really do that. But
- * we cannot just dump the namespace flags into clone(...) because several
- * usecases (such as rootless containers) require more granularity around
- * the namespace setup. In addition, some older kernels had issues where
- * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot
- * handle this while also dealing with SELinux so we choose SELinux support
- * over broken kernel support).
- *
- * However, if we unshare(2) the user namespace *before* we clone(2), then
- * all hell breaks loose.
- *
- * The parent no longer has permissions to do many things (unshare(2) drops
- * all capabilities in your old namespace), and the container cannot be set
- * up to have more than one {uid,gid} mapping. This is obviously less than
- * ideal. In order to fix this, we have to first clone(2) and then unshare.
- *
- * Unfortunately, it's not as simple as that. We have to fork to enter the
- * PID namespace (the PID namespace only applies to children). Since we'll
- * have to double-fork, this clone_parent() call won't be able to get the
- * PID of the _actual_ init process (without doing more synchronisation than
- * I can deal with at the moment). So we'll just get the parent to send it
- * for us, the only job of this process is to update
- * /proc/pid/{setgroups,uid_map,gid_map}.
- *
- * And as a result of the above, we also need to setns(2) in the first child
- * because if we join a PID namespace in the topmost parent then our child
- * will be in that namespace (and it will not be able to give us a PID value
- * that makes sense without resorting to sending things with cmsg).
- *
- * This also deals with an older issue caused by dumping cloneflags into
- * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so
- * we have to unshare(2) before clone(2) in order to do this. This was fixed
- * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was
- * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're
- * aware, the last mainline kernel which had this bug was Linux 3.12.
- * However, we cannot comment on which kernels the broken patch was
- * backported to.
- *
- * -- Aleksa "what has my life come to?" Sarai
- */
-
- switch (setjmp(env)) {
- /*
- * Stage 0: We're in the parent. Our job is just to create a new child
- * (stage 1: JUMP_CHILD) process and write its uid_map and
- * gid_map. That process will go on to create a new process, then
- * it will send us its PID which we will send to the bootstrap
- * process.
- */
- case JUMP_PARENT:{
- int len;
- pid_t child, first_child = -1;
- bool ready = false;
-
- /* For debugging. */
- prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
-
- /* Start the process of getting a container. */
- child = clone_parent(&env, JUMP_CHILD);
- if (child < 0)
- bail("unable to fork: child_func");
-
- /*
- * State machine for synchronisation with the children.
- *
- * Father only return when both child and grandchild are
- * ready, so we can receive all possible error codes
- * generated by children.
- */
- while (!ready) {
- enum sync_t s;
- int ret;
-
- syncfd = sync_child_pipe[1];
- close(sync_child_pipe[0]);
-
- if (read(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with child: next state");
-
- switch (s) {
- case SYNC_ERR:
- /* We have to mirror the error code of the child. */
- if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
- bail("failed to sync with child: read(error code)");
-
- exit(ret);
- case SYNC_USERMAP_PLS:
- /*
- * Enable setgroups(2) if we've been asked to. But we also
- * have to explicitly disable setgroups(2) if we're
- * creating a rootless container for single-entry mapping.
- * i.e. config.is_setgroup == false.
- * (this is required since Linux 3.19).
- *
- * For rootless multi-entry mapping, config.is_setgroup shall be true and
- * newuidmap/newgidmap shall be used.
- */
-
- if (config.is_rootless_euid && !config.is_setgroup)
- update_setgroups(child, SETGROUPS_DENY);
-
- /* Set up mappings. */
- update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
- update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
-
- s = SYNC_USERMAP_ACK;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(child, SIGKILL);
- bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
- }
- break;
- case SYNC_RECVPID_PLS:{
- first_child = child;
-
- /* Get the init_func pid. */
- if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
- kill(first_child, SIGKILL);
- bail("failed to sync with child: read(childpid)");
- }
-
- /* Send ACK. */
- s = SYNC_RECVPID_ACK;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(first_child, SIGKILL);
- kill(child, SIGKILL);
- bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
- }
-
- /* Send the init_func pid back to our parent.
- *
- * Send the init_func pid and the pid of the first child back to our parent.
- * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
- * It becomes the responsibility of our parent to reap the first child.
- */
- len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
- if (len < 0) {
- kill(child, SIGKILL);
- bail("unable to generate JSON for child pid");
- }
- }
- break;
- case SYNC_CHILD_READY:
- ready = true;
- break;
- default:
- bail("unexpected sync value: %u", s);
- }
- }
-
- /* Now sync with grandchild. */
-
- ready = false;
- while (!ready) {
- enum sync_t s;
- int ret;
-
- syncfd = sync_grandchild_pipe[1];
- close(sync_grandchild_pipe[0]);
-
- s = SYNC_GRANDCHILD;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(child, SIGKILL);
- bail("failed to sync with child: write(SYNC_GRANDCHILD)");
- }
-
- if (read(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with child: next state");
-
- switch (s) {
- case SYNC_ERR:
- /* We have to mirror the error code of the child. */
- if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
- bail("failed to sync with child: read(error code)");
-
- exit(ret);
- case SYNC_CHILD_READY:
- ready = true;
- break;
- default:
- bail("unexpected sync value: %u", s);
- }
- }
- exit(0);
- }
-
- /*
- * Stage 1: We're in the first child process. Our job is to join any
- * provided namespaces in the netlink payload and unshare all
- * of the requested namespaces. If we've been asked to
- * CLONE_NEWUSER, we will ask our parent (stage 0) to set up
- * our user mappings for us. Then, we create a new child
- * (stage 2: JUMP_INIT) for PID namespace. We then send the
- * child's PID to our parent (stage 0).
- */
- case JUMP_CHILD:{
- pid_t child;
- enum sync_t s;
-
- /* We're in a child and thus need to tell the parent if we die. */
- syncfd = sync_child_pipe[0];
- close(sync_child_pipe[1]);
-
- /* For debugging. */
- prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
-
- /*
- * We need to setns first. We cannot do this earlier (in stage 0)
- * because of the fact that we forked to get here (the PID of
- * [stage 2: JUMP_INIT]) would be meaningless). We could send it
- * using cmsg(3) but that's just annoying.
- */
- if (config.namespaces)
- join_namespaces(config.namespaces);
-
- /*
- * Deal with user namespaces first. They are quite special, as they
- * affect our ability to unshare other namespaces and are used as
- * context for privilege checks.
- *
- * We don't unshare all namespaces in one go. The reason for this
- * is that, while the kernel documentation may claim otherwise,
- * there are certain cases where unsharing all namespaces at once
- * will result in namespace objects being owned incorrectly.
- * Ideally we should just fix these kernel bugs, but it's better to
- * be safe than sorry, and fix them separately.
- *
- * A specific case of this is that the SELinux label of the
- * internal kern-mount that mqueue uses will be incorrect if the
- * UTS namespace is cloned before the USER namespace is mapped.
- * I've also heard of similar problems with the network namespace
- * in some scenarios. This also mirrors how LXC deals with this
- * problem.
- */
- if (config.cloneflags & CLONE_NEWUSER) {
- if (unshare(CLONE_NEWUSER) < 0)
- bail("failed to unshare user namespace");
- config.cloneflags &= ~CLONE_NEWUSER;
-
- /*
- * We don't have the privileges to do any mapping here (see the
- * clone_parent rant). So signal our parent to hook us up.
- */
-
- /* Switching is only necessary if we joined namespaces. */
- if (config.namespaces) {
- if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
- bail("failed to set process as dumpable");
- }
- s = SYNC_USERMAP_PLS;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
-
- /* ... wait for mapping ... */
-
- if (read(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
- if (s != SYNC_USERMAP_ACK)
- bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
- /* Switching is only necessary if we joined namespaces. */
- if (config.namespaces) {
- if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
- bail("failed to set process as dumpable");
- }
-
- /* Become root in the namespace proper. */
- if (setresuid(0, 0, 0) < 0)
- bail("failed to become root in user namespace");
- }
- /*
- * Unshare all of the namespaces. Now, it should be noted that this
- * ordering might break in the future (especially with rootless
- * containers). But for now, it's not possible to split this into
- * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
- *
- * Note that we don't merge this with clone() because there were
- * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
- * was broken, so we'll just do it the long way anyway.
- */
- if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
- bail("failed to unshare namespaces");
-
- /*
- * TODO: What about non-namespace clone flags that we're dropping here?
- *
- * We fork again because of PID namespace, setns(2) or unshare(2) don't
- * change the PID namespace of the calling process, because doing so
- * would change the caller's idea of its own PID (as reported by getpid()),
- * which would break many applications and libraries, so we must fork
- * to actually enter the new PID namespace.
- */
- child = clone_parent(&env, JUMP_INIT);
- if (child < 0)
- bail("unable to fork: init_func");
-
- /* Send the child to our parent, which knows what it's doing. */
- s = SYNC_RECVPID_PLS;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(child, SIGKILL);
- bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
- }
- if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
- kill(child, SIGKILL);
- bail("failed to sync with parent: write(childpid)");
- }
-
- /* ... wait for parent to get the pid ... */
-
- if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(child, SIGKILL);
- bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
- }
- if (s != SYNC_RECVPID_ACK) {
- kill(child, SIGKILL);
- bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
- }
-
- s = SYNC_CHILD_READY;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
- kill(child, SIGKILL);
- bail("failed to sync with parent: write(SYNC_CHILD_READY)");
- }
-
- /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
- exit(0);
- }
-
- /*
- * Stage 2: We're the final child process, and the only process that will
- * actually return to the Go runtime. Our job is to just do the
- * final cleanup steps and then return to the Go runtime to allow
- * init_linux.go to run.
- */
- case JUMP_INIT:{
- /*
- * We're inside the child now, having jumped from the
- * start_child() code after forking in the parent.
- */
- enum sync_t s;
-
- /* We're in a child and thus need to tell the parent if we die. */
- syncfd = sync_grandchild_pipe[0];
- close(sync_grandchild_pipe[1]);
- close(sync_child_pipe[0]);
- close(sync_child_pipe[1]);
-
- /* For debugging. */
- prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
-
- if (read(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
- if (s != SYNC_GRANDCHILD)
- bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
-
- if (setsid() < 0)
- bail("setsid failed");
-
- if (setuid(0) < 0)
- bail("setuid failed");
-
- if (setgid(0) < 0)
- bail("setgid failed");
-
- if (!config.is_rootless_euid && config.is_setgroup) {
- if (setgroups(0, NULL) < 0)
- bail("setgroups failed");
- }
-
- /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
- if (config.cloneflags & CLONE_NEWCGROUP) {
- uint8_t value;
- if (read(pipenum, &value, sizeof(value)) != sizeof(value))
- bail("read synchronisation value failed");
- if (value == CREATECGROUPNS) {
- if (unshare(CLONE_NEWCGROUP) < 0)
- bail("failed to unshare cgroup namespace");
- } else
- bail("received unknown synchronisation value");
- }
-
- s = SYNC_CHILD_READY;
- if (write(syncfd, &s, sizeof(s)) != sizeof(s))
- bail("failed to sync with patent: write(SYNC_CHILD_READY)");
-
- /* Close sync pipes. */
- close(sync_grandchild_pipe[0]);
-
- /* Free netlink data. */
- nl_free(&config);
-
- /* Finish executing, let the Go runtime take over. */
- return;
- }
- default:
- bail("unexpected jump value");
- }
-
- /* Should never be reached. */
- bail("should never be reached");
-}
diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS b/vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS
new file mode 100644
index 000000000..edbe20066
--- /dev/null
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/MAINTAINERS
@@ -0,0 +1,2 @@
+Tianon Gravi <admwiggin@gmail.com> (@tianon)
+Aleksa Sarai <cyphar@cyphar.com> (@cyphar)