From 7462ebe830b256e9e145d133c824de5dfd23045d Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Fri, 3 Aug 2018 07:27:33 -0400 Subject: Vendor in latest github.com/projectatomic/buildah This adds support for Dockerfile.in and fixes some limits issues on docker build Also adds support for podman build to read Dockerfile from stdin. cat Dockerfile | podman build -f - . Signed-off-by: Daniel J Walsh Closes: #1209 Approved by: mheon --- vendor/github.com/projectatomic/buildah/buildah.go | 4 +- .../github.com/projectatomic/buildah/chroot/run.go | 1248 ++++++++++++++++++++ .../projectatomic/buildah/chroot/seccomp.go | 142 +++ .../buildah/chroot/seccomp_unsupported.go | 15 + .../projectatomic/buildah/chroot/selinux.go | 22 + .../buildah/chroot/selinux_unsupported.go | 18 + .../projectatomic/buildah/chroot/unsupported.go | 15 + .../projectatomic/buildah/chroot/util.go | 15 + .../projectatomic/buildah/imagebuildah/build.go | 119 +- vendor/github.com/projectatomic/buildah/new.go | 9 + .../projectatomic/buildah/pkg/parse/parse.go | 30 +- vendor/github.com/projectatomic/buildah/run.go | 27 +- .../projectatomic/buildah/unshare/unshare.c | 110 ++ .../projectatomic/buildah/unshare/unshare.go | 273 +++++ .../projectatomic/buildah/unshare/unshare_cgo.go | 10 + .../projectatomic/buildah/unshare/unshare_gccgo.go | 25 + .../buildah/unshare/unshare_unsupported.go | 1 + .../github.com/projectatomic/buildah/util/types.go | 5 + .../github.com/projectatomic/buildah/vendor.conf | 6 +- 19 files changed, 2063 insertions(+), 31 deletions(-) create mode 100644 vendor/github.com/projectatomic/buildah/chroot/run.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/seccomp.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/seccomp_unsupported.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/selinux.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/selinux_unsupported.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/unsupported.go create mode 100644 vendor/github.com/projectatomic/buildah/chroot/util.go create mode 100644 vendor/github.com/projectatomic/buildah/unshare/unshare.c create mode 100644 vendor/github.com/projectatomic/buildah/unshare/unshare.go create mode 100644 vendor/github.com/projectatomic/buildah/unshare/unshare_cgo.go create mode 100644 vendor/github.com/projectatomic/buildah/unshare/unshare_gccgo.go create mode 100644 vendor/github.com/projectatomic/buildah/unshare/unshare_unsupported.go (limited to 'vendor/github.com/projectatomic/buildah') diff --git a/vendor/github.com/projectatomic/buildah/buildah.go b/vendor/github.com/projectatomic/buildah/buildah.go index 1a103809e..1f5212362 100644 --- a/vendor/github.com/projectatomic/buildah/buildah.go +++ b/vendor/github.com/projectatomic/buildah/buildah.go @@ -360,7 +360,9 @@ type BuilderOptions struct { // after processing the AddCapabilities set, when running commands in the // container. If a capability appears in both lists, it will be dropped. DropCapabilities []string - + // ImageOnly is a boolean designating that we wish to only pull the image and + // to not create a container from it. Used by pull command. + ImageOnly bool CommonBuildOpts *CommonBuildOptions } diff --git a/vendor/github.com/projectatomic/buildah/chroot/run.go b/vendor/github.com/projectatomic/buildah/chroot/run.go new file mode 100644 index 000000000..77709c52d --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/run.go @@ -0,0 +1,1248 @@ +// +build linux + +package chroot + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "syscall" + "unsafe" + + "github.com/containers/storage/pkg/ioutils" + "github.com/containers/storage/pkg/mount" + "github.com/containers/storage/pkg/reexec" + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/projectatomic/buildah/bind" + "github.com/projectatomic/buildah/unshare" + "github.com/projectatomic/buildah/util" + "github.com/sirupsen/logrus" + "github.com/syndtr/gocapability/capability" + "golang.org/x/crypto/ssh/terminal" + "golang.org/x/sys/unix" +) + +const ( + // runUsingChrootCommand is a command we use as a key for reexec + runUsingChrootCommand = "buildah-chroot-runtime" + // runUsingChrootExec is a command we use as a key for reexec + runUsingChrootExecCommand = "buildah-chroot-exec" +) + +var ( + rlimitsMap = map[string]int{ + "RLIMIT_AS": unix.RLIMIT_AS, + "RLIMIT_CORE": unix.RLIMIT_CORE, + "RLIMIT_CPU": unix.RLIMIT_CPU, + "RLIMIT_DATA": unix.RLIMIT_DATA, + "RLIMIT_FSIZE": unix.RLIMIT_FSIZE, + "RLIMIT_LOCKS": unix.RLIMIT_LOCKS, + "RLIMIT_MEMLOCK": unix.RLIMIT_MEMLOCK, + "RLIMIT_MSGQUEUE": unix.RLIMIT_MSGQUEUE, + "RLIMIT_NICE": unix.RLIMIT_NICE, + "RLIMIT_NOFILE": unix.RLIMIT_NOFILE, + "RLIMIT_NPROC": unix.RLIMIT_NPROC, + "RLIMIT_RSS": unix.RLIMIT_RSS, + "RLIMIT_RTPRIO": unix.RLIMIT_RTPRIO, + "RLIMIT_RTTIME": unix.RLIMIT_RTTIME, + "RLIMIT_SIGPENDING": unix.RLIMIT_SIGPENDING, + "RLIMIT_STACK": unix.RLIMIT_STACK, + } + rlimitsReverseMap = map[int]string{} +) + +func init() { + reexec.Register(runUsingChrootCommand, runUsingChrootMain) + reexec.Register(runUsingChrootExecCommand, runUsingChrootExecMain) + for limitName, limitNumber := range rlimitsMap { + rlimitsReverseMap[limitNumber] = limitName + } +} + +type runUsingChrootSubprocOptions struct { + Spec *specs.Spec + BundlePath string + UIDMappings []syscall.SysProcIDMap + GIDMappings []syscall.SysProcIDMap +} + +type runUsingChrootExecSubprocOptions struct { + Spec *specs.Spec + BundlePath string +} + +// RunUsingChroot runs a chrooted process, using some of the settings from the +// passed-in spec, and using the specified bundlePath to hold temporary files, +// directories, and mountpoints. +func RunUsingChroot(spec *specs.Spec, bundlePath string, stdin io.Reader, stdout, stderr io.Writer) (err error) { + var confwg sync.WaitGroup + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + // Write the runtime configuration, mainly for debugging. + specbytes, err := json.Marshal(spec) + if err != nil { + return err + } + if err = ioutils.AtomicWriteFile(filepath.Join(bundlePath, "config.json"), specbytes, 0600); err != nil { + return errors.Wrapf(err, "error storing runtime configuration") + } + + // Run the grandparent subprocess in a user namespace that reuses the mappings that we have. + uidmap, gidmap, err := util.GetHostIDMappings("") + if err != nil { + return err + } + for i := range uidmap { + uidmap[i].HostID = uidmap[i].ContainerID + } + for i := range gidmap { + gidmap[i].HostID = gidmap[i].ContainerID + } + + // Default to using stdin/stdout/stderr if we weren't passed objects to use. + if stdin == nil { + stdin = os.Stdin + } + if stdout == nil { + stdout = os.Stdout + } + if stderr == nil { + stderr = os.Stderr + } + + // Create a pipe for passing configuration down to the next process. + preader, pwriter, err := os.Pipe() + if err != nil { + return errors.Wrapf(err, "error creating configuration pipe") + } + config, conferr := json.Marshal(runUsingChrootSubprocOptions{ + Spec: spec, + BundlePath: bundlePath, + }) + if conferr != nil { + return errors.Wrapf(conferr, "error encoding configuration for %q", runUsingChrootCommand) + } + + // Set our terminal's mode to raw, to pass handling of special + // terminal input to the terminal in the container. + if spec.Process.Terminal && terminal.IsTerminal(unix.Stdin) { + state, err := terminal.MakeRaw(unix.Stdin) + if err != nil { + logrus.Warnf("error setting terminal state: %v", err) + } else { + defer func() { + if err = terminal.Restore(unix.Stdin, state); err != nil { + logrus.Errorf("unable to restore terminal state: %v", err) + } + }() + } + } + + // Raise any resource limits that are higher than they are now, before + // we drop any more privileges. + if err = setRlimits(spec, false, true); err != nil { + return err + } + + // Start the grandparent subprocess. + cmd := unshare.Command(runUsingChrootCommand) + cmd.Stdin, cmd.Stdout, cmd.Stderr = stdin, stdout, stderr + cmd.Dir = "/" + cmd.Env = append([]string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())}, os.Environ()...) + cmd.UnshareFlags = syscall.CLONE_NEWUSER + cmd.UidMappings = uidmap + cmd.GidMappings = gidmap + cmd.GidMappingsEnableSetgroups = true + + logrus.Debugf("Running %#v in %#v", cmd.Cmd, cmd) + confwg.Add(1) + go func() { + _, conferr = io.Copy(pwriter, bytes.NewReader(config)) + pwriter.Close() + confwg.Done() + }() + cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...) + err = cmd.Run() + confwg.Wait() + if err == nil { + return conferr + } + return err +} + +// main() for grandparent subprocess. Its main job is to shuttle stdio back +// and forth, managing a pseudo-terminal if we want one, for our child, the +// parent subprocess. +func runUsingChrootMain() { + var options runUsingChrootSubprocOptions + + runtime.LockOSThread() + + // Set logging. + if level := os.Getenv("LOGLEVEL"); level != "" { + if ll, err := strconv.Atoi(level); err == nil { + logrus.SetLevel(logrus.Level(ll)) + } + os.Unsetenv("LOGLEVEL") + } + + // Unpack our configuration. + confPipe := os.NewFile(3, "confpipe") + if confPipe == nil { + fmt.Fprintf(os.Stderr, "error reading options pipe\n") + os.Exit(1) + } + defer confPipe.Close() + if err := json.NewDecoder(confPipe).Decode(&options); err != nil { + fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err) + os.Exit(1) + } + + // Prepare to shuttle stdio back and forth. + rootUid32, rootGid32, err := util.GetHostRootIDs(options.Spec) + if err != nil { + logrus.Errorf("error determining ownership for container stdio") + os.Exit(1) + } + rootUid := int(rootUid32) + rootGid := int(rootGid32) + relays := make(map[int]int) + closeOnceRunning := []*os.File{} + var ctty *os.File + var stdin io.Reader + var stdinCopy io.WriteCloser + var stdout io.Writer + var stderr io.Writer + fdDesc := make(map[int]string) + deferred := func() {} + if options.Spec.Process.Terminal { + // Create a pseudo-terminal -- open a copy of the master side. + ptyMasterFd, err := unix.Open("/dev/ptmx", os.O_RDWR, 0600) + if err != nil { + logrus.Errorf("error opening PTY master using /dev/ptmx: %v", err) + os.Exit(1) + } + // Set the kernel's lock to "unlocked". + locked := 0 + if result, _, err := unix.Syscall(unix.SYS_IOCTL, uintptr(ptyMasterFd), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&locked))); int(result) == -1 { + logrus.Errorf("error locking PTY descriptor: %v", err) + os.Exit(1) + } + // Get a handle for the other end. + ptyFd, _, err := unix.Syscall(unix.SYS_IOCTL, uintptr(ptyMasterFd), unix.TIOCGPTPEER, unix.O_RDWR|unix.O_NOCTTY) + if int(ptyFd) == -1 { + if errno, isErrno := err.(syscall.Errno); !isErrno || (errno != syscall.EINVAL && errno != syscall.ENOTTY) { + logrus.Errorf("error getting PTY descriptor: %v", err) + os.Exit(1) + } + // EINVAL means the kernel's too old to understand TIOCGPTPEER. Try TIOCGPTN. + ptyN, err := unix.IoctlGetInt(ptyMasterFd, unix.TIOCGPTN) + if err != nil { + logrus.Errorf("error getting PTY number: %v", err) + os.Exit(1) + } + ptyName := fmt.Sprintf("/dev/pts/%d", ptyN) + fd, err := unix.Open(ptyName, unix.O_RDWR|unix.O_NOCTTY, 0620) + if err != nil { + logrus.Errorf("error opening PTY %q: %v", ptyName, err) + os.Exit(1) + } + ptyFd = uintptr(fd) + } + // Make notes about what's going where. + relays[ptyMasterFd] = unix.Stdout + relays[unix.Stdin] = ptyMasterFd + fdDesc[ptyMasterFd] = "container terminal" + fdDesc[unix.Stdin] = "stdin" + fdDesc[unix.Stdout] = "stdout" + winsize := &unix.Winsize{} + // Set the pseudoterminal's size to the configured size, or our own. + if options.Spec.Process.ConsoleSize != nil { + // Use configured sizes. + winsize.Row = uint16(options.Spec.Process.ConsoleSize.Height) + winsize.Col = uint16(options.Spec.Process.ConsoleSize.Width) + } else { + if terminal.IsTerminal(unix.Stdin) { + // Use the size of our terminal. + winsize, err = unix.IoctlGetWinsize(unix.Stdin, unix.TIOCGWINSZ) + if err != nil { + logrus.Debugf("error reading current terminal's size") + winsize.Row = 0 + winsize.Col = 0 + } + } + } + if winsize.Row != 0 && winsize.Col != 0 { + if err = unix.IoctlSetWinsize(int(ptyFd), unix.TIOCSWINSZ, winsize); err != nil { + logrus.Warnf("error setting terminal size for pty") + } + // FIXME - if we're connected to a terminal, we should + // be passing the updated terminal size down when we + // receive a SIGWINCH. + } + // Open an *os.File object that we can pass to our child. + ctty = os.NewFile(ptyFd, "/dev/tty") + // Set ownership for the PTY. + if err = ctty.Chown(rootUid, rootGid); err != nil { + var cttyInfo unix.Stat_t + err2 := unix.Fstat(int(ptyFd), &cttyInfo) + from := "" + op := "setting" + if err2 == nil { + op = "changing" + from = fmt.Sprintf("from %d/%d ", cttyInfo.Uid, cttyInfo.Gid) + } + logrus.Warnf("error %s ownership of container PTY %sto %d/%d: %v", op, from, rootUid, rootGid, err) + } + // Set permissions on the PTY. + if err = ctty.Chmod(0620); err != nil { + logrus.Errorf("error setting permissions of container PTY: %v", err) + os.Exit(1) + } + // Make a note that our child (the parent subprocess) should + // have the PTY connected to its stdio, and that we should + // close it once it's running. + stdin = ctty + stdout = ctty + stderr = ctty + closeOnceRunning = append(closeOnceRunning, ctty) + } else { + // Create pipes for stdio. + stdinRead, stdinWrite, err := os.Pipe() + if err != nil { + logrus.Errorf("error opening pipe for stdin: %v", err) + } + stdoutRead, stdoutWrite, err := os.Pipe() + if err != nil { + logrus.Errorf("error opening pipe for stdout: %v", err) + } + stderrRead, stderrWrite, err := os.Pipe() + if err != nil { + logrus.Errorf("error opening pipe for stderr: %v", err) + } + // Make notes about what's going where. + relays[unix.Stdin] = int(stdinWrite.Fd()) + relays[int(stdoutRead.Fd())] = unix.Stdout + relays[int(stderrRead.Fd())] = unix.Stderr + fdDesc[int(stdinWrite.Fd())] = "container stdin pipe" + fdDesc[int(stdoutRead.Fd())] = "container stdout pipe" + fdDesc[int(stderrRead.Fd())] = "container stderr pipe" + fdDesc[unix.Stdin] = "stdin" + fdDesc[unix.Stdout] = "stdout" + fdDesc[unix.Stderr] = "stderr" + // Set ownership for the pipes. + if err = stdinRead.Chown(rootUid, rootGid); err != nil { + logrus.Errorf("error setting ownership of container stdin pipe: %v", err) + os.Exit(1) + } + if err = stdoutWrite.Chown(rootUid, rootGid); err != nil { + logrus.Errorf("error setting ownership of container stdout pipe: %v", err) + os.Exit(1) + } + if err = stderrWrite.Chown(rootUid, rootGid); err != nil { + logrus.Errorf("error setting ownership of container stderr pipe: %v", err) + os.Exit(1) + } + // Make a note that our child (the parent subprocess) should + // have the pipes connected to its stdio, and that we should + // close its ends of them once it's running. + stdin = stdinRead + stdout = stdoutWrite + stderr = stderrWrite + closeOnceRunning = append(closeOnceRunning, stdinRead, stdoutWrite, stderrWrite) + stdinCopy = stdinWrite + defer stdoutRead.Close() + defer stderrRead.Close() + } + // A helper that returns false if err is an error that would cause us + // to give up. + logIfNotRetryable := func(err error, what string) (retry bool) { + if err == nil { + return true + } + if errno, isErrno := err.(syscall.Errno); isErrno { + switch errno { + case syscall.EINTR, syscall.EAGAIN: + return true + } + } + logrus.Error(what) + return false + } + for readFd := range relays { + if err := unix.SetNonblock(readFd, true); err != nil { + logrus.Errorf("error setting descriptor %d (%s) non-blocking: %v", readFd, fdDesc[readFd], err) + return + } + } + go func() { + buffers := make(map[int]*bytes.Buffer) + for _, writeFd := range relays { + buffers[writeFd] = new(bytes.Buffer) + } + pollTimeout := -1 + for len(relays) > 0 { + fds := make([]unix.PollFd, 0, len(relays)) + for fd := range relays { + fds = append(fds, unix.PollFd{Fd: int32(fd), Events: unix.POLLIN | unix.POLLHUP}) + } + _, err := unix.Poll(fds, pollTimeout) + if !logIfNotRetryable(err, fmt.Sprintf("poll: %v", err)) { + return + } + removeFds := make(map[int]struct{}) + for _, rfd := range fds { + if rfd.Revents&unix.POLLHUP == unix.POLLHUP { + removeFds[int(rfd.Fd)] = struct{}{} + } + if rfd.Revents&unix.POLLNVAL == unix.POLLNVAL { + logrus.Debugf("error polling descriptor %s: closed?", fdDesc[int(rfd.Fd)]) + removeFds[int(rfd.Fd)] = struct{}{} + } + if rfd.Revents&unix.POLLIN == 0 { + continue + } + b := make([]byte, 8192) + nread, err := unix.Read(int(rfd.Fd), b) + logIfNotRetryable(err, fmt.Sprintf("read %s: %v", fdDesc[int(rfd.Fd)], err)) + if nread > 0 { + if wfd, ok := relays[int(rfd.Fd)]; ok { + nwritten, err := buffers[wfd].Write(b[:nread]) + if err != nil { + logrus.Debugf("buffer: %v", err) + continue + } + if nwritten != nread { + logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", nread, nwritten) + continue + } + } + } + if nread == 0 { + removeFds[int(rfd.Fd)] = struct{}{} + } + } + pollTimeout = -1 + for wfd, buffer := range buffers { + if buffer.Len() > 0 { + nwritten, err := unix.Write(wfd, buffer.Bytes()) + logIfNotRetryable(err, fmt.Sprintf("write %s: %v", fdDesc[wfd], err)) + if nwritten >= 0 { + _ = buffer.Next(nwritten) + } + } + if buffer.Len() > 0 { + pollTimeout = 100 + } + } + for rfd := range removeFds { + if !options.Spec.Process.Terminal && rfd == unix.Stdin { + stdinCopy.Close() + } + delete(relays, rfd) + } + } + }() + + // Set up mounts and namespaces, and run the parent subprocess. + status, err := runUsingChroot(options.Spec, options.BundlePath, ctty, stdin, stdout, stderr, closeOnceRunning) + deferred() + if err != nil { + fmt.Fprintf(os.Stderr, "error running subprocess: %v\n", err) + os.Exit(1) + } + + // Pass the process's exit status back to the caller by exiting with the same status. + if status.Exited() { + if status.ExitStatus() != 0 { + fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", status.ExitStatus()) + } + os.Exit(status.ExitStatus()) + } else if status.Signaled() { + fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", status.Signal()) + os.Exit(1) + } +} + +// runUsingChroot, still in the grandparent process, sets up various bind +// mounts and then runs the parent process in its own user namespace with the +// necessary ID mappings. +func runUsingChroot(spec *specs.Spec, bundlePath string, ctty *os.File, stdin io.Reader, stdout, stderr io.Writer, closeOnceRunning []*os.File) (wstatus unix.WaitStatus, err error) { + var confwg sync.WaitGroup + + // Create a new mount namespace for ourselves and bind mount everything to a new location. + undoIntermediates, err := bind.SetupIntermediateMountNamespace(spec, bundlePath) + if err != nil { + return 1, err + } + defer func() { + undoIntermediates() + }() + + // Bind mount in our filesystems. + undoChroots, err := setupChrootBindMounts(spec, bundlePath) + if err != nil { + return 1, err + } + defer func() { + undoChroots() + }() + + // Create a pipe for passing configuration down to the next process. + preader, pwriter, err := os.Pipe() + if err != nil { + return 1, errors.Wrapf(err, "error creating configuration pipe") + } + config, conferr := json.Marshal(runUsingChrootExecSubprocOptions{ + Spec: spec, + BundlePath: bundlePath, + }) + if conferr != nil { + fmt.Fprintf(os.Stderr, "error re-encoding configuration for %q", runUsingChrootExecCommand) + os.Exit(1) + } + + // Apologize for the namespace configuration that we're about to ignore. + logNamespaceDiagnostics(spec) + + // If we have configured ID mappings, set them here so that they can apply to the child. + hostUidmap, hostGidmap, err := util.GetHostIDMappings("") + if err != nil { + return 1, err + } + uidmap, gidmap := spec.Linux.UIDMappings, spec.Linux.GIDMappings + if len(uidmap) == 0 { + // No UID mappings are configured for the container. Borrow our parent's mappings. + uidmap = append([]specs.LinuxIDMapping{}, hostUidmap...) + for i := range uidmap { + uidmap[i].HostID = uidmap[i].ContainerID + } + } + if len(gidmap) == 0 { + // No GID mappings are configured for the container. Borrow our parent's mappings. + gidmap = append([]specs.LinuxIDMapping{}, hostGidmap...) + for i := range gidmap { + gidmap[i].HostID = gidmap[i].ContainerID + } + } + + // Start the parent subprocess. + cmd := unshare.Command(append([]string{runUsingChrootExecCommand}, spec.Process.Args...)...) + cmd.Stdin, cmd.Stdout, cmd.Stderr = stdin, stdout, stderr + cmd.Dir = "/" + cmd.Env = append([]string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())}, os.Environ()...) + cmd.UnshareFlags = syscall.CLONE_NEWUSER | syscall.CLONE_NEWUTS | syscall.CLONE_NEWNS + cmd.UidMappings = uidmap + cmd.GidMappings = gidmap + cmd.GidMappingsEnableSetgroups = true + if ctty != nil { + cmd.Setsid = true + cmd.Ctty = ctty + } + if spec.Process.OOMScoreAdj != nil { + cmd.OOMScoreAdj = *spec.Process.OOMScoreAdj + } + cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...) + cmd.Hook = func(int) error { + for _, f := range closeOnceRunning { + f.Close() + } + return nil + } + + logrus.Debugf("Running %#v in %#v", cmd.Cmd, cmd) + confwg.Add(1) + go func() { + _, conferr = io.Copy(pwriter, bytes.NewReader(config)) + pwriter.Close() + confwg.Done() + }() + err = cmd.Run() + confwg.Wait() + if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + if waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus); ok { + if waitStatus.Exited() { + if waitStatus.ExitStatus() != 0 { + fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", waitStatus.ExitStatus()) + } + os.Exit(waitStatus.ExitStatus()) + } else if waitStatus.Signaled() { + fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", waitStatus.Signal()) + os.Exit(1) + } + } + } + fmt.Fprintf(os.Stderr, "process exited with error: %v", err) + os.Exit(1) + } + + return 0, nil +} + +// main() for parent subprocess. Its main job is to try to make our +// environment look like the one described by the runtime configuration blob, +// and then launch the intended command as a child, since we can't exec() +// directly. +func runUsingChrootExecMain() { + args := os.Args[1:] + var options runUsingChrootExecSubprocOptions + var err error + + runtime.LockOSThread() + + // Set logging. + if level := os.Getenv("LOGLEVEL"); level != "" { + if ll, err := strconv.Atoi(level); err == nil { + logrus.SetLevel(logrus.Level(ll)) + } + os.Unsetenv("LOGLEVEL") + } + + // Unpack our configuration. + confPipe := os.NewFile(3, "confpipe") + if confPipe == nil { + fmt.Fprintf(os.Stderr, "error reading options pipe\n") + os.Exit(1) + } + defer confPipe.Close() + if err := json.NewDecoder(confPipe).Decode(&options); err != nil { + fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err) + os.Exit(1) + } + + // Set the hostname. We're already in a distinct UTS namespace and are admins in the user + // namespace which created it, so we shouldn't get a permissions error, but seccomp policy + // might deny our attempt to call sethostname() anyway, so log a debug message for that. + if options.Spec.Hostname != "" { + if err := unix.Sethostname([]byte(options.Spec.Hostname)); err != nil { + logrus.Debugf("failed to set hostname %q for process: %v", options.Spec.Hostname, err) + } + } + + // not doing because it's still shared: creating devices + // not doing because it's not applicable: setting annotations + // not doing because it's still shared: setting sysctl settings + // not doing because cgroupfs is read only: configuring control groups + // -> this means we can use the freezer to make sure there aren't any lingering processes + // -> this means we ignore cgroups-based controls + // not doing because we don't set any in the config: running hooks + // not doing because we don't set it in the config: setting rootfs read-only + // not doing because we don't set it in the config: setting rootfs propagation + logrus.Debugf("setting apparmor profile") + if err = setApparmorProfile(options.Spec); err != nil { + fmt.Fprintf(os.Stderr, "error setting apparmor profile for process: %v\n", err) + os.Exit(1) + } + if err = setSelinuxLabel(options.Spec); err != nil { + fmt.Fprintf(os.Stderr, "error setting SELinux label for process: %v\n", err) + os.Exit(1) + } + logrus.Debugf("setting capabilities") + if err := setCapabilities(options.Spec); err != nil { + fmt.Fprintf(os.Stderr, "error setting capabilities for process %v\n", err) + os.Exit(1) + } + if err = setSeccomp(options.Spec); err != nil { + fmt.Fprintf(os.Stderr, "error setting seccomp filter for process: %v\n", err) + os.Exit(1) + } + logrus.Debugf("setting resource limits") + if err = setRlimits(options.Spec, false, false); err != nil { + fmt.Fprintf(os.Stderr, "error setting process resource limits for process: %v\n", err) + os.Exit(1) + } + + // Try to chroot into the root. + if err := unix.Chroot(options.Spec.Root.Path); err != nil { + fmt.Fprintf(os.Stderr, "error chroot()ing into directory %q: %v\n", options.Spec.Root.Path, err) + os.Exit(1) + } + cwd := options.Spec.Process.Cwd + if !filepath.IsAbs(cwd) { + cwd = "/" + cwd + } + if err := unix.Chdir(cwd); err != nil { + fmt.Fprintf(os.Stderr, "error chdir()ing into directory %q: %v\n", cwd, err) + os.Exit(1) + } + logrus.Debugf("chrooted into %q, changed working directory to %q", options.Spec.Root.Path, cwd) + + // Drop privileges. + user := options.Spec.Process.User + if len(user.AdditionalGids) > 0 { + gids := make([]int, len(user.AdditionalGids)) + for i := range user.AdditionalGids { + gids[i] = int(user.AdditionalGids[i]) + } + logrus.Debugf("setting supplemental groups") + if err = syscall.Setgroups(gids); err != nil { + fmt.Fprintf(os.Stderr, "error setting supplemental groups list: %v", err) + os.Exit(1) + } + } else { + logrus.Debugf("clearing supplemental groups") + if err = syscall.Setgroups([]int{}); err != nil { + fmt.Fprintf(os.Stderr, "error clearing supplemental groups list: %v", err) + os.Exit(1) + } + } + logrus.Debugf("setting gid") + if err = syscall.Setresgid(int(user.GID), int(user.GID), int(user.GID)); err != nil { + fmt.Fprintf(os.Stderr, "error setting GID: %v", err) + os.Exit(1) + } + logrus.Debugf("setting uid") + if err = syscall.Setresuid(int(user.UID), int(user.UID), int(user.UID)); err != nil { + fmt.Fprintf(os.Stderr, "error setting UID: %v", err) + os.Exit(1) + } + + // Actually run the specified command. + cmd := exec.Command(args[0], args[1:]...) + cmd.Env = options.Spec.Process.Env + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + cmd.Dir = cwd + logrus.Debugf("Running %#v (PATH = %q)", cmd, os.Getenv("PATH")) + if err = cmd.Run(); err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + if waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus); ok { + if waitStatus.Exited() { + if waitStatus.ExitStatus() != 0 { + fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", waitStatus.ExitStatus()) + } + os.Exit(waitStatus.ExitStatus()) + } else if waitStatus.Signaled() { + fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", waitStatus.Signal()) + os.Exit(1) + } + } + } + fmt.Fprintf(os.Stderr, "process exited with error: %v", err) + os.Exit(1) + } +} + +// logNamespaceDiagnostics knows which namespaces we want to create. +// Output debug messages when that differs from what we're being asked to do. +func logNamespaceDiagnostics(spec *specs.Spec) { + sawMountNS := false + sawUserNS := false + sawUTSNS := false + for _, ns := range spec.Linux.Namespaces { + switch ns.Type { + case specs.CgroupNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join cgroup namespace, sorry about that") + } else { + logrus.Debugf("unable to create cgroup namespace, sorry about that") + } + case specs.IPCNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join IPC namespace, sorry about that") + } else { + logrus.Debugf("unable to create IPC namespace, sorry about that") + } + case specs.MountNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join mount namespace %q, creating a new one", ns.Path) + } + sawMountNS = true + case specs.NetworkNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join network namespace, sorry about that") + } else { + logrus.Debugf("unable to create network namespace, sorry about that") + } + case specs.PIDNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join PID namespace, sorry about that") + } else { + logrus.Debugf("unable to create PID namespace, sorry about that") + } + case specs.UserNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join user namespace %q, creating a new one", ns.Path) + } + sawUserNS = true + case specs.UTSNamespace: + if ns.Path != "" { + logrus.Debugf("unable to join UTS namespace %q, creating a new one", ns.Path) + } + sawUTSNS = true + } + } + if !sawMountNS { + logrus.Debugf("mount namespace not requested, but creating a new one anyway") + } + if !sawUserNS { + logrus.Debugf("user namespace not requested, but creating a new one anyway") + } + if !sawUTSNS { + logrus.Debugf("UTS namespace not requested, but creating a new one anyway") + } +} + +// setApparmorProfile sets the apparmor profile for ourselves, and hopefully any child processes that we'll start. +func setApparmorProfile(spec *specs.Spec) error { + if !apparmor.IsEnabled() || spec.Process.ApparmorProfile == "" { + return nil + } + if err := apparmor.ApplyProfile(spec.Process.ApparmorProfile); err != nil { + return errors.Wrapf(err, "error setting apparmor profile to %q", spec.Process.ApparmorProfile) + } + return nil +} + +// setCapabilities sets capabilities for ourselves, to be more or less inherited by any processes that we'll start. +func setCapabilities(spec *specs.Spec) error { + caps, err := capability.NewPid(0) + if err != nil { + return errors.Wrapf(err, "error reading capabilities of current process") + } + capMap := map[capability.CapType][]string{ + capability.BOUNDING: spec.Process.Capabilities.Bounding, + capability.EFFECTIVE: spec.Process.Capabilities.Effective, + capability.INHERITABLE: spec.Process.Capabilities.Inheritable, + capability.PERMITTED: spec.Process.Capabilities.Permitted, + capability.AMBIENT: spec.Process.Capabilities.Ambient, + } + knownCaps := capability.List() + for capType, capList := range capMap { + caps.Clear(capType) + for _, capToSet := range capList { + cap := capability.CAP_LAST_CAP + for _, c := range knownCaps { + if strings.EqualFold("CAP_"+c.String(), capToSet) { + cap = c + break + } + } + if cap == capability.CAP_LAST_CAP { + return errors.Errorf("error mapping capability %q to a number", capToSet) + } + caps.Set(capType, cap) + } + } + for capType := range capMap { + if err = caps.Apply(capType); err != nil { + return errors.Wrapf(err, "error setting %s capabilities to %#v", capType.String(), capMap[capType]) + } + } + return nil +} + +// parses the resource limits for ourselves and any processes that +// we'll start into a format that's more in line with the kernel APIs +func parseRlimits(spec *specs.Spec) (map[int]unix.Rlimit, error) { + if spec.Process == nil { + return nil, nil + } + parsed := make(map[int]unix.Rlimit) + for _, limit := range spec.Process.Rlimits { + resource, recognized := rlimitsMap[strings.ToUpper(limit.Type)] + if !recognized { + return nil, errors.Errorf("error parsing limit type %q", limit.Type) + } + parsed[resource] = unix.Rlimit{Cur: limit.Soft, Max: limit.Hard} + } + return parsed, nil +} + +// setRlimits sets any resource limits that we want to apply to processes that +// we'll start. +func setRlimits(spec *specs.Spec, onlyLower, onlyRaise bool) error { + limits, err := parseRlimits(spec) + if err != nil { + return err + } + for resource, desired := range limits { + var current unix.Rlimit + if err := unix.Getrlimit(resource, ¤t); err != nil { + return errors.Wrapf(err, "error reading %q limit", rlimitsReverseMap[resource]) + } + if desired.Max > current.Max && onlyLower { + // this would raise a hard limit, and we're only here to lower them + continue + } + if desired.Max < current.Max && onlyRaise { + // this would lower a hard limit, and we're only here to raise them + continue + } + if err := unix.Setrlimit(resource, &desired); err != nil { + return errors.Wrapf(err, "error setting %q limit to soft=%d,hard=%d (was soft=%d,hard=%d)", rlimitsReverseMap[resource], desired.Cur, desired.Max, current.Cur, current.Max) + } + } + return nil +} + +// setupChrootBindMounts actually bind mounts things under the rootfs, and returns a +// callback that will clean up its work. +func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func() error, err error) { + var fs unix.Statfs_t + removes := []string{} + undoBinds = func() error { + if err2 := bind.UnmountMountpoints(spec.Root.Path, removes); err2 != nil { + logrus.Warnf("pkg/chroot: error unmounting %q: %v", spec.Root.Path, err2) + if err == nil { + err = err2 + } + } + return err + } + + // Now bind mount all of those things to be under the rootfs's location in this + // mount namespace. + commonFlags := uintptr(unix.MS_BIND | unix.MS_REC | unix.MS_PRIVATE) + bindFlags := commonFlags | unix.MS_NODEV + devFlags := commonFlags | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY + procFlags := devFlags | unix.MS_NODEV + sysFlags := devFlags | unix.MS_NODEV | unix.MS_RDONLY + + // Bind /dev read-only. + subDev := filepath.Join(spec.Root.Path, "/dev") + if err := unix.Mount("/dev", subDev, "bind", devFlags, ""); err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(subDev, 0700) + if err == nil { + err = unix.Mount("/dev", subDev, "bind", devFlags, "") + } + } + if err != nil { + return undoBinds, errors.Wrapf(err, "error bind mounting /dev from host into mount namespace") + } + } + // Make sure it's read-only. + if err = unix.Statfs(subDev, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was bound read-only", subDev) + } + if fs.Flags&unix.ST_RDONLY == 0 { + if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error remounting /dev in mount namespace read-only") + } + } + logrus.Debugf("bind mounted %q to %q", "/dev", filepath.Join(spec.Root.Path, "/dev")) + + // Bind /proc read-write. + subProc := filepath.Join(spec.Root.Path, "/proc") + if err := unix.Mount("/proc", subProc, "bind", procFlags, ""); err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(subProc, 0700) + if err == nil { + err = unix.Mount("/proc", subProc, "bind", procFlags, "") + } + } + if err != nil { + return undoBinds, errors.Wrapf(err, "error bind mounting /proc from host into mount namespace") + } + } + logrus.Debugf("bind mounted %q to %q", "/proc", filepath.Join(spec.Root.Path, "/proc")) + + // Bind /sys read-only. + subSys := filepath.Join(spec.Root.Path, "/sys") + if err := unix.Mount("/sys", subSys, "bind", sysFlags, ""); err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(subSys, 0700) + if err == nil { + err = unix.Mount("/sys", subSys, "bind", sysFlags, "") + } + } + if err != nil { + return undoBinds, errors.Wrapf(err, "error bind mounting /sys from host into mount namespace") + } + } + // Make sure it's read-only. + if err = unix.Statfs(subSys, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was bound read-only", subSys) + } + if fs.Flags&unix.ST_RDONLY == 0 { + if err := unix.Mount(subSys, subSys, "bind", sysFlags|unix.MS_REMOUNT, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error remounting /sys in mount namespace read-only") + } + } + logrus.Debugf("bind mounted %q to %q", "/sys", filepath.Join(spec.Root.Path, "/sys")) + + // Add /sys/fs/selinux to the set of masked paths, to ensure that we don't have processes + // attempting to interact with labeling, when they aren't allowed to do so. + spec.Linux.MaskedPaths = append(spec.Linux.MaskedPaths, "/sys/fs/selinux") + // Add /sys/fs/cgroup to the set of masked paths, to ensure that we don't have processes + // attempting to mess with cgroup configuration, when they aren't allowed to do so. + spec.Linux.MaskedPaths = append(spec.Linux.MaskedPaths, "/sys/fs/cgroup") + + // Bind mount in everything we've been asked to mount. + for _, m := range spec.Mounts { + // Skip anything that we just mounted. + switch m.Destination { + case "/dev", "/proc", "/sys": + logrus.Debugf("already bind mounted %q on %q", m.Destination, filepath.Join(spec.Root.Path, m.Destination)) + continue + default: + if strings.HasPrefix(m.Destination, "/dev/") { + continue + } + if strings.HasPrefix(m.Destination, "/proc/") { + continue + } + if strings.HasPrefix(m.Destination, "/sys/") { + continue + } + } + // Skip anything that isn't a bind or tmpfs mount. + if m.Type != "bind" && m.Type != "tmpfs" { + logrus.Debugf("skipping mount of type %q on %q", m.Type, m.Destination) + continue + } + // If the target is there, we can just mount it. + var srcinfo os.FileInfo + switch m.Type { + case "bind": + srcinfo, err = os.Stat(m.Source) + if err != nil { + return undoBinds, errors.Wrapf(err, "error examining %q for mounting in mount namespace", m.Source) + } + case "tmpfs": + srcinfo, err = os.Stat("/") + if err != nil { + return undoBinds, errors.Wrapf(err, "error examining / to use as a template for a tmpfs") + } + } + target := filepath.Join(spec.Root.Path, m.Destination) + if _, err := os.Stat(target); err != nil { + // If the target can't be stat()ted, check the error. + if !os.IsNotExist(err) { + return undoBinds, errors.Wrapf(err, "error examining %q for mounting in mount namespace", target) + } + // The target isn't there yet, so create it, and make a + // note to remove it later. + if srcinfo.IsDir() { + if err = os.Mkdir(target, 0111); err != nil { + return undoBinds, errors.Wrapf(err, "error creating mountpoint %q in mount namespace", target) + } + removes = append(removes, target) + } else { + var file *os.File + if file, err = os.OpenFile(target, os.O_WRONLY|os.O_CREATE, 0); err != nil { + return undoBinds, errors.Wrapf(err, "error creating mountpoint %q in mount namespace", target) + } + file.Close() + removes = append(removes, target) + } + } + requestFlags := bindFlags + expectedFlags := uintptr(0) + if util.StringInSlice("nodev", m.Options) { + requestFlags |= unix.MS_NODEV + expectedFlags |= unix.ST_NODEV + } + if util.StringInSlice("noexec", m.Options) { + requestFlags |= unix.MS_NOEXEC + expectedFlags |= unix.ST_NOEXEC + } + if util.StringInSlice("nosuid", m.Options) { + requestFlags |= unix.MS_NOSUID + expectedFlags |= unix.ST_NOSUID + } + if util.StringInSlice("ro", m.Options) { + requestFlags |= unix.MS_RDONLY + expectedFlags |= unix.ST_RDONLY + } + switch m.Type { + case "bind": + // Do the bind mount. + if err := unix.Mount(m.Source, target, "", requestFlags, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error bind mounting %q from host to %q in mount namespace (%q)", m.Source, m.Destination, target) + } + logrus.Debugf("bind mounted %q to %q", m.Source, target) + case "tmpfs": + // Mount a tmpfs. + if err := mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil { + return undoBinds, errors.Wrapf(err, "error mounting tmpfs to %q in mount namespace (%q, %q)", m.Destination, target, strings.Join(m.Options, ",")) + } + logrus.Debugf("mounted a tmpfs to %q", target) + } + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was bound read-only", subSys) + } + if uintptr(fs.Flags)&expectedFlags != expectedFlags { + if err := unix.Mount(target, target, "bind", requestFlags|unix.MS_REMOUNT, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error remounting %q in mount namespace with expected flags") + } + } + } + + // Set up any read-only paths that we need to. If we're running inside + // of a container, some of these locations will already be read-only. + for _, roPath := range spec.Linux.ReadonlyPaths { + r := filepath.Join(spec.Root.Path, roPath) + target, err := filepath.EvalSymlinks(r) + if err != nil { + if os.IsNotExist(err) { + // No target, no problem. + continue + } + return undoBinds, errors.Wrapf(err, "error checking %q for symlinks before marking it read-only", r) + } + // Check if the location is already read-only. + var fs unix.Statfs_t + if err = unix.Statfs(target, &fs); err != nil { + if os.IsNotExist(err) { + // No target, no problem. + continue + } + return undoBinds, errors.Wrapf(err, "error checking if directory %q is already read-only", target) + } + if fs.Flags&unix.ST_RDONLY != 0 { + continue + } + // Mount the location over itself, so that we can remount it as read-only. + roFlags := uintptr(unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY) + if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REC, ""); err != nil { + if os.IsNotExist(err) { + // No target, no problem. + continue + } + return undoBinds, errors.Wrapf(err, "error bind mounting %q onto itself in preparation for making it read-only", target) + } + // Remount the location read-only. + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was bound read-only", target) + } + if fs.Flags&unix.ST_RDONLY == 0 { + if err := unix.Mount(target, target, "", roFlags|unix.MS_BIND|unix.MS_REMOUNT, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error remounting %q in mount namespace read-only", target) + } + } + // Check again. + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was remounted read-only", target) + } + if fs.Flags&unix.ST_RDONLY == 0 { + return undoBinds, errors.Wrapf(err, "error verifying that %q in mount namespace was remounted read-only", target) + } + } + + // Set up any masked paths that we need to. If we're running inside of + // a container, some of these locations will already be read-only tmpfs + // filesystems or bind mounted to os.DevNull. If we're not running + // inside of a container, and nobody else has done that, we'll do it. + for _, masked := range spec.Linux.MaskedPaths { + t := filepath.Join(spec.Root.Path, masked) + target, err := filepath.EvalSymlinks(t) + if err != nil { + target = t + } + // Get some info about the null device. + nullinfo, err := os.Stat(os.DevNull) + if err != nil { + return undoBinds, errors.Wrapf(err, "error examining %q for masking in mount namespace", os.DevNull) + } + // Get some info about the target. + targetinfo, err := os.Stat(target) + if err != nil { + if os.IsNotExist(err) { + // No target, no problem. + continue + } + return undoBinds, errors.Wrapf(err, "error examining %q for masking in mount namespace", target) + } + if targetinfo.IsDir() { + // The target's a directory. Check if it's a read-only filesystem. + var statfs unix.Statfs_t + if err = unix.Statfs(target, &statfs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q is a mountpoint", target) + } + isReadOnly := statfs.Flags&unix.MS_RDONLY != 0 + // Check if any of the IDs we're mapping could read it. + isAccessible := true + var stat unix.Stat_t + if err = unix.Stat(target, &stat); err != nil { + return undoBinds, errors.Wrapf(err, "error checking permissions on directory %q", target) + } + isAccessible = false + if stat.Mode&unix.S_IROTH|unix.S_IXOTH != 0 { + isAccessible = true + } + if !isAccessible && stat.Mode&unix.S_IROTH|unix.S_IXOTH != 0 { + if len(spec.Linux.GIDMappings) > 0 { + for _, mapping := range spec.Linux.GIDMappings { + if stat.Gid >= mapping.ContainerID && stat.Gid < mapping.ContainerID+mapping.Size { + isAccessible = true + break + } + } + } + } + if !isAccessible && stat.Mode&unix.S_IRUSR|unix.S_IXUSR != 0 { + if len(spec.Linux.UIDMappings) > 0 { + for _, mapping := range spec.Linux.UIDMappings { + if stat.Uid >= mapping.ContainerID && stat.Uid < mapping.ContainerID+mapping.Size { + isAccessible = true + break + } + } + } + } + // Check if it's empty. + hasContent := false + directory, err := os.Open(target) + if err != nil { + if !os.IsPermission(err) { + return undoBinds, errors.Wrapf(err, "error opening directory %q", target) + } + } else { + names, err := directory.Readdirnames(0) + directory.Close() + if err != nil { + return undoBinds, errors.Wrapf(err, "error reading contents of directory %q", target) + } + hasContent = false + for _, name := range names { + switch name { + case ".", "..": + continue + default: + hasContent = true + } + if hasContent { + break + } + } + } + // The target's a directory, so mount a read-only tmpfs on it. + roFlags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY) + if !isReadOnly || (hasContent && isAccessible) { + if err = unix.Mount("none", target, "tmpfs", roFlags, "size=0"); err != nil { + return undoBinds, errors.Wrapf(err, "error masking directory %q in mount namespace", target) + } + if err = unix.Statfs(target, &fs); err != nil { + return undoBinds, errors.Wrapf(err, "error checking if directory %q was mounted read-only in mount namespace", target) + } + if fs.Flags&unix.ST_RDONLY == 0 { + if err = unix.Mount(target, target, "", roFlags|syscall.MS_REMOUNT, ""); err != nil { + return undoBinds, errors.Wrapf(err, "error making sure directory %q in mount namespace is read only", target) + } + } + } + } else { + // The target's not a directory, so bind mount os.DevNull over it, unless it's already os.DevNull. + if !os.SameFile(nullinfo, targetinfo) { + if err = unix.Mount(os.DevNull, target, "", uintptr(syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_PRIVATE), ""); err != nil { + return undoBinds, errors.Wrapf(err, "error masking non-directory %q in mount namespace", target) + } + } + } + } + return undoBinds, nil +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/seccomp.go b/vendor/github.com/projectatomic/buildah/chroot/seccomp.go new file mode 100644 index 000000000..d5c5842d4 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/seccomp.go @@ -0,0 +1,142 @@ +// +build linux,seccomp + +package chroot + +import ( + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + libseccomp "github.com/seccomp/libseccomp-golang" + "github.com/sirupsen/logrus" +) + +// setSeccomp sets the seccomp filter for ourselves and any processes that we'll start. +func setSeccomp(spec *specs.Spec) error { + logrus.Debugf("setting seccomp configuration") + if spec.Linux.Seccomp == nil { + return nil + } + mapAction := func(specAction specs.LinuxSeccompAction) libseccomp.ScmpAction { + switch specAction { + case specs.ActKill: + return libseccomp.ActKill + case specs.ActTrap: + return libseccomp.ActTrap + case specs.ActErrno: + return libseccomp.ActErrno + case specs.ActTrace: + return libseccomp.ActTrace + case specs.ActAllow: + return libseccomp.ActAllow + } + return libseccomp.ActInvalid + } + mapArch := func(specArch specs.Arch) libseccomp.ScmpArch { + switch specArch { + case specs.ArchX86: + return libseccomp.ArchX86 + case specs.ArchX86_64: + return libseccomp.ArchAMD64 + case specs.ArchX32: + return libseccomp.ArchX32 + case specs.ArchARM: + return libseccomp.ArchARM + case specs.ArchAARCH64: + return libseccomp.ArchARM64 + case specs.ArchMIPS: + return libseccomp.ArchMIPS + case specs.ArchMIPS64: + return libseccomp.ArchMIPS64 + case specs.ArchMIPS64N32: + return libseccomp.ArchMIPS64N32 + case specs.ArchMIPSEL: + return libseccomp.ArchMIPSEL + case specs.ArchMIPSEL64: + return libseccomp.ArchMIPSEL64 + case specs.ArchMIPSEL64N32: + return libseccomp.ArchMIPSEL64N32 + case specs.ArchPPC: + return libseccomp.ArchPPC + case specs.ArchPPC64: + return libseccomp.ArchPPC64 + case specs.ArchPPC64LE: + return libseccomp.ArchPPC64LE + case specs.ArchS390: + return libseccomp.ArchS390 + case specs.ArchS390X: + return libseccomp.ArchS390X + case specs.ArchPARISC: + /* fallthrough */ /* for now */ + case specs.ArchPARISC64: + /* fallthrough */ /* for now */ + } + return libseccomp.ArchInvalid + } + mapOp := func(op specs.LinuxSeccompOperator) libseccomp.ScmpCompareOp { + switch op { + case specs.OpNotEqual: + return libseccomp.CompareNotEqual + case specs.OpLessThan: + return libseccomp.CompareLess + case specs.OpLessEqual: + return libseccomp.CompareLessOrEqual + case specs.OpEqualTo: + return libseccomp.CompareEqual + case specs.OpGreaterEqual: + return libseccomp.CompareGreaterEqual + case specs.OpGreaterThan: + return libseccomp.CompareGreater + case specs.OpMaskedEqual: + return libseccomp.CompareMaskedEqual + } + return libseccomp.CompareInvalid + } + + filter, err := libseccomp.NewFilter(mapAction(spec.Linux.Seccomp.DefaultAction)) + if err != nil { + return errors.Wrapf(err, "error creating seccomp filter with default action %q", spec.Linux.Seccomp.DefaultAction) + } + for _, arch := range spec.Linux.Seccomp.Architectures { + if err = filter.AddArch(mapArch(arch)); err != nil { + return errors.Wrapf(err, "error adding architecture %q(%q) to seccomp filter", arch, mapArch(arch)) + } + } + for _, rule := range spec.Linux.Seccomp.Syscalls { + scnames := make(map[libseccomp.ScmpSyscall]string) + for _, name := range rule.Names { + scnum, err := libseccomp.GetSyscallFromName(name) + if err != nil { + logrus.Debugf("error mapping syscall %q to a syscall, ignoring %q rule for %q", name, rule.Action) + continue + } + scnames[scnum] = name + } + for scnum := range scnames { + if len(rule.Args) == 0 { + if err = filter.AddRule(scnum, mapAction(rule.Action)); err != nil { + return errors.Wrapf(err, "error adding a rule (%q:%q) to seccomp filter", scnames[scnum], rule.Action) + } + continue + } + var conditions []libseccomp.ScmpCondition + for _, arg := range rule.Args { + condition, err := libseccomp.MakeCondition(arg.Index, mapOp(arg.Op), arg.Value, arg.ValueTwo) + if err != nil { + return errors.Wrapf(err, "error building a seccomp condition %d:%v:%d:%d", arg.Index, arg.Op, arg.Value, arg.ValueTwo) + } + conditions = append(conditions, condition) + } + if err = filter.AddRuleConditional(scnum, mapAction(rule.Action), conditions); err != nil { + return errors.Wrapf(err, "error adding a conditional rule (%q:%q) to seccomp filter", scnames[scnum], rule.Action) + } + } + } + if err = filter.SetNoNewPrivsBit(spec.Process.NoNewPrivileges); err != nil { + return errors.Wrapf(err, "error setting no-new-privileges bit to %v", spec.Process.NoNewPrivileges) + } + err = filter.Load() + filter.Release() + if err != nil { + return errors.Wrapf(err, "error activating seccomp filter") + } + return nil +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/seccomp_unsupported.go b/vendor/github.com/projectatomic/buildah/chroot/seccomp_unsupported.go new file mode 100644 index 000000000..a5b74bf09 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/seccomp_unsupported.go @@ -0,0 +1,15 @@ +// +build !linux !seccomp + +package chroot + +import ( + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +func setSeccomp(spec *specs.Spec) error { + if spec.Linux.Seccomp != nil { + return errors.New("configured a seccomp filter without seccomp support?") + } + return nil +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/selinux.go b/vendor/github.com/projectatomic/buildah/chroot/selinux.go new file mode 100644 index 000000000..3e62d743d --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/selinux.go @@ -0,0 +1,22 @@ +// +build linux,selinux + +package chroot + +import ( + "github.com/opencontainers/runtime-spec/specs-go" + selinux "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// setSelinuxLabel sets the process label for child processes that we'll start. +func setSelinuxLabel(spec *specs.Spec) error { + logrus.Debugf("setting selinux label") + if spec.Process.SelinuxLabel != "" && selinux.EnforceMode() != selinux.Disabled { + if err := label.SetProcessLabel(spec.Process.SelinuxLabel); err != nil { + return errors.Wrapf(err, "error setting process label to %q", spec.Process.SelinuxLabel) + } + } + return nil +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/selinux_unsupported.go b/vendor/github.com/projectatomic/buildah/chroot/selinux_unsupported.go new file mode 100644 index 000000000..1c6f48912 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/selinux_unsupported.go @@ -0,0 +1,18 @@ +// +build !linux !selinux + +package chroot + +import ( + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +func setSelinuxLabel(spec *specs.Spec) error { + if spec.Linux.MountLabel != "" { + return errors.New("configured an SELinux mount label without SELinux support?") + } + if spec.Process.SelinuxLabel != "" { + return errors.New("configured an SELinux process label without SELinux support?") + } + return nil +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/unsupported.go b/vendor/github.com/projectatomic/buildah/chroot/unsupported.go new file mode 100644 index 000000000..5312c0024 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/unsupported.go @@ -0,0 +1,15 @@ +// +build !linux + +package chroot + +import ( + "io" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// RunUsingChroot is not supported. +func RunUsingChroot(spec *specs.Spec, bundlePath string, stdin io.Reader, stdout, stderr io.Writer) (err error) { + return errors.Errorf("--isolation chroot is not supported on this platform") +} diff --git a/vendor/github.com/projectatomic/buildah/chroot/util.go b/vendor/github.com/projectatomic/buildah/chroot/util.go new file mode 100644 index 000000000..34cc77260 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/chroot/util.go @@ -0,0 +1,15 @@ +// +build linux + +package chroot + +func dedupeStringSlice(slice []string) []string { + done := make([]string, 0, len(slice)) + m := make(map[string]struct{}) + for _, s := range slice { + if _, present := m[s]; !present { + m[s] = struct{}{} + done = append(done, s) + } + } + return done +} diff --git a/vendor/github.com/projectatomic/buildah/imagebuildah/build.go b/vendor/github.com/projectatomic/buildah/imagebuildah/build.go index 672d6e94d..42e51878e 100644 --- a/vendor/github.com/projectatomic/buildah/imagebuildah/build.go +++ b/vendor/github.com/projectatomic/buildah/imagebuildah/build.go @@ -1,11 +1,14 @@ package imagebuildah import ( + "bytes" "context" "fmt" "io" + "io/ioutil" "net/http" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -215,6 +218,7 @@ type Executor struct { noCache bool removeIntermediateCtrs bool forceRmIntermediateCtrs bool + containerIDs []string // Stores the IDs of the successful intermediate containers used during layer build } // withName creates a new child executor that will be used whenever a COPY statement uses --from=NAME. @@ -684,6 +688,7 @@ func (b *Executor) Prepare(ctx context.Context, ib *imagebuilder.Builder, node * // Add the top layer of this image to b.topLayers so we can keep track of them // when building with cached images. b.topLayers = append(b.topLayers, builder.TopLayer) + logrus.Debugln("Container ID:", builder.ContainerID) return nil } @@ -811,12 +816,8 @@ func (b *Executor) Execute(ctx context.Context, ib *imagebuilder.Builder, node * // it is used to create the container for the next step. imgID = cacheID } - // Delete the intermediate container if b.removeIntermediateCtrs is true. - if b.removeIntermediateCtrs { - if err := b.Delete(); err != nil { - return errors.Wrap(err, "error deleting intermediate container") - } - } + // Add container ID of successful intermediate container to b.containerIDs + b.containerIDs = append(b.containerIDs, b.builder.ContainerID) // Prepare for the next step with imgID as the new base image. if i != len(children)-1 { if err := b.Prepare(ctx, ib, node, imgID); err != nil { @@ -1122,11 +1123,14 @@ func (b *Executor) Build(ctx context.Context, stages imagebuilder.Stages) error if len(stages) == 0 { errors.New("error building: no stages to build") } - var stageExecutor *Executor + var ( + stageExecutor *Executor + lastErr error + ) for _, stage := range stages { stageExecutor = b.withName(stage.Name, stage.Position) if err := stageExecutor.Prepare(ctx, stage.Builder, stage.Node, ""); err != nil { - return err + lastErr = err } // Always remove the intermediate/build containers, even if the build was unsuccessful. // If building with layers, remove all intermediate/build containers if b.forceRmIntermediateCtrs @@ -1135,8 +1139,18 @@ func (b *Executor) Build(ctx context.Context, stages imagebuilder.Stages) error defer stageExecutor.Delete() } if err := stageExecutor.Execute(ctx, stage.Builder, stage.Node); err != nil { - return err + lastErr = err } + + // Delete the successful intermediate containers if an error in the build + // process occurs and b.removeIntermediateCtrs is true. + if lastErr != nil { + if b.removeIntermediateCtrs { + stageExecutor.deleteSuccessfulIntermediateCtrs() + } + return lastErr + } + b.containerIDs = append(b.containerIDs, stageExecutor.containerIDs...) } if !b.layers && !b.noCache { @@ -1154,7 +1168,9 @@ func (b *Executor) Build(ctx context.Context, stages imagebuilder.Stages) error // the removal of intermediate/build containers will be handled by the // defer statement above. if b.removeIntermediateCtrs && (b.layers || b.noCache) { - return stageExecutor.Delete() + if err := b.deleteSuccessfulIntermediateCtrs(); err != nil { + return errors.Errorf("Failed to cleanup intermediate containers") + } } return nil } @@ -1173,6 +1189,8 @@ func BuildDockerfiles(ctx context.Context, store storage.Store, options BuildOpt } }(dockerfiles...) for _, dfile := range paths { + var data io.ReadCloser + if strings.HasPrefix(dfile, "http://") || strings.HasPrefix(dfile, "https://") { logrus.Debugf("reading remote Dockerfile %q", dfile) resp, err := http.Get(dfile) @@ -1183,7 +1201,7 @@ func BuildDockerfiles(ctx context.Context, store storage.Store, options BuildOpt resp.Body.Close() return errors.Errorf("no contents in %q", dfile) } - dockerfiles = append(dockerfiles, resp.Body) + data = resp.Body } else { if !filepath.IsAbs(dfile) { logrus.Debugf("resolving local Dockerfile %q", dfile) @@ -1199,12 +1217,23 @@ func BuildDockerfiles(ctx context.Context, store storage.Store, options BuildOpt contents.Close() return errors.Wrapf(err, "error reading info about %q", dfile) } - if dinfo.Size() == 0 { + if dinfo.Mode().IsRegular() && dinfo.Size() == 0 { contents.Close() return errors.Wrapf(err, "no contents in %q", dfile) } - dockerfiles = append(dockerfiles, contents) + data = contents + } + + // pre-process Dockerfiles with ".in" suffix + if strings.HasSuffix(dfile, ".in") { + pData, err := preprocessDockerfileContents(data, options.ContextDirectory) + if err != nil { + return err + } + data = *pData } + + dockerfiles = append(dockerfiles, data) } mainNode, err := imagebuilder.ParseDockerfile(dockerfiles[0]) if err != nil { @@ -1225,3 +1254,67 @@ func BuildDockerfiles(ctx context.Context, store storage.Store, options BuildOpt stages := imagebuilder.NewStages(mainNode, b) return exec.Build(ctx, stages) } + +// deleteSuccessfulIntermediateCtrs goes through the container IDs in b.containerIDs +// and deletes the containers associated with that ID. +func (b *Executor) deleteSuccessfulIntermediateCtrs() error { + var lastErr error + for _, ctr := range b.containerIDs { + if err := b.store.DeleteContainer(ctr); err != nil { + logrus.Errorf("error deleting build container %q: %v\n", ctr, err) + lastErr = err + } + } + return lastErr +} + +// preprocessDockerfileContents runs CPP(1) in preprocess-only mode on the input +// dockerfile content and will use ctxDir as the base include path. +// +// Note: we cannot use cmd.StdoutPipe() as cmd.Wait() closes it. +func preprocessDockerfileContents(r io.ReadCloser, ctxDir string) (rdrCloser *io.ReadCloser, err error) { + cppPath := "/usr/bin/cpp" + if _, err = os.Stat(cppPath); err != nil { + if os.IsNotExist(err) { + err = errors.Errorf("error: Dockerfile.in support requires %s to be installed", cppPath) + } + return nil, err + } + + stdout := bytes.Buffer{} + stderr := bytes.Buffer{} + + cmd := exec.Command(cppPath, "-E", "-iquote", ctxDir, "-") + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + pipe, err := cmd.StdinPipe() + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + pipe.Close() + } + }() + + if err = cmd.Start(); err != nil { + return nil, err + } + + if _, err = io.Copy(pipe, r); err != nil { + return nil, err + } + + pipe.Close() + if err = cmd.Wait(); err != nil { + if stderr.Len() > 0 { + err = fmt.Errorf("%v: %s", err, strings.TrimSpace(stderr.String())) + } + return nil, errors.Wrapf(err, "error pre-processing Dockerfile") + } + + rc := ioutil.NopCloser(bytes.NewReader(stdout.Bytes())) + return &rc, nil +} diff --git a/vendor/github.com/projectatomic/buildah/new.go b/vendor/github.com/projectatomic/buildah/new.go index 1a1e89c46..60d217552 100644 --- a/vendor/github.com/projectatomic/buildah/new.go +++ b/vendor/github.com/projectatomic/buildah/new.go @@ -248,6 +248,15 @@ func newBuilder(ctx context.Context, store storage.Store, options BuilderOptions defer src.Close() } + // If the pull command was used, we only pull the image, + // we don't create a container. + if options.ImageOnly { + imgBuilder := &Builder{ + FromImageID: imageID, + } + return imgBuilder, nil + } + name := "working-container" if options.Container != "" { name = options.Container diff --git a/vendor/github.com/projectatomic/buildah/pkg/parse/parse.go b/vendor/github.com/projectatomic/buildah/pkg/parse/parse.go index 26831c7a2..bd333a2cc 100644 --- a/vendor/github.com/projectatomic/buildah/pkg/parse/parse.go +++ b/vendor/github.com/projectatomic/buildah/pkg/parse/parse.go @@ -24,6 +24,7 @@ import ( "github.com/sirupsen/logrus" "github.com/urfave/cli" "golang.org/x/crypto/ssh/terminal" + "golang.org/x/sys/unix" ) const ( @@ -40,6 +41,14 @@ func CommonBuildOptions(c *cli.Context) (*buildah.CommonBuildOptions, error) { memorySwap int64 err error ) + rlim := unix.Rlimit{Cur: 1048576, Max: 1048576} + defaultLimits := []string{} + if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rlim); err == nil { + defaultLimits = append(defaultLimits, fmt.Sprintf("nofile=%d:%d", rlim.Cur, rlim.Max)) + } + if err := unix.Setrlimit(unix.RLIMIT_NPROC, &rlim); err == nil { + defaultLimits = append(defaultLimits, fmt.Sprintf("nproc=%d:%d", rlim.Cur, rlim.Max)) + } if c.String("memory") != "" { memoryLimit, err = units.RAMInBytes(c.String("memory")) if err != nil { @@ -77,7 +86,7 @@ func CommonBuildOptions(c *cli.Context) (*buildah.CommonBuildOptions, error) { Memory: memoryLimit, MemorySwap: memorySwap, ShmSize: c.String("shm-size"), - Ulimit: c.StringSlice("ulimit"), + Ulimit: append(defaultLimits, c.StringSlice("ulimit")...), Volumes: c.StringSlice("volume"), } if err := parseSecurityOpts(c.StringSlice("security-opt"), commonOpts); err != nil { @@ -531,12 +540,17 @@ func NamespaceOptions(c *cli.Context) (namespaceOptions buildah.NamespaceOptions return options, policy, nil } -func defaultIsolation() buildah.Isolation { - isolation := os.Getenv("BUILDAH_ISOLATION") - if strings.HasPrefix(strings.ToLower(isolation), "oci") { - return buildah.IsolationOCI +func defaultIsolation() (buildah.Isolation, error) { + isolation, isSet := os.LookupEnv("BUILDAH_ISOLATION") + if isSet { + if strings.HasPrefix(strings.ToLower(isolation), "oci") { + return buildah.IsolationOCI, nil + } else if strings.HasPrefix(strings.ToLower(isolation), "chroot") { + return buildah.IsolationChroot, nil + } + return 0, errors.Errorf("unrecognized $BUILDAH_ISOLATION value %q", isolation) } - return buildah.IsolationDefault + return buildah.IsolationDefault, nil } // IsolationOption parses the --isolation flag. @@ -544,9 +558,11 @@ func IsolationOption(c *cli.Context) (buildah.Isolation, error) { if c.String("isolation") != "" { if strings.HasPrefix(strings.ToLower(c.String("isolation")), "oci") { return buildah.IsolationOCI, nil + } else if strings.HasPrefix(strings.ToLower(c.String("isolation")), "chroot") { + return buildah.IsolationChroot, nil } else { return buildah.IsolationDefault, errors.Errorf("unrecognized isolation type %q", c.String("isolation")) } } - return defaultIsolation(), nil + return defaultIsolation() } diff --git a/vendor/github.com/projectatomic/buildah/run.go b/vendor/github.com/projectatomic/buildah/run.go index b9a7b4e9e..b6a21cdad 100644 --- a/vendor/github.com/projectatomic/buildah/run.go +++ b/vendor/github.com/projectatomic/buildah/run.go @@ -29,6 +29,7 @@ import ( "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/projectatomic/buildah/bind" + "github.com/projectatomic/buildah/chroot" "github.com/projectatomic/buildah/util" "github.com/projectatomic/libpod/pkg/secrets" "github.com/sirupsen/logrus" @@ -40,7 +41,7 @@ const ( // DefaultWorkingDir is used if none was specified. DefaultWorkingDir = "/" // runUsingRuntimeCommand is a command we use as a key for reexec - runUsingRuntimeCommand = Package + "-runtime" + runUsingRuntimeCommand = Package + "-oci-runtime" ) // TerminalPolicy takes the value DefaultTerminal, WithoutTerminal, or WithTerminal. @@ -112,6 +113,9 @@ const ( IsolationDefault Isolation = iota // IsolationOCI is a proper OCI runtime. IsolationOCI + // IsolationChroot is a more chroot-like environment: less isolation, + // but with fewer requirements. + IsolationChroot ) // String converts a Isolation into a string. @@ -121,6 +125,8 @@ func (i Isolation) String() string { return "IsolationDefault" case IsolationOCI: return "IsolationOCI" + case IsolationChroot: + return "IsolationChroot" } return fmt.Sprintf("unrecognized isolation type %d", i) } @@ -129,10 +135,10 @@ func (i Isolation) String() string { type RunOptions struct { // Hostname is the hostname we set for the running container. Hostname string - // Isolation is either IsolationDefault or IsolationOCI. + // Isolation is either IsolationDefault, IsolationOCI, or IsolationChroot. Isolation Isolation - // Runtime is the name of the command to run. It should accept the same arguments - // that runc does, and produce similar output. + // Runtime is the name of the runtime to run. It should accept the + // same arguments that runc does, and produce similar output. Runtime string // Args adds global arguments for the runtime. Args []string @@ -792,6 +798,11 @@ func setupNamespaces(g *generate.Generator, namespaceOptions NamespaceOptions, i } } } + if configureNetwork { + for name, val := range util.DefaultNetworkSysctl { + g.AddLinuxSysctl(name, val) + } + } return configureNetwork, configureNetworks, configureUTS, nil } @@ -969,8 +980,8 @@ func (b *Builder) Run(command []string, options RunOptions) error { return err } defer func() { - if err2 := b.Unmount(); err2 != nil { - logrus.Errorf("error unmounting container: %v", err2) + if err := b.Unmount(); err != nil { + logrus.Errorf("error unmounting container: %v", err) } }() g.SetRootPath(mountPoint) @@ -1069,6 +1080,8 @@ func (b *Builder) Run(command []string, options RunOptions) error { switch isolation { case IsolationOCI: err = b.runUsingRuntimeSubproc(options, configureNetwork, configureNetworks, spec, mountPoint, path, Package+"-"+filepath.Base(path)) + case IsolationChroot: + err = chroot.RunUsingChroot(spec, path, options.Stdin, options.Stdout, options.Stderr) default: err = errors.Errorf("don't know how to run this command") } @@ -1677,7 +1690,7 @@ func runCopyStdio(stdio *sync.WaitGroup, copyPipes bool, stdioPipe [][]int, copy } // If the descriptor was closed elsewhere, remove it from our list. if pollFd.Revents&unix.POLLNVAL != 0 { - logrus.Debugf("error polling descriptor %d: closed?", pollFd.Fd) + logrus.Debugf("error polling descriptor %s: closed?", readDesc[int(pollFd.Fd)]) removes[int(pollFd.Fd)] = struct{}{} } // If the POLLIN flag isn't set, then there's no data to be read from this descriptor. diff --git a/vendor/github.com/projectatomic/buildah/unshare/unshare.c b/vendor/github.com/projectatomic/buildah/unshare/unshare.c new file mode 100644 index 000000000..83864359b --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/unshare/unshare.c @@ -0,0 +1,110 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int _buildah_unshare_parse_envint(const char *envname) { + char *p, *q; + long l; + + p = getenv(envname); + if (p == NULL) { + return -1; + } + q = NULL; + l = strtol(p, &q, 10); + if ((q == NULL) || (*q != '\0')) { + fprintf(stderr, "Error parsing \"%s\"=\"%s\"!\n", envname, p); + _exit(1); + } + unsetenv(envname); + return l; +} + +void _buildah_unshare(void) +{ + int flags, pidfd, continuefd, n, pgrp, sid, ctty, allow_setgroups; + char buf[2048]; + + flags = _buildah_unshare_parse_envint("_Buildah-unshare"); + if (flags == -1) { + return; + } + if ((flags & CLONE_NEWUSER) != 0) { + if (unshare(CLONE_NEWUSER) == -1) { + fprintf(stderr, "Error during unshare(CLONE_NEWUSER): %m\n"); + _exit(1); + } + } + pidfd = _buildah_unshare_parse_envint("_Buildah-pid-pipe"); + if (pidfd != -1) { + snprintf(buf, sizeof(buf), "%llu", (unsigned long long) getpid()); + if (write(pidfd, buf, strlen(buf)) != strlen(buf)) { + fprintf(stderr, "Error writing PID to pipe on fd %d: %m\n", pidfd); + _exit(1); + } + close(pidfd); + } + continuefd = _buildah_unshare_parse_envint("_Buildah-continue-pipe"); + if (continuefd != -1) { + n = read(continuefd, buf, sizeof(buf)); + if (n > 0) { + fprintf(stderr, "Error: %.*s\n", n, buf); + _exit(1); + } + close(continuefd); + } + sid = _buildah_unshare_parse_envint("_Buildah-setsid"); + if (sid == 1) { + if (setsid() == -1) { + fprintf(stderr, "Error during setsid: %m\n"); + _exit(1); + } + } + pgrp = _buildah_unshare_parse_envint("_Buildah-setpgrp"); + if (pgrp == 1) { + if (setpgrp() == -1) { + fprintf(stderr, "Error during setpgrp: %m\n"); + _exit(1); + } + } + ctty = _buildah_unshare_parse_envint("_Buildah-ctty"); + if (ctty != -1) { + if (ioctl(ctty, TIOCSCTTY, 0) == -1) { + fprintf(stderr, "Error while setting controlling terminal to %d: %m\n", ctty); + _exit(1); + } + } + allow_setgroups = _buildah_unshare_parse_envint("_Buildah-allow-setgroups"); + if ((flags & CLONE_NEWUSER) != 0) { + if (allow_setgroups == 1) { + if (setgroups(0, NULL) != 0) { + fprintf(stderr, "Error during setgroups(0, NULL): %m\n"); + _exit(1); + } + } + if (setresgid(0, 0, 0) != 0) { + fprintf(stderr, "Error during setresgid(0): %m\n"); + _exit(1); + } + if (setresuid(0, 0, 0) != 0) { + fprintf(stderr, "Error during setresuid(0): %m\n"); + _exit(1); + } + } + if ((flags & ~CLONE_NEWUSER) != 0) { + if (unshare(flags & ~CLONE_NEWUSER) == -1) { + fprintf(stderr, "Error during unshare(...): %m\n"); + _exit(1); + } + } + return; +} diff --git a/vendor/github.com/projectatomic/buildah/unshare/unshare.go b/vendor/github.com/projectatomic/buildah/unshare/unshare.go new file mode 100644 index 000000000..ed2a97934 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/unshare/unshare.go @@ -0,0 +1,273 @@ +// +build linux + +package unshare + +import ( + "bytes" + "fmt" + "io" + "os" + "os/exec" + "runtime" + "strconv" + "strings" + "syscall" + + "github.com/containers/storage/pkg/reexec" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/projectatomic/buildah/util" +) + +// Cmd wraps an exec.Cmd created by the reexec package in unshare(), and +// handles setting ID maps and other related settings by triggering +// initialization code in the child. +type Cmd struct { + *exec.Cmd + UnshareFlags int + UseNewuidmap bool + UidMappings []specs.LinuxIDMapping + UseNewgidmap bool + GidMappings []specs.LinuxIDMapping + GidMappingsEnableSetgroups bool + Setsid bool + Setpgrp bool + Ctty *os.File + OOMScoreAdj int + Hook func(pid int) error +} + +// Command creates a new Cmd which can be customized. +func Command(args ...string) *Cmd { + cmd := reexec.Command(args...) + return &Cmd{ + Cmd: cmd, + } +} + +func (c *Cmd) Start() error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + // Set an environment variable to tell the child to synchronize its startup. + if c.Env == nil { + c.Env = os.Environ() + } + c.Env = append(c.Env, fmt.Sprintf("_Buildah-unshare=%d", c.UnshareFlags)) + + // Create the pipe for reading the child's PID. + pidRead, pidWrite, err := os.Pipe() + if err != nil { + return errors.Wrapf(err, "error creating pid pipe") + } + c.Env = append(c.Env, fmt.Sprintf("_Buildah-pid-pipe=%d", len(c.ExtraFiles)+3)) + c.ExtraFiles = append(c.ExtraFiles, pidWrite) + + // Create the pipe for letting the child know to proceed. + continueRead, continueWrite, err := os.Pipe() + if err != nil { + pidRead.Close() + pidWrite.Close() + return errors.Wrapf(err, "error creating pid pipe") + } + c.Env = append(c.Env, fmt.Sprintf("_Buildah-continue-pipe=%d", len(c.ExtraFiles)+3)) + c.ExtraFiles = append(c.ExtraFiles, continueRead) + + // Pass along other instructions. + if c.Setsid { + c.Env = append(c.Env, "_Buildah-setsid=1") + } + if c.Setpgrp { + c.Env = append(c.Env, "_Buildah-setpgrp=1") + } + if c.Ctty != nil { + c.Env = append(c.Env, fmt.Sprintf("_Buildah-ctty=%d", len(c.ExtraFiles)+3)) + c.ExtraFiles = append(c.ExtraFiles, c.Ctty) + } + if c.GidMappingsEnableSetgroups { + c.Env = append(c.Env, "_Buildah-allow-setgroups=1") + } else { + c.Env = append(c.Env, "_Buildah-allow-setgroups=0") + } + + // Make sure we clean up our pipes. + defer func() { + if pidRead != nil { + pidRead.Close() + } + if pidWrite != nil { + pidWrite.Close() + } + if continueRead != nil { + continueRead.Close() + } + if continueWrite != nil { + continueWrite.Close() + } + }() + + // Start the new process. + err = c.Cmd.Start() + if err != nil { + return err + } + + // Close the ends of the pipes that the parent doesn't need. + continueRead.Close() + continueRead = nil + pidWrite.Close() + pidWrite = nil + + // Read the child's PID from the pipe. + pidString := "" + b := new(bytes.Buffer) + io.Copy(b, pidRead) + pidString = b.String() + pid, err := strconv.Atoi(pidString) + if err != nil { + fmt.Fprintf(continueWrite, "error parsing PID %q: %v", pidString, err) + return errors.Wrapf(err, "error parsing PID %q", pidString) + } + pidString = fmt.Sprintf("%d", pid) + + // If we created a new user namespace, set any specified mappings. + if c.UnshareFlags&syscall.CLONE_NEWUSER != 0 { + // Always set "setgroups". + setgroups, err := os.OpenFile(fmt.Sprintf("/proc/%s/setgroups", pidString), os.O_TRUNC|os.O_WRONLY, 0) + if err != nil { + fmt.Fprintf(continueWrite, "error opening setgroups: %v", err) + return errors.Wrapf(err, "error opening /proc/%s/setgroups", pidString) + } + defer setgroups.Close() + if c.GidMappingsEnableSetgroups { + if _, err := fmt.Fprintf(setgroups, "allow"); err != nil { + fmt.Fprintf(continueWrite, "error writing \"allow\" to setgroups: %v", err) + return errors.Wrapf(err, "error opening \"allow\" to /proc/%s/setgroups", pidString) + } + } else { + if _, err := fmt.Fprintf(setgroups, "deny"); err != nil { + fmt.Fprintf(continueWrite, "error writing \"deny\" to setgroups: %v", err) + return errors.Wrapf(err, "error writing \"deny\" to /proc/%s/setgroups", pidString) + } + } + + if len(c.UidMappings) == 0 || len(c.GidMappings) == 0 { + uidmap, gidmap, err := util.GetHostIDMappings("") + if err != nil { + fmt.Fprintf(continueWrite, "error reading ID mappings in parent: %v", err) + return errors.Wrapf(err, "error reading ID mappings in parent") + } + if len(c.UidMappings) == 0 { + c.UidMappings = uidmap + for i := range c.UidMappings { + c.UidMappings[i].HostID = c.UidMappings[i].ContainerID + } + } + if len(c.GidMappings) == 0 { + c.GidMappings = gidmap + for i := range c.GidMappings { + c.GidMappings[i].HostID = c.GidMappings[i].ContainerID + } + } + } + + if len(c.GidMappings) > 0 { + // Build the GID map, since writing to the proc file has to be done all at once. + g := new(bytes.Buffer) + for _, m := range c.GidMappings { + fmt.Fprintf(g, "%d %d %d\n", m.ContainerID, m.HostID, m.Size) + } + // Set the GID map. + if c.UseNewgidmap { + cmd := exec.Command("newgidmap", append([]string{pidString}, strings.Fields(strings.Replace(g.String(), "\n", " ", -1))...)...) + g.Reset() + cmd.Stdout = g + cmd.Stderr = g + err := cmd.Run() + if err != nil { + fmt.Fprintf(continueWrite, "error running newgidmap: %v: %s", err, g.String()) + return errors.Wrapf(err, "error running newgidmap: %s", g.String()) + } + } else { + gidmap, err := os.OpenFile(fmt.Sprintf("/proc/%s/gid_map", pidString), os.O_TRUNC|os.O_WRONLY, 0) + if err != nil { + fmt.Fprintf(continueWrite, "error opening /proc/%s/gid_map: %v", pidString, err) + return errors.Wrapf(err, "error opening /proc/%s/gid_map", pidString) + } + defer gidmap.Close() + if _, err := fmt.Fprintf(gidmap, "%s", g.String()); err != nil { + fmt.Fprintf(continueWrite, "error writing /proc/%s/gid_map: %v", pidString, err) + return errors.Wrapf(err, "error writing /proc/%s/gid_map", pidString) + } + } + } + + if len(c.UidMappings) > 0 { + // Build the UID map, since writing to the proc file has to be done all at once. + u := new(bytes.Buffer) + for _, m := range c.UidMappings { + fmt.Fprintf(u, "%d %d %d\n", m.ContainerID, m.HostID, m.Size) + } + // Set the GID map. + if c.UseNewuidmap { + cmd := exec.Command("newuidmap", append([]string{pidString}, strings.Fields(strings.Replace(u.String(), "\n", " ", -1))...)...) + u.Reset() + cmd.Stdout = u + cmd.Stderr = u + err := cmd.Run() + if err != nil { + fmt.Fprintf(continueWrite, "error running newuidmap: %v: %s", err, u.String()) + return errors.Wrapf(err, "error running newuidmap: %s", u.String()) + } + } else { + uidmap, err := os.OpenFile(fmt.Sprintf("/proc/%s/uid_map", pidString), os.O_TRUNC|os.O_WRONLY, 0) + if err != nil { + fmt.Fprintf(continueWrite, "error opening /proc/%s/uid_map: %v", pidString, err) + return errors.Wrapf(err, "error opening /proc/%s/uid_map", pidString) + } + defer uidmap.Close() + if _, err := fmt.Fprintf(uidmap, "%s", u.String()); err != nil { + fmt.Fprintf(continueWrite, "error writing /proc/%s/uid_map: %v", pidString, err) + return errors.Wrapf(err, "error writing /proc/%s/uid_map", pidString) + } + } + } + } + + // Adjust the process's OOM score. + oomScoreAdj, err := os.OpenFile(fmt.Sprintf("/proc/%s/oom_score_adj", pidString), os.O_TRUNC|os.O_WRONLY, 0) + if err != nil { + fmt.Fprintf(continueWrite, "error opening oom_score_adj: %v", err) + return errors.Wrapf(err, "error opening /proc/%s/oom_score_adj", pidString) + } + if _, err := fmt.Fprintf(oomScoreAdj, "%d\n", c.OOMScoreAdj); err != nil { + fmt.Fprintf(continueWrite, "error writing \"%d\" to oom_score_adj: %v", c.OOMScoreAdj, err) + return errors.Wrapf(err, "error writing \"%d\" to /proc/%s/oom_score_adj", c.OOMScoreAdj, pidString) + } + defer oomScoreAdj.Close() + + // Run any additional setup that we want to do before the child starts running proper. + if c.Hook != nil { + if err = c.Hook(pid); err != nil { + fmt.Fprintf(continueWrite, "hook error: %v", err) + return err + } + } + + return nil +} + +func (c *Cmd) Run() error { + if err := c.Start(); err != nil { + return err + } + return c.Wait() +} + +func (c *Cmd) CombinedOutput() ([]byte, error) { + return nil, errors.New("unshare: CombinedOutput() not implemented") +} + +func (c *Cmd) Output() ([]byte, error) { + return nil, errors.New("unshare: Output() not implemented") +} diff --git a/vendor/github.com/projectatomic/buildah/unshare/unshare_cgo.go b/vendor/github.com/projectatomic/buildah/unshare/unshare_cgo.go new file mode 100644 index 000000000..26a0b2c20 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/unshare/unshare_cgo.go @@ -0,0 +1,10 @@ +// +build linux,cgo,!gccgo + +package unshare + +// #cgo CFLAGS: -Wall +// extern void _buildah_unshare(void); +// void __attribute__((constructor)) init(void) { +// _buildah_unshare(); +// } +import "C" diff --git a/vendor/github.com/projectatomic/buildah/unshare/unshare_gccgo.go b/vendor/github.com/projectatomic/buildah/unshare/unshare_gccgo.go new file mode 100644 index 000000000..c4811782a --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/unshare/unshare_gccgo.go @@ -0,0 +1,25 @@ +// +build linux,cgo,gccgo + +package unshare + +// #cgo CFLAGS: -Wall -Wextra +// extern void _buildah_unshare(void); +// void __attribute__((constructor)) init(void) { +// _buildah_unshare(); +// } +import "C" + +// This next bit is straight out of libcontainer. + +// AlwaysFalse is here to stay false +// (and be exported so the compiler doesn't optimize out its reference) +var AlwaysFalse bool + +func init() { + if AlwaysFalse { + // by referencing this C init() in a noop test, it will ensure the compiler + // links in the C function. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 + C.init() + } +} diff --git a/vendor/github.com/projectatomic/buildah/unshare/unshare_unsupported.go b/vendor/github.com/projectatomic/buildah/unshare/unshare_unsupported.go new file mode 100644 index 000000000..feeceae66 --- /dev/null +++ b/vendor/github.com/projectatomic/buildah/unshare/unshare_unsupported.go @@ -0,0 +1 @@ +package unshare diff --git a/vendor/github.com/projectatomic/buildah/util/types.go b/vendor/github.com/projectatomic/buildah/util/types.go index 974b707fb..dc5f4b6c8 100644 --- a/vendor/github.com/projectatomic/buildah/util/types.go +++ b/vendor/github.com/projectatomic/buildah/util/types.go @@ -27,4 +27,9 @@ var ( "CAP_SETUID", "CAP_SYS_CHROOT", } + // DefaultNetworkSysctl is the list of Kernel parameters which we + // grant by default to containers which are running under UID 0. + DefaultNetworkSysctl = map[string]string{ + "net.ipv4.ping_group_range": "0 0", + } ) diff --git a/vendor/github.com/projectatomic/buildah/vendor.conf b/vendor/github.com/projectatomic/buildah/vendor.conf index 94e5ebb10..5a99b0cbf 100644 --- a/vendor/github.com/projectatomic/buildah/vendor.conf +++ b/vendor/github.com/projectatomic/buildah/vendor.conf @@ -4,8 +4,8 @@ github.com/BurntSushi/toml master github.com/containerd/continuity master github.com/containernetworking/cni v0.6.0 github.com/seccomp/containers-golang master -github.com/containers/image master -github.com/containers/storage afdedba2d2ad573350aee35033d4e0c58fdbd57b +github.com/containers/image 134f99bed228d6297dc01d152804f6f09f185418 +github.com/containers/storage 17c7d1fee5603ccf6dd97edc14162fc1510e7e23 github.com/docker/distribution 5f6282db7d65e6d72ad7c2cc66310724a57be716 github.com/docker/docker b8571fd81c7d2223c9ecbf799c693e3ef1daaea9 github.com/docker/docker-credential-helpers d68f9aeca33f5fd3f08eeae5e9d175edf4e731d1 @@ -46,7 +46,7 @@ github.com/projectatomic/libpod master github.com/sirupsen/logrus master github.com/syndtr/gocapability master github.com/tchap/go-patricia master -github.com/urfave/cli master +github.com/urfave/cli fix-short-opts-parsing https://github.com/vrothberg/cli github.com/vbatts/tar-split v0.10.2 github.com/xeipuuv/gojsonpointer master github.com/xeipuuv/gojsonreference master -- cgit v1.2.3-54-g00ecf