summaryrefslogtreecommitdiff
path: root/libpod/oci_linux.go
diff options
context:
space:
mode:
Diffstat (limited to 'libpod/oci_linux.go')
-rw-r--r--libpod/oci_linux.go447
1 files changed, 447 insertions, 0 deletions
diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go
index 1f5411c1f..1c1e4a203 100644
--- a/libpod/oci_linux.go
+++ b/libpod/oci_linux.go
@@ -3,6 +3,8 @@
package libpod
import (
+ "bufio"
+ "bytes"
"fmt"
"os"
"os/exec"
@@ -10,12 +12,17 @@ import (
"runtime"
"strings"
"syscall"
+ "time"
"github.com/containerd/cgroups"
"github.com/containers/libpod/pkg/rootless"
+ "github.com/containers/libpod/pkg/util"
"github.com/containers/libpod/utils"
pmount "github.com/containers/storage/pkg/mount"
+ "github.com/coreos/go-systemd/activation"
spec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@@ -179,3 +186,443 @@ func (r *OCIRuntime) conmonPackage() string {
}
return dpkgVersion(r.conmonPath)
}
+
+func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
+ var stderrBuf bytes.Buffer
+
+ runtimeDir, err := util.GetRootlessRuntimeDir()
+ if err != nil {
+ return err
+ }
+
+ parentPipe, childPipe, err := newPipe()
+ if err != nil {
+ return errors.Wrapf(err, "error creating socket pair")
+ }
+
+ childStartPipe, parentStartPipe, err := newPipe()
+ if err != nil {
+ return errors.Wrapf(err, "error creating socket pair for start pipe")
+ }
+
+ defer parentPipe.Close()
+ defer parentStartPipe.Close()
+
+ args := []string{}
+ if r.cgroupManager == SystemdCgroupsManager {
+ args = append(args, "-s")
+ }
+ args = append(args, "-c", ctr.ID())
+ args = append(args, "-u", ctr.ID())
+ args = append(args, "-r", r.path)
+ args = append(args, "-b", ctr.bundlePath())
+ args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile"))
+ args = append(args, "-l", ctr.LogPath())
+ args = append(args, "--exit-dir", r.exitsDir)
+ if ctr.config.ConmonPidFile != "" {
+ args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
+ }
+ if len(ctr.config.ExitCommand) > 0 {
+ args = append(args, "--exit-command", ctr.config.ExitCommand[0])
+ for _, arg := range ctr.config.ExitCommand[1:] {
+ args = append(args, []string{"--exit-command-arg", arg}...)
+ }
+ }
+ args = append(args, "--socket-dir-path", r.socketsDir)
+ if ctr.config.Spec.Process.Terminal {
+ args = append(args, "-t")
+ } else if ctr.config.Stdin {
+ args = append(args, "-i")
+ }
+ if r.logSizeMax >= 0 {
+ args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax))
+ }
+ if r.noPivot {
+ args = append(args, "--no-pivot")
+ }
+
+ logLevel := logrus.GetLevel()
+ args = append(args, "--log-level", logLevel.String())
+
+ if logLevel == logrus.DebugLevel {
+ logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
+ args = append(args, "--syslog")
+ }
+
+ if restoreOptions != nil {
+ args = append(args, "--restore", ctr.CheckpointPath())
+ if restoreOptions.TCPEstablished {
+ args = append(args, "--restore-arg", "--tcp-established")
+ }
+ }
+
+ logrus.WithFields(logrus.Fields{
+ "args": args,
+ }).Debugf("running conmon: %s", r.conmonPath)
+
+ cmd := exec.Command(r.conmonPath, args...)
+ cmd.Dir = ctr.bundlePath()
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setpgid: true,
+ }
+ // TODO this is probably a really bad idea for some uses
+ // Make this configurable
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if ctr.config.Spec.Process.Terminal {
+ cmd.Stderr = &stderrBuf
+ }
+
+ cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
+ // 0, 1 and 2 are stdin, stdout and stderr
+ cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
+ cmd.Env = append(cmd.Env, fmt.Sprintf("HOME=%s", os.Getenv("HOME")))
+
+ if r.reservePorts && !ctr.config.NetMode.IsSlirp4netns() {
+ ports, err := bindPorts(ctr.config.PortMappings)
+ if err != nil {
+ return err
+ }
+
+ // Leak the port we bound in the conmon process. These fd's won't be used
+ // by the container and conmon will keep the ports busy so that another
+ // process cannot use them.
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
+ }
+
+ if ctr.config.NetMode.IsSlirp4netns() {
+ ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
+ if err != nil {
+ return errors.Wrapf(err, "failed to create rootless network sync pipe")
+ }
+ // Leak one end in conmon, the other one will be leaked into slirp4netns
+ cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
+ }
+
+ if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
+ cmd.Env = append(cmd.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
+ }
+ if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok {
+ cmd.Env = append(cmd.Env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1")
+ fds := activation.Files(false)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, fds...)
+ }
+ if selinux.GetEnabled() {
+ // Set the label of the conmon process to be level :s0
+ // This will allow the container processes to talk to fifo-files
+ // passed into the container by conmon
+ var (
+ plabel string
+ con selinux.Context
+ )
+ plabel, err = selinux.CurrentLabel()
+ if err != nil {
+ childPipe.Close()
+ return errors.Wrapf(err, "Failed to get current SELinux label")
+ }
+
+ con, err = selinux.NewContext(plabel)
+ if err != nil {
+ return errors.Wrapf(err, "Failed to get new context from SELinux label")
+ }
+
+ runtime.LockOSThread()
+ if con["level"] != "s0" && con["level"] != "" {
+ con["level"] = "s0"
+ if err = label.SetProcessLabel(con.Get()); err != nil {
+ runtime.UnlockOSThread()
+ return err
+ }
+ }
+ err = cmd.Start()
+ // Ignore error returned from SetProcessLabel("") call,
+ // can't recover.
+ label.SetProcessLabel("")
+ runtime.UnlockOSThread()
+ } else {
+ err = cmd.Start()
+ }
+ if err != nil {
+ childPipe.Close()
+ return err
+ }
+ defer cmd.Wait()
+
+ // We don't need childPipe on the parent side
+ childPipe.Close()
+ childStartPipe.Close()
+
+ // Move conmon to specified cgroup
+ if err := r.moveConmonToCgroup(ctr, cgroupParent, cmd); err != nil {
+ return err
+ }
+
+ /* We set the cgroup, now the child can start creating children */
+ someData := []byte{0}
+ _, err = parentStartPipe.Write(someData)
+ if err != nil {
+ return err
+ }
+
+ /* Wait for initial setup and fork, and reap child */
+ err = cmd.Wait()
+ if err != nil {
+ return err
+ }
+
+ defer func() {
+ if err != nil {
+ if err2 := r.deleteContainer(ctr); err2 != nil {
+ logrus.Errorf("Error removing container %s from runtime after creation failed", ctr.ID())
+ }
+ }
+ }()
+
+ // Wait to get container pid from conmon
+ type syncStruct struct {
+ si *syncInfo
+ err error
+ }
+ ch := make(chan syncStruct)
+ go func() {
+ var si *syncInfo
+ rdr := bufio.NewReader(parentPipe)
+ b, err := rdr.ReadBytes('\n')
+ if err != nil {
+ ch <- syncStruct{err: err}
+ }
+ if err := json.Unmarshal(b, &si); err != nil {
+ ch <- syncStruct{err: err}
+ return
+ }
+ ch <- syncStruct{si: si}
+ }()
+
+ select {
+ case ss := <-ch:
+ if ss.err != nil {
+ return errors.Wrapf(ss.err, "error reading container (probably exited) json message")
+ }
+ logrus.Debugf("Received container pid: %d", ss.si.Pid)
+ if ss.si.Pid == -1 {
+ if ss.si.Message != "" {
+ return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message)
+ }
+ return errors.Wrapf(ErrInternal, "container create failed")
+ }
+ ctr.state.PID = ss.si.Pid
+ case <-time.After(ContainerCreateTimeout):
+ return errors.Wrapf(ErrInternal, "container creation timeout")
+ }
+ return nil
+}
+
+// Wait for a container which has been sent a signal to stop
+func waitContainerStop(ctr *Container, timeout time.Duration) error {
+ done := make(chan struct{})
+ chControl := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case <-chControl:
+ return
+ default:
+ // Check if the process is still around
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ close(done)
+ return
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+ select {
+ case <-done:
+ return nil
+ case <-time.After(timeout):
+ close(chControl)
+ logrus.Debugf("container %s did not die within timeout %d", ctr.ID(), timeout)
+ return errors.Errorf("container %s did not die within timeout", ctr.ID())
+ }
+}
+
+// Wait for a set of given PIDs to stop
+func waitPidsStop(pids []int, timeout time.Duration) error {
+ done := make(chan struct{})
+ chControl := make(chan struct{})
+ go func() {
+ for {
+ select {
+ case <-chControl:
+ return
+ default:
+ allClosed := true
+ for _, pid := range pids {
+ if err := unix.Kill(pid, 0); err != unix.ESRCH {
+ allClosed = false
+ break
+ }
+ }
+ if allClosed {
+ close(done)
+ return
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ }
+ }()
+ select {
+ case <-done:
+ return nil
+ case <-time.After(timeout):
+ close(chControl)
+ return errors.Errorf("given PIDs did not die within timeout")
+ }
+}
+
+// stopContainer stops a container, first using its given stop signal (or
+// SIGTERM if no signal was specified), then using SIGKILL
+// Timeout is given in seconds. If timeout is 0, the container will be
+// immediately kill with SIGKILL
+// Does not set finished time for container, assumes you will run updateStatus
+// after to pull the exit code
+func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error {
+ logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
+
+ // Ping the container to see if it's alive
+ // If it's not, it's already stopped, return
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ stopSignal := ctr.config.StopSignal
+ if stopSignal == 0 {
+ stopSignal = uint(syscall.SIGTERM)
+ }
+
+ if timeout > 0 {
+ if err := r.killContainer(ctr, stopSignal); err != nil {
+ // Is the container gone?
+ // If so, it probably died between the first check and
+ // our sending the signal
+ // The container is stopped, so exit cleanly
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return err
+ }
+
+ if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Warnf("Timed out stopping container %s, resorting to SIGKILL", ctr.ID())
+ } else {
+ // No error, the container is dead
+ return nil
+ }
+ }
+
+ var args []string
+ if rootless.IsRootless() {
+ // we don't use --all for rootless containers as the OCI runtime might use
+ // the cgroups to determine the PIDs, but for rootless containers there is
+ // not any.
+ args = []string{"kill", ctr.ID(), "KILL"}
+ } else {
+ args = []string{"kill", "--all", ctr.ID(), "KILL"}
+ }
+
+ runtimeDir, err := util.GetRootlessRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
+ // Again, check if the container is gone. If it is, exit cleanly.
+ err := unix.Kill(ctr.state.PID, 0)
+ if err == unix.ESRCH {
+ return nil
+ }
+
+ return errors.Wrapf(err, "error sending SIGKILL to container %s", ctr.ID())
+ }
+
+ // Give runtime a few seconds to make it happen
+ if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// execStopContainer stops all active exec sessions in a container
+// It will also stop all other processes in the container. It is only intended
+// to be used to assist in cleanup when removing a container.
+// SIGTERM is used by default to stop processes. If SIGTERM fails, SIGKILL will be used.
+func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
+ // Do we have active exec sessions?
+ if len(ctr.state.ExecSessions) == 0 {
+ return nil
+ }
+
+ // Get a list of active exec sessions
+ execSessions := []int{}
+ for _, session := range ctr.state.ExecSessions {
+ pid := session.PID
+ // Ping the PID with signal 0 to see if it still exists
+ if err := unix.Kill(pid, 0); err == unix.ESRCH {
+ continue
+ }
+
+ execSessions = append(execSessions, pid)
+ }
+
+ // All the sessions may be dead
+ // If they are, just return
+ if len(execSessions) == 0 {
+ return nil
+ }
+ runtimeDir, err := util.GetRootlessRuntimeDir()
+ if err != nil {
+ return err
+ }
+ env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
+
+ // If timeout is 0, just use SIGKILL
+ if timeout > 0 {
+ // Stop using SIGTERM by default
+ // Use SIGSTOP after a timeout
+ logrus.Debugf("Killing all processes in container %s with SIGTERM", ctr.ID())
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "kill", "--all", ctr.ID(), "TERM"); err != nil {
+ return errors.Wrapf(err, "error sending SIGTERM to container %s processes", ctr.ID())
+ }
+
+ // Wait for all processes to stop
+ if err := waitPidsStop(execSessions, time.Duration(timeout)*time.Second); err != nil {
+ logrus.Warnf("Timed out stopping container %s exec sessions", ctr.ID())
+ } else {
+ // No error, all exec sessions are dead
+ return nil
+ }
+ }
+
+ // Send SIGKILL
+ logrus.Debugf("Killing all processes in container %s with SIGKILL", ctr.ID())
+ if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "kill", "--all", ctr.ID(), "KILL"); err != nil {
+ return errors.Wrapf(err, "error sending SIGKILL to container %s processes", ctr.ID())
+ }
+
+ // Give the processes a few seconds to go down
+ if err := waitPidsStop(execSessions, killContainerTimeout); err != nil {
+ return errors.Wrapf(err, "failed to kill container %s exec sessions", ctr.ID())
+ }
+
+ return nil
+}