diff options
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_api.go | 130 | ||||
-rw-r--r-- | libpod/container_attach_linux.go | 179 | ||||
-rw-r--r-- | libpod/container_internal.go | 92 | ||||
-rw-r--r-- | libpod/container_internal_linux.go | 2 | ||||
-rw-r--r-- | libpod/define/errors.go | 10 | ||||
-rw-r--r-- | libpod/healthcheck.go | 12 | ||||
-rw-r--r-- | libpod/oci.go | 104 | ||||
-rw-r--r-- | libpod/oci_attach_linux.go | 258 | ||||
-rw-r--r-- | libpod/oci_attach_linux_cgo.go (renamed from libpod/container_attach_linux_cgo.go) | 0 | ||||
-rw-r--r-- | libpod/oci_attach_linux_nocgo.go (renamed from libpod/container_attach_linux_nocgo.go) | 0 | ||||
-rw-r--r-- | libpod/oci_attach_unsupported.go (renamed from libpod/container_attach_unsupported.go) | 6 | ||||
-rw-r--r-- | libpod/oci_internal_linux.go | 493 | ||||
-rw-r--r-- | libpod/oci_linux.go | 414 | ||||
-rw-r--r-- | libpod/oci_unsupported.go | 7 |
14 files changed, 1043 insertions, 664 deletions
diff --git a/libpod/container_api.go b/libpod/container_api.go index 50fa1759d..0cce6ca22 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -2,16 +2,13 @@ package libpod import ( "context" - "fmt" "io" "io/ioutil" "os" - "strconv" "time" "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/libpod/events" - "github.com/containers/libpod/pkg/lookup" "github.com/containers/storage/pkg/stringid" "github.com/docker/docker/oci/caps" "github.com/opentracing/opentracing-go" @@ -21,6 +18,11 @@ import ( "k8s.io/client-go/tools/remotecommand" ) +const ( + defaultExecExitCode = 125 + defaultExecExitCodeCannotInvoke = 126 +) + // Init creates a container in the OCI runtime func (c *Container) Init(ctx context.Context) (err error) { span, _ := opentracing.StartSpanFromContext(ctx, "containerInit") @@ -220,23 +222,19 @@ func (c *Container) Kill(signal uint) error { } // Exec starts a new process inside the container +// Returns an exit code and an error. If Exec was not able to exec in the container before a failure, an exit code of 126 is returned. +// If another generic error happens, an exit code of 125 is returned. +// Sometimes, the $RUNTIME exec call errors, and if that is the case, the exit code is the exit code of the call. +// Otherwise, the exit code will be the exit code of the executed call inside of the container. // TODO investigate allowing exec without attaching -func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs int) error { +func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, error) { var capList []string - - locked := false if !c.batched { - locked = true - c.lock.Lock() - defer func() { - if locked { - c.lock.Unlock() - } - }() + defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return err + return defaultExecExitCodeCannotInvoke, err } } @@ -244,25 +242,13 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir // TODO can probably relax this once we track exec sessions if conState != define.ContainerStateRunning { - return errors.Wrapf(define.ErrCtrStateInvalid, "cannot exec into container that is not running") + return defaultExecExitCodeCannotInvoke, errors.Wrapf(define.ErrCtrStateInvalid, "cannot exec into container that is not running") } + if privileged || c.config.Privileged { capList = caps.GetAllCapabilities() } - // If user was set, look it up in the container to get a UID to use on - // the host - hostUser := "" - if user != "" { - execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, nil) - if err != nil { - return err - } - - // runc expects user formatted as uid:gid - hostUser = fmt.Sprintf("%d:%d", execUser.Uid, execUser.Gid) - } - // Generate exec session ID // Ensure we don't conflict with an existing session ID sessionID := stringid.GenerateNonCryptoID() @@ -282,55 +268,27 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir } logrus.Debugf("Creating new exec session in container %s with session id %s", c.ID(), sessionID) - - execCmd, err := c.ociRuntime.execContainer(c, cmd, capList, env, tty, workDir, hostUser, sessionID, streams, preserveFDs) - if err != nil { - return errors.Wrapf(err, "error exec %s", c.ID()) + if err := c.createExecBundle(sessionID); err != nil { + return defaultExecExitCodeCannotInvoke, err } - chWait := make(chan error) - go func() { - chWait <- execCmd.Wait() - close(chWait) - }() - pidFile := c.execPidPath(sessionID) - // 60 second seems a reasonable time to wait - // https://github.com/containers/libpod/issues/1495 - // https://github.com/containers/libpod/issues/1816 - const pidWaitTimeout = 60000 - - // Wait until the runtime makes the pidfile - exited, err := WaitForFile(pidFile, chWait, pidWaitTimeout*time.Millisecond) - if err != nil { - if exited { - // If the runtime exited, propagate the error we got from the process. - // We need to remove PID files to ensure no memory leaks - if err2 := os.Remove(pidFile); err2 != nil { - logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) - } - - return err + defer func() { + // cleanup exec bundle + if err := c.cleanupExecBundle(sessionID); err != nil { + logrus.Errorf("Error removing exec session %s bundle path for container %s: %v", sessionID, c.ID(), err) } - return errors.Wrapf(err, "timed out waiting for runtime to create pidfile for exec session in container %s", c.ID()) - } + }() - // Pidfile exists, read it - contents, err := ioutil.ReadFile(pidFile) - // We need to remove PID files to ensure no memory leaks - if err2 := os.Remove(pidFile); err2 != nil { - logrus.Errorf("Error removing exit file for container %s exec session %s: %v", c.ID(), sessionID, err2) - } - if err != nil { - // We don't know the PID of the exec session - // However, it may still be alive - // TODO handle this better - return errors.Wrapf(err, "could not read pidfile for exec session %s in container %s", sessionID, c.ID()) - } - pid, err := strconv.ParseInt(string(contents), 10, 32) + pid, attachChan, err := c.ociRuntime.execContainer(c, cmd, capList, env, tty, workDir, user, sessionID, streams, preserveFDs, resize, detachKeys) if err != nil { - // As above, we don't have a valid PID, but the exec session is likely still alive - // TODO handle this better - return errors.Wrapf(err, "error parsing PID of exec session %s in container %s", sessionID, c.ID()) + ec := defaultExecExitCode + // Conmon will pass a non-zero exit code from the runtime as a pid here. + // we differentiate a pid with an exit code by sending it as negative, so reverse + // that change and return the exit code the runtime failed with. + if pid < 0 { + ec = -1 * pid + } + return ec, err } // We have the PID, add it to state @@ -340,12 +298,12 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir session := new(ExecSession) session.ID = sessionID session.Command = cmd - session.PID = int(pid) + session.PID = pid c.state.ExecSessions[sessionID] = session if err := c.save(); err != nil { // Now we have a PID but we can't save it in the DB // TODO handle this better - return errors.Wrapf(err, "error saving exec sessions %s for container %s", sessionID, c.ID()) + return defaultExecExitCode, errors.Wrapf(err, "error saving exec sessions %s for container %s", sessionID, c.ID()) } c.newContainerEvent(events.Exec) logrus.Debugf("Successfully started exec session %s in container %s", sessionID, c.ID()) @@ -353,23 +311,33 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir // Unlock so other processes can use the container if !c.batched { c.lock.Unlock() - locked = false } - var waitErr error - if !exited { - waitErr = <-chWait + lastErr := <-attachChan + + exitCode, err := c.readExecExitCode(sessionID) + if err != nil { + if lastErr != nil { + logrus.Errorf(lastErr.Error()) + } + lastErr = err + } + if exitCode != 0 { + if lastErr != nil { + logrus.Errorf(lastErr.Error()) + } + lastErr = errors.Wrapf(define.ErrOCIRuntime, "non zero exit code: %d", exitCode) } // Lock again if !c.batched { - locked = true c.lock.Lock() } // Sync the container again to pick up changes in state if err := c.syncContainer(); err != nil { - return errors.Wrapf(err, "error syncing container %s state to remove exec session %s", c.ID(), sessionID) + logrus.Errorf("error syncing container %s state to remove exec session %s", c.ID(), sessionID) + return exitCode, lastErr } // Remove the exec session from state @@ -377,7 +345,7 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir if err := c.save(); err != nil { logrus.Errorf("Error removing exec session %s from container %s state: %v", sessionID, c.ID(), err) } - return waitErr + return exitCode, lastErr } // AttachStreams contains streams that will be attached to the container diff --git a/libpod/container_attach_linux.go b/libpod/container_attach_linux.go deleted file mode 100644 index 837cbf916..000000000 --- a/libpod/container_attach_linux.go +++ /dev/null @@ -1,179 +0,0 @@ -//+build linux - -package libpod - -import ( - "fmt" - "io" - "net" - "os" - "path/filepath" - - "github.com/containers/libpod/libpod/define" - "github.com/containers/libpod/pkg/errorhandling" - "github.com/containers/libpod/pkg/kubeutils" - "github.com/containers/libpod/utils" - "github.com/docker/docker/pkg/term" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - "k8s.io/client-go/tools/remotecommand" -) - -/* Sync with stdpipe_t in conmon.c */ -const ( - AttachPipeStdin = 1 - AttachPipeStdout = 2 - AttachPipeStderr = 3 -) - -// Attach to the given container -// Does not check if state is appropriate -func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool, started chan bool) error { - if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { - return errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") - } - - logrus.Debugf("Attaching to container %s", c.ID()) - - return c.attachContainerSocket(resize, keys, streams, startContainer, started) -} - -// attachContainerSocket connects to the container's attach socket and deals with the IO. -// started is only required if startContainer is true -// TODO add a channel to allow interrupting -func (c *Container) attachContainerSocket(resize <-chan remotecommand.TerminalSize, keys string, streams *AttachStreams, startContainer bool, started chan bool) error { - if startContainer && started == nil { - return errors.Wrapf(define.ErrInternal, "started chan not passed when startContainer set") - } - - // Use default detach keys when keys aren't passed or specified in libpod.conf - if len(keys) == 0 { - keys = DefaultDetachKeys - } - - // Check the validity of the provided keys - detachKeys := []byte{} - var err error - detachKeys, err = term.ToBytes(keys) - if err != nil { - return errors.Wrapf(err, "invalid detach keys") - } - - kubeutils.HandleResizing(resize, func(size remotecommand.TerminalSize) { - controlPath := filepath.Join(c.bundlePath(), "ctl") - controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) - if err != nil { - logrus.Debugf("Could not open ctl file: %v", err) - return - } - defer errorhandling.CloseQuiet(controlFile) - - logrus.Debugf("Received a resize event: %+v", size) - if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil { - logrus.Warnf("Failed to write to control file to resize terminal: %v", err) - } - }) - - socketPath := c.AttachSocketPath() - - maxUnixLength := unixPathLength() - if maxUnixLength < len(socketPath) { - socketPath = socketPath[0:maxUnixLength] - } - - logrus.Debug("connecting to socket ", socketPath) - - conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: socketPath, Net: "unixpacket"}) - if err != nil { - return errors.Wrapf(err, "failed to connect to container's attach socket: %v", socketPath) - } - defer func() { - if err := conn.Close(); err != nil { - logrus.Errorf("unable to close socket: %q", err) - } - }() - - // If starting was requested, start the container and notify when that's - // done. - if startContainer { - if err := c.start(); err != nil { - return err - } - started <- true - } - - receiveStdoutError := make(chan error) - go func() { - receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn) - }() - - stdinDone := make(chan error) - go func() { - var err error - if streams.AttachInput { - _, err = utils.CopyDetachable(conn, streams.InputStream, detachKeys) - if err := conn.CloseWrite(); err != nil { - logrus.Error("failed to close write in attach") - } - } - stdinDone <- err - }() - - select { - case err := <-receiveStdoutError: - return err - case err := <-stdinDone: - if err == define.ErrDetach { - return err - } - if streams.AttachOutput || streams.AttachError { - return <-receiveStdoutError - } - } - return nil -} - -func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error { - var err error - buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */ - for { - nr, er := conn.Read(buf) - if nr > 0 { - var dst io.Writer - var doWrite bool - switch buf[0] { - case AttachPipeStdout: - dst = outputStream - doWrite = writeOutput - case AttachPipeStderr: - dst = errorStream - doWrite = writeError - default: - logrus.Infof("Received unexpected attach type %+d", buf[0]) - } - if dst == nil { - return errors.New("output destination cannot be nil") - } - if doWrite { - nw, ew := dst.Write(buf[1:nr]) - if ew != nil { - err = ew - break - } - if nr != nw+1 { - err = io.ErrShortWrite - break - } - } - } - if er == io.EOF { - break - } - if er != nil { - err = er - break - } - } - return err -} diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ca27f5814..36732685b 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -31,7 +31,8 @@ import ( const ( // name of the directory holding the artifacts - artifactsDir = "artifacts" + artifactsDir = "artifacts" + execDirPermission = 0755 ) // rootFsSize gets the size of the container's root filesystem @@ -132,14 +133,91 @@ func (c *Container) AttachSocketPath() string { return filepath.Join(c.ociRuntime.socketsDir, c.ID(), "attach") } +// exitFilePath gets the path to the container's exit file +func (c *Container) exitFilePath() string { + return filepath.Join(c.ociRuntime.exitsDir, c.ID()) +} + +// create a bundle path and associated files for an exec session +func (c *Container) createExecBundle(sessionID string) (err error) { + bundlePath := c.execBundlePath(sessionID) + if createErr := os.MkdirAll(bundlePath, execDirPermission); createErr != nil { + return createErr + } + defer func() { + if err != nil { + if err2 := os.RemoveAll(bundlePath); err != nil { + logrus.Warnf("error removing exec bundle after creation caused another error: %v", err2) + } + } + }() + if err2 := os.MkdirAll(c.execExitFileDir(sessionID), execDirPermission); err2 != nil { + // The directory is allowed to exist + if !os.IsExist(err2) { + err = errors.Wrapf(err2, "error creating OCI runtime exit file path %s", c.execExitFileDir(sessionID)) + } + } + return +} + +// cleanup an exec session after its done +func (c *Container) cleanupExecBundle(sessionID string) error { + return os.RemoveAll(c.execBundlePath(sessionID)) +} + +// the path to a containers exec session bundle +func (c *Container) execBundlePath(sessionID string) string { + return filepath.Join(c.bundlePath(), sessionID) +} + // Get PID file path for a container's exec session func (c *Container) execPidPath(sessionID string) string { - return filepath.Join(c.state.RunDir, "exec_pid_"+sessionID) + return filepath.Join(c.execBundlePath(sessionID), "exec_pid") } -// exitFilePath gets the path to the container's exit file -func (c *Container) exitFilePath() string { - return filepath.Join(c.ociRuntime.exitsDir, c.ID()) +// the log path for an exec session +func (c *Container) execLogPath(sessionID string) string { + return filepath.Join(c.execBundlePath(sessionID), "exec_log") +} + +// the socket conmon creates for an exec session +func (c *Container) execAttachSocketPath(sessionID string) string { + return filepath.Join(c.ociRuntime.socketsDir, sessionID, "attach") +} + +// execExitFileDir gets the path to the container's exit file +func (c *Container) execExitFileDir(sessionID string) string { + return filepath.Join(c.execBundlePath(sessionID), "exit") +} + +// execOCILog returns the file path for the exec sessions oci log +func (c *Container) execOCILog(sessionID string) string { + if !c.ociRuntime.supportsJSON { + return "" + } + return filepath.Join(c.execBundlePath(sessionID), "oci-log") +} + +// readExecExitCode reads the exit file for an exec session and returns +// the exit code +func (c *Container) readExecExitCode(sessionID string) (int, error) { + exitFile := filepath.Join(c.execExitFileDir(sessionID), c.ID()) + chWait := make(chan error) + defer close(chWait) + + _, err := WaitForFile(exitFile, chWait, time.Second*5) + if err != nil { + return -1, err + } + ec, err := ioutil.ReadFile(exitFile) + if err != nil { + return -1, err + } + ecInt, err := strconv.Atoi(string(ec)) + if err != nil { + return -1, err + } + return ecInt, nil } // Wait for the container's exit file to appear. @@ -849,8 +927,8 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { return err } - // With the newSpec complete, do an OCI create - if err := c.ociRuntime.createContainer(c, c.config.CgroupParent, nil); err != nil { + // With the spec complete, do an OCI create + if err := c.ociRuntime.createContainer(c, nil); err != nil { return err } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 3dfd4c9e9..6e775cd28 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -834,7 +834,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } } - if err := c.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil { + if err := c.ociRuntime.createContainer(c, &options); err != nil { return err } diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 3b8313855..a4368a9aa 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -97,6 +97,14 @@ var ( // OS. ErrOSNotSupported = errors.New("no support for this OS yet") - // ErrOCIRuntime indicates an error from the OCI runtime + // ErrOCIRuntime indicates a generic error from the OCI runtime ErrOCIRuntime = errors.New("OCI runtime error") + + // ErrOCIRuntimePermissionDenied indicates the OCI runtime attempted to invoke a command that returned + // a permission denied error + ErrOCIRuntimePermissionDenied = errors.New("OCI runtime permission denied error") + + // ErrOCIRuntimeNotFound indicates the OCI runtime attempted to invoke a command + // that was not found + ErrOCIRuntimeNotFound = errors.New("OCI runtime command not found error") ) diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 1a19b88bb..0338828e4 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -141,10 +141,18 @@ func (c *Container) runHealthCheck() (HealthCheckStatus, error) { logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID()) timeStart := time.Now() hcResult := HealthCheckSuccess - hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0) + _, hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0, nil, "") if hcErr != nil { + errCause := errors.Cause(hcErr) hcResult = HealthCheckFailure - returnCode = 1 + if errCause == define.ErrOCIRuntimeNotFound || + errCause == define.ErrOCIRuntimePermissionDenied || + errCause == define.ErrOCIRuntime { + returnCode = 1 + hcErr = nil + } else { + returnCode = 125 + } } timeEnd := time.Now() if c.HealthCheckConfig().StartPeriod > 0 { diff --git a/libpod/oci.go b/libpod/oci.go index 3daf9f834..193e66aaf 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -62,12 +62,6 @@ type OCIRuntime struct { supportsJSON bool } -// syncInfo is used to return data from monitor process to daemon -type syncInfo struct { - Pid int `json:"pid"` - Message string `json:"message,omitempty"` -} - // ociError is used to parse the OCI runtime JSON log. It is not part of the // OCI runtime specifications, it follows what runc does type ociError struct { @@ -245,6 +239,7 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container, useRuntime bool) erro cmd := exec.Command(r.path, "state", ctr.ID()) cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) + outPipe, err := cmd.StdoutPipe() if err != nil { return errors.Wrapf(err, "getting stdout pipe") @@ -390,103 +385,6 @@ func (r *OCIRuntime) unpauseContainer(ctr *Container) error { return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "resume", ctr.ID()) } -// execContainer executes a command in a running container -// TODO: Add --detach support -// TODO: Convert to use conmon -// TODO: add --pid-file and use that to generate exec session tracking -func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string, streams *AttachStreams, preserveFDs int) (*exec.Cmd, error) { - if len(cmd) == 0 { - return nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute") - } - - if sessionID == "" { - return nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec") - } - - runtimeDir, err := util.GetRootlessRuntimeDir() - if err != nil { - return nil, err - } - - args := []string{} - - // TODO - should we maintain separate logpaths for exec sessions? - args = append(args, "exec") - - if cwd != "" { - args = append(args, "--cwd", cwd) - } - - args = append(args, "--pid-file", c.execPidPath(sessionID)) - - if tty { - args = append(args, "--tty") - } else { - args = append(args, "--tty=false") - } - - if user != "" { - args = append(args, "--user", user) - } - - if preserveFDs > 0 { - args = append(args, fmt.Sprintf("--preserve-fds=%d", preserveFDs)) - } - if c.config.Spec.Process.NoNewPrivileges { - args = append(args, "--no-new-privs") - } - - for _, capabilityAdd := range capAdd { - args = append(args, "--cap", capabilityAdd) - } - - for _, envVar := range env { - args = append(args, "--env", envVar) - } - - // Append container ID, name and command - args = append(args, c.ID()) - args = append(args, cmd...) - - logrus.Debugf("Starting runtime %s with following arguments: %v", r.path, args) - - execCmd := exec.Command(r.path, args...) - - if streams.AttachOutput { - execCmd.Stdout = streams.OutputStream - } - if streams.AttachInput { - execCmd.Stdin = streams.InputStream - } - if streams.AttachError { - execCmd.Stderr = streams.ErrorStream - } - - execCmd.Env = append(execCmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) - - if preserveFDs > 0 { - for fd := 3; fd < 3+preserveFDs; fd++ { - execCmd.ExtraFiles = append(execCmd.ExtraFiles, os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))) - } - } - - if err := execCmd.Start(); err != nil { - return nil, errors.Wrapf(err, "cannot start container %s", c.ID()) - } - - if preserveFDs > 0 { - for fd := 3; fd < 3+preserveFDs; fd++ { - // These fds were passed down to the runtime. Close them - // and not interfere - if err := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close(); err != nil { - logrus.Debugf("unable to close file fd-%d", fd) - } - } - } - - return execCmd, nil -} - // checkpointContainer checkpoints the given container func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckpointOptions) error { if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { diff --git a/libpod/oci_attach_linux.go b/libpod/oci_attach_linux.go new file mode 100644 index 000000000..7157ee2f7 --- /dev/null +++ b/libpod/oci_attach_linux.go @@ -0,0 +1,258 @@ +//+build linux + +package libpod + +import ( + "fmt" + "io" + "net" + "os" + "path/filepath" + + "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/pkg/errorhandling" + "github.com/containers/libpod/pkg/kubeutils" + "github.com/containers/libpod/utils" + "github.com/docker/docker/pkg/term" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + "k8s.io/client-go/tools/remotecommand" +) + +/* Sync with stdpipe_t in conmon.c */ +const ( + AttachPipeStdin = 1 + AttachPipeStdout = 2 + AttachPipeStderr = 3 +) + +// Attach to the given container +// Does not check if state is appropriate +// started is only required if startContainer is true +func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool, started chan bool) error { + if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { + return errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") + } + if startContainer && started == nil { + return errors.Wrapf(define.ErrInternal, "started chan not passed when startContainer set") + } + + detachKeys, err := processDetachKeys(keys) + if err != nil { + return err + } + + logrus.Debugf("Attaching to container %s", c.ID()) + + registerResizeFunc(resize, c.bundlePath()) + + socketPath := buildSocketPath(c.AttachSocketPath()) + + conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: socketPath, Net: "unixpacket"}) + if err != nil { + return errors.Wrapf(err, "failed to connect to container's attach socket: %v", socketPath) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("unable to close socket: %q", err) + } + }() + + // If starting was requested, start the container and notify when that's + // done. + if startContainer { + if err := c.start(); err != nil { + return err + } + started <- true + } + + receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys) + return readStdio(streams, receiveStdoutError, stdinDone) +} + +// Attach to the given container's exec session +// attachFd and startFd must be open file descriptors +// attachFd must be the output side of the fd. attachFd is used for two things: +// conmon will first send a nonse value across the pipe indicating it has set up its side of the console socket +// this ensures attachToExec gets all of the output of the called process +// conmon will then send the exit code of the exec process, or an error in the exec session +// startFd must be the input side of the fd. +// conmon will wait to start the exec session until the parent process has setup the console socket. +// Once attachToExec successfully attaches to the console socket, the child conmon process responsible for calling runtime exec +// will read from the output side of start fd, thus learning to start the child process. +// Thus, the order goes as follow: +// 1. conmon parent process sets up its console socket. sends on attachFd +// 2. attachToExec attaches to the console socket after reading on attachFd +// 3. child waits on startFd for attachToExec to attach to said console socket +// 4. attachToExec sends on startFd, signalling it has attached to the socket and child is ready to go +// 5. child receives on startFd, runs the runtime exec command +// attachToExec is responsible for closing startFd and attachFd +func (c *Container) attachToExec(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, sessionID string, startFd, attachFd *os.File) error { + if !streams.AttachOutput && !streams.AttachError && !streams.AttachInput { + return errors.Wrapf(define.ErrInvalidArg, "must provide at least one stream to attach to") + } + if startFd == nil || attachFd == nil { + return errors.Wrapf(define.ErrInvalidArg, "start sync pipe and attach sync pipe must be defined for exec attach") + } + + defer errorhandling.CloseQuiet(startFd) + defer errorhandling.CloseQuiet(attachFd) + + detachKeys, err := processDetachKeys(keys) + if err != nil { + return err + } + + logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID) + + registerResizeFunc(resize, c.execBundlePath(sessionID)) + + // set up the socket path, such that it is the correct length and location for exec + socketPath := buildSocketPath(c.execAttachSocketPath(sessionID)) + + // 2: read from attachFd that the parent process has set up the console socket + if _, err := readConmonPipeData(attachFd, ""); err != nil { + return err + } + // 2: then attach + conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: socketPath, Net: "unixpacket"}) + if err != nil { + return errors.Wrapf(err, "failed to connect to container's attach socket: %v", socketPath) + } + defer func() { + if err := conn.Close(); err != nil { + logrus.Errorf("unable to close socket: %q", err) + } + }() + + // start listening on stdio of the process + receiveStdoutError, stdinDone := setupStdioChannels(streams, conn, detachKeys) + + // 4: send start message to child + if err := writeConmonPipeData(startFd); err != nil { + return err + } + + return readStdio(streams, receiveStdoutError, stdinDone) +} + +func processDetachKeys(keys string) ([]byte, error) { + // Check the validity of the provided keys first + if len(keys) == 0 { + keys = DefaultDetachKeys + } + detachKeys, err := term.ToBytes(keys) + if err != nil { + return nil, errors.Wrapf(err, "invalid detach keys") + } + return detachKeys, nil +} + +func registerResizeFunc(resize <-chan remotecommand.TerminalSize, bundlePath string) { + kubeutils.HandleResizing(resize, func(size remotecommand.TerminalSize) { + controlPath := filepath.Join(bundlePath, "ctl") + controlFile, err := os.OpenFile(controlPath, unix.O_WRONLY, 0) + if err != nil { + logrus.Debugf("Could not open ctl file: %v", err) + return + } + defer controlFile.Close() + + logrus.Debugf("Received a resize event: %+v", size) + if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 1, size.Height, size.Width); err != nil { + logrus.Warnf("Failed to write to control file to resize terminal: %v", err) + } + }) +} + +func buildSocketPath(socketPath string) string { + maxUnixLength := unixPathLength() + if maxUnixLength < len(socketPath) { + socketPath = socketPath[0:maxUnixLength] + } + + logrus.Debug("connecting to socket ", socketPath) + return socketPath +} + +func setupStdioChannels(streams *AttachStreams, conn *net.UnixConn, detachKeys []byte) (chan error, chan error) { + receiveStdoutError := make(chan error) + go func() { + receiveStdoutError <- redirectResponseToOutputStreams(streams.OutputStream, streams.ErrorStream, streams.AttachOutput, streams.AttachError, conn) + }() + + stdinDone := make(chan error) + go func() { + var err error + if streams.AttachInput { + _, err = utils.CopyDetachable(conn, streams.InputStream, detachKeys) + conn.CloseWrite() + } + stdinDone <- err + }() + + return receiveStdoutError, stdinDone +} + +func redirectResponseToOutputStreams(outputStream, errorStream io.Writer, writeOutput, writeError bool, conn io.Reader) error { + var err error + buf := make([]byte, 8192+1) /* Sync with conmon STDIO_BUF_SIZE */ + for { + nr, er := conn.Read(buf) + if nr > 0 { + var dst io.Writer + var doWrite bool + switch buf[0] { + case AttachPipeStdout: + dst = outputStream + doWrite = writeOutput + case AttachPipeStderr: + dst = errorStream + doWrite = writeError + default: + logrus.Infof("Received unexpected attach type %+d", buf[0]) + } + if dst == nil { + return errors.New("output destination cannot be nil") + } + + if doWrite { + nw, ew := dst.Write(buf[1:nr]) + if ew != nil { + err = ew + break + } + if nr != nw+1 { + err = io.ErrShortWrite + break + } + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + return err +} + +func readStdio(streams *AttachStreams, receiveStdoutError, stdinDone chan error) error { + var err error + select { + case err = <-receiveStdoutError: + return err + case err = <-stdinDone: + if err == define.ErrDetach { + return err + } + if streams.AttachOutput || streams.AttachError { + return <-receiveStdoutError + } + } + return nil +} diff --git a/libpod/container_attach_linux_cgo.go b/libpod/oci_attach_linux_cgo.go index d81243360..d81243360 100644 --- a/libpod/container_attach_linux_cgo.go +++ b/libpod/oci_attach_linux_cgo.go diff --git a/libpod/container_attach_linux_nocgo.go b/libpod/oci_attach_linux_nocgo.go index a514a555d..a514a555d 100644 --- a/libpod/container_attach_linux_nocgo.go +++ b/libpod/oci_attach_linux_nocgo.go diff --git a/libpod/container_attach_unsupported.go b/libpod/oci_attach_unsupported.go index c27ce0799..987d2c973 100644 --- a/libpod/container_attach_unsupported.go +++ b/libpod/oci_attach_unsupported.go @@ -3,6 +3,8 @@ package libpod import ( + "os" + "github.com/containers/libpod/libpod/define" "k8s.io/client-go/tools/remotecommand" ) @@ -10,3 +12,7 @@ import ( func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, startContainer bool, started chan bool) error { return define.ErrNotImplemented } + +func (c *Container) attachToExec(streams *AttachStreams, keys string, resize <-chan remotecommand.TerminalSize, sessionID string, startFd *os.File, attachFd *os.File) error { + return define.ErrNotImplemented +} diff --git a/libpod/oci_internal_linux.go b/libpod/oci_internal_linux.go new file mode 100644 index 000000000..1d8654eca --- /dev/null +++ b/libpod/oci_internal_linux.go @@ -0,0 +1,493 @@ +// +build linux + +package libpod + +import ( + "bufio" + "bytes" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "regexp" + "runtime" + "strconv" + "strings" + "syscall" + "time" + + "github.com/containers/libpod/libpod/define" + "github.com/containers/libpod/pkg/cgroups" + "github.com/containers/libpod/pkg/lookup" + "github.com/containers/libpod/pkg/util" + "github.com/containers/libpod/utils" + "github.com/coreos/go-systemd/activation" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// createOCIContainer generates this container's main conmon instance and prepares it for starting +func (r *OCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) { + var stderrBuf bytes.Buffer + + runtimeDir, err := util.GetRootlessRuntimeDir() + if err != nil { + return err + } + + parentSyncPipe, childSyncPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair") + } + defer parentSyncPipe.Close() + + childStartPipe, parentStartPipe, err := newPipe() + if err != nil { + return errors.Wrapf(err, "error creating socket pair for start pipe") + } + + defer parentStartPipe.Close() + + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = filepath.Join(ctr.state.RunDir, "oci-log") + } + args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog) + + if ctr.config.Spec.Process.Terminal { + args = append(args, "-t") + } else if ctr.config.Stdin { + args = append(args, "-i") + } + + if ctr.config.ConmonPidFile != "" { + args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile) + } + + if r.noPivot { + args = append(args, "--no-pivot") + } + + if len(ctr.config.ExitCommand) > 0 { + args = append(args, "--exit-command", ctr.config.ExitCommand[0]) + for _, arg := range ctr.config.ExitCommand[1:] { + args = append(args, []string{"--exit-command-arg", arg}...) + } + } + + if restoreOptions != nil { + args = append(args, "--restore", ctr.CheckpointPath()) + if restoreOptions.TCPEstablished { + args = append(args, "--runtime-opt", "--tcp-established") + } + } + + logrus.WithFields(logrus.Fields{ + "args": args, + }).Debugf("running conmon: %s", r.conmonPath) + + cmd := exec.Command(r.conmonPath, args...) + cmd.Dir = ctr.bundlePath() + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + // TODO this is probably a really bad idea for some uses + // Make this configurable + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if ctr.config.Spec.Process.Terminal { + cmd.Stderr = &stderrBuf + } + + // 0, 1 and 2 are stdin, stdout and stderr + conmonEnv, envFiles, err := r.configureConmonEnv(runtimeDir) + if err != nil { + return err + } + + cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3), fmt.Sprintf("_OCI_STARTPIPE=%d", 4)) + cmd.Env = append(cmd.Env, conmonEnv...) + cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe) + cmd.ExtraFiles = append(cmd.ExtraFiles, envFiles...) + + if r.reservePorts && !ctr.config.NetMode.IsSlirp4netns() { + ports, err := bindPorts(ctr.config.PortMappings) + if err != nil { + return err + } + + // Leak the port we bound in the conmon process. These fd's won't be used + // by the container and conmon will keep the ports busy so that another + // process cannot use them. + cmd.ExtraFiles = append(cmd.ExtraFiles, ports...) + } + + if ctr.config.NetMode.IsSlirp4netns() { + ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() + if err != nil { + return errors.Wrapf(err, "failed to create rootless network sync pipe") + } + // Leak one end in conmon, the other one will be leaked into slirp4netns + cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW) + } + + err = startCommandGivenSelinux(cmd) + // regardless of whether we errored or not, we no longer need the children pipes + childSyncPipe.Close() + childStartPipe.Close() + if err != nil { + return err + } + if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe, ctr.ID()); err != nil { + return err + } + /* Wait for initial setup and fork, and reap child */ + err = cmd.Wait() + if err != nil { + return err + } + + pid, err := readConmonPipeData(parentSyncPipe, ociLog) + if err != nil { + if err2 := r.deleteContainer(ctr); err2 != nil { + logrus.Errorf("Error removing container %s from runtime after creation failed", ctr.ID()) + } + return err + } + ctr.state.PID = pid + + conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile) + if err != nil { + logrus.Warnf("error reading conmon pid file for container %s: %s", ctr.ID(), err.Error()) + } else if conmonPID > 0 { + // conmon not having a pid file is a valid state, so don't set it if we don't have it + logrus.Infof("Got Conmon PID as %d", conmonPID) + ctr.state.ConmonPID = conmonPID + } + + return nil +} + +// prepareProcessExec returns the path of the process.json used in runc exec -p +// caller is responsible to close the returned *os.File if needed. +func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string) (*os.File, error) { + f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-") + if err != nil { + return nil, err + } + + pspec := c.config.Spec.Process + pspec.Args = cmd + // We need to default this to false else it will inherit terminal as true + // from the container. + pspec.Terminal = false + if tty { + pspec.Terminal = true + } + if len(env) > 0 { + pspec.Env = append(pspec.Env, env...) + } + + if cwd != "" { + pspec.Cwd = cwd + + } + // If user was set, look it up in the container to get a UID to use on + // the host + if user != "" { + execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, nil) + if err != nil { + return nil, err + } + sgids := make([]uint32, 0, len(execUser.Sgids)) + for _, sgid := range execUser.Sgids { + sgids = append(sgids, uint32(sgid)) + } + processUser := spec.User{ + UID: uint32(execUser.Uid), + GID: uint32(execUser.Gid), + AdditionalGids: sgids, + } + + pspec.User = processUser + } + + processJSON, err := json.Marshal(pspec) + if err != nil { + return nil, err + } + + if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil { + return nil, err + } + return f, nil +} + +// configureConmonEnv gets the environment values to add to conmon's exec struct +// TODO this may want to be less hardcoded/more configurable in the future +func (r *OCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File, error) { + env := make([]string, 0, 6) + env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) + env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED"))) + env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID"))) + home, err := homeDir() + if err != nil { + return nil, nil, err + } + env = append(env, fmt.Sprintf("HOME=%s", home)) + + extraFiles := make([]*os.File, 0) + if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { + env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) + } + if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok { + env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1") + fds := activation.Files(false) + extraFiles = append(extraFiles, fds...) + } + return env, extraFiles, nil +} + +// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI +func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath string) []string { + // set the conmon API version to be able to use the correct sync struct keys + args := []string{"--api-version", "1"} + if r.cgroupManager == SystemdCgroupsManager { + args = append(args, "-s") + } + args = append(args, "-c", ctr.ID()) + args = append(args, "-u", cuuid) + args = append(args, "-r", r.path) + args = append(args, "-b", bundlePath) + args = append(args, "-p", pidPath) + + var logDriver string + switch ctr.LogDriver() { + case JournaldLogging: + logDriver = JournaldLogging + case JSONLogging: + fallthrough + default: + // No case here should happen except JSONLogging, but keep this here in case the options are extended + logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver()) + fallthrough + case KubernetesLogging: + logDriver = fmt.Sprintf("%s:%s", KubernetesLogging, logPath) + } + + args = append(args, "-l", logDriver) + args = append(args, "--exit-dir", exitDir) + args = append(args, "--socket-dir-path", r.socketsDir) + if r.logSizeMax >= 0 { + args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax)) + } + + logLevel := logrus.GetLevel() + args = append(args, "--log-level", logLevel.String()) + + if logLevel == logrus.DebugLevel { + logrus.Debugf("%s messages will be logged to syslog", r.conmonPath) + args = append(args, "--syslog") + } + if ociLogPath != "" { + args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath)) + } + return args +} + +// startCommandGivenSelinux starts a container ensuring to set the labels of +// the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled +func startCommandGivenSelinux(cmd *exec.Cmd) error { + if !selinux.GetEnabled() { + return cmd.Start() + } + // Set the label of the conmon process to be level :s0 + // This will allow the container processes to talk to fifo-files + // passed into the container by conmon + var ( + plabel string + con selinux.Context + err error + ) + plabel, err = selinux.CurrentLabel() + if err != nil { + return errors.Wrapf(err, "Failed to get current SELinux label") + } + + con, err = selinux.NewContext(plabel) + if err != nil { + return errors.Wrapf(err, "Failed to get new context from SELinux label") + } + + runtime.LockOSThread() + if con["level"] != "s0" && con["level"] != "" { + con["level"] = "s0" + if err = label.SetProcessLabel(con.Get()); err != nil { + runtime.UnlockOSThread() + return err + } + } + err = cmd.Start() + // Ignore error returned from SetProcessLabel("") call, + // can't recover. + label.SetProcessLabel("") + runtime.UnlockOSThread() + return err +} + +// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup +// it then signals for conmon to start by sending nonse data down the start fd +func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error { + cgroupParent := ctr.CgroupParent() + if os.Geteuid() == 0 { + if r.cgroupManager == SystemdCgroupsManager { + unitName := createUnitName("libpod-conmon", ctr.ID()) + + realCgroupParent := cgroupParent + splitParent := strings.Split(cgroupParent, "/") + if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { + realCgroupParent = splitParent[len(splitParent)-1] + } + + logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) + if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { + logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) + } + } else { + cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") + control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) + if err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } else { + // we need to remove this defer and delete the cgroup once conmon exits + // maybe need a conmon monitor? + if err := control.AddPid(cmd.Process.Pid); err != nil { + logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) + } + } + } + } + + /* We set the cgroup, now the child can start creating children */ + if err := writeConmonPipeData(startFd); err != nil { + return err + } + return nil +} + +// newPipe creates a unix socket pair for communication +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// readConmonPidFile attempts to read conmon's pid from its pid file +func readConmonPidFile(pidFile string) (int, error) { + // Let's try reading the Conmon pid at the same time. + if pidFile != "" { + contents, err := ioutil.ReadFile(pidFile) + if err != nil { + return -1, err + } + // Convert it to an int + conmonPID, err := strconv.Atoi(string(contents)) + if err != nil { + return -1, err + } + return conmonPID, nil + } + return 0, nil +} + +// readConmonPipeData attempts to read a syncInfo struct from the pipe +func readConmonPipeData(pipe *os.File, ociLog string) (int, error) { + // syncInfo is used to return data from monitor process to daemon + type syncInfo struct { + Data int `json:"data"` + Message string `json:"message,omitempty"` + } + + // Wait to get container pid from conmon + type syncStruct struct { + si *syncInfo + err error + } + ch := make(chan syncStruct) + go func() { + var si *syncInfo + rdr := bufio.NewReader(pipe) + b, err := rdr.ReadBytes('\n') + if err != nil { + ch <- syncStruct{err: err} + } + if err := json.Unmarshal(b, &si); err != nil { + ch <- syncStruct{err: err} + return + } + ch <- syncStruct{si: si} + }() + + data := -1 + select { + case ss := <-ch: + if ss.err != nil { + return -1, errors.Wrapf(ss.err, "error reading container (probably exited) json message") + } + logrus.Debugf("Received: %d", ss.si.Data) + if ss.si.Data < 0 { + if ociLog != "" { + ociLogData, err := ioutil.ReadFile(ociLog) + if err == nil { + var ociErr ociError + if err := json.Unmarshal(ociLogData, &ociErr); err == nil { + return ss.si.Data, getOCIRuntimeError(ociErr.Msg) + } + } + } + // If we failed to parse the JSON errors, then print the output as it is + if ss.si.Message != "" { + return ss.si.Data, getOCIRuntimeError(ss.si.Message) + } + return ss.si.Data, errors.Wrapf(define.ErrInternal, "container create failed") + } + data = ss.si.Data + case <-time.After(ContainerCreateTimeout): + return -1, errors.Wrapf(define.ErrInternal, "container creation timeout") + } + return data, nil +} + +func getOCIRuntimeError(runtimeMsg string) error { + if match, _ := regexp.MatchString(".*permission denied.*", runtimeMsg); match { + return errors.Wrapf(define.ErrOCIRuntimePermissionDenied, "%s", strings.Trim(runtimeMsg, "\n")) + } + if match, _ := regexp.MatchString(".*executable file not found in.*", runtimeMsg); match { + return errors.Wrapf(define.ErrOCIRuntimeNotFound, "%s", strings.Trim(runtimeMsg, "\n")) + } + return errors.Wrapf(define.ErrOCIRuntime, "%s", strings.Trim(runtimeMsg, "\n")) +} + +// writeConmonPipeData writes nonse data to a pipe +func writeConmonPipeData(pipe *os.File) error { + someData := []byte{0} + _, err := pipe.Write(someData) + return err +} + +// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon +func formatRuntimeOpts(opts ...string) []string { + args := make([]string, 0, len(opts)*2) + for _, o := range opts { + args = append(args, "--runtime-opt", o) + } + return args +} diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index 9ce836cb5..45365203e 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -3,78 +3,29 @@ package libpod import ( - "bufio" - "bytes" "fmt" - "io/ioutil" "os" "os/exec" "path/filepath" "runtime" - "strconv" "strings" "syscall" "time" "github.com/containers/libpod/libpod/define" - "github.com/containers/libpod/pkg/cgroups" "github.com/containers/libpod/pkg/errorhandling" "github.com/containers/libpod/pkg/rootless" "github.com/containers/libpod/pkg/util" "github.com/containers/libpod/utils" pmount "github.com/containers/storage/pkg/mount" - "github.com/coreos/go-systemd/activation" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/opencontainers/selinux/go-selinux/label" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "k8s.io/client-go/tools/remotecommand" ) const unknownPackage = "Unknown" -func (r *OCIRuntime) moveConmonToCgroup(ctr *Container, cgroupParent string, cmd *exec.Cmd) error { - if os.Geteuid() == 0 { - if r.cgroupManager == SystemdCgroupsManager { - unitName := createUnitName("libpod-conmon", ctr.ID()) - - realCgroupParent := cgroupParent - splitParent := strings.Split(cgroupParent, "/") - if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 { - realCgroupParent = splitParent[len(splitParent)-1] - } - - logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName) - if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil { - logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err) - } - } else { - cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon") - control, err := cgroups.New(cgroupPath, &spec.LinuxResources{}) - if err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } else { - // we need to remove this defer and delete the cgroup once conmon exits - // maybe need a conmon monitor? - if err := control.AddPid(cmd.Process.Pid); err != nil { - logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err) - } - } - } - } - return nil -} - -// newPipe creates a unix socket pair for communication -func newPipe() (parent *os.File, child *os.File, err error) { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - // makeAccessible changes the path permission and each parent directory to have --x--x--x func makeAccessible(path string, uid, gid int) error { for ; path != "/"; path = filepath.Dir(path) { @@ -100,7 +51,7 @@ func makeAccessible(path string, uid, gid int) error { // CreateContainer creates a container in the OCI runtime // TODO terminal support for container // Presently just ignoring conmon opts related to it -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { +func (r *OCIRuntime) createContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) { if len(ctr.config.IDMappings.UIDMap) != 0 || len(ctr.config.IDMappings.GIDMap) != 0 { for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.VolumePath} { if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { @@ -152,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restor return errors.Wrapf(err, "cannot unmount %s", m.Mountpoint) } } - return r.createOCIContainer(ctr, cgroupParent, restoreOptions) + return r.createOCIContainer(ctr, restoreOptions) }() ch <- err }() @@ -160,7 +111,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restor return err } } - return r.createOCIContainer(ctr, cgroupParent, restoreOptions) + return r.createOCIContainer(ctr, restoreOptions) } func rpmVersion(path string) string { @@ -195,293 +146,178 @@ func (r *OCIRuntime) conmonPackage() string { return dpkgVersion(r.conmonPath) } -func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { - var stderrBuf bytes.Buffer +// execContainer executes a command in a running container +// TODO: Add --detach support +// TODO: Convert to use conmon +// TODO: add --pid-file and use that to generate exec session tracking +func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, chan error, error) { + if len(cmd) == 0 { + return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute") + } - runtimeDir, err := util.GetRootlessRuntimeDir() - if err != nil { - return err + if sessionID == "" { + return -1, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec") } - parentPipe, childPipe, err := newPipe() + // create sync pipe to receive the pid + parentSyncPipe, childSyncPipe, err := newPipe() if err != nil { - return errors.Wrapf(err, "error creating socket pair") + return -1, nil, errors.Wrapf(err, "error creating socket pair") } + defer errorhandling.CloseQuiet(parentSyncPipe) + + // create start pipe to set the cgroup before running + // attachToExec is responsible for closing parentStartPipe childStartPipe, parentStartPipe, err := newPipe() if err != nil { - return errors.Wrapf(err, "error creating socket pair for start pipe") + return -1, nil, errors.Wrapf(err, "error creating socket pair") } - defer errorhandling.CloseQuiet(parentPipe) - defer errorhandling.CloseQuiet(parentStartPipe) + // We want to make sure we close the parent{Start,Attach}Pipes if we fail + // but also don't want to close them after attach to exec is called + attachToExecCalled := false - ociLog := filepath.Join(ctr.state.RunDir, "oci-log") - logLevel := logrus.GetLevel() + defer func() { + if !attachToExecCalled { + errorhandling.CloseQuiet(parentStartPipe) + } + }() - args := []string{} - if r.cgroupManager == SystemdCgroupsManager { - args = append(args, "-s") - } - args = append(args, "-c", ctr.ID()) - args = append(args, "-u", ctr.ID()) - args = append(args, "-n", ctr.Name()) - args = append(args, "-r", r.path) - args = append(args, "-b", ctr.bundlePath()) - args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile")) - args = append(args, "--exit-dir", r.exitsDir) - if logLevel != logrus.DebugLevel && r.supportsJSON { - args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLog)) - } - if ctr.config.ConmonPidFile != "" { - args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile) + // create the attach pipe to allow attach socket to be created before + // $RUNTIME exec starts running. This is to make sure we can capture all output + // from the process through that socket, rather than half reading the log, half attaching to the socket + // attachToExec is responsible for closing parentAttachPipe + parentAttachPipe, childAttachPipe, err := newPipe() + if err != nil { + return -1, nil, errors.Wrapf(err, "error creating socket pair") } - if len(ctr.config.ExitCommand) > 0 { - args = append(args, "--exit-command", ctr.config.ExitCommand[0]) - for _, arg := range ctr.config.ExitCommand[1:] { - args = append(args, []string{"--exit-command-arg", arg}...) + + defer func() { + if !attachToExecCalled { + errorhandling.CloseQuiet(parentAttachPipe) } - } - args = append(args, "--socket-dir-path", r.socketsDir) - if ctr.config.Spec.Process.Terminal { - args = append(args, "-t") - } else if ctr.config.Stdin { - args = append(args, "-i") - } - if r.logSizeMax >= 0 { - args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax)) + }() + + childrenClosed := false + defer func() { + if !childrenClosed { + errorhandling.CloseQuiet(childSyncPipe) + errorhandling.CloseQuiet(childAttachPipe) + errorhandling.CloseQuiet(childStartPipe) + } + }() + + runtimeDir, err := util.GetRootlessRuntimeDir() + if err != nil { + return -1, nil, err } - logDriver := KubernetesLogging - if ctr.LogDriver() == JSONLogging { - logrus.Errorf("json-file logging specified but not supported. Choosing k8s-file logging instead") - } else if ctr.LogDriver() != "" { - logDriver = ctr.LogDriver() + processFile, err := prepareProcessExec(c, cmd, env, tty, cwd, user, sessionID) + if err != nil { + return -1, nil, err } - args = append(args, "-l", fmt.Sprintf("%s:%s", logDriver, ctr.LogPath())) - if r.noPivot { - args = append(args, "--no-pivot") + var ociLog string + if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON { + ociLog = c.execOCILog(sessionID) } + args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog) - args = append(args, "--log-level", logLevel.String()) + if preserveFDs > 0 { + args = append(args, formatRuntimeOpts("--preserve-fds", string(preserveFDs))...) + } - if logLevel == logrus.DebugLevel { - logrus.Debugf("%s messages will be logged to syslog", r.conmonPath) - args = append(args, "--syslog") + for _, capability := range capAdd { + args = append(args, formatRuntimeOpts("--cap", capability)...) } - if restoreOptions != nil { - args = append(args, "--restore", ctr.CheckpointPath()) - if restoreOptions.TCPEstablished { - args = append(args, "--restore-arg", "--tcp-established") - } + if tty { + args = append(args, "-t") } + // Append container ID and command + args = append(args, "-e") + // TODO make this optional when we can detach + args = append(args, "--exec-attach") + args = append(args, "--exec-process-spec", processFile.Name()) + logrus.WithFields(logrus.Fields{ "args": args, }).Debugf("running conmon: %s", r.conmonPath) + execCmd := exec.Command(r.conmonPath, args...) - cmd := exec.Command(r.conmonPath, args...) - cmd.Dir = ctr.bundlePath() - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, + if streams.AttachInput { + execCmd.Stdin = streams.InputStream } - // TODO this is probably a really bad idea for some uses - // Make this configurable - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if ctr.config.Spec.Process.Terminal { - cmd.Stderr = &stderrBuf - } - - cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe) - // 0, 1 and 2 are stdin, stdout and stderr - cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3)) - cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4)) - cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) - cmd.Env = append(cmd.Env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED"))) - cmd.Env = append(cmd.Env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID"))) - home, err := homeDir() - if err != nil { - return err + if streams.AttachOutput { + execCmd.Stdout = streams.OutputStream + } + if streams.AttachError { + execCmd.Stderr = streams.ErrorStream } - cmd.Env = append(cmd.Env, fmt.Sprintf("HOME=%s", home)) - - if r.reservePorts && !ctr.config.NetMode.IsSlirp4netns() { - ports, err := bindPorts(ctr.config.PortMappings) - if err != nil { - return err - } - // Leak the port we bound in the conmon process. These fd's won't be used - // by the container and conmon will keep the ports busy so that another - // process cannot use them. - cmd.ExtraFiles = append(cmd.ExtraFiles, ports...) + conmonEnv, extraFiles, err := r.configureConmonEnv(runtimeDir) + if err != nil { + return -1, nil, err } - if ctr.config.NetMode.IsSlirp4netns() { - ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe() - if err != nil { - return errors.Wrapf(err, "failed to create rootless network sync pipe") - } - // Leak one end in conmon, the other one will be leaked into slirp4netns - cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW) - } - - if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { - cmd.Env = append(cmd.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) - } - if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok { - cmd.Env = append(cmd.Env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1") - fds := activation.Files(false) - cmd.ExtraFiles = append(cmd.ExtraFiles, fds...) - } - if selinux.GetEnabled() { - // Set the label of the conmon process to be level :s0 - // This will allow the container processes to talk to fifo-files - // passed into the container by conmon - var ( - plabel string - con selinux.Context - ) - plabel, err = selinux.CurrentLabel() - if err != nil { - if err := childPipe.Close(); err != nil { - logrus.Errorf("failed to close child pipe: %q", err) - } - return errors.Wrapf(err, "Failed to get current SELinux label") - } + // we don't want to step on users fds they asked to preserve + // Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3 + execCmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", preserveFDs+5)) + execCmd.Env = append(execCmd.Env, conmonEnv...) - con, err = selinux.NewContext(plabel) - if err != nil { - return errors.Wrapf(err, "Failed to get new context from SELinux label") - } + execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe) + execCmd.ExtraFiles = append(execCmd.ExtraFiles, extraFiles...) + execCmd.Dir = c.execBundlePath(sessionID) + execCmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } - runtime.LockOSThread() - if con["level"] != "s0" && con["level"] != "" { - con["level"] = "s0" - if err = label.SetProcessLabel(con.Get()); err != nil { - runtime.UnlockOSThread() - return err - } + if preserveFDs > 0 { + for fd := 3; fd < 3+preserveFDs; fd++ { + execCmd.ExtraFiles = append(execCmd.ExtraFiles, os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd))) } - err = cmd.Start() - // Ignore error returned from SetProcessLabel("") call, - // can't recover. - if err := label.SetProcessLabel(""); err != nil { - _ = err - } - runtime.UnlockOSThread() - } else { - err = cmd.Start() } - if err != nil { - errorhandling.CloseQuiet(childPipe) - return err - } - defer func() { - _ = cmd.Wait() - }() - // We don't need childPipe on the parent side - if err := childPipe.Close(); err != nil { - return err - } - if err := childStartPipe.Close(); err != nil { - return err - } + err = startCommandGivenSelinux(execCmd) - // Move conmon to specified cgroup - if err := r.moveConmonToCgroup(ctr, cgroupParent, cmd); err != nil { - return err - } + // We don't need children pipes on the parent side + errorhandling.CloseQuiet(childSyncPipe) + errorhandling.CloseQuiet(childAttachPipe) + errorhandling.CloseQuiet(childStartPipe) + childrenClosed = true - /* We set the cgroup, now the child can start creating children */ - someData := []byte{0} - _, err = parentStartPipe.Write(someData) if err != nil { - return err + return -1, nil, errors.Wrapf(err, "cannot start container %s", c.ID()) } - - /* Wait for initial setup and fork, and reap child */ - err = cmd.Wait() - if err != nil { - return err + if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe, sessionID); err != nil { + return -1, nil, err } - defer func() { - if err != nil { - if err2 := r.deleteContainer(ctr); err2 != nil { - logrus.Errorf("Error removing container %s from runtime after creation failed", ctr.ID()) + if preserveFDs > 0 { + for fd := 3; fd < 3+preserveFDs; fd++ { + // These fds were passed down to the runtime. Close them + // and not interfere + if err := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close(); err != nil { + logrus.Debugf("unable to close file fd-%d", fd) } } - }() - - // Wait to get container pid from conmon - type syncStruct struct { - si *syncInfo - err error } - ch := make(chan syncStruct) + + // TODO Only create if !detach + // Attach to the container before starting it + attachChan := make(chan error) go func() { - var si *syncInfo - rdr := bufio.NewReader(parentPipe) - b, err := rdr.ReadBytes('\n') - if err != nil { - ch <- syncStruct{err: err} - } - if err := json.Unmarshal(b, &si); err != nil { - ch <- syncStruct{err: err} - return - } - ch <- syncStruct{si: si} + // attachToExec is responsible for closing pipes + attachChan <- c.attachToExec(streams, detachKeys, resize, sessionID, parentStartPipe, parentAttachPipe) + close(attachChan) }() + attachToExecCalled = true - select { - case ss := <-ch: - if ss.err != nil { - return errors.Wrapf(ss.err, "error reading container (probably exited) json message") - } - logrus.Debugf("Received container pid: %d", ss.si.Pid) - if ss.si.Pid == -1 { - if r.supportsJSON { - data, err := ioutil.ReadFile(ociLog) - if err == nil { - var ociErr ociError - if err := json.Unmarshal(data, &ociErr); err == nil { - return errors.Wrapf(define.ErrOCIRuntime, "%s", strings.Trim(ociErr.Msg, "\n")) - } - } - } - // If we failed to parse the JSON errors, then print the output as it is - if ss.si.Message != "" { - return errors.Wrapf(define.ErrOCIRuntime, "%s", ss.si.Message) - } - return errors.Wrapf(define.ErrInternal, "container create failed") - } - ctr.state.PID = ss.si.Pid - // Let's try reading the Conmon pid at the same time. - if ctr.config.ConmonPidFile != "" { - contents, err := ioutil.ReadFile(ctr.config.ConmonPidFile) - if err != nil { - logrus.Warnf("Error reading Conmon pidfile for container %s: %v", ctr.ID(), err) - } else { - // Convert it to an int - conmonPID, err := strconv.Atoi(string(contents)) - if err != nil { - logrus.Warnf("Error decoding Conmon PID %q for container %s: %v", string(contents), ctr.ID(), err) - } else { - ctr.state.ConmonPID = conmonPID - logrus.Infof("Got Conmon PID as %d", conmonPID) - } - } - } - case <-time.After(ContainerCreateTimeout): - return errors.Wrapf(define.ErrInternal, "container creation timeout") - } - return nil + pid, err := readConmonPipeData(parentSyncPipe, ociLog) + + return pid, attachChan, err } // Wait for a container which has been sent a signal to stop diff --git a/libpod/oci_unsupported.go b/libpod/oci_unsupported.go index cfdf70bc6..4a65d4d1d 100644 --- a/libpod/oci_unsupported.go +++ b/libpod/oci_unsupported.go @@ -7,6 +7,7 @@ import ( "os/exec" "github.com/containers/libpod/libpod/define" + "k8s.io/client-go/tools/remotecommand" ) func (r *OCIRuntime) moveConmonToCgroup(ctr *Container, cgroupParent string, cmd *exec.Cmd) error { @@ -17,7 +18,7 @@ func newPipe() (parent *os.File, child *os.File, err error) { return nil, nil, define.ErrNotImplemented } -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { +func (r *OCIRuntime) createContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) { return define.ErrNotImplemented } @@ -40,3 +41,7 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error { return define.ErrOSNotSupported } + +func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, chan error, error) { + return -1, nil, define.ErrOSNotSupported +} |