From 4b72f9e4013411208751df2a92ab9f322d4da5b2 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Mon, 2 Mar 2020 14:42:48 -0500 Subject: exec: get the exit code from sync pipe instead of file Before, we were getting the exit code from the file, in which we waited an arbitrary amount of time (5 seconds) for the file, and segfaulted if we didn't find it. instead, we should be a bit more certain conmon has sent the exit code. Luckily, it sends the exit code along the sync pipe fd, so we can read it from there Adapt the ExecContainer interface to pass along a channel to get the pid and exit code from conmon, to be able to read both from the pipe Signed-off-by: Peter Hunt --- libpod/container_api.go | 25 +++++++++----- libpod/container_internal.go | 22 ------------ libpod/oci.go | 9 ++++- libpod/oci_attach_linux.go | 4 +-- libpod/oci_conmon_linux.go | 82 +++++++++++++++++++++++++++++--------------- libpod/oci_missing.go | 4 +-- 6 files changed, 83 insertions(+), 63 deletions(-) diff --git a/libpod/container_api.go b/libpod/container_api.go index ee879b69d..37a05bb75 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -282,7 +282,16 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri opts.Resize = resize opts.DetachKeys = detachKeys - pid, attachChan, err := c.ociRuntime.ExecContainer(c, sessionID, opts) + pid := 0 + pipeDataChan, attachChan, err := c.ociRuntime.ExecContainer(c, sessionID, opts) + // if pipeDataChan isn't nil, we should set the err + if pipeDataChan != nil { + pidData := <-pipeDataChan + if pidData.err != nil { + err = pidData.err + } + pid = pidData.data + } if err != nil { ec := define.ExecErrorCodeGeneric // Conmon will pass a non-zero exit code from the runtime as a pid here. @@ -318,18 +327,18 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri lastErr := <-attachChan - exitCode, err := c.readExecExitCode(sessionID) - if err != nil { + exitCodeData := <-pipeDataChan + if exitCodeData.err != nil { if lastErr != nil { logrus.Errorf(lastErr.Error()) } - lastErr = err + lastErr = exitCodeData.err } - if exitCode != 0 { + if exitCodeData.data != 0 { if lastErr != nil { logrus.Errorf(lastErr.Error()) } - lastErr = errors.Wrapf(define.ErrOCIRuntime, "non zero exit code: %d", exitCode) + lastErr = errors.Wrapf(define.ErrOCIRuntime, "non zero exit code: %d", exitCodeData.data) } // Lock again @@ -340,7 +349,7 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri // Sync the container again to pick up changes in state if err := c.syncContainer(); err != nil { logrus.Errorf("error syncing container %s state to remove exec session %s", c.ID(), sessionID) - return exitCode, lastErr + return exitCodeData.data, lastErr } // Remove the exec session from state @@ -348,7 +357,7 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri if err := c.save(); err != nil { logrus.Errorf("Error removing exec session %s from container %s state: %v", sessionID, c.ID(), err) } - return exitCode, lastErr + return exitCodeData.data, lastErr } // AttachStreams contains streams that will be attached to the container diff --git a/libpod/container_internal.go b/libpod/container_internal.go index ff43bfc8f..67e02cc31 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -206,28 +206,6 @@ func (c *Container) execOCILog(sessionID string) string { return filepath.Join(c.execBundlePath(sessionID), "oci-log") } -// readExecExitCode reads the exit file for an exec session and returns -// the exit code -func (c *Container) readExecExitCode(sessionID string) (int, error) { - exitFile := filepath.Join(c.execExitFileDir(sessionID), c.ID()) - chWait := make(chan error) - defer close(chWait) - - _, err := WaitForFile(exitFile, chWait, time.Second*5) - if err != nil { - return -1, err - } - ec, err := ioutil.ReadFile(exitFile) - if err != nil { - return -1, err - } - ecInt, err := strconv.Atoi(string(ec)) - if err != nil { - return -1, err - } - return ecInt, nil -} - // Wait for the container's exit file to appear. // When it does, update our state based on it. func (c *Container) waitForExitFileAndSync() error { diff --git a/libpod/oci.go b/libpod/oci.go index 2ea61851f..e5f9b2135 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -70,7 +70,7 @@ type OCIRuntime interface { // ExecContainer executes a command in a running container. // Returns an int (exit code), error channel (errors from attach), and // error (errors that occurred attempting to start the exec session). - ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error) + ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (chan DataAndErr, chan error, error) // ExecStopContainer stops a given exec session in a running container. // SIGTERM with be sent initially, then SIGKILL after the given timeout. // If timeout is 0, SIGKILL will be sent immediately, and SIGTERM will @@ -159,3 +159,10 @@ type HTTPAttachStreams struct { Stdout bool Stderr bool } + +// DataAndErr is a generic structure for passing around an int and an error +// it is especially useful for getting information from conmon +type DataAndErr struct { + data int + err error +} diff --git a/libpod/oci_attach_linux.go b/libpod/oci_attach_linux.go index 46c70e7eb..5a8198d05 100644 --- a/libpod/oci_attach_linux.go +++ b/libpod/oci_attach_linux.go @@ -119,8 +119,8 @@ func (c *Container) attachToExec(streams *AttachStreams, keys string, resize <-c socketPath := buildSocketPath(sockPath) // 2: read from attachFd that the parent process has set up the console socket - if _, err := readConmonPipeData(attachFd, ""); err != nil { - return err + if pipeData := readConmonPipeData(attachFd, ""); pipeData.err != nil { + return pipeData.err } // 2: then attach diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 800f89603..8da6db09f 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -595,31 +595,29 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize remotecommand.Te // ExecContainer executes a command in a running container // TODO: Split into Create/Start/Attach/Wait -func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions) (int, chan error, error) { +func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options *ExecOptions) (chan DataAndErr, chan error, error) { if options == nil { - return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer") + return nil, nil, errors.Wrapf(define.ErrInvalidArg, "must provide an ExecOptions struct to ExecContainer") } if len(options.Cmd) == 0 { - return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute") + return nil, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute") } if sessionID == "" { - return -1, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec") + return nil, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec") } // create sync pipe to receive the pid parentSyncPipe, childSyncPipe, err := newPipe() if err != nil { - return -1, nil, errors.Wrapf(err, "error creating socket pair") + return nil, nil, errors.Wrapf(err, "error creating socket pair") } - defer errorhandling.CloseQuiet(parentSyncPipe) - // create start pipe to set the cgroup before running // attachToExec is responsible for closing parentStartPipe childStartPipe, parentStartPipe, err := newPipe() if err != nil { - return -1, nil, errors.Wrapf(err, "error creating socket pair") + return nil, nil, errors.Wrapf(err, "error creating socket pair") } // We want to make sure we close the parent{Start,Attach}Pipes if we fail @@ -638,7 +636,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options // attachToExec is responsible for closing parentAttachPipe parentAttachPipe, childAttachPipe, err := newPipe() if err != nil { - return -1, nil, errors.Wrapf(err, "error creating socket pair") + return nil, nil, errors.Wrapf(err, "error creating socket pair") } defer func() { @@ -658,7 +656,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options runtimeDir, err := util.GetRuntimeDir() if err != nil { - return -1, nil, err + return nil, nil, err } finalEnv := make([]string, 0, len(options.Env)) @@ -668,7 +666,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options processFile, err := prepareProcessExec(c, options.Cmd, finalEnv, options.Terminal, options.Cwd, options.User, sessionID) if err != nil { - return -1, nil, err + return nil, nil, err } var ociLog string @@ -717,7 +715,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options conmonEnv, extraFiles, err := r.configureConmonEnv(runtimeDir) if err != nil { - return -1, nil, err + return nil, nil, err } if options.PreserveFDs > 0 { @@ -748,10 +746,10 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options childrenClosed = true if err != nil { - return -1, nil, errors.Wrapf(err, "cannot start container %s", c.ID()) + return nil, nil, errors.Wrapf(err, "cannot start container %s", c.ID()) } if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe); err != nil { - return -1, nil, err + return nil, nil, err } if options.PreserveFDs > 0 { @@ -774,9 +772,16 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options }() attachToExecCalled = true - pid, err := readConmonPipeData(parentSyncPipe, ociLog) + dataChan := make(chan DataAndErr) + go func() { + // read the exec pid + dataChan <- readConmonPipeData(parentSyncPipe, ociLog) + // read the exec exit code + dataChan <- readConmonPipeData(parentSyncPipe, ociLog) + errorhandling.CloseQuiet(parentSyncPipe) + }() - return pid, attachChan, err + return dataChan, attachChan, err } // ExecStopContainer stops a given exec session in a running container. @@ -1206,14 +1211,14 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co return err } - pid, err := readConmonPipeData(parentSyncPipe, ociLog) - if err != nil { + pipeData := readConmonPipeData(parentSyncPipe, ociLog) + if pipeData.err != nil { if err2 := r.DeleteContainer(ctr); err2 != nil { logrus.Errorf("Error removing container %s from runtime after creation failed", ctr.ID()) } - return err + return pipeData.err } - ctr.state.PID = pid + ctr.state.PID = pipeData.data conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile) if err != nil { @@ -1525,7 +1530,7 @@ func readConmonPidFile(pidFile string) (int, error) { } // readConmonPipeData attempts to read a syncInfo struct from the pipe -func readConmonPipeData(pipe *os.File, ociLog string) (int, error) { +func readConmonPipeData(pipe *os.File, ociLog string) DataAndErr { // syncInfo is used to return data from monitor process to daemon type syncInfo struct { Data int `json:"data"` @@ -1561,11 +1566,17 @@ func readConmonPipeData(pipe *os.File, ociLog string) (int, error) { if err == nil { var ociErr ociError if err := json.Unmarshal(ociLogData, &ociErr); err == nil { - return -1, getOCIRuntimeError(ociErr.Msg) + return DataAndErr{ + data: -1, + err: getOCIRuntimeError(ociErr.Msg), + } } } } - return -1, errors.Wrapf(ss.err, "container create failed (no logs from conmon)") + return DataAndErr{ + data: -1, + err: errors.Wrapf(ss.err, "container create failed (no logs from conmon)"), + } } logrus.Debugf("Received: %d", ss.si.Data) if ss.si.Data < 0 { @@ -1574,21 +1585,36 @@ func readConmonPipeData(pipe *os.File, ociLog string) (int, error) { if err == nil { var ociErr ociError if err := json.Unmarshal(ociLogData, &ociErr); err == nil { - return ss.si.Data, getOCIRuntimeError(ociErr.Msg) + return DataAndErr{ + data: ss.si.Data, + err: getOCIRuntimeError(ociErr.Msg), + } } } } // If we failed to parse the JSON errors, then print the output as it is if ss.si.Message != "" { - return ss.si.Data, getOCIRuntimeError(ss.si.Message) + return DataAndErr{ + data: ss.si.Data, + err: getOCIRuntimeError(ss.si.Message), + } + } + return DataAndErr{ + data: ss.si.Data, + err: errors.Wrapf(define.ErrInternal, "container create failed"), } - return ss.si.Data, errors.Wrapf(define.ErrInternal, "container create failed") } data = ss.si.Data case <-time.After(define.ContainerCreateTimeout): - return -1, errors.Wrapf(define.ErrInternal, "container creation timeout") + return DataAndErr{ + data: -1, + err: errors.Wrapf(define.ErrInternal, "container creation timeout"), + } + } + return DataAndErr{ + data: data, + err: nil, } - return data, nil } // writeConmonPipeData writes nonse data to a pipe diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index ff7eea625..1b7c1979d 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -121,8 +121,8 @@ func (r *MissingRuntime) AttachResize(ctr *Container, newSize remotecommand.Term } // ExecContainer is not available as the runtime is missing -func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error) { - return -1, nil, r.printError() +func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (chan DataAndErr, chan error, error) { + return nil, nil, r.printError() } // ExecStopContainer is not available as the runtime is missing. -- cgit v1.2.3-54-g00ecf From d3d97a25e8c87cf741b2e24ac01ef84962137106 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Tue, 3 Mar 2020 15:35:29 -0500 Subject: Exec: use ErrorConmonRead Before, we were using -1 as a bogus value in podman to signify something went wrong when reading from a conmon pipe. However, conmon uses negative values to indicate the runtime failed, and return the runtime's exit code. instead, we should use a bogus value that is actually bogus. Define that value in the define package as MinInt32 (-1<< 31 - 1), which is outside of the range of possible pids (-1 << 31) Signed-off-by: Peter Hunt --- libpod/container_api.go | 4 +++- libpod/define/exec_codes.go | 6 ++++++ libpod/oci_conmon_linux.go | 8 ++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/libpod/container_api.go b/libpod/container_api.go index 37a05bb75..356da12d0 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -297,7 +297,9 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri // Conmon will pass a non-zero exit code from the runtime as a pid here. // we differentiate a pid with an exit code by sending it as negative, so reverse // that change and return the exit code the runtime failed with. - if pid < 0 { + // Make sure the value is not ErrorConmonRead, as that is a podman set bogus value + // and not sent by conmon (and thus has no special meaning) + if pid < 0 && pid != define.ErrorConmonRead { ec = -1 * pid } return ec, err diff --git a/libpod/define/exec_codes.go b/libpod/define/exec_codes.go index f94616b33..c2ec08666 100644 --- a/libpod/define/exec_codes.go +++ b/libpod/define/exec_codes.go @@ -1,6 +1,7 @@ package define import ( + "math" "strings" "github.com/pkg/errors" @@ -17,6 +18,11 @@ const ( ExecErrorCodeCannotInvoke = 126 // ExecErrorCodeNotFound is the error code to return when a command cannot be found ExecErrorCodeNotFound = 127 + // ErrorConmonRead is a bogus value that can neither be a valid PID or exit code. It is + // used because conmon will send a negative value when sending a PID back over a pipe FD + // to signify something went wrong in the runtime. We need to differentiate between that + // value and a failure on the podman side of reading that value. Thus, we use ErrorConmonRead + ErrorConmonRead = math.MinInt32 - 1 ) // TranslateExecErrorToExitCode takes an error and checks whether it diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 8da6db09f..f260e3a39 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -1557,7 +1557,7 @@ func readConmonPipeData(pipe *os.File, ociLog string) DataAndErr { ch <- syncStruct{si: si} }() - data := -1 + data := define.ErrorConmonRead select { case ss := <-ch: if ss.err != nil { @@ -1567,14 +1567,14 @@ func readConmonPipeData(pipe *os.File, ociLog string) DataAndErr { var ociErr ociError if err := json.Unmarshal(ociLogData, &ociErr); err == nil { return DataAndErr{ - data: -1, + data: data, err: getOCIRuntimeError(ociErr.Msg), } } } } return DataAndErr{ - data: -1, + data: data, err: errors.Wrapf(ss.err, "container create failed (no logs from conmon)"), } } @@ -1607,7 +1607,7 @@ func readConmonPipeData(pipe *os.File, ociLog string) DataAndErr { data = ss.si.Data case <-time.After(define.ContainerCreateTimeout): return DataAndErr{ - data: -1, + data: data, err: errors.Wrapf(define.ErrInternal, "container creation timeout"), } } -- cgit v1.2.3-54-g00ecf