diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2020-06-09 10:09:14 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-09 10:09:14 +0200 |
commit | 79f30af384cf6c74f89c3b44c01707da22d867e5 (patch) | |
tree | 23743436c054a1de091ae7d0ecb008d066786360 /libpod | |
parent | a85e97952994a2803a17ee2a384b260115f18b43 (diff) | |
parent | 9d964ffb9fc98ed13f6fec974c917b84c175d39a (diff) | |
download | podman-79f30af384cf6c74f89c3b44c01707da22d867e5.tar.gz podman-79f30af384cf6c74f89c3b44c01707da22d867e5.tar.bz2 podman-79f30af384cf6c74f89c3b44c01707da22d867e5.zip |
Merge pull request #6520 from mheon/no_conmon_no_error
Ensure Conmon is alive before waiting for exit file
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_internal.go | 24 | ||||
-rw-r--r-- | libpod/define/errors.go | 3 | ||||
-rw-r--r-- | libpod/oci.go | 7 | ||||
-rw-r--r-- | libpod/oci_conmon_linux.go | 25 | ||||
-rw-r--r-- | libpod/oci_missing.go | 5 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 6 |
6 files changed, 66 insertions, 4 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 43e873bd6..f6fc3c1a4 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1209,13 +1209,35 @@ func (c *Container) stop(timeout uint) error { } } + // Check if conmon is still alive. + // If it is not, we won't be getting an exit file. + conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) + if err != nil { + return err + } + if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil { return err } + c.newContainerEvent(events.Stop) + c.state.PID = 0 c.state.ConmonPID = 0 c.state.StoppedByUser = true + + if !conmonAlive { + // Conmon is dead, so we can't epect an exit code. + c.state.ExitCode = -1 + c.state.FinishedTime = time.Now() + c.state.State = define.ContainerStateStopped + if err := c.save(); err != nil { + logrus.Errorf("Error saving container %s status: %v", c.ID(), err) + } + + return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID()) + } + if err := c.save(); err != nil { return errors.Wrapf(err, "error saving container %s state after stopping", c.ID()) } @@ -1225,8 +1247,6 @@ func (c *Container) stop(timeout uint) error { return err } - c.newContainerEvent(events.Stop) - return nil } diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 16df2a1cc..083553b7e 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -141,6 +141,9 @@ var ( // ErrConmonOutdated indicates the version of conmon found (whether via the configuration or $PATH) // is out of date for the current podman version ErrConmonOutdated = errors.New("outdated conmon version") + // ErrConmonDead indicates that the container's conmon process has been + // killed, preventing normal operation. + ErrConmonDead = errors.New("conmon process killed") // ErrImageInUse indicates the requested operation failed because the image was in use ErrImageInUse = errors.New("image is being used") diff --git a/libpod/oci.go b/libpod/oci.go index 684a7ba42..c2f0041b1 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -107,6 +107,13 @@ type OCIRuntime interface { // error. CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error + // CheckConmonRunning verifies that the given container's Conmon + // instance is still running. Runtimes without Conmon, or systems where + // the PID of conmon is not available, should mock this as True. + // True indicates that Conmon for the instance is running, False + // indicates it is not. + CheckConmonRunning(ctr *Container) (bool, error) + // SupportsCheckpoint returns whether this OCI runtime // implementation supports the CheckpointContainer() operation. SupportsCheckpoint() bool diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 9c92b036e..0921a532b 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -669,6 +669,31 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) } +func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { + if ctr.state.ConmonPID == 0 { + // If the container is running or paused, assume Conmon is + // running. We didn't record Conmon PID on some old versions, so + // that is likely what's going on... + // Unusual enough that we should print a warning message though. + if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) { + logrus.Warnf("Conmon PID is not set, but container is running!") + return true, nil + } + // Container's not running, so conmon PID being unset is + // expected. Conmon is not running. + return false, nil + } + + // We have a conmon PID. Ping it with signal 0. + if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil { + if err == unix.ESRCH { + return false, nil + } + return false, errors.Wrapf(err, "error pinging container %s conmon with signal 0", ctr.ID()) + } + return true, nil +} + // SupportsCheckpoint checks if the OCI runtime supports checkpointing // containers. func (r *ConmonOCIRuntime) SupportsCheckpoint() bool { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index 4da16876c..90e90cc6c 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -163,6 +163,11 @@ func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCh return r.printError() } +// CheckConmonRunning is not available as the runtime is missing +func (r *MissingRuntime) CheckConmonRunning(ctr *Container) (bool, error) { + return false, r.printError() +} + // SupportsCheckpoint returns false as checkpointing requires a working runtime func (r *MissingRuntime) SupportsCheckpoint() bool { return false diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 655b42e51..aa91dff03 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -464,9 +464,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } - // Check that the container's in a good state to be removed + // Check that the container's in a good state to be removed. if c.state.State == define.ContainerStateRunning { - if err := c.stop(c.StopTimeout()); err != nil { + // Ignore ErrConmonDead - we couldn't retrieve the container's + // exit code properly, but it's still stopped. + if err := c.stop(c.StopTimeout()); err != nil && errors.Cause(err) != define.ErrConmonDead { return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID()) } } |