summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2020-06-09 10:09:14 +0200
committerGitHub <noreply@github.com>2020-06-09 10:09:14 +0200
commit79f30af384cf6c74f89c3b44c01707da22d867e5 (patch)
tree23743436c054a1de091ae7d0ecb008d066786360
parenta85e97952994a2803a17ee2a384b260115f18b43 (diff)
parent9d964ffb9fc98ed13f6fec974c917b84c175d39a (diff)
downloadpodman-79f30af384cf6c74f89c3b44c01707da22d867e5.tar.gz
podman-79f30af384cf6c74f89c3b44c01707da22d867e5.tar.bz2
podman-79f30af384cf6c74f89c3b44c01707da22d867e5.zip
Merge pull request #6520 from mheon/no_conmon_no_error
Ensure Conmon is alive before waiting for exit file
-rw-r--r--libpod/container_internal.go24
-rw-r--r--libpod/define/errors.go3
-rw-r--r--libpod/oci.go7
-rw-r--r--libpod/oci_conmon_linux.go25
-rw-r--r--libpod/oci_missing.go5
-rw-r--r--libpod/runtime_ctr.go6
6 files changed, 66 insertions, 4 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 43e873bd6..f6fc3c1a4 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1209,13 +1209,35 @@ func (c *Container) stop(timeout uint) error {
}
}
+ // Check if conmon is still alive.
+ // If it is not, we won't be getting an exit file.
+ conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
+ if err != nil {
+ return err
+ }
+
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
return err
}
+ c.newContainerEvent(events.Stop)
+
c.state.PID = 0
c.state.ConmonPID = 0
c.state.StoppedByUser = true
+
+ if !conmonAlive {
+ // Conmon is dead, so we can't epect an exit code.
+ c.state.ExitCode = -1
+ c.state.FinishedTime = time.Now()
+ c.state.State = define.ContainerStateStopped
+ if err := c.save(); err != nil {
+ logrus.Errorf("Error saving container %s status: %v", c.ID(), err)
+ }
+
+ return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID())
+ }
+
if err := c.save(); err != nil {
return errors.Wrapf(err, "error saving container %s state after stopping", c.ID())
}
@@ -1225,8 +1247,6 @@ func (c *Container) stop(timeout uint) error {
return err
}
- c.newContainerEvent(events.Stop)
-
return nil
}
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index 16df2a1cc..083553b7e 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -141,6 +141,9 @@ var (
// ErrConmonOutdated indicates the version of conmon found (whether via the configuration or $PATH)
// is out of date for the current podman version
ErrConmonOutdated = errors.New("outdated conmon version")
+ // ErrConmonDead indicates that the container's conmon process has been
+ // killed, preventing normal operation.
+ ErrConmonDead = errors.New("conmon process killed")
// ErrImageInUse indicates the requested operation failed because the image was in use
ErrImageInUse = errors.New("image is being used")
diff --git a/libpod/oci.go b/libpod/oci.go
index 684a7ba42..c2f0041b1 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -107,6 +107,13 @@ type OCIRuntime interface {
// error.
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error
+ // CheckConmonRunning verifies that the given container's Conmon
+ // instance is still running. Runtimes without Conmon, or systems where
+ // the PID of conmon is not available, should mock this as True.
+ // True indicates that Conmon for the instance is running, False
+ // indicates it is not.
+ CheckConmonRunning(ctr *Container) (bool, error)
+
// SupportsCheckpoint returns whether this OCI runtime
// implementation supports the CheckpointContainer() operation.
SupportsCheckpoint() bool
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 9c92b036e..0921a532b 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -669,6 +669,31 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
}
+func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
+ if ctr.state.ConmonPID == 0 {
+ // If the container is running or paused, assume Conmon is
+ // running. We didn't record Conmon PID on some old versions, so
+ // that is likely what's going on...
+ // Unusual enough that we should print a warning message though.
+ if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
+ logrus.Warnf("Conmon PID is not set, but container is running!")
+ return true, nil
+ }
+ // Container's not running, so conmon PID being unset is
+ // expected. Conmon is not running.
+ return false, nil
+ }
+
+ // We have a conmon PID. Ping it with signal 0.
+ if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
+ if err == unix.ESRCH {
+ return false, nil
+ }
+ return false, errors.Wrapf(err, "error pinging container %s conmon with signal 0", ctr.ID())
+ }
+ return true, nil
+}
+
// SupportsCheckpoint checks if the OCI runtime supports checkpointing
// containers.
func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go
index 4da16876c..90e90cc6c 100644
--- a/libpod/oci_missing.go
+++ b/libpod/oci_missing.go
@@ -163,6 +163,11 @@ func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCh
return r.printError()
}
+// CheckConmonRunning is not available as the runtime is missing
+func (r *MissingRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
+ return false, r.printError()
+}
+
// SupportsCheckpoint returns false as checkpointing requires a working runtime
func (r *MissingRuntime) SupportsCheckpoint() bool {
return false
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 655b42e51..aa91dff03 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -464,9 +464,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
}
}
- // Check that the container's in a good state to be removed
+ // Check that the container's in a good state to be removed.
if c.state.State == define.ContainerStateRunning {
- if err := c.stop(c.StopTimeout()); err != nil {
+ // Ignore ErrConmonDead - we couldn't retrieve the container's
+ // exit code properly, but it's still stopped.
+ if err := c.stop(c.StopTimeout()); err != nil && errors.Cause(err) != define.ErrConmonDead {
return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
}
}