Add a MissingRuntime implementation

When a container is created with a given OCI runtime, but then it is uninstalled or removed from the configuration file, Libpod presently reacts very poorly. The EvictContainer code can potentially remove these containers, but we still can't see them in `podman ps` (aside from the massive logrus.Errorf messages they create). Providing a minimal OCI runtime implementation for missing runtimes allows us to behave better. We'll be able to retrieve containers from the database, though we still pop up an error for each missing runtime. For containers which are stopped, we can remove them as normal. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
author: Matthew Heon <matthew.heon@pm.me> 2019-10-15 15:11:26 -0400
committer: Matthew Heon <matthew.heon@pm.me> 2019-10-15 15:59:20 -0400
commit: cab7bfbb211f2496af9f86208588e26954fc9b2a (patch)
tree: d65c78bbac71bba5dc7a1b95b830a9d032243f87 /libpod
parent: 5f72e6ef2ef7b6941cefc3c655903aeacb53e115 (diff)
download: podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.tar.gz
podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.tar.bz2
podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.zip
6 files changed, 235 insertions, 35 deletions
diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go
index ed87373e9..3347a3648 100644
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@@ -396,7 +396,11 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt.
 
 		ociRuntime, ok := s.runtime.ociRuntimes[runtimeName]
 		if !ok {
-			return errors.Wrapf(define.ErrOCIRuntimeUnavailable, "cannot find OCI runtime %q for container %s", ctr.config.OCIRuntime, ctr.ID())
+			// Use a MissingRuntime implementation
+			ociRuntime, err = getMissingRuntime(runtimeName, s.runtime)
+			if err != nil {
+				return err
+			}
 		}
 		ctr.ociRuntime = ociRuntime
 	}
diff --git a/libpod/container_api.go b/libpod/container_api.go
index 04c796410..759a7067e 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -656,7 +656,7 @@ func (c *Container) Sync() error {
 		(c.state.State != define.ContainerStateConfigured) &&
 		(c.state.State != define.ContainerStateExited) {
 		oldState := c.state.State
-		if err := c.ociRuntime.UpdateContainerStatus(c, true); err != nil {
+		if err := c.ociRuntime.UpdateContainerStatus(c); err != nil {
 			return err
 		}
 		// Only save back to DB if state changed
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index a7ac23f73..0043c9651 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -252,7 +252,7 @@ func (c *Container) waitForExitFileAndSync() error {
 		return err
 	}
 
-	if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
+	if err := c.checkExitFile(); err != nil {
 		return err
 	}
 
@@ -386,10 +386,11 @@ func (c *Container) syncContainer() error {
 		(c.state.State != define.ContainerStateConfigured) &&
 		(c.state.State != define.ContainerStateExited) {
 		oldState := c.state.State
-		// TODO: optionally replace this with a stat for the exit file
-		if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
+
+		if err := c.checkExitFile(); err != nil {
 			return err
 		}
+
 		// Only save back to DB if state changed
 		if c.state.State != oldState {
 			// Check for a restart policy match
@@ -1811,3 +1812,35 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume
 	}
 	return namedUserVolumes, userMounts
 }
+
+// Check for an exit file, and handle one if present
+func (c *Container) checkExitFile() error {
+	// If the container's not running, nothing to do.
+	if c.state.State != define.ContainerStateRunning && c.state.State != define.ContainerStatePaused {
+		return nil
+	}
+
+	exitFile, err := c.exitFilePath()
+	if err != nil {
+		return err
+	}
+
+	// Check for the exit file
+	info, err := os.Stat(exitFile)
+	if err != nil {
+		if os.IsNotExist(err) {
+			// Container is still running, no error
+			return nil
+		}
+
+		return errors.Wrapf(err, "error running stat on container %s exit file", c.ID())
+	}
+
+	// Alright, it exists. Transition to Stopped state.
+	c.state.State = define.ContainerStateStopped
+	c.state.PID = 0
+	c.state.ConmonPID = 0
+
+	// Read the exit file to get our stopped time and exit code.
+	return c.handleExitFile(exitFile, info)
+}
diff --git a/libpod/oci.go b/libpod/oci.go
index 37d04349f..9e761788e 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -26,7 +26,7 @@ type OCIRuntime interface {
 	// It includes a switch for whether to perform a hard query of the
 	// runtime. If unset, the exit file (if supported by the implementation)
 	// will be used.
-	UpdateContainerStatus(ctr *Container, useRuntime bool) error
+	UpdateContainerStatus(ctr *Container) error
 	// StartContainer starts the given container.
 	StartContainer(ctr *Container) error
 	// KillContainer sends the given signal to the given container.
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 3606a9634..658a2fe4e 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -216,8 +216,8 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta
 // If useRuntime is false, we will not directly hit runc to see the container's
 // status, but will instead only check for the existence of the conmon exit file
 // and update state to stopped if it exists.
-func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container, useRuntime bool) error {
-	exitFile, err := ctr.exitFilePath()
+func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
+	exitFile, err := r.ExitFilePath(ctr)
 	if err != nil {
 		return err
 	}
@@ -227,33 +227,6 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container, useRuntime bool
 		return err
 	}
 
-	// If not using the OCI runtime, we don't need to do most of this.
-	if !useRuntime {
-		// If the container's not running, nothing to do.
-		if ctr.state.State != define.ContainerStateRunning && ctr.state.State != define.ContainerStatePaused {
-			return nil
-		}
-
-		// Check for the exit file conmon makes
-		info, err := os.Stat(exitFile)
-		if err != nil {
-			if os.IsNotExist(err) {
-				// Container is still running, no error
-				return nil
-			}
-
-			return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID())
-		}
-
-		// Alright, it exists. Transition to Stopped state.
-		ctr.state.State = define.ContainerStateStopped
-		ctr.state.PID = 0
-		ctr.state.ConmonPID = 0
-
-		// Read the exit file to get our stopped time and exit code.
-		return ctr.handleExitFile(exitFile, info)
-	}
-
 	// Store old state so we know if we were already stopped
 	oldState := ctr.state.State
 
@@ -825,6 +798,7 @@ func (r *ConmonOCIRuntime) RuntimeInfo() (map[string]interface{}, error) {
 		"version": conmonVersion,
 	}
 	info["OCIRuntime"] = map[string]interface{}{
+		"name":    r.name,
 		"path":    r.path,
 		"package": runtimePackage,
 		"version": runtimeVersion,
diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go
new file mode 100644
index 000000000..d4524cd34
--- /dev/null
+++ b/libpod/oci_missing.go
@@ -0,0 +1,189 @@
+package libpod
+
+import (
+	"fmt"
+	"path/filepath"
+	"sync"
+
+	"github.com/containers/libpod/libpod/define"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	// Only create each missing runtime once.
+	// Creation makes error messages we don't want to duplicate.
+	missingRuntimes map[string]*MissingRuntime
+	// We need a lock for this
+	missingRuntimesLock sync.Mutex
+)
+
+// MissingRuntime is used when the OCI runtime requested by the container is
+// missing (not installed or not in the configuration file).
+type MissingRuntime struct {
+	// Name is the name of the missing runtime. Will be used in errors.
+	name string
+	// exitsDir is the directory for exit files.
+	exitsDir string
+}
+
+// Get a new MissingRuntime for the given name.
+// Requires a libpod Runtime so we can make a sane path for the exits dir.
+func getMissingRuntime(name string, r *Runtime) (OCIRuntime, error) {
+	missingRuntimesLock.Lock()
+	defer missingRuntimesLock.Unlock()
+
+	if missingRuntimes == nil {
+		missingRuntimes = make(map[string]*MissingRuntime)
+	}
+
+	runtime, ok := missingRuntimes[name]
+	if ok {
+		return runtime, nil
+	}
+
+	// Once for each missing runtime, we want to error.
+	logrus.Errorf("OCI Runtime %s is in use by a container, but is not available (not in configuration file or not installed)", name)
+
+	newRuntime := new(MissingRuntime)
+	newRuntime.name = name
+	newRuntime.exitsDir = filepath.Join(r.config.TmpDir, "exits")
+
+	missingRuntimes[name] = newRuntime
+
+	return newRuntime, nil
+}
+
+// Name is the name of the missing runtime
+func (r *MissingRuntime) Name() string {
+	return fmt.Sprintf("%s (missing/not available)", r.name)
+}
+
+// Path is not available as the runtime is missing
+func (r *MissingRuntime) Path() string {
+	return "(missing/not available)"
+}
+
+// CreateContainer is not available as the runtime is missing
+func (r *MissingRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error {
+	return r.printError()
+}
+
+// UpdateContainerStatus is not available as the runtime is missing
+func (r *MissingRuntime) UpdateContainerStatus(ctr *Container) error {
+	return r.printError()
+}
+
+// StartContainer is not available as the runtime is missing
+func (r *MissingRuntime) StartContainer(ctr *Container) error {
+	return r.printError()
+}
+
+// KillContainer is not available as the runtime is missing
+// TODO: We could attempt to unix.Kill() the PID as recorded in the state if we
+// really want to smooth things out? Won't be perfect, but if the container has
+// a PID namespace it could be enough?
+func (r *MissingRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
+	return r.printError()
+}
+
+// StopContainer is not available as the runtime is missing
+func (r *MissingRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
+	return r.printError()
+}
+
+// DeleteContainer is not available as the runtime is missing
+func (r *MissingRuntime) DeleteContainer(ctr *Container) error {
+	return r.printError()
+}
+
+// PauseContainer is not available as the runtime is missing
+func (r *MissingRuntime) PauseContainer(ctr *Container) error {
+	return r.printError()
+}
+
+// UnpauseContainer is not available as the runtime is missing
+func (r *MissingRuntime) UnpauseContainer(ctr *Container) error {
+	return r.printError()
+}
+
+// ExecContainer is not available as the runtime is missing
+func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error) {
+	return -1, nil, r.printError()
+}
+
+// ExecStopContainer is not available as the runtime is missing.
+// TODO: We can also investigate using unix.Kill() on the PID of the exec
+// session here if we want to make stopping containers possible. Won't be
+// perfect, though.
+func (r *MissingRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
+	return r.printError()
+}
+
+// ExecContainerCleanup is not available as the runtime is missing
+func (r *MissingRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error {
+	return r.printError()
+}
+
+// CheckpointContainer is not available as the runtime is missing
+func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
+	return r.printError()
+}
+
+// SupportsCheckpoint returns false as checkpointing requires a working runtime
+func (r *MissingRuntime) SupportsCheckpoint() bool {
+	return false
+}
+
+// SupportsJSONErrors returns false as there is no runtime to give errors
+func (r *MissingRuntime) SupportsJSONErrors() bool {
+	return false
+}
+
+// SupportsNoCgroups returns false as there is no runtime to create containers
+func (r *MissingRuntime) SupportsNoCgroups() bool {
+	return false
+}
+
+// AttachSocketPath does not work as there is no runtime to attach to.
+// (Theoretically we could follow ExitFilePath but there is no guarantee the
+// container is running and thus has an attach socket...)
+func (r *MissingRuntime) AttachSocketPath(ctr *Container) (string, error) {
+	return "", r.printError()
+}
+
+// ExecAttachSocketPath does not work as there is no runtime to attach to.
+// (Again, we could follow ExitFilePath, but no guarantee there is an existing
+// and running exec session)
+func (r *MissingRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
+	return "", r.printError()
+}
+
+// ExitFilePath returns the exit file path for containers.
+// Here, we mimic what ConmonOCIRuntime does, because there is a chance that the
+// container in question is still running happily (config file modified to
+// remove a runtime, for example). We can't find the runtime to do anything to
+// the container, but Conmon should still place an exit file for it.
+func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
+	if ctr == nil {
+		return "", errors.Wrapf(define.ErrInvalidArg, "must provide a valid container to get exit file path")
+	}
+	return filepath.Join(r.exitsDir, ctr.ID()), nil
+}
+
+// RuntimeInfo returns information on the missing runtime
+func (r *MissingRuntime) RuntimeInfo() (map[string]interface{}, error) {
+	info := make(map[string]interface{})
+	info["OCIRuntime"] = map[string]interface{}{
+		"name":    r.name,
+		"path":    "missing",
+		"package": "missing",
+		"version": "missing",
+	}
+	return info, nil
+}
+
+// Return an error indicating the runtime is missing
+func (r *MissingRuntime) printError() error {
+	return errors.Wrapf(define.ErrOCIRuntimeNotFound, "runtime %s is missing", r.name)
+}
author	Matthew Heon <matthew.heon@pm.me>	2019-10-15 15:11:26 -0400
committer	Matthew Heon <matthew.heon@pm.me>	2019-10-15 15:59:20 -0400
commit	cab7bfbb211f2496af9f86208588e26954fc9b2a (patch)
tree	d65c78bbac71bba5dc7a1b95b830a9d032243f87 /libpod
parent	5f72e6ef2ef7b6941cefc3c655903aeacb53e115 (diff)
download	podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.tar.gz podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.tar.bz2 podman-cab7bfbb211f2496af9f86208588e26954fc9b2a.zip