13 files changed, 284 insertions, 136 deletions
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 0b4e5763e..fc33a1bbc 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -516,7 +516,7 @@ func (c *Container) refresh() error {
 	}
 
 	// We need to pick up a new lock
-	lock, err := c.runtime.lockManager.RetrieveLock(c.config.LockID)
+	lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID)
 	if err != nil {
 		return errors.Wrapf(err, "error acquiring lock for container %s", c.ID())
 	}
@@ -1490,3 +1490,25 @@ func (c *Container) copyWithTarFromImage(src, dest string) error {
 	}
 	return a.CopyWithTar(source, dest)
 }
+
+// checkReadyForRemoval checks whether the given container is ready to be
+// removed.
+// These checks are only used if force-remove is not specified.
+// If it is, we'll remove the container anyways.
+// Returns nil if safe to remove, or an error describing why it's unsafe if not.
+func (c *Container) checkReadyForRemoval() error {
+	if c.state.State == ContainerStateUnknown {
+		return errors.Wrapf(ErrCtrStateInvalid, "container %s is in invalid state", c.ID())
+	}
+
+	if c.state.State == ContainerStateRunning ||
+		c.state.State == ContainerStatePaused {
+		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
+	}
+
+	if len(c.state.ExecSessions) != 0 {
+		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
+	}
+
+	return nil
+}
diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go
index 7c9605917..f3c842f89 100644
--- a/libpod/lock/in_memory_locks.go
+++ b/libpod/lock/in_memory_locks.go
@@ -90,6 +90,22 @@ func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
 	return m.locks[id], nil
 }
 
+// AllocateAndRetrieveLock allocates a lock with the given ID (if not already in
+// use) and returns it.
+func (m *InMemoryManager) AllocateAndRetrieveLock(id uint32) (Locker, error) {
+	if id >= m.numLocks {
+		return nil, errors.Errorf("given lock ID %d is too large - this manager only supports lock indexes up to %d", id, m.numLocks)
+	}
+
+	if m.locks[id].allocated {
+		return nil, errors.Errorf("given lock ID %d is already in use, cannot reallocate", id)
+	}
+
+	m.locks[id].allocated = true
+
+	return m.locks[id], nil
+}
+
 // FreeAllLocks frees all locks.
 // This function is DANGEROUS. Please read the full comment in locks.go before
 // trying to use it.
diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go
index d6841646b..4e1e2e215 100644
--- a/libpod/lock/lock.go
+++ b/libpod/lock/lock.go
@@ -24,6 +24,13 @@ type Manager interface {
 	// The underlying lock MUST be the same as another other lock with the
 	// same UUID.
 	RetrieveLock(id uint32) (Locker, error)
+	// AllocateAndRetrieveLock marks the lock with the given UUID as in use
+	// and retrieves it.
+	// RetrieveAndAllocateLock will error if the lock in question has
+	// already been allocated.
+	// This is mostly used after a system restart to repopulate the list of
+	// locks in use.
+	AllocateAndRetrieveLock(id uint32) (Locker, error)
 	// PLEASE READ FULL DESCRIPTION BEFORE USING.
 	// FreeAllLocks frees all allocated locks, in preparation for lock
 	// reallocation.
diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
index d11fce71a..047d3c417 100644
--- a/libpod/lock/shm/shm_lock.c
+++ b/libpod/lock/shm/shm_lock.c
@@ -354,6 +354,61 @@ int64_t allocate_semaphore(shm_struct_t *shm) {
   return -1 * ENOSPC;
 }
 
+// Allocate the semaphore with the given ID.
+// Returns an error if the semaphore with this ID does not exist, or has already
+// been allocated.
+// Returns 0 on success, or negative errno values on failure.
+int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+  bitmap_t test_map;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Check if the lock index is valid
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  // This should never happen if the sem_index test above succeeded, but better
+  // safe than sorry
+  if (bitmap_index >= shm->num_bitmaps) {
+    return -1 * EFAULT;
+  }
+
+  test_map = 0x1 << index_in_bitmap;
+
+  // Lock the mutex controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Check if the semaphore is allocated
+  if ((test_map & shm->locks[bitmap_index].bitmap) != 0) {
+    ret_code = release_mutex(&(shm->segment_lock));
+    if (ret_code != 0) {
+      return -1 * ret_code;
+    }
+
+    return -1 * EEXIST;
+  }
+
+  // The semaphore is not allocated, allocate it
+  shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap | test_map;
+
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  return 0;
+}
+
 // Deallocate a given semaphore
 // Returns 0 on success, negative ERRNO values on failure
 int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go
index e70ea8743..c21e9a221 100644
--- a/libpod/lock/shm/shm_lock.go
+++ b/libpod/lock/shm/shm_lock.go
@@ -134,6 +134,23 @@ func (locks *SHMLocks) AllocateSemaphore() (uint32, error) {
 	return uint32(retCode), nil
 }
 
+// AllocateGivenSemaphore allocates the given semaphore from the shared-memory
+// segment for use by a container or pod.
+// If the semaphore is already in use or the index is invalid an error will be
+// returned.
+func (locks *SHMLocks) AllocateGivenSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	retCode := C.allocate_given_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
 // DeallocateSemaphore frees a semaphore in a shared-memory segment so it can be
 // reallocated to another container or pod.
 // The given semaphore must be already allocated, or an error will be returned.
diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h
index 58e4297e2..759f8178a 100644
--- a/libpod/lock/shm/shm_lock.h
+++ b/libpod/lock/shm/shm_lock.h
@@ -39,6 +39,7 @@ shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code);
 shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
 int32_t close_lock_shm(shm_struct_t *shm);
 int64_t allocate_semaphore(shm_struct_t *shm);
+int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index);
 int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
 int32_t deallocate_all_semaphores(shm_struct_t *shm);
 int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go
index 8678958ee..5f31939f8 100644
--- a/libpod/lock/shm_lock_manager_linux.go
+++ b/libpod/lock/shm_lock_manager_linux.go
@@ -57,6 +57,25 @@ func (m *SHMLockManager) AllocateLock() (Locker, error) {
 	return lock, nil
 }
 
+// AllocateAndRetrieveLock allocates the lock with the given ID and returns it.
+// If the lock is already allocated, error.
+func (m *SHMLockManager) AllocateAndRetrieveLock(id uint32) (Locker, error) {
+	lock := new(SHMLock)
+	lock.lockID = id
+	lock.manager = m
+
+	if id >= m.locks.GetMaxLocks() {
+		return nil, errors.Wrapf(syscall.EINVAL, "lock ID %d is too large - max lock size is %d",
+			id, m.locks.GetMaxLocks()-1)
+	}
+
+	if err := m.locks.AllocateGivenSemaphore(id); err != nil {
+		return nil, err
+	}
+
+	return lock, nil
+}
+
 // RetrieveLock retrieves a lock from the manager given its ID.
 func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
 	lock := new(SHMLock)
diff --git a/libpod/pod_internal.go b/libpod/pod_internal.go
index 25e4e77d7..1fcb5b1a6 100644
--- a/libpod/pod_internal.go
+++ b/libpod/pod_internal.go
@@ -56,7 +56,7 @@ func (p *Pod) refresh() error {
 	}
 
 	// Retrieve the pod's lock
-	lock, err := p.runtime.lockManager.RetrieveLock(p.config.LockID)
+	lock, err := p.runtime.lockManager.AllocateAndRetrieveLock(p.config.LockID)
 	if err != nil {
 		return errors.Wrapf(err, "error retrieving lock for pod %s", p.ID())
 	}
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 34b6ac74f..e6b84014e 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -922,7 +922,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
 		if os.IsNotExist(errors.Cause(err)) {
 			manager, err = lock.NewSHMLockManager(lockPath, runtime.config.NumLocks)
 			if err != nil {
-				return err
+				return errors.Wrapf(err, "failed to get new shm lock manager")
 			}
 		} else if errors.Cause(err) == syscall.ERANGE && runtime.doRenumber {
 			logrus.Debugf("Number of locks does not match - removing old locks")
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 48c254c0f..c7758055f 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -238,12 +238,15 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
 func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool) error {
 	r.lock.Lock()
 	defer r.lock.Unlock()
-	return r.removeContainer(ctx, c, force, removeVolume)
+	return r.removeContainer(ctx, c, force, removeVolume, false)
 }
 
-// Internal function to remove a container
-// Locks the container, but does not lock the runtime
-func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, removeVolume bool) error {
+// Internal function to remove a container.
+// Locks the container, but does not lock the runtime.
+// removePod is used only when removing pods. It instructs Podman to ignore
+// infra container protections, and *not* remove from the database (as pod
+// remove will handle that).
+func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, removeVolume bool, removePod bool) error {
 	span, _ := opentracing.StartSpanFromContext(ctx, "removeContainer")
 	span.SetTag("type", "runtime")
 	defer span.Finish()
@@ -256,12 +259,14 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		}
 	}
 
-	// We need to lock the pod before we lock the container
-	// To avoid races around removing a container and the pod it is in
+	// We need to lock the pod before we lock the container.
+	// To avoid races around removing a container and the pod it is in.
+	// Don't need to do this in pod removal case - we're evicting the entire
+	// pod.
 	var pod *Pod
 	var err error
 	runtime := c.runtime
-	if c.config.Pod != "" {
+	if c.config.Pod != "" && !removePod {
 		pod, err = r.state.Pod(c.config.Pod)
 		if err != nil {
 			return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), pod.ID())
@@ -280,8 +285,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		}
 	}
 
-	c.lock.Lock()
-	defer c.lock.Unlock()
+	// For pod removal, the container is already locked by the caller
+	if !removePod {
+		c.lock.Lock()
+		defer c.lock.Unlock()
+	}
 
 	if !r.valid {
 		return ErrRuntimeStopped
@@ -292,10 +300,15 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		return err
 	}
 
-	if c.state.State == ContainerStatePaused {
-		if !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "container %s is paused, cannot remove until unpaused", c.ID())
+	// If we're not force-removing, we need to check if we're in a good
+	// state to remove.
+	if !force {
+		if err := c.checkReadyForRemoval(); err != nil {
+			return err
 		}
+	}
+
+	if c.state.State == ContainerStatePaused {
 		if err := c.runtime.ociRuntime.killContainer(c, 9); err != nil {
 			return err
 		}
@@ -309,7 +322,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 	}
 
 	// Check that the container's in a good state to be removed
-	if c.state.State == ContainerStateRunning && force {
+	if c.state.State == ContainerStateRunning {
 		if err := r.ociRuntime.stopContainer(c, c.StopTimeout()); err != nil {
 			return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
 		}
@@ -318,42 +331,41 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		if err := c.waitForExitFileAndSync(); err != nil {
 			return err
 		}
-	} else if !(c.state.State == ContainerStateConfigured ||
-		c.state.State == ContainerStateCreated ||
-		c.state.State == ContainerStateStopped ||
-		c.state.State == ContainerStateExited) {
-		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
 	}
 
 	// Check that all of our exec sessions have finished
 	if len(c.state.ExecSessions) != 0 {
-		if force {
-			if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
-				return err
-			}
-		} else {
-			return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
+		if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
+			return err
 		}
 	}
 
-	// Check that no other containers depend on the container
-	deps, err := r.state.ContainerInUse(c)
-	if err != nil {
-		return err
-	}
-	if len(deps) != 0 {
-		depsStr := strings.Join(deps, ", ")
-		return errors.Wrapf(ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr)
+	// Check that no other containers depend on the container.
+	// Only used if not removing a pod - pods guarantee that all
+	// deps will be evicted at the same time.
+	if !removePod {
+		deps, err := r.state.ContainerInUse(c)
+		if err != nil {
+			return err
+		}
+		if len(deps) != 0 {
+			depsStr := strings.Join(deps, ", ")
+			return errors.Wrapf(ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr)
+		}
 	}
 
 	var cleanupErr error
 	// Remove the container from the state
 	if c.config.Pod != "" {
-		if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
-			if cleanupErr == nil {
-				cleanupErr = err
-			} else {
-				logrus.Errorf("removing container from pod: %v", err)
+		// If we're removing the pod, the container will be evicted
+		// from the state elsewhere
+		if !removePod {
+			if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
+				if cleanupErr == nil {
+					cleanupErr = err
+				} else {
+					logrus.Errorf("removing container from pod: %v", err)
+				}
 			}
 		}
 	} else {
diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go
index 5e9f65acc..7cc7de270 100644
--- a/libpod/runtime_img.go
+++ b/libpod/runtime_img.go
@@ -48,7 +48,7 @@ func (r *Runtime) RemoveImage(ctx context.Context, img *image.Image, force bool)
 	if len(imageCtrs) > 0 && len(img.Names()) <= 1 {
 		if force {
 			for _, ctr := range imageCtrs {
-				if err := r.removeContainer(ctx, ctr, true, false); err != nil {
+				if err := r.removeContainer(ctx, ctr, true, false, false); err != nil {
 					return "", errors.Wrapf(err, "error removing image %s: container %s using image could not be removed", img.ID(), ctr.ID())
 				}
 			}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 0011c771a..456ad365f 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/containerd/cgroups"
 	"github.com/containers/libpod/libpod/events"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -148,117 +149,88 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		return errors.Wrapf(ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
 	}
 
-	// Go through and lock all containers so we can operate on them all at once
-	dependencies := make(map[string][]string)
+	// Go through and lock all containers so we can operate on them all at
+	// once.
+	// First loop also checks that we are ready to go ahead and remove.
 	for _, ctr := range ctrs {
-		ctr.lock.Lock()
-		defer ctr.lock.Unlock()
+		ctrLock := ctr.lock
+		ctrLock.Lock()
+		defer ctrLock.Unlock()
+
+		// If we're force-removing, no need to check status.
+		if force {
+			continue
+		}
 
 		// Sync all containers
 		if err := ctr.syncContainer(); err != nil {
 			return err
 		}
 
-		// Check if the container is in a good state to be removed
-		if ctr.state.State == ContainerStatePaused {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains paused container %s, cannot remove", p.ID(), ctr.ID())
-		}
-
-		if ctr.state.State == ContainerStateUnknown {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s with invalid state", p.ID(), ctr.ID())
-		}
-
-		// If the container is running and force is not set we can't do anything
-		if ctr.state.State == ContainerStateRunning && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID())
-		}
-
-		// If the container has active exec sessions and force is not set we can't do anything
-		if len(ctr.state.ExecSessions) != 0 && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
-		}
-
-		deps, err := r.state.ContainerInUse(ctr)
-		if err != nil {
-			return err
+		// Ensure state appropriate for removal
+		if err := ctr.checkReadyForRemoval(); err != nil {
+			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
 		}
-		dependencies[ctr.ID()] = deps
 	}
 
-	// Check if containers have dependencies
-	// If they do, and the dependencies are not in the pod, error
-	for ctr, deps := range dependencies {
-		for _, dep := range deps {
-			if _, ok := dependencies[dep]; !ok {
-				return errors.Wrapf(ErrCtrExists, "container %s depends on container %s not in pod %s", ctr, dep, p.ID())
+	var removalErr error
+
+	// We're going to be removing containers.
+	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
+	// the pod and conmon CGroups with a PID limit to prevent them from
+	// spawning any further processes (particularly cleanup processes) which
+	// would prevent removing the CGroups.
+	if p.runtime.config.CgroupManager == CgroupfsCgroupsManager {
+		// Get the conmon CGroup
+		v1CGroups := GetV1CGroups(getExcludedCGroups())
+		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
+		conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
+		if err != nil && err != cgroups.ErrCgroupDeleted {
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup %s", p.ID(), conmonCgroupPath)
+			} else {
+				logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
 			}
 		}
-	}
 
-	// First loop through all containers and stop them
-	// Do not remove in this loop to ensure that we don't remove unless all
-	// containers are in a good state
-	if force {
-		for _, ctr := range ctrs {
-			// If force is set and the container is running, stop it now
-			if ctr.state.State == ContainerStateRunning {
-				if err := r.ociRuntime.stopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return errors.Wrapf(err, "error stopping container %s to remove pod %s", ctr.ID(), p.ID())
-				}
-
-				// Sync again to pick up stopped state
-				if err := ctr.syncContainer(); err != nil {
-					return err
-				}
-			}
-			// If the container has active exec sessions, stop them now
-			if len(ctr.state.ExecSessions) != 0 {
-				if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return err
+		// New resource limits
+		resLimits := new(spec.LinuxResources)
+		resLimits.Pids = new(spec.LinuxPids)
+		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+
+		// Don't try if we failed to retrieve the cgroup
+		if err == nil {
+			if err := conmonCgroup.Update(resLimits); err != nil {
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error updating pod %s conmon group", p.ID())
+				} else {
+					logrus.Errorf("Error updating pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
 				}
 			}
 		}
 	}
 
-	// Start removing containers
-	// We can remove containers even if they have dependencies now
-	// As we have guaranteed their dependencies are in the pod
+	// Second loop - all containers are good, so we should be clear to
+	// remove.
 	for _, ctr := range ctrs {
-		// Clean up network namespace, cgroups, mounts
-		if err := ctr.cleanup(ctx); err != nil {
-			return err
-		}
-
-		// Stop container's storage
-		if err := ctr.teardownStorage(); err != nil {
-			return err
-		}
-
-		// Delete the container from runtime (only if we are not
-		// ContainerStateConfigured)
-		if ctr.state.State != ContainerStateConfigured &&
-			ctr.state.State != ContainerStateExited {
-			if err := ctr.delete(ctx); err != nil {
-				return err
+		// Remove the container
+		if err := r.removeContainer(ctx, ctr, force, true, true); err != nil {
+			if removalErr != nil {
+				removalErr = err
+			} else {
+				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
 			}
 		}
-
-		// Free the container's lock
-		if err := ctr.lock.Free(); err != nil {
-			return err
-		}
 	}
 
-	// Remove containers from the state
+	// Remove all containers in the pod from the state.
 	if err := r.state.RemovePodContainers(p); err != nil {
+		// If this fails, there isn't much more we can do.
+		// The containers in the pod are unusable, but they still exist,
+		// so pod removal will fail.
 		return err
 	}
 
-	// Mark containers invalid
-	for _, ctr := range ctrs {
-		ctr.valid = false
-	}
-
 	// Remove pod cgroup, if present
 	if p.state.CgroupPath != "" {
 		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
@@ -266,10 +238,11 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		switch p.runtime.config.CgroupManager {
 		case SystemdCgroupsManager:
 			if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
-				// The pod is already almost gone.
-				// No point in hard-failing if we fail
-				// this bit of cleanup.
-				logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 		case CgroupfsCgroupsManager:
 			// Delete the cgroupfs cgroup
@@ -280,34 +253,60 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
 			conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID())
+				} else {
+					logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := conmonCgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					}
 				}
 			}
 			cgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(p.state.CgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := cgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					}
 				}
 			}
 		default:
-			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
+			// This should be caught much earlier, but let's still
+			// keep going so we make sure to evict the pod before
+			// ending up with an inconsistent state.
+			if removalErr == nil {
+				removalErr = errors.Wrapf(ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.CgroupManager, p.ID())
+			} else {
+				logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.CgroupManager, p.ID())
+			}
 		}
 	}
 
 	// Remove pod from state
 	if err := r.state.RemovePod(p); err != nil {
+		if removalErr != nil {
+			logrus.Errorf("%v", removalErr)
+		}
 		return err
 	}
 
 	// Mark pod invalid
 	p.valid = false
 	p.newPodEvent(events.Remove)
-	return nil
+	return removalErr
 }
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index 40040fc52..a326ed0e0 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -116,7 +116,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
 			// containers?
 			// I'm inclined to say no, in case someone accidentally
 			// wipes a container they're using...
-			if err := r.removeContainer(ctx, ctr, false, false); err != nil {
+			if err := r.removeContainer(ctx, ctr, false, false, false); err != nil {
 				return errors.Wrapf(err, "error removing container %s that depends on volume %s", ctr.ID(), v.Name())
 			}
 		}