1 files changed, 93 insertions, 94 deletions
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 0011c771a..456ad365f 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/containerd/cgroups"
 	"github.com/containers/libpod/libpod/events"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -148,117 +149,88 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		return errors.Wrapf(ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
 	}
 
-	// Go through and lock all containers so we can operate on them all at once
-	dependencies := make(map[string][]string)
+	// Go through and lock all containers so we can operate on them all at
+	// once.
+	// First loop also checks that we are ready to go ahead and remove.
 	for _, ctr := range ctrs {
-		ctr.lock.Lock()
-		defer ctr.lock.Unlock()
+		ctrLock := ctr.lock
+		ctrLock.Lock()
+		defer ctrLock.Unlock()
+
+		// If we're force-removing, no need to check status.
+		if force {
+			continue
+		}
 
 		// Sync all containers
 		if err := ctr.syncContainer(); err != nil {
 			return err
 		}
 
-		// Check if the container is in a good state to be removed
-		if ctr.state.State == ContainerStatePaused {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains paused container %s, cannot remove", p.ID(), ctr.ID())
-		}
-
-		if ctr.state.State == ContainerStateUnknown {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s with invalid state", p.ID(), ctr.ID())
-		}
-
-		// If the container is running and force is not set we can't do anything
-		if ctr.state.State == ContainerStateRunning && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID())
-		}
-
-		// If the container has active exec sessions and force is not set we can't do anything
-		if len(ctr.state.ExecSessions) != 0 && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
-		}
-
-		deps, err := r.state.ContainerInUse(ctr)
-		if err != nil {
-			return err
+		// Ensure state appropriate for removal
+		if err := ctr.checkReadyForRemoval(); err != nil {
+			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
 		}
-		dependencies[ctr.ID()] = deps
 	}
 
-	// Check if containers have dependencies
-	// If they do, and the dependencies are not in the pod, error
-	for ctr, deps := range dependencies {
-		for _, dep := range deps {
-			if _, ok := dependencies[dep]; !ok {
-				return errors.Wrapf(ErrCtrExists, "container %s depends on container %s not in pod %s", ctr, dep, p.ID())
+	var removalErr error
+
+	// We're going to be removing containers.
+	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
+	// the pod and conmon CGroups with a PID limit to prevent them from
+	// spawning any further processes (particularly cleanup processes) which
+	// would prevent removing the CGroups.
+	if p.runtime.config.CgroupManager == CgroupfsCgroupsManager {
+		// Get the conmon CGroup
+		v1CGroups := GetV1CGroups(getExcludedCGroups())
+		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
+		conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
+		if err != nil && err != cgroups.ErrCgroupDeleted {
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup %s", p.ID(), conmonCgroupPath)
+			} else {
+				logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
 			}
 		}
-	}
 
-	// First loop through all containers and stop them
-	// Do not remove in this loop to ensure that we don't remove unless all
-	// containers are in a good state
-	if force {
-		for _, ctr := range ctrs {
-			// If force is set and the container is running, stop it now
-			if ctr.state.State == ContainerStateRunning {
-				if err := r.ociRuntime.stopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return errors.Wrapf(err, "error stopping container %s to remove pod %s", ctr.ID(), p.ID())
-				}
-
-				// Sync again to pick up stopped state
-				if err := ctr.syncContainer(); err != nil {
-					return err
-				}
-			}
-			// If the container has active exec sessions, stop them now
-			if len(ctr.state.ExecSessions) != 0 {
-				if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return err
+		// New resource limits
+		resLimits := new(spec.LinuxResources)
+		resLimits.Pids = new(spec.LinuxPids)
+		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+
+		// Don't try if we failed to retrieve the cgroup
+		if err == nil {
+			if err := conmonCgroup.Update(resLimits); err != nil {
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error updating pod %s conmon group", p.ID())
+				} else {
+					logrus.Errorf("Error updating pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
 				}
 			}
 		}
 	}
 
-	// Start removing containers
-	// We can remove containers even if they have dependencies now
-	// As we have guaranteed their dependencies are in the pod
+	// Second loop - all containers are good, so we should be clear to
+	// remove.
 	for _, ctr := range ctrs {
-		// Clean up network namespace, cgroups, mounts
-		if err := ctr.cleanup(ctx); err != nil {
-			return err
-		}
-
-		// Stop container's storage
-		if err := ctr.teardownStorage(); err != nil {
-			return err
-		}
-
-		// Delete the container from runtime (only if we are not
-		// ContainerStateConfigured)
-		if ctr.state.State != ContainerStateConfigured &&
-			ctr.state.State != ContainerStateExited {
-			if err := ctr.delete(ctx); err != nil {
-				return err
+		// Remove the container
+		if err := r.removeContainer(ctx, ctr, force, true, true); err != nil {
+			if removalErr != nil {
+				removalErr = err
+			} else {
+				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
 			}
 		}
-
-		// Free the container's lock
-		if err := ctr.lock.Free(); err != nil {
-			return err
-		}
 	}
 
-	// Remove containers from the state
+	// Remove all containers in the pod from the state.
 	if err := r.state.RemovePodContainers(p); err != nil {
+		// If this fails, there isn't much more we can do.
+		// The containers in the pod are unusable, but they still exist,
+		// so pod removal will fail.
 		return err
 	}
 
-	// Mark containers invalid
-	for _, ctr := range ctrs {
-		ctr.valid = false
-	}
-
 	// Remove pod cgroup, if present
 	if p.state.CgroupPath != "" {
 		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
@@ -266,10 +238,11 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		switch p.runtime.config.CgroupManager {
 		case SystemdCgroupsManager:
 			if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
-				// The pod is already almost gone.
-				// No point in hard-failing if we fail
-				// this bit of cleanup.
-				logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 		case CgroupfsCgroupsManager:
 			// Delete the cgroupfs cgroup
@@ -280,34 +253,60 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
 			conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID())
+				} else {
+					logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := conmonCgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					}
 				}
 			}
 			cgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(p.state.CgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := cgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					}
 				}
 			}
 		default:
-			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
+			// This should be caught much earlier, but let's still
+			// keep going so we make sure to evict the pod before
+			// ending up with an inconsistent state.
+			if removalErr == nil {
+				removalErr = errors.Wrapf(ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.CgroupManager, p.ID())
+			} else {
+				logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.CgroupManager, p.ID())
+			}
 		}
 	}
 
 	// Remove pod from state
 	if err := r.state.RemovePod(p); err != nil {
+		if removalErr != nil {
+			logrus.Errorf("%v", removalErr)
+		}
 		return err
 	}
 
 	// Mark pod invalid
 	p.valid = false
 	p.newPodEvent(events.Remove)
-	return nil
+	return removalErr
 }