From 5cbb3e7e9d14ff8e09e3a36c974d73e9c1a5b9a9 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Fri, 10 May 2019 12:42:14 -0400
Subject: Use standard remove functions for removing pod ctrs

Instead of rewriting the logic, reuse the standard logic we use
for removing containers, which is much better tested.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/runtime_pod_linux.go | 121 +++++++++-----------------------------------
 1 file changed, 24 insertions(+), 97 deletions(-)

(limited to 'libpod/runtime_pod_linux.go')

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 5867b1f87..baad278f3 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -148,124 +148,51 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		return errors.Wrapf(ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
 	}
 
-	// Go through and lock all containers so we can operate on them all at once
+	// Go through and lock all containers so we can operate on them all at
+	// once.
+	// First loop also checks that we are ready to go ahead and remove.
 	for _, ctr := range ctrs {
 		ctrLock := ctr.lock
 		ctrLock.Lock()
 		defer ctrLock.Unlock()
 
+		// If we're force-removing, no need to check status.
+		if force {
+			continue
+		}
+
 		// Sync all containers
 		if err := ctr.syncContainer(); err != nil {
 			return err
 		}
 
-		// Check if the container is in a good state to be removed
-		if ctr.state.State == ContainerStatePaused {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains paused container %s, cannot remove", p.ID(), ctr.ID())
-		}
-
-		if ctr.state.State == ContainerStateUnknown {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s with invalid state", p.ID(), ctr.ID())
-		}
-
-		// If the container is running and force is not set we can't do anything
-		if ctr.state.State == ContainerStateRunning && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID())
+		// Ensure state appropriate for removal
+		if err := ctr.checkReadyForRemoval(); err != nil {
+			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
 		}
-
-		// If the container has active exec sessions and force is not set we can't do anything
-		if len(ctr.state.ExecSessions) != 0 && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
-		}
-	}
-
-	// We maintain the invariant that container dependencies must all exist
-	// within the container's pod.
-	// No need to check dependencies as such - we're removing all containers
-	// in the pod at once, no dependency issues.
-
-	// First loop through all containers and stop them
-	// Do not remove in this loop to ensure that we don't remove unless all
-	// containers are in a good state
-	if force {
-		for _, ctr := range ctrs {
-			// If force is set and the container is running, stop it now
-			if ctr.state.State == ContainerStateRunning {
-				if err := r.ociRuntime.stopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return errors.Wrapf(err, "error stopping container %s to remove pod %s", ctr.ID(), p.ID())
-				}
-
-				// Sync again to pick up stopped state
-				if err := ctr.syncContainer(); err != nil {
-					return err
-				}
-			}
-			// If the container has active exec sessions, stop them now
-			if len(ctr.state.ExecSessions) != 0 {
-				if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return err
-				}
-			}
-		}
-	}
-
-	// Remove all containers in the pod from the state.
-	if err := r.state.RemovePodContainers(p); err != nil {
-		return err
 	}
 
 	var removalErr error
 
-	// Clean up after our removed containers.
-	// Errors here are nonfatal - the containers have already been evicted.
-	// We'll do our best to clean up after them, but we have to keep going
-	// and remove the pod as well.
-	// From here until we remove the pod from the state, no error returns.
+	// Second loop - all containers are good, so we should be clear to
+	// remove.
 	for _, ctr := range ctrs {
-		// The container no longer exists in the state, mark invalid.
-		ctr.valid = false
-
-		ctr.newContainerEvent(events.Remove)
-
-		// Clean up network namespace, cgroups, mounts
-		if err := ctr.cleanup(ctx); err != nil {
-			if removalErr == nil {
-				removalErr = err
-			} else {
-				logrus.Errorf("Unable to clean up container %s: %v", ctr.ID(), err)
-			}
-		}
-
-		// Stop container's storage
-		if err := ctr.teardownStorage(); err != nil {
-			if removalErr == nil {
+		// Remove the container
+		if err := r.removeContainer(ctx, ctr, force, true, true); err != nil {
+			if removalErr != nil {
 				removalErr = err
 			} else {
-				logrus.Errorf("Unable to tear down container %s storage: %v", ctr.ID(), err)
-			}
-		}
-
-		// Delete the container from runtime (only if we are not
-		// ContainerStateConfigured)
-		if ctr.state.State != ContainerStateConfigured &&
-			ctr.state.State != ContainerStateExited {
-			if err := ctr.delete(ctx); err != nil {
-				if removalErr == nil {
-					removalErr = err
-				} else {
-					logrus.Errorf("Unable to remove container %s from OCI runtime: %v", ctr.ID(), err)
-				}
+				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
 			}
 		}
+	}
 
-		// Free the container's lock
-		if err := ctr.lock.Free(); err != nil {
-			if removalErr == nil {
-				removalErr = errors.Wrapf(err, "error freeing container %s lock", ctr.ID())
-			} else {
-				logrus.Errorf("Unable to free container %s lock: %v", ctr.ID(), err)
-			}
-		}
+	// Remove all containers in the pod from the state.
+	if err := r.state.RemovePodContainers(p); err != nil {
+		// If this fails, there isn't much more we can do.
+		// The containers in the pod are unusable, but they still exist,
+		// so pod removal will fail.
+		return err
 	}
 
 	// Remove pod cgroup, if present
-- 
cgit v1.2.3-54-g00ecf


From c9c00ecd532fea4722b020f318daa1e34bc46fd0 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Sat, 11 May 2019 15:10:34 -0400
Subject: When removing a pod with CGroupfs, set pids limit to 0

When using CGroupfs, we see races during pod removal between
removing the CGroup and the cleanup process starting (in the
CGroup, thus preventing removal).

The simplest way to avoid this is to prevent the forking of the
cleanup process. Conveniently, we can do this via the CGroup that
we already created for Conmon - we just need to update the PID
limit to 0, which completely inhibits new forks.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/runtime_pod_linux.go | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'libpod/runtime_pod_linux.go')

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index baad278f3..456ad365f 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/containerd/cgroups"
 	"github.com/containers/libpod/libpod/events"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -174,6 +175,41 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 
 	var removalErr error
 
+	// We're going to be removing containers.
+	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
+	// the pod and conmon CGroups with a PID limit to prevent them from
+	// spawning any further processes (particularly cleanup processes) which
+	// would prevent removing the CGroups.
+	if p.runtime.config.CgroupManager == CgroupfsCgroupsManager {
+		// Get the conmon CGroup
+		v1CGroups := GetV1CGroups(getExcludedCGroups())
+		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
+		conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
+		if err != nil && err != cgroups.ErrCgroupDeleted {
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup %s", p.ID(), conmonCgroupPath)
+			} else {
+				logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+			}
+		}
+
+		// New resource limits
+		resLimits := new(spec.LinuxResources)
+		resLimits.Pids = new(spec.LinuxPids)
+		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+
+		// Don't try if we failed to retrieve the cgroup
+		if err == nil {
+			if err := conmonCgroup.Update(resLimits); err != nil {
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error updating pod %s conmon group", p.ID())
+				} else {
+					logrus.Errorf("Error updating pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
+			}
+		}
+	}
+
 	// Second loop - all containers are good, so we should be clear to
 	// remove.
 	for _, ctr := range ctrs {
-- 
cgit v1.2.3-54-g00ecf