From f5938be1f758abf71f3d91fcb82240220ecfad91 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Tue, 7 May 2019 21:22:06 -0400
Subject: Improve robustness of pod removal

Removing a pod must first removal all containers in the pod.
Libpod requires the state to remain consistent at all times, so
references to a deleted pod must all be cleansed first.

Pods can have many containers in them. We presently iterate
through all of them, and if an error occurs trying to clean up
and remove any single container, we abort the entire operation
(but cannot recover anything already removed - pod removal is not
an atomic operation).

Because of this, if a removal error occurs partway through, we
can end up with a pod in an inconsistent state that is no longer
usable. What's worse, if the error is in the infra container, and
it's persistent, we get zombie pods - completely unable to be
removed.

When we saw some of these same issues with containers not in
pods, we modified the removal code there to aggressively purge
containers from the database, then try to clean up afterwards.
Take the same approach here, and make cleanup errors nonfatal.
Once we've gone ahead and removed containers, we need to see
pod deletion through to the end - we'll log errors but keep
going.

Also, fix some other small things (most notably, we didn't make
events for the containers removed).

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/runtime_pod_linux.go | 70 +++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 38 deletions(-)

(limited to 'libpod')

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 0011c771a..6f74acb9d 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -149,10 +149,10 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 	}
 
 	// Go through and lock all containers so we can operate on them all at once
-	dependencies := make(map[string][]string)
 	for _, ctr := range ctrs {
-		ctr.lock.Lock()
-		defer ctr.lock.Unlock()
+		ctrLock := ctr.lock
+		ctrLock.Lock()
+		defer ctrLock.Unlock()
 
 		// Sync all containers
 		if err := ctr.syncContainer(); err != nil {
@@ -177,23 +177,12 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		if len(ctr.state.ExecSessions) != 0 && !force {
 			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
 		}
-
-		deps, err := r.state.ContainerInUse(ctr)
-		if err != nil {
-			return err
-		}
-		dependencies[ctr.ID()] = deps
 	}
 
-	// Check if containers have dependencies
-	// If they do, and the dependencies are not in the pod, error
-	for ctr, deps := range dependencies {
-		for _, dep := range deps {
-			if _, ok := dependencies[dep]; !ok {
-				return errors.Wrapf(ErrCtrExists, "container %s depends on container %s not in pod %s", ctr, dep, p.ID())
-			}
-		}
-	}
+	// We maintain the invariant that container dependencies must all exist
+	// within the container's pod.
+	// No need to check dependencies as such - we're removing all containers
+	// in the pod at once, no dependency issues.
 
 	// First loop through all containers and stop them
 	// Do not remove in this loop to ensure that we don't remove unless all
@@ -220,18 +209,30 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		}
 	}
 
-	// Start removing containers
-	// We can remove containers even if they have dependencies now
-	// As we have guaranteed their dependencies are in the pod
+	// Remove all containers in the pod from the state.
+	if err := r.state.RemovePodContainers(p); err != nil {
+		return err
+	}
+
+	// Clean up after our removed containers.
+	// Errors here are nonfatal - the containers have already been evicted.
+	// We'll do our best to clean up after them, but we have to keep going
+	// and remove the pod as well.
+	// From here until we remove the pod from the state, no error returns.
 	for _, ctr := range ctrs {
+		// The container no longer exists in the state, mark invalid.
+		ctr.valid = false
+
+		ctr.newContainerEvent(events.Remove)
+
 		// Clean up network namespace, cgroups, mounts
 		if err := ctr.cleanup(ctx); err != nil {
-			return err
+			logrus.Errorf("Unable to clean up container %s: %v", ctr.ID(), err)
 		}
 
 		// Stop container's storage
 		if err := ctr.teardownStorage(); err != nil {
-			return err
+			logrus.Errorf("Unable to tear down container %s storage: %v", ctr.ID(), err)
 		}
 
 		// Delete the container from runtime (only if we are not
@@ -239,26 +240,16 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		if ctr.state.State != ContainerStateConfigured &&
 			ctr.state.State != ContainerStateExited {
 			if err := ctr.delete(ctx); err != nil {
-				return err
+				logrus.Errorf("Unable to remove container %s from OCI runtime: %v", ctr.ID(), err)
 			}
 		}
 
 		// Free the container's lock
 		if err := ctr.lock.Free(); err != nil {
-			return err
+			logrus.Errorf("Unable to free container %s lock: %v", ctr.ID(), err)
 		}
 	}
 
-	// Remove containers from the state
-	if err := r.state.RemovePodContainers(p); err != nil {
-		return err
-	}
-
-	// Mark containers invalid
-	for _, ctr := range ctrs {
-		ctr.valid = false
-	}
-
 	// Remove pod cgroup, if present
 	if p.state.CgroupPath != "" {
 		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
@@ -280,7 +271,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
 			conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
 			}
 			if err == nil {
 				if err := conmonCgroup.Delete(); err != nil {
@@ -289,7 +280,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 			}
 			cgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(p.state.CgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				return err
+				logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
 			}
 			if err == nil {
 				if err := cgroup.Delete(); err != nil {
@@ -297,7 +288,10 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 				}
 			}
 		default:
-			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
+			// This should be caught much earlier, but let's still
+			// keep going so we make sure to evict the pod before
+			// ending up with an inconsistent state.
+			logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.CgroupManager, p.ID())
 		}
 	}
 
-- 
cgit v1.2.3-54-g00ecf


From e9c78b411379fc0cfa245bc6b4b25b3bd84077a1 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Tue, 7 May 2019 21:44:04 -0400
Subject: Preserve errors returned by removing pods

Ensure that, if an error occurs somewhere along the way when we
remove a pod, it's preserved until the end and returned, even as
we continue to remove the pod.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/runtime_pod_linux.go | 70 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 56 insertions(+), 14 deletions(-)

(limited to 'libpod')

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 6f74acb9d..5867b1f87 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -214,6 +214,8 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		return err
 	}
 
+	var removalErr error
+
 	// Clean up after our removed containers.
 	// Errors here are nonfatal - the containers have already been evicted.
 	// We'll do our best to clean up after them, but we have to keep going
@@ -227,12 +229,20 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 
 		// Clean up network namespace, cgroups, mounts
 		if err := ctr.cleanup(ctx); err != nil {
-			logrus.Errorf("Unable to clean up container %s: %v", ctr.ID(), err)
+			if removalErr == nil {
+				removalErr = err
+			} else {
+				logrus.Errorf("Unable to clean up container %s: %v", ctr.ID(), err)
+			}
 		}
 
 		// Stop container's storage
 		if err := ctr.teardownStorage(); err != nil {
-			logrus.Errorf("Unable to tear down container %s storage: %v", ctr.ID(), err)
+			if removalErr == nil {
+				removalErr = err
+			} else {
+				logrus.Errorf("Unable to tear down container %s storage: %v", ctr.ID(), err)
+			}
 		}
 
 		// Delete the container from runtime (only if we are not
@@ -240,13 +250,21 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		if ctr.state.State != ContainerStateConfigured &&
 			ctr.state.State != ContainerStateExited {
 			if err := ctr.delete(ctx); err != nil {
-				logrus.Errorf("Unable to remove container %s from OCI runtime: %v", ctr.ID(), err)
+				if removalErr == nil {
+					removalErr = err
+				} else {
+					logrus.Errorf("Unable to remove container %s from OCI runtime: %v", ctr.ID(), err)
+				}
 			}
 		}
 
 		// Free the container's lock
 		if err := ctr.lock.Free(); err != nil {
-			logrus.Errorf("Unable to free container %s lock: %v", ctr.ID(), err)
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error freeing container %s lock", ctr.ID())
+			} else {
+				logrus.Errorf("Unable to free container %s lock: %v", ctr.ID(), err)
+			}
 		}
 	}
 
@@ -257,10 +275,11 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		switch p.runtime.config.CgroupManager {
 		case SystemdCgroupsManager:
 			if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
-				// The pod is already almost gone.
-				// No point in hard-failing if we fail
-				// this bit of cleanup.
-				logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 		case CgroupfsCgroupsManager:
 			// Delete the cgroupfs cgroup
@@ -271,37 +290,60 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
 			conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID())
+				} else {
+					logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := conmonCgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+					}
 				}
 			}
 			cgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(p.state.CgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
-				logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID())
+				} else {
+					logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+				}
 			}
 			if err == nil {
 				if err := cgroup.Delete(); err != nil {
-					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					if removalErr == nil {
+						removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
+					} else {
+						logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
+					}
 				}
 			}
 		default:
 			// This should be caught much earlier, but let's still
 			// keep going so we make sure to evict the pod before
 			// ending up with an inconsistent state.
-			logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.CgroupManager, p.ID())
+			if removalErr == nil {
+				removalErr = errors.Wrapf(ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.CgroupManager, p.ID())
+			} else {
+				logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.CgroupManager, p.ID())
+			}
 		}
 	}
 
 	// Remove pod from state
 	if err := r.state.RemovePod(p); err != nil {
+		if removalErr != nil {
+			logrus.Errorf("%v", removalErr)
+		}
 		return err
 	}
 
 	// Mark pod invalid
 	p.valid = false
 	p.newPodEvent(events.Remove)
-	return nil
+	return removalErr
 }
-- 
cgit v1.2.3-54-g00ecf