From 5cbb3e7e9d14ff8e09e3a36c974d73e9c1a5b9a9 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Fri, 10 May 2019 12:42:14 -0400
Subject: Use standard remove functions for removing pod ctrs

Instead of rewriting the logic, reuse the standard logic we use
for removing containers, which is much better tested.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/container_internal.go   |  22 ++++++++
 libpod/runtime_ctr.go          |  86 ++++++++++++++++-------------
 libpod/runtime_img.go          |   2 +-
 libpod/runtime_pod_linux.go    | 121 ++++++++---------------------------------
 libpod/runtime_volume_linux.go |   2 +-
 5 files changed, 97 insertions(+), 136 deletions(-)

diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index e6ffaa6d7..fc33a1bbc 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1490,3 +1490,25 @@ func (c *Container) copyWithTarFromImage(src, dest string) error {
 	}
 	return a.CopyWithTar(source, dest)
 }
+
+// checkReadyForRemoval checks whether the given container is ready to be
+// removed.
+// These checks are only used if force-remove is not specified.
+// If it is, we'll remove the container anyways.
+// Returns nil if safe to remove, or an error describing why it's unsafe if not.
+func (c *Container) checkReadyForRemoval() error {
+	if c.state.State == ContainerStateUnknown {
+		return errors.Wrapf(ErrCtrStateInvalid, "container %s is in invalid state", c.ID())
+	}
+
+	if c.state.State == ContainerStateRunning ||
+		c.state.State == ContainerStatePaused {
+		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
+	}
+
+	if len(c.state.ExecSessions) != 0 {
+		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
+	}
+
+	return nil
+}
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 48c254c0f..c7758055f 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -238,12 +238,15 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
 func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool) error {
 	r.lock.Lock()
 	defer r.lock.Unlock()
-	return r.removeContainer(ctx, c, force, removeVolume)
+	return r.removeContainer(ctx, c, force, removeVolume, false)
 }
 
-// Internal function to remove a container
-// Locks the container, but does not lock the runtime
-func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, removeVolume bool) error {
+// Internal function to remove a container.
+// Locks the container, but does not lock the runtime.
+// removePod is used only when removing pods. It instructs Podman to ignore
+// infra container protections, and *not* remove from the database (as pod
+// remove will handle that).
+func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, removeVolume bool, removePod bool) error {
 	span, _ := opentracing.StartSpanFromContext(ctx, "removeContainer")
 	span.SetTag("type", "runtime")
 	defer span.Finish()
@@ -256,12 +259,14 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		}
 	}
 
-	// We need to lock the pod before we lock the container
-	// To avoid races around removing a container and the pod it is in
+	// We need to lock the pod before we lock the container.
+	// To avoid races around removing a container and the pod it is in.
+	// Don't need to do this in pod removal case - we're evicting the entire
+	// pod.
 	var pod *Pod
 	var err error
 	runtime := c.runtime
-	if c.config.Pod != "" {
+	if c.config.Pod != "" && !removePod {
 		pod, err = r.state.Pod(c.config.Pod)
 		if err != nil {
 			return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), pod.ID())
@@ -280,8 +285,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		}
 	}
 
-	c.lock.Lock()
-	defer c.lock.Unlock()
+	// For pod removal, the container is already locked by the caller
+	if !removePod {
+		c.lock.Lock()
+		defer c.lock.Unlock()
+	}
 
 	if !r.valid {
 		return ErrRuntimeStopped
@@ -292,10 +300,15 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		return err
 	}
 
-	if c.state.State == ContainerStatePaused {
-		if !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "container %s is paused, cannot remove until unpaused", c.ID())
+	// If we're not force-removing, we need to check if we're in a good
+	// state to remove.
+	if !force {
+		if err := c.checkReadyForRemoval(); err != nil {
+			return err
 		}
+	}
+
+	if c.state.State == ContainerStatePaused {
 		if err := c.runtime.ociRuntime.killContainer(c, 9); err != nil {
 			return err
 		}
@@ -309,7 +322,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 	}
 
 	// Check that the container's in a good state to be removed
-	if c.state.State == ContainerStateRunning && force {
+	if c.state.State == ContainerStateRunning {
 		if err := r.ociRuntime.stopContainer(c, c.StopTimeout()); err != nil {
 			return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
 		}
@@ -318,42 +331,41 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
 		if err := c.waitForExitFileAndSync(); err != nil {
 			return err
 		}
-	} else if !(c.state.State == ContainerStateConfigured ||
-		c.state.State == ContainerStateCreated ||
-		c.state.State == ContainerStateStopped ||
-		c.state.State == ContainerStateExited) {
-		return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
 	}
 
 	// Check that all of our exec sessions have finished
 	if len(c.state.ExecSessions) != 0 {
-		if force {
-			if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
-				return err
-			}
-		} else {
-			return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
+		if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
+			return err
 		}
 	}
 
-	// Check that no other containers depend on the container
-	deps, err := r.state.ContainerInUse(c)
-	if err != nil {
-		return err
-	}
-	if len(deps) != 0 {
-		depsStr := strings.Join(deps, ", ")
-		return errors.Wrapf(ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr)
+	// Check that no other containers depend on the container.
+	// Only used if not removing a pod - pods guarantee that all
+	// deps will be evicted at the same time.
+	if !removePod {
+		deps, err := r.state.ContainerInUse(c)
+		if err != nil {
+			return err
+		}
+		if len(deps) != 0 {
+			depsStr := strings.Join(deps, ", ")
+			return errors.Wrapf(ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr)
+		}
 	}
 
 	var cleanupErr error
 	// Remove the container from the state
 	if c.config.Pod != "" {
-		if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
-			if cleanupErr == nil {
-				cleanupErr = err
-			} else {
-				logrus.Errorf("removing container from pod: %v", err)
+		// If we're removing the pod, the container will be evicted
+		// from the state elsewhere
+		if !removePod {
+			if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
+				if cleanupErr == nil {
+					cleanupErr = err
+				} else {
+					logrus.Errorf("removing container from pod: %v", err)
+				}
 			}
 		}
 	} else {
diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go
index 5e9f65acc..7cc7de270 100644
--- a/libpod/runtime_img.go
+++ b/libpod/runtime_img.go
@@ -48,7 +48,7 @@ func (r *Runtime) RemoveImage(ctx context.Context, img *image.Image, force bool)
 	if len(imageCtrs) > 0 && len(img.Names()) <= 1 {
 		if force {
 			for _, ctr := range imageCtrs {
-				if err := r.removeContainer(ctx, ctr, true, false); err != nil {
+				if err := r.removeContainer(ctx, ctr, true, false, false); err != nil {
 					return "", errors.Wrapf(err, "error removing image %s: container %s using image could not be removed", img.ID(), ctr.ID())
 				}
 			}
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 5867b1f87..baad278f3 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -148,124 +148,51 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 		return errors.Wrapf(ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
 	}
 
-	// Go through and lock all containers so we can operate on them all at once
+	// Go through and lock all containers so we can operate on them all at
+	// once.
+	// First loop also checks that we are ready to go ahead and remove.
 	for _, ctr := range ctrs {
 		ctrLock := ctr.lock
 		ctrLock.Lock()
 		defer ctrLock.Unlock()
 
+		// If we're force-removing, no need to check status.
+		if force {
+			continue
+		}
+
 		// Sync all containers
 		if err := ctr.syncContainer(); err != nil {
 			return err
 		}
 
-		// Check if the container is in a good state to be removed
-		if ctr.state.State == ContainerStatePaused {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains paused container %s, cannot remove", p.ID(), ctr.ID())
-		}
-
-		if ctr.state.State == ContainerStateUnknown {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s with invalid state", p.ID(), ctr.ID())
-		}
-
-		// If the container is running and force is not set we can't do anything
-		if ctr.state.State == ContainerStateRunning && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID())
+		// Ensure state appropriate for removal
+		if err := ctr.checkReadyForRemoval(); err != nil {
+			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
 		}
-
-		// If the container has active exec sessions and force is not set we can't do anything
-		if len(ctr.state.ExecSessions) != 0 && !force {
-			return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
-		}
-	}
-
-	// We maintain the invariant that container dependencies must all exist
-	// within the container's pod.
-	// No need to check dependencies as such - we're removing all containers
-	// in the pod at once, no dependency issues.
-
-	// First loop through all containers and stop them
-	// Do not remove in this loop to ensure that we don't remove unless all
-	// containers are in a good state
-	if force {
-		for _, ctr := range ctrs {
-			// If force is set and the container is running, stop it now
-			if ctr.state.State == ContainerStateRunning {
-				if err := r.ociRuntime.stopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return errors.Wrapf(err, "error stopping container %s to remove pod %s", ctr.ID(), p.ID())
-				}
-
-				// Sync again to pick up stopped state
-				if err := ctr.syncContainer(); err != nil {
-					return err
-				}
-			}
-			// If the container has active exec sessions, stop them now
-			if len(ctr.state.ExecSessions) != 0 {
-				if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil {
-					return err
-				}
-			}
-		}
-	}
-
-	// Remove all containers in the pod from the state.
-	if err := r.state.RemovePodContainers(p); err != nil {
-		return err
 	}
 
 	var removalErr error
 
-	// Clean up after our removed containers.
-	// Errors here are nonfatal - the containers have already been evicted.
-	// We'll do our best to clean up after them, but we have to keep going
-	// and remove the pod as well.
-	// From here until we remove the pod from the state, no error returns.
+	// Second loop - all containers are good, so we should be clear to
+	// remove.
 	for _, ctr := range ctrs {
-		// The container no longer exists in the state, mark invalid.
-		ctr.valid = false
-
-		ctr.newContainerEvent(events.Remove)
-
-		// Clean up network namespace, cgroups, mounts
-		if err := ctr.cleanup(ctx); err != nil {
-			if removalErr == nil {
-				removalErr = err
-			} else {
-				logrus.Errorf("Unable to clean up container %s: %v", ctr.ID(), err)
-			}
-		}
-
-		// Stop container's storage
-		if err := ctr.teardownStorage(); err != nil {
-			if removalErr == nil {
+		// Remove the container
+		if err := r.removeContainer(ctx, ctr, force, true, true); err != nil {
+			if removalErr != nil {
 				removalErr = err
 			} else {
-				logrus.Errorf("Unable to tear down container %s storage: %v", ctr.ID(), err)
-			}
-		}
-
-		// Delete the container from runtime (only if we are not
-		// ContainerStateConfigured)
-		if ctr.state.State != ContainerStateConfigured &&
-			ctr.state.State != ContainerStateExited {
-			if err := ctr.delete(ctx); err != nil {
-				if removalErr == nil {
-					removalErr = err
-				} else {
-					logrus.Errorf("Unable to remove container %s from OCI runtime: %v", ctr.ID(), err)
-				}
+				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
 			}
 		}
+	}
 
-		// Free the container's lock
-		if err := ctr.lock.Free(); err != nil {
-			if removalErr == nil {
-				removalErr = errors.Wrapf(err, "error freeing container %s lock", ctr.ID())
-			} else {
-				logrus.Errorf("Unable to free container %s lock: %v", ctr.ID(), err)
-			}
-		}
+	// Remove all containers in the pod from the state.
+	if err := r.state.RemovePodContainers(p); err != nil {
+		// If this fails, there isn't much more we can do.
+		// The containers in the pod are unusable, but they still exist,
+		// so pod removal will fail.
+		return err
 	}
 
 	// Remove pod cgroup, if present
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index 40040fc52..a326ed0e0 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -116,7 +116,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
 			// containers?
 			// I'm inclined to say no, in case someone accidentally
 			// wipes a container they're using...
-			if err := r.removeContainer(ctx, ctr, false, false); err != nil {
+			if err := r.removeContainer(ctx, ctr, false, false, false); err != nil {
 				return errors.Wrapf(err, "error removing container %s that depends on volume %s", ctr.ID(), v.Name())
 			}
 		}
-- 
cgit v1.2.3-54-g00ecf


From 952f8620b6d528a823dd5ef61a51cf491f6d54e6 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Fri, 10 May 2019 16:12:09 -0400
Subject: Add fix for an issue breaking our CI

Buildah no longer updates the create time of single-action images
(e.g. `FROM ...` with no other instructions. This isn't a bug (it
matches Docker's behavior), but it broke one of our tests.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 test/e2e/images_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/e2e/images_test.go b/test/e2e/images_test.go
index 5a299fe75..bec6e304b 100644
--- a/test/e2e/images_test.go
+++ b/test/e2e/images_test.go
@@ -169,12 +169,13 @@ var _ = Describe("Podman images", func() {
 			Skip("Does not work on remote client")
 		}
 		dockerfile := `FROM docker.io/library/alpine:latest
+RUN apk update && apk add man
 `
 		podmanTest.BuildImage(dockerfile, "foobar.com/before:latest", "false")
 		result := podmanTest.Podman([]string{"images", "-q", "-f", "before=foobar.com/before:latest"})
 		result.WaitWithDefaultTimeout()
 		Expect(result.ExitCode()).To(Equal(0))
-		Expect(len(result.OutputToStringArray())).To(Equal(1))
+		Expect(len(result.OutputToStringArray()) >= 1).To(BeTrue())
 	})
 
 	It("podman images filter after image", func() {
-- 
cgit v1.2.3-54-g00ecf


From c9c00ecd532fea4722b020f318daa1e34bc46fd0 Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Sat, 11 May 2019 15:10:34 -0400
Subject: When removing a pod with CGroupfs, set pids limit to 0

When using CGroupfs, we see races during pod removal between
removing the CGroup and the cleanup process starting (in the
CGroup, thus preventing removal).

The simplest way to avoid this is to prevent the forking of the
cleanup process. Conveniently, we can do this via the CGroup that
we already created for Conmon - we just need to update the PID
limit to 0, which completely inhibits new forks.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/runtime_pod_linux.go | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index baad278f3..456ad365f 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/containerd/cgroups"
 	"github.com/containers/libpod/libpod/events"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -174,6 +175,41 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 
 	var removalErr error
 
+	// We're going to be removing containers.
+	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
+	// the pod and conmon CGroups with a PID limit to prevent them from
+	// spawning any further processes (particularly cleanup processes) which
+	// would prevent removing the CGroups.
+	if p.runtime.config.CgroupManager == CgroupfsCgroupsManager {
+		// Get the conmon CGroup
+		v1CGroups := GetV1CGroups(getExcludedCGroups())
+		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
+		conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
+		if err != nil && err != cgroups.ErrCgroupDeleted {
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup %s", p.ID(), conmonCgroupPath)
+			} else {
+				logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+			}
+		}
+
+		// New resource limits
+		resLimits := new(spec.LinuxResources)
+		resLimits.Pids = new(spec.LinuxPids)
+		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+
+		// Don't try if we failed to retrieve the cgroup
+		if err == nil {
+			if err := conmonCgroup.Update(resLimits); err != nil {
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error updating pod %s conmon group", p.ID())
+				} else {
+					logrus.Errorf("Error updating pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
+			}
+		}
+	}
+
 	// Second loop - all containers are good, so we should be clear to
 	// remove.
 	for _, ctr := range ctrs {
-- 
cgit v1.2.3-54-g00ecf