When removing a pod with CGroupfs, set pids limit to 0

When using CGroupfs, we see races during pod removal between removing the CGroup and the cleanup process starting (in the CGroup, thus preventing removal). The simplest way to avoid this is to prevent the forking of the cleanup process. Conveniently, we can do this via the CGroup that we already created for Conmon - we just need to update the PID limit to 0, which completely inhibits new forks. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
author: Matthew Heon <matthew.heon@pm.me> 2019-05-11 15:10:34 -0400
committer: Matthew Heon <matthew.heon@pm.me> 2019-05-12 00:10:59 -0400
commit: c9c00ecd532fea4722b020f318daa1e34bc46fd0 (patch)
tree: f6c366676d6f59f6ad05e9f0c9b80335b9fb09fe /libpod/runtime_pod_linux.go
parent: 952f8620b6d528a823dd5ef61a51cf491f6d54e6 (diff)
download: podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.tar.gz
podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.tar.bz2
podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.zip
1 files changed, 36 insertions, 0 deletions
diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index baad278f3..456ad365f 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/containerd/cgroups"
 	"github.com/containers/libpod/libpod/events"
+	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -174,6 +175,41 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool)
 
 	var removalErr error
 
+	// We're going to be removing containers.
+	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
+	// the pod and conmon CGroups with a PID limit to prevent them from
+	// spawning any further processes (particularly cleanup processes) which
+	// would prevent removing the CGroups.
+	if p.runtime.config.CgroupManager == CgroupfsCgroupsManager {
+		// Get the conmon CGroup
+		v1CGroups := GetV1CGroups(getExcludedCGroups())
+		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
+		conmonCgroup, err := cgroups.Load(v1CGroups, cgroups.StaticPath(conmonCgroupPath))
+		if err != nil && err != cgroups.ErrCgroupDeleted {
+			if removalErr == nil {
+				removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup %s", p.ID(), conmonCgroupPath)
+			} else {
+				logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+			}
+		}
+
+		// New resource limits
+		resLimits := new(spec.LinuxResources)
+		resLimits.Pids = new(spec.LinuxPids)
+		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
+
+		// Don't try if we failed to retrieve the cgroup
+		if err == nil {
+			if err := conmonCgroup.Update(resLimits); err != nil {
+				if removalErr == nil {
+					removalErr = errors.Wrapf(err, "error updating pod %s conmon group", p.ID())
+				} else {
+					logrus.Errorf("Error updating pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
+				}
+			}
+		}
+	}
+
 	// Second loop - all containers are good, so we should be clear to
 	// remove.
 	for _, ctr := range ctrs {
author	Matthew Heon <matthew.heon@pm.me>	2019-05-11 15:10:34 -0400
committer	Matthew Heon <matthew.heon@pm.me>	2019-05-12 00:10:59 -0400
commit	c9c00ecd532fea4722b020f318daa1e34bc46fd0 (patch)
tree	f6c366676d6f59f6ad05e9f0c9b80335b9fb09fe /libpod/runtime_pod_linux.go
parent	952f8620b6d528a823dd5ef61a51cf491f6d54e6 (diff)
download	podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.tar.gz podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.tar.bz2 podman-c9c00ecd532fea4722b020f318daa1e34bc46fd0.zip