From a0719398931d59a422835b7e4cc7b6e28c18d031 Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Mon, 10 Aug 2020 15:00:42 -0400
Subject: Ensure pod infra containers have an exit command

Most Libpod containers are made via `pkg/specgen/generate` which
includes code to generate an appropriate exit command which will
handle unmounting the container's storage, cleaning up the
container's network, etc. There is one notable exception: pod
infra containers, which are made entirely within Libpod and do
not touch pkg/specgen. As such, no cleanup process, network never
cleaned up, bad things can happen.

There is good news, though - it's not that difficult to add this,
and it's done in this PR. Generally speaking, we don't allow
passing options directly to the infra container at create time,
but we do (optionally) proxy a pre-approved set of options into
it when we create it. Add ExitCommand to these options, and set
it at time of pod creation using the same code we use to generate
exit commands for normal containers.

Fixes #7103

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 pkg/bindings/test/pods_test.go     |  6 +++---
 pkg/specgen/generate/pod_create.go | 16 ++++++++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'pkg')

diff --git a/pkg/bindings/test/pods_test.go b/pkg/bindings/test/pods_test.go
index 27f648384..8498de020 100644
--- a/pkg/bindings/test/pods_test.go
+++ b/pkg/bindings/test/pods_test.go
@@ -244,7 +244,7 @@ var _ = Describe("Podman pods", func() {
 		Expect(response.State).To(Equal(define.PodStateExited))
 		for _, i := range response.Containers {
 			Expect(define.StringToContainerStatus(i.State)).
-				To(Equal(define.ContainerStateStopped))
+				To(Equal(define.ContainerStateExited))
 		}
 
 		// Stop an already stopped pod
@@ -309,7 +309,7 @@ var _ = Describe("Podman pods", func() {
 		Expect(response.State).To(Equal(define.PodStateExited))
 		for _, i := range response.Containers {
 			Expect(define.StringToContainerStatus(i.State)).
-				To(Equal(define.ContainerStateStopped))
+				To(Equal(define.ContainerStateExited))
 		}
 		_, err = pods.Stop(bt.conn, newpod2, nil)
 		Expect(err).To(BeNil())
@@ -318,7 +318,7 @@ var _ = Describe("Podman pods", func() {
 		Expect(response.State).To(Equal(define.PodStateExited))
 		for _, i := range response.Containers {
 			Expect(define.StringToContainerStatus(i.State)).
-				To(Equal(define.ContainerStateStopped))
+				To(Equal(define.ContainerStateExited))
 		}
 		_, err = pods.Prune(bt.conn)
 		Expect(err).To(BeNil())
diff --git a/pkg/specgen/generate/pod_create.go b/pkg/specgen/generate/pod_create.go
index 0cbfca2db..0bd39d5a4 100644
--- a/pkg/specgen/generate/pod_create.go
+++ b/pkg/specgen/generate/pod_create.go
@@ -13,14 +13,14 @@ func MakePod(p *specgen.PodSpecGenerator, rt *libpod.Runtime) (*libpod.Pod, erro
 	if err := p.Validate(); err != nil {
 		return nil, err
 	}
-	options, err := createPodOptions(p)
+	options, err := createPodOptions(p, rt)
 	if err != nil {
 		return nil, err
 	}
 	return rt.NewPod(context.Background(), options...)
 }
 
-func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, error) {
+func createPodOptions(p *specgen.PodSpecGenerator, rt *libpod.Runtime) ([]libpod.PodCreateOption, error) {
 	var (
 		options []libpod.PodCreateOption
 	)
@@ -31,6 +31,18 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
 			return nil, err
 		}
 		options = append(options, nsOptions...)
+
+		// Make our exit command
+		storageConfig := rt.StorageConfig()
+		runtimeConfig, err := rt.GetConfig()
+		if err != nil {
+			return nil, err
+		}
+		exitCommand, err := CreateExitCommandArgs(storageConfig, runtimeConfig, logrus.IsLevelEnabled(logrus.DebugLevel), false, false)
+		if err != nil {
+			return nil, errors.Wrapf(err, "error creating infra container exit command")
+		}
+		options = append(options, libpod.WithPodInfraExitCommand(exitCommand))
 	}
 	if len(p.CgroupParent) > 0 {
 		options = append(options, libpod.WithPodCgroupParent(p.CgroupParent))
-- 
cgit v1.2.3-54-g00ecf


From c4b2078508a45843034a86916b37a52d3e34d20d Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Mon, 17 Aug 2020 11:04:26 -0400
Subject: Clean up pods before returning from Pod Stop API call

This should help alleviate races where the pod is not fully
cleaned up before subsequent API calls happen.

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/pod_api.go               | 134 ++++++++++++++++++++++++++++++----------
 pkg/api/handlers/libpod/pods.go |   9 ++-
 2 files changed, 111 insertions(+), 32 deletions(-)

(limited to 'pkg')

diff --git a/libpod/pod_api.go b/libpod/pod_api.go
index 4cb604683..ec4cc08f7 100644
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@@ -11,20 +11,20 @@ import (
 	"github.com/sirupsen/logrus"
 )
 
-// Start starts all containers within a pod
-// It combines the effects of Init() and Start() on a container
+// Start starts all containers within a pod.
+// It combines the effects of Init() and Start() on a container.
 // If a container has already been initialized it will be started,
 // otherwise it will be initialized then started.
 // Containers that are already running or have been paused are ignored
 // All containers are started independently, in order dictated by their
 // dependencies.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were started
+// any containers were started.
 // If map is not nil, an error was encountered when starting one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
-// If both error and the map are nil, all containers were started successfully
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were started successfully.
 func (p *Pod) Start(ctx context.Context) (map[string]error, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -72,20 +72,20 @@ func (p *Pod) Stop(ctx context.Context, cleanup bool) (map[string]error, error)
 }
 
 // StopWithTimeout stops all containers within a pod that are not already stopped
-// Each container will use its own stop timeout
+// Each container will use its own stop timeout.
 // Only running containers will be stopped. Paused, stopped, or created
 // containers will be ignored.
 // If cleanup is true, mounts and network namespaces will be cleaned up after
 // the container is stopped.
 // All containers are stopped independently. An error stopping one container
 // will not prevent other containers being stopped.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were stopped
+// any containers were stopped.
 // If map is not nil, an error was encountered when stopping one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
-// If both error and the map are nil, all containers were stopped without error
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were stopped without error.
 func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -138,10 +138,82 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
 		ctr.lock.Unlock()
 	}
 
+	p.newPodEvent(events.Stop)
+
 	if len(ctrErrors) > 0 {
 		return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error stopping some containers")
 	}
-	defer p.newPodEvent(events.Stop)
+	return nil, nil
+}
+
+// Cleanup cleans up all containers within a pod that have stopped.
+// All containers are cleaned up independently. An error with one container will
+// not prevent other containers being cleaned up.
+// An error and a map[string]error are returned.
+// If the error is not nil and the map is nil, an error was encountered before
+// any containers were cleaned up.
+// If map is not nil, an error was encountered when working on one or more
+// containers. The container ID is mapped to the error encountered. The error is
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were paused without error
+func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+
+	if !p.valid {
+		return nil, define.ErrPodRemoved
+	}
+
+	allCtrs, err := p.runtime.state.PodContainers(p)
+	if err != nil {
+		return nil, err
+	}
+
+	ctrErrors := make(map[string]error)
+
+	// Clean up all containers
+	for _, ctr := range allCtrs {
+		ctr.lock.Lock()
+
+		if err := ctr.syncContainer(); err != nil {
+			ctr.lock.Unlock()
+			ctrErrors[ctr.ID()] = err
+			continue
+		}
+
+		// Ignore containers that are running/paused
+		if !ctr.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
+			ctr.lock.Unlock()
+			continue
+		}
+
+		// Check for running exec sessions, ignore containers with them.
+		sessions, err := ctr.getActiveExecSessions()
+		if err != nil {
+			ctr.lock.Unlock()
+			ctrErrors[ctr.ID()] = err
+			continue
+		}
+		if len(sessions) > 0 {
+			ctr.lock.Unlock()
+			continue
+		}
+
+		// TODO: Should we handle restart policy here?
+
+		ctr.newContainerEvent(events.Cleanup)
+
+		if err := ctr.cleanup(ctx); err != nil {
+			ctrErrors[ctr.ID()] = err
+		}
+
+		ctr.lock.Unlock()
+	}
+
+	if len(ctrErrors) > 0 {
+		return ctrErrors, errors.Wrapf(define.ErrPodPartialFail, "error cleaning up some containers")
+	}
+
 	return nil, nil
 }
 
@@ -150,12 +222,12 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
 // containers will be ignored.
 // All containers are paused independently. An error pausing one container
 // will not prevent other containers being paused.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were paused
+// any containers were paused.
 // If map is not nil, an error was encountered when pausing one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
+// set to ErrPodPartialFail.
 // If both error and the map are nil, all containers were paused without error
 func (p *Pod) Pause() (map[string]error, error) {
 	p.lock.Lock()
@@ -219,13 +291,13 @@ func (p *Pod) Pause() (map[string]error, error) {
 // containers will be ignored.
 // All containers are unpaused independently. An error unpausing one container
 // will not prevent other containers being unpaused.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were unpaused
+// any containers were unpaused.
 // If map is not nil, an error was encountered when unpausing one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
-// If both error and the map are nil, all containers were unpaused without error
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were unpaused without error.
 func (p *Pod) Unpause() (map[string]error, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -280,13 +352,13 @@ func (p *Pod) Unpause() (map[string]error, error) {
 // All containers are started independently, in order dictated by their
 // dependencies. An error restarting one container
 // will not prevent other containers being restarted.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were restarted
+// any containers were restarted.
 // If map is not nil, an error was encountered when restarting one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
-// If both error and the map are nil, all containers were restarted without error
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were restarted without error.
 func (p *Pod) Restart(ctx context.Context) (map[string]error, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -328,17 +400,17 @@ func (p *Pod) Restart(ctx context.Context) (map[string]error, error) {
 	return nil, nil
 }
 
-// Kill sends a signal to all running containers within a pod
+// Kill sends a signal to all running containers within a pod.
 // Signals will only be sent to running containers. Containers that are not
 // running will be ignored. All signals are sent independently, and sending will
 // continue even if some containers encounter errors.
-// An error and a map[string]error are returned
+// An error and a map[string]error are returned.
 // If the error is not nil and the map is nil, an error was encountered before
-// any containers were signalled
+// any containers were signalled.
 // If map is not nil, an error was encountered when signalling one or more
 // containers. The container ID is mapped to the error encountered. The error is
-// set to ErrCtrExists
-// If both error and the map are nil, all containers were signalled successfully
+// set to ErrPodPartialFail.
+// If both error and the map are nil, all containers were signalled successfully.
 func (p *Pod) Kill(signal uint) (map[string]error, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -393,8 +465,8 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) {
 	return nil, nil
 }
 
-// Status gets the status of all containers in the pod
-// Returns a map of Container ID to Container Status
+// Status gets the status of all containers in the pod.
+// Returns a map of Container ID to Container Status.
 func (p *Pod) Status() (map[string]define.ContainerStatus, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@@ -430,7 +502,7 @@ func containerStatusFromContainers(allCtrs []*Container) (map[string]define.Cont
 	return status, nil
 }
 
-// Inspect returns a PodInspect struct to describe the pod
+// Inspect returns a PodInspect struct to describe the pod.
 func (p *Pod) Inspect() (*define.InspectPodData, error) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
diff --git a/pkg/api/handlers/libpod/pods.go b/pkg/api/handlers/libpod/pods.go
index 10e0d4ce9..6e704fe65 100644
--- a/pkg/api/handlers/libpod/pods.go
+++ b/pkg/api/handlers/libpod/pods.go
@@ -123,10 +123,17 @@ func PodStop(w http.ResponseWriter, r *http.Request) {
 	} else {
 		responses, stopError = pod.Stop(r.Context(), false)
 	}
-	if stopError != nil {
+	if stopError != nil && errors.Cause(stopError) != define.ErrPodPartialFail {
 		utils.Error(w, "Something went wrong", http.StatusInternalServerError, err)
 		return
 	}
+	// Try to clean up the pod - but only warn on failure, it's nonfatal.
+	if cleanupCtrs, cleanupErr := pod.Cleanup(r.Context()); cleanupErr != nil {
+		logrus.Errorf("Error cleaning up pod %s: %v", pod.ID(), cleanupErr)
+		for id, err := range cleanupCtrs {
+			logrus.Errorf("Error cleaning up pod %s container %s: %v", pod.ID(), id, err)
+		}
+	}
 	var errs []error //nolint
 	for _, err := range responses {
 		errs = append(errs, err)
-- 
cgit v1.2.3-54-g00ecf