diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2022-09-15 21:01:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-15 21:01:23 +0200 |
commit | df73f606ef45e3ec3358968090681a946083cd6f (patch) | |
tree | f8f34d772e7e4eb1716a3b3d3103fee2b21dfb06 /libpod | |
parent | 50c538b3cc7a218fe009180bbe20b0c464add6f7 (diff) | |
parent | e19e0de5faf31137fa0197ea014d4615e641d33a (diff) | |
download | podman-df73f606ef45e3ec3358968090681a946083cd6f.tar.gz podman-df73f606ef45e3ec3358968090681a946083cd6f.tar.bz2 podman-df73f606ef45e3ec3358968090681a946083cd6f.zip |
Merge pull request #15757 from mheon/fix_15526
Introduce graph-based pod container removal
Diffstat (limited to 'libpod')
-rw-r--r-- | libpod/container_graph.go | 91 | ||||
-rw-r--r-- | libpod/pod_api.go | 2 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 52 | ||||
-rw-r--r-- | libpod/runtime_img.go | 2 | ||||
-rw-r--r-- | libpod/runtime_pod_linux.go | 92 | ||||
-rw-r--r-- | libpod/runtime_volume_linux.go | 2 |
6 files changed, 189 insertions, 52 deletions
diff --git a/libpod/container_graph.go b/libpod/container_graph.go index 96d61b756..d43579e4a 100644 --- a/libpod/container_graph.go +++ b/libpod/container_graph.go @@ -281,3 +281,94 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) } } + +// Visit a node on the container graph and remove it, or set an error if it +// failed to remove. Only intended for use in pod removal; do *not* use when +// removing individual containers. +// All containers are assumed to be *UNLOCKED* on running this function. +// Container locks will be acquired as necessary. +// Pod and infraID are optional. If a pod is given it must be *LOCKED*. +func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) { + // If we already visited this node, we're done. + if ctrsVisited[node.id] { + return + } + + // Someone who depends on us failed. + // Mark us as failed and recurse. + if setError { + ctrsVisited[node.id] = true + ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid) + + // Hit anyone who depends on us, set errors there as well. + for _, successor := range node.dependsOn { + removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes) + } + } + + // Does anyone still depend on us? + // Cannot remove if true. Once all our dependencies have been removed, + // we will be removed. + for _, dep := range node.dependedOn { + // The container that depends on us hasn't been removed yet. + // OK to continue on + if ok := ctrsVisited[dep.id]; !ok { + return + } + } + + // Going to try to remove the node, mark us as visited + ctrsVisited[node.id] = true + + ctrErrored := false + + // Verify that all that depend on us are gone. + // Graph traversal should guarantee this is true, but this isn't that + // expensive, and it's better to be safe. + for _, dep := range node.dependedOn { + if _, err := node.container.runtime.GetContainer(dep.id); err == nil { + ctrErrored = true + ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists) + } + } + + // Lock the container + node.container.lock.Lock() + + // Gate all subsequent bits behind a ctrErrored check - we don't want to + // proceed if a previous step failed. + if !ctrErrored { + if err := node.container.syncContainer(); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + if !ctrErrored { + for _, vol := range node.container.config.NamedVolumes { + ctrNamedVolumes[vol.Name] = vol + } + + if pod != nil && pod.state.InfraContainerID == node.id { + pod.state.InfraContainerID = "" + if err := pod.save(); err != nil { + ctrErrored = true + ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err) + } + } + } + + if !ctrErrored { + if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil { + ctrErrored = true + ctrErrors[node.id] = err + } + } + + node.container.lock.Unlock() + + // Recurse to anyone who we depend on and remove them + for _, successor := range node.dependsOn { + removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes) + } +} diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 1bd686ddc..924d43436 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error { icLock := initCon.lock icLock.Lock() var time *uint - if err := p.runtime.removeContainer(ctx, initCon, false, false, true, time); err != nil { + if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil { icLock.Unlock() return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 1f032dd6b..7b3cbadfa 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -581,7 +581,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai // be removed also if and only if the container is the sole user // Otherwise, RemoveContainer will return an error if the container is running func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error { - return r.removeContainer(ctx, c, force, removeVolume, false, timeout) + return r.removeContainer(ctx, c, force, removeVolume, false, false, timeout) } // Internal function to remove a container. @@ -589,7 +589,9 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, // removePod is used only when removing pods. It instructs Podman to ignore // infra container protections, and *not* remove from the database (as pod // remove will handle that). -func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod bool, timeout *uint) error { +// ignoreDeps is *DANGEROUS* and should not be used outside of a very specific +// context (alternate pod removal code, where graph traversal is not possible). +func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps bool, timeout *uint) error { if !c.valid { if ok, _ := r.state.HasContainer(c.ID()); !ok { // Container probably already removed @@ -618,25 +620,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // pod. var pod *Pod runtime := c.runtime - if c.config.Pod != "" && !removePod { + if c.config.Pod != "" { pod, err = r.state.Pod(c.config.Pod) if err != nil { return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err) } - // Lock the pod while we're removing container - if pod.config.LockID == c.config.LockID { - return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) - } - pod.lock.Lock() - defer pod.lock.Unlock() - if err := pod.updatePod(); err != nil { - return err - } + if !removePod { + // Lock the pod while we're removing container + if pod.config.LockID == c.config.LockID { + return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) + } + pod.lock.Lock() + defer pod.lock.Unlock() + if err := pod.updatePod(); err != nil { + return err + } - infraID := pod.state.InfraContainerID - if c.ID() == infraID { - return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + infraID := pod.state.InfraContainerID + if c.ID() == infraID { + return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + } } } @@ -696,7 +700,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // Check that no other containers depend on the container. // Only used if not removing a pod - pods guarantee that all // deps will be evicted at the same time. - if !removePod { + if !ignoreDeps { deps, err := r.state.ContainerInUse(c) if err != nil { return err @@ -777,13 +781,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if c.config.Pod != "" { // If we're removing the pod, the container will be evicted // from the state elsewhere - if !removePod { - if err := r.state.RemoveContainerFromPod(pod, c); err != nil { - if cleanupErr == nil { - cleanupErr = err - } else { - logrus.Errorf("Removing container %s from database: %v", c.ID(), err) - } + if err := r.state.RemoveContainerFromPod(pod, c); err != nil { + if cleanupErr == nil { + cleanupErr = err + } else { + logrus.Errorf("Removing container %s from database: %v", c.ID(), err) } } } else { @@ -872,7 +874,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol if err == nil { logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id) // Assume force = true for the evict case - err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, timeout) + err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, timeout) if !tmpCtr.valid { // If the container is marked invalid, remove succeeded // in kicking it out of the state - no need to continue. @@ -1034,7 +1036,7 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool } report := reports.RmReport{Id: rmCtr.ID(), RawInput: rmCtr.ID()} - report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, timeout) + report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, false, timeout) return append(rmReports, &report), nil } diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go index 5510b2af6..dacbd752f 100644 --- a/libpod/runtime_img.go +++ b/libpod/runtime_img.go @@ -47,7 +47,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } else { - if err := r.removeContainer(ctx, ctr, true, false, false, timeout); err != nil { + if err := r.removeContainer(ctx, ctr, true, false, false, false, timeout); err != nil { return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 3eeef69d8..24e9f3da7 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -17,6 +17,7 @@ import ( "github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" + "github.com/hashicorp/go-multierror" "github.com/sirupsen/logrus" ) @@ -191,29 +192,9 @@ func (r *Runtime) SavePod(pod *Pod) error { return nil } -func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error { - if err := p.updatePod(); err != nil { - return err - } - - ctrs, err := r.state.PodContainers(p) - if err != nil { - return err - } - numCtrs := len(ctrs) - - // If the only running container in the pod is the pause container, remove the pod and container unconditionally. - pauseCtrID := p.state.InfraContainerID - if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { - removeCtrs = true - force = true - } - if !removeCtrs && numCtrs > 0 { - return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists) - } - - ctrNamedVolumes := make(map[string]*ContainerNamedVolume) - +// DO NOT USE THIS FUNCTION DIRECTLY. Use removePod(), below. It will call +// removeMalformedPod() if necessary. +func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Container, force bool, timeout *uint, ctrNamedVolumes map[string]*ContainerNamedVolume) error { var removalErr error for _, ctr := range ctrs { err := func() error { @@ -231,7 +212,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, ctrNamedVolumes[vol.Name] = vol } - return r.removeContainer(ctx, ctr, force, false, true, timeout) + return r.removeContainer(ctx, ctr, force, false, true, true, timeout) }() if removalErr == nil { @@ -261,6 +242,69 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, return err } + return nil +} + +func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error { + if err := p.updatePod(); err != nil { + return err + } + + ctrs, err := r.state.PodContainers(p) + if err != nil { + return err + } + numCtrs := len(ctrs) + + // If the only running container in the pod is the pause container, remove the pod and container unconditionally. + pauseCtrID := p.state.InfraContainerID + if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { + removeCtrs = true + force = true + } + if !removeCtrs && numCtrs > 0 { + return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists) + } + + var removalErr error + ctrNamedVolumes := make(map[string]*ContainerNamedVolume) + + // Build a graph of all containers in the pod. + graph, err := BuildContainerGraph(ctrs) + if err != nil { + // We have to allow the pod to be removed. + // But let's only do it if force is set. + if !force { + return fmt.Errorf("cannot create container graph for pod %s: %w", p.ID(), err) + } + + removalErr = fmt.Errorf("creating container graph for pod %s failed, fell back to loop removal: %w", p.ID(), err) + + if err := r.removeMalformedPod(ctx, p, ctrs, force, timeout, ctrNamedVolumes); err != nil { + logrus.Errorf("Error creating container graph for pod %s: %v. Falling back to loop removal.", p.ID(), err) + return err + } + } else { + ctrErrors := make(map[string]error) + ctrsVisited := make(map[string]bool) + + for _, node := range graph.notDependedOnNodes { + removeNode(ctx, node, p, force, timeout, false, ctrErrors, ctrsVisited, ctrNamedVolumes) + } + + // This is gross, but I don't want to change the signature on + // removePod - especially since any change here eventually has + // to map down to one error unless we want to make a breaking + // API change. + if len(ctrErrors) > 0 { + var allErrs error + for id, err := range ctrErrors { + allErrs = multierror.Append(allErrs, fmt.Errorf("removing container %s from pod %s: %w", id, p.ID(), err)) + } + return allErrs + } + } + for volName := range ctrNamedVolumes { volume, err := r.state.Volume(volName) if err != nil && !errors.Is(err, define.ErrNoSuchVolume) { diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index c9a4a7dc1..08fdbf977 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -324,7 +324,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name()) - if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil { + if err := r.removeContainer(ctx, ctr, force, false, false, false, timeout); err != nil { return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err) } } |