diff options
author | Matthew Heon <mheon@redhat.com> | 2019-11-11 09:52:13 -0500 |
---|---|---|
committer | Matthew Heon <mheon@redhat.com> | 2019-11-19 15:38:03 -0500 |
commit | 25cc43c376c5ddfa70a6009526f8f03b5235c2c6 (patch) | |
tree | 9aec67da4cb3894dca1477148714ad6cdf995ee1 /libpod/runtime_ctr.go | |
parent | f3f219a67c3a9297b5e1f0505c583b9de35661c8 (diff) | |
download | podman-25cc43c376c5ddfa70a6009526f8f03b5235c2c6.tar.gz podman-25cc43c376c5ddfa70a6009526f8f03b5235c2c6.tar.bz2 podman-25cc43c376c5ddfa70a6009526f8f03b5235c2c6.zip |
Add ContainerStateRemoving
When Libpod removes a container, there is the possibility that
removal will not fully succeed. The most notable problems are
storage issues, where the container cannot be removed from
c/storage.
When this occurs, we were faced with a choice. We can keep the
container in the state, appearing in `podman ps` and available for
other API operations, but likely unable to do any of them as it's
been partially removed. Or we can remove it very early and clean
up after it's already gone. We have, until now, used the second
approach.
The problem that arises is intermittent problems removing
storage. We end up removing a container, failing to remove its
storage, and ending up with a container permanently stuck in
c/storage that we can't remove with the normal Podman CLI, can't
use the name of, and generally can't interact with. A notable
cause is when Podman is hit by a SIGKILL midway through removal,
which can consistently cause `podman rm` to fail to remove
storage.
We now add a new state for containers that are in the process of
being removed, ContainerStateRemoving. We set this at the
beginning of the removal process. It notifies Podman that the
container cannot be used anymore, but preserves it in the DB
until it is fully removed. This will allow Remove to be run on
these containers again, which should successfully remove storage
if it fails.
Fixes #3906
Signed-off-by: Matthew Heon <mheon@redhat.com>
Diffstat (limited to 'libpod/runtime_ctr.go')
-rw-r--r-- | libpod/runtime_ctr.go | 55 |
1 files changed, 34 insertions, 21 deletions
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 7069d3494..ae401013c 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -489,32 +489,19 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } - var cleanupErr error - // Remove the container from the state - if c.config.Pod != "" { - // If we're removing the pod, the container will be evicted - // from the state elsewhere - if !removePod { - if err := r.state.RemoveContainerFromPod(pod, c); err != nil { - cleanupErr = err - } - } - } else { - if err := r.state.RemoveContainer(c); err != nil { - cleanupErr = err - } + // Set ContainerStateRemoving and remove exec sessions + c.state.State = define.ContainerStateRemoving + c.state.ExecSessions = nil + + if err := c.save(); err != nil { + return errors.Wrapf(err, "unable to set container %s removing state in database", c.ID()) } - // Set container as invalid so it can no longer be used - c.valid = false + var cleanupErr error // Clean up network namespace, cgroups, mounts if err := c.cleanup(ctx); err != nil { - if cleanupErr == nil { - cleanupErr = errors.Wrapf(err, "error cleaning up container %s", c.ID()) - } else { - logrus.Errorf("cleanup network, cgroups, mounts: %v", err) - } + cleanupErr = errors.Wrapf(err, "error cleaning up container %s", c.ID()) } // Stop the container's storage @@ -540,6 +527,29 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } + // Remove the container from the state + if c.config.Pod != "" { + // If we're removing the pod, the container will be evicted + // from the state elsewhere + if !removePod { + if err := r.state.RemoveContainerFromPod(pod, c); err != nil { + if cleanupErr == nil { + cleanupErr = err + } else { + logrus.Errorf("Error removing container %s from database: %v", c.ID(), err) + } + } + } + } else { + if err := r.state.RemoveContainer(c); err != nil { + if cleanupErr == nil { + cleanupErr = err + } else { + logrus.Errorf("Error removing container %s from database: %v", c.ID(), err) + } + } + } + // Deallocate the container's lock if err := c.lock.Free(); err != nil { if cleanupErr == nil { @@ -549,6 +559,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } } + // Set container as invalid so it can no longer be used + c.valid = false + c.newContainerEvent(events.Remove) if !removeVolume { |