From e252b3b4f294745ca8ac6d1c1850de2e7f1365c7 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 14 Jan 2022 13:15:08 +0100 Subject: exec: retry rm -rf on ENOTEMPTY and EBUSY when running on NFS, a RemoveAll could cause EBUSY because of some unlinked files that are still kept open and "silly renamed" to .nfs$ID. This is only half of the fix, as conmon needs to be fixed too. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2040379 Related: https://github.com/containers/conmon/pull/319 [NO NEW TESTS NEEDED] as it requires NFS as the underlying storage. Signed-off-by: Giuseppe Scrivano --- libpod/container_exec.go | 40 ++++++++++++++++++++++++++++++++++------ libpod/runtime_ctr.go | 41 ++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 25 deletions(-) (limited to 'libpod') diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 7d4e28d5d..d1c190905 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -14,6 +14,7 @@ import ( "github.com/containers/storage/pkg/stringid" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) // ExecConfig contains the configuration of an exec session @@ -774,13 +775,40 @@ func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resi return exitCode, nil } -// cleanup an exec session after its done -func (c *Container) cleanupExecBundle(sessionID string) error { - if err := os.RemoveAll(c.execBundlePath(sessionID)); err != nil && !os.IsNotExist(err) { - return err +// cleanupExecBundle cleanups an exec session after its done +// Please be careful when using this function since it might temporarily unlock +// the container when os.RemoveAll($bundlePath) fails with ENOTEMPTY or EBUSY +// errors. +func (c *Container) cleanupExecBundle(sessionID string) (Err error) { + path := c.execBundlePath(sessionID) + for attempts := 0; attempts < 50; attempts++ { + Err = os.RemoveAll(path) + if Err == nil || os.IsNotExist(Err) { + return nil + } + if pathErr, ok := Err.(*os.PathError); ok { + Err = pathErr.Err + if errors.Cause(Err) == unix.ENOTEMPTY || errors.Cause(Err) == unix.EBUSY { + // give other processes a chance to use the container + if !c.batched { + if err := c.save(); err != nil { + return err + } + c.lock.Unlock() + } + time.Sleep(time.Millisecond * 100) + if !c.batched { + c.lock.Lock() + if err := c.syncContainer(); err != nil { + return err + } + } + continue + } + } + return } - - return nil + return } // the path to a containers exec session bundle diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 6ee25c0ec..620b4eee0 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -653,6 +653,20 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } } + // Check that no other containers depend on the container. + // Only used if not removing a pod - pods guarantee that all + // deps will be evicted at the same time. + if !removePod { + deps, err := r.state.ContainerInUse(c) + if err != nil { + return err + } + if len(deps) != 0 { + depsStr := strings.Join(deps, ", ") + return errors.Wrapf(define.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) + } + } + // Check that the container's in a good state to be removed. if c.state.State == define.ContainerStateRunning { time := c.StopTimeout() @@ -675,25 +689,6 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } } - // Remove all active exec sessions - if err := c.removeAllExecSessions(); err != nil { - return err - } - - // Check that no other containers depend on the container. - // Only used if not removing a pod - pods guarantee that all - // deps will be evicted at the same time. - if !removePod { - deps, err := r.state.ContainerInUse(c) - if err != nil { - return err - } - if len(deps) != 0 { - depsStr := strings.Join(deps, ", ") - return errors.Wrapf(define.ErrCtrExists, "container %s has dependent containers which must be removed before it: %s", c.ID(), depsStr) - } - } - var cleanupErr error // Clean up network namespace, cgroups, mounts. @@ -713,6 +708,14 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo return errors.Wrapf(err, "unable to set container %s removing state in database", c.ID()) } + // Remove all active exec sessions + // removing the exec sessions might temporarily unlock the container's lock. Using it + // after setting the state to ContainerStateRemoving will prevent that the container is + // restarted + if err := c.removeAllExecSessions(); err != nil { + return err + } + // Stop the container's storage if err := c.teardownStorage(); err != nil { if cleanupErr == nil { -- cgit v1.2.3-54-g00ecf