diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2020-02-25 13:17:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-25 13:17:12 +0100 |
commit | 6babc9f20ca9e3633da1792a31d0391bde6201b6 (patch) | |
tree | 9627ab8a289952c5902ef45a09fab6cfbd143954 | |
parent | 8a30759b6b7f73c76c90e0931e6cf1ca3d94ee61 (diff) | |
parent | 4004f646cd7a99f86b2098587c256071a89a4dc7 (diff) | |
download | podman-6babc9f20ca9e3633da1792a31d0391bde6201b6.tar.gz podman-6babc9f20ca9e3633da1792a31d0391bde6201b6.tar.bz2 podman-6babc9f20ca9e3633da1792a31d0391bde6201b6.zip |
Merge pull request #5305 from mheon/check_for_common_deadlocks
Add basic deadlock detection for container start/remove
-rw-r--r-- | cmd/podman/rm.go | 5 | ||||
-rw-r--r-- | libpod/container_internal.go | 3 | ||||
-rw-r--r-- | libpod/define/errors.go | 5 | ||||
-rw-r--r-- | libpod/runtime_ctr.go | 3 | ||||
-rw-r--r-- | libpod/runtime_volume.go | 3 | ||||
-rw-r--r-- | libpod/runtime_volume_linux.go | 3 | ||||
-rw-r--r-- | pkg/adapter/containers.go | 13 |
7 files changed, 32 insertions, 3 deletions
diff --git a/cmd/podman/rm.go b/cmd/podman/rm.go index e69565e95..644b0ef76 100644 --- a/cmd/podman/rm.go +++ b/cmd/podman/rm.go @@ -4,8 +4,10 @@ import ( "fmt" "github.com/containers/libpod/cmd/podman/cliconfig" + "github.com/containers/libpod/libpod/define" "github.com/containers/libpod/pkg/adapter" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "github.com/spf13/cobra" ) @@ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error { if len(failures) > 0 { for _, err := range failures { + if errors.Cause(err) == define.ErrWillDeadlock { + logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve") + } exitCode = setExitCode(err) } } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 11f9721dc..ff43bfc8f 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) } + if vol.config.LockID == c.config.LockID { + return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID) + } vol.lock.Lock() defer vol.lock.Unlock() if vol.needsMount() { diff --git a/libpod/define/errors.go b/libpod/define/errors.go index 523062866..b79cf08dc 100644 --- a/libpod/define/errors.go +++ b/libpod/define/errors.go @@ -61,6 +61,11 @@ var ( // the user. ErrDetach = utils.ErrDetach + // ErrWillDeadlock indicates that the requested operation will cause a + // deadlock. This is usually caused by upgrade issues, and is resolved + // by renumbering the locks. + ErrWillDeadlock = errors.New("deadlock due to lock mismatch") + // ErrNoCgroups indicates that the container does not have its own // CGroup. ErrNoCgroups = errors.New("this container does not have a cgroup") diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 3ad09f27c..39284026c 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, } // Lock the pod while we're removing container + if pod.config.LockID == c.config.LockID { + return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID) + } pod.lock.Lock() defer pod.lock.Unlock() if err := pod.updatePod(); err != nil { diff --git a/libpod/runtime_volume.go b/libpod/runtime_volume.go index 835dccf9c..efc3c5bd9 100644 --- a/libpod/runtime_volume.go +++ b/libpod/runtime_volume.go @@ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error } } - v.lock.Lock() - defer v.lock.Unlock() - return r.removeVolume(ctx, v, force) } diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index 037cf4cc2..e9cfda9d4 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error return define.ErrVolumeRemoved } + v.lock.Lock() + defer v.lock.Unlock() + // Update volume status to pick up a potential removal from state if err := v.update(); err != nil { return err diff --git a/pkg/adapter/containers.go b/pkg/adapter/containers.go index 78057e3f9..08e19edb8 100644 --- a/pkg/adapter/containers.go +++ b/pkg/adapter/containers.go @@ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID()) } } + if errors.Cause(err) == define.ErrWillDeadlock { + logrus.Debugf("Deadlock error: %v", err) + return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID()) + } return define.ExitCode(err), err } @@ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP return exitCode, nil } + if errors.Cause(err) == define.ErrWillDeadlock { + logrus.Debugf("Deadlock error: %v", err) + return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID()) + } + if ctrRunning { return 0, err } @@ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP if lastError != nil { fmt.Fprintln(os.Stderr, lastError) } + if errors.Cause(err) == define.ErrWillDeadlock { + lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks") + continue + } lastError = errors.Wrapf(err, "unable to start container %q", container) continue } |