aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Heon <matthew.heon@pm.me>2020-02-23 13:25:12 -0500
committerMatthew Heon <matthew.heon@pm.me>2020-02-24 09:29:34 -0500
commit4004f646cd7a99f86b2098587c256071a89a4dc7 (patch)
tree2dd8da33094ab8cbac7012473bfccd916ad2ae4f
parent18dcb84d641f4e8ae7fa31fa446d9461e0c915ab (diff)
downloadpodman-4004f646cd7a99f86b2098587c256071a89a4dc7.tar.gz
podman-4004f646cd7a99f86b2098587c256071a89a4dc7.tar.bz2
podman-4004f646cd7a99f86b2098587c256071a89a4dc7.zip
Add basic deadlock detection for container start/remove
We can easily tell if we're going to deadlock by comparing lock IDs before actually taking the lock. Add a few checks for this in common places where deadlocks might occur. This does not yet cover pod operations, where detection is more difficult (and costly) due to the number of locks being involved being higher than 2. Also, add some error wrapping on the Podman side, so we can tell people to use `system renumber` when it occurs. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
-rw-r--r--cmd/podman/rm.go5
-rw-r--r--libpod/container_internal.go3
-rw-r--r--libpod/define/errors.go5
-rw-r--r--libpod/runtime_ctr.go3
-rw-r--r--libpod/runtime_volume.go3
-rw-r--r--libpod/runtime_volume_linux.go3
-rw-r--r--pkg/adapter/containers.go13
7 files changed, 32 insertions, 3 deletions
diff --git a/cmd/podman/rm.go b/cmd/podman/rm.go
index e69565e95..644b0ef76 100644
--- a/cmd/podman/rm.go
+++ b/cmd/podman/rm.go
@@ -4,8 +4,10 @@ import (
"fmt"
"github.com/containers/libpod/cmd/podman/cliconfig"
+ "github.com/containers/libpod/libpod/define"
"github.com/containers/libpod/pkg/adapter"
"github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
@@ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error {
if len(failures) > 0 {
for _, err := range failures {
+ if errors.Cause(err) == define.ErrWillDeadlock {
+ logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve")
+ }
exitCode = setExitCode(err)
}
}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 11f9721dc..ff43bfc8f 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
}
+ if vol.config.LockID == c.config.LockID {
+ return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID)
+ }
vol.lock.Lock()
defer vol.lock.Unlock()
if vol.needsMount() {
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index 523062866..b79cf08dc 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -61,6 +61,11 @@ var (
// the user.
ErrDetach = utils.ErrDetach
+ // ErrWillDeadlock indicates that the requested operation will cause a
+ // deadlock. This is usually caused by upgrade issues, and is resolved
+ // by renumbering the locks.
+ ErrWillDeadlock = errors.New("deadlock due to lock mismatch")
+
// ErrNoCgroups indicates that the container does not have its own
// CGroup.
ErrNoCgroups = errors.New("this container does not have a cgroup")
diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 3ad09f27c..39284026c 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
}
// Lock the pod while we're removing container
+ if pod.config.LockID == c.config.LockID {
+ return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID)
+ }
pod.lock.Lock()
defer pod.lock.Unlock()
if err := pod.updatePod(); err != nil {
diff --git a/libpod/runtime_volume.go b/libpod/runtime_volume.go
index 835dccf9c..efc3c5bd9 100644
--- a/libpod/runtime_volume.go
+++ b/libpod/runtime_volume.go
@@ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error
}
}
- v.lock.Lock()
- defer v.lock.Unlock()
-
return r.removeVolume(ctx, v, force)
}
diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go
index 037cf4cc2..e9cfda9d4 100644
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@@ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
return define.ErrVolumeRemoved
}
+ v.lock.Lock()
+ defer v.lock.Unlock()
+
// Update volume status to pick up a potential removal from state
if err := v.update(); err != nil {
return err
diff --git a/pkg/adapter/containers.go b/pkg/adapter/containers.go
index 78057e3f9..08e19edb8 100644
--- a/pkg/adapter/containers.go
+++ b/pkg/adapter/containers.go
@@ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode
logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID())
}
}
+ if errors.Cause(err) == define.ErrWillDeadlock {
+ logrus.Debugf("Deadlock error: %v", err)
+ return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
+ }
return define.ExitCode(err), err
}
@@ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
return exitCode, nil
}
+ if errors.Cause(err) == define.ErrWillDeadlock {
+ logrus.Debugf("Deadlock error: %v", err)
+ return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
+ }
+
if ctrRunning {
return 0, err
}
@@ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
if lastError != nil {
fmt.Fprintln(os.Stderr, lastError)
}
+ if errors.Cause(err) == define.ErrWillDeadlock {
+ lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks")
+ continue
+ }
lastError = errors.Wrapf(err, "unable to start container %q", container)
continue
}