From 0436cf29e57b3118c6fa22597240731913556a6f Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Mon, 2 Dec 2019 23:06:00 -0500 Subject: Ensure volumes reacquire locks on state refresh After a restart, pods and containers both run a refresh() function to prepare to run after a reboot. Until now, volumes have not had a similar function, because they had no per-boot setup to perform. Unfortunately, this was not noticed when in-memory locking was introduced to volumes. The refresh() routine is, among other things, responsible for ensuring that locks are reserved after a reboot, ensuring they cannot be taken by a freshly-created container, pod, or volume. If this reservation is not done, we can end up with two objects using the same lock, potentially needing to lock each other for some operations - classic recipe for deadlocks. Add a refresh() function to volumes to perform lock reservation and ensure it is called as part of overall refresh(). Fixes #4605 Fixes #4621 Signed-off-by: Matthew Heon --- libpod/runtime.go | 18 ++++++++++++++---- libpod/volume_internal.go | 12 ++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'libpod') diff --git a/libpod/runtime.go b/libpod/runtime.go index 42e6782e9..3873079ce 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -625,7 +625,8 @@ func (r *Runtime) refresh(alivePath string) error { } // Next refresh the state of all containers to recreate dirs and - // namespaces, and all the pods to recreate cgroups + // namespaces, and all the pods to recreate cgroups. + // Containers, pods, and volumes must also reacquire their locks. ctrs, err := r.state.AllContainers() if err != nil { return errors.Wrapf(err, "error retrieving all containers from state") @@ -634,10 +635,14 @@ func (r *Runtime) refresh(alivePath string) error { if err != nil { return errors.Wrapf(err, "error retrieving all pods from state") } - // No locks are taken during pod and container refresh. - // Furthermore, the pod and container refresh() functions are not + vols, err := r.state.AllVolumes() + if err != nil { + return errors.Wrapf(err, "error retrieving all volumes from state") + } + // No locks are taken during pod, volume, and container refresh. + // Furthermore, the pod/volume/container refresh() functions are not // allowed to take locks themselves. - // We cannot assume that any pod or container has a valid lock until + // We cannot assume that any pod/volume/container has a valid lock until // after this function has returned. // The runtime alive lock should suffice to provide mutual exclusion // until this has run. @@ -651,6 +656,11 @@ func (r *Runtime) refresh(alivePath string) error { logrus.Errorf("Error refreshing pod %s: %v", pod.ID(), err) } } + for _, vol := range vols { + if err := vol.refresh(); err != nil { + logrus.Errorf("Error refreshing volume %s: %v", vol.Name(), err) + } + } // Create a file indicating the runtime is alive and ready file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644) diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go index 42b935e7c..e89b3484d 100644 --- a/libpod/volume_internal.go +++ b/libpod/volume_internal.go @@ -5,6 +5,7 @@ import ( "path/filepath" "github.com/containers/libpod/libpod/define" + "github.com/pkg/errors" ) // Creates a new volume @@ -46,3 +47,14 @@ func (v *Volume) update() error { func (v *Volume) save() error { return v.runtime.state.SaveVolume(v) } + +// Refresh volume state after a restart. +func (v *Volume) refresh() error { + lock, err := v.runtime.lockManager.AllocateAndRetrieveLock(v.config.LockID) + if err != nil { + return errors.Wrapf(err, "error acquiring lock %d for volume %s", v.config.LockID, v.Name()) + } + v.lock = lock + + return nil +} -- cgit v1.2.3-54-g00ecf