From 7fdd20ae5a1ced1faceab9cb0a6e553343911a0b Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 14 Feb 2019 17:25:58 -0500 Subject: Add initial version of renumber backend Renumber is a way of renumbering container locks after the number of locks available has changed. For now, renumber only works with containers. Signed-off-by: Matthew Heon --- libpod/runtime_renumber.go | 60 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 libpod/runtime_renumber.go (limited to 'libpod/runtime_renumber.go') diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go new file mode 100644 index 000000000..bc291156e --- /dev/null +++ b/libpod/runtime_renumber.go @@ -0,0 +1,60 @@ +package libpod + +import ( + "path/filepath" + + "github.com/containers/storage" + "github.com/pkg/errors" +) + +// RenumberLocks reassigns lock numbers for all containers, pods, and volumes in +// the state. +// It renders the runtime it is called on, and all container/pod/volume structs +// from that runtime, unusable, and requires that a new runtime be initialized +// after it is called. +func (r *Runtime) RenumberLocks() error { + r.lock.Lock() + locked := true + defer func() { + if locked { + r.lock.Unlock() + } + }() + + runtimeAliveLock := filepath.Join(r.config.TmpDir, "alive.lck") + aliveLock, err := storage.GetLockfile(runtimeAliveLock) + if err != nil { + return errors.Wrapf(err, "error acquiring runtime init lock") + } + aliveLock.Lock() + // It's OK to defer until Shutdown() has run, so no need to check locked + defer aliveLock.Unlock() + + // Start off by deallocating all locks + if err := r.lockManager.FreeAllLocks(); err != nil { + return err + } + + allCtrs, err := r.state.AllContainers() + if err != nil { + return err + } + for _, ctr := range allCtrs { + lock, err := r.lockManager.AllocateLock() + if err != nil { + return errors.Wrapf(err, "error allocating lock for container %s", ctr.ID()) + } + + ctr.config.LockID = lock.ID() + + // Write the new lock ID + if err := r.state.RewriteContainerConfig(ctr, ctr.config); err != nil { + return err + } + } + + r.lock.Unlock() + locked = false + + return r.Shutdown(false) +} -- cgit v1.2.3-54-g00ecf From 9353a3e8ec3855f29c552dd1e0134dbaa3031efa Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Thu, 14 Feb 2019 17:55:54 -0500 Subject: Expand renumber to also renumber pod locks Signed-off-by: Matthew Heon --- libpod/runtime_renumber.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'libpod/runtime_renumber.go') diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go index bc291156e..b27b5f1a8 100644 --- a/libpod/runtime_renumber.go +++ b/libpod/runtime_renumber.go @@ -52,6 +52,23 @@ func (r *Runtime) RenumberLocks() error { return err } } + allPods, err := r.state.AllPods() + if err != nil { + return err + } + for _, pod := range allPods { + lock, err := r.lockManager.AllocateLock() + if err != nil { + return errors.Wrapf(err, "error allocating lock for pod %s", pod.ID()) + } + + pod.config.LockID = lock.ID() + + // Write the new lock ID + if err := r.state.RewritePodConfig(pod, pod.config); err != nil { + return err + } + } r.lock.Unlock() locked = false -- cgit v1.2.3-54-g00ecf From ca8ae877c12cbfa368d572ef6700d9e4a23d5b11 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Fri, 15 Feb 2019 09:34:37 -0500 Subject: Remove locks from volumes I was looking into why we have locks in volumes, and I'm fairly convinced they're unnecessary. We don't have a state whose accesses we need to guard with locks and syncs. The only real purpose for the lock was to prevent concurrent removal of the same volume. Looking at the code, concurrent removal ought to be fine with a bit of reordering - one or the other might fail, but we will successfully evict the volume from the state. Also, remove the 'prune' bool from RemoveVolume. None of our other API functions accept it, and it only served to toggle off more verbose error messages. Signed-off-by: Matthew Heon --- libpod/boltdb_state.go | 4 ---- libpod/boltdb_state_internal.go | 7 ------- libpod/runtime_ctr.go | 2 +- libpod/runtime_renumber.go | 6 ++++++ libpod/runtime_volume.go | 13 +++++-------- libpod/runtime_volume_linux.go | 31 +++++++++++++------------------ libpod/volume.go | 5 ----- libpod/volume_internal.go | 3 --- 8 files changed, 25 insertions(+), 46 deletions(-) (limited to 'libpod/runtime_renumber.go') diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 6214333b9..25ef5cd0e 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -1369,10 +1369,6 @@ func (s *BoltState) RemoveVolume(volume *Volume) error { return ErrDBClosed } - if !volume.valid { - return ErrVolumeRemoved - } - volName := []byte(volume.Name()) db, err := s.getDBCon() diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go index ea150cfac..3d749849d 100644 --- a/libpod/boltdb_state_internal.go +++ b/libpod/boltdb_state_internal.go @@ -348,13 +348,6 @@ func (s *BoltState) getVolumeFromDB(name []byte, volume *Volume, volBkt *bolt.Bu return errors.Wrapf(err, "error unmarshalling volume %s config from DB", string(name)) } - // Get the lock - lock, err := s.runtime.lockManager.RetrieveLock(volume.config.LockID) - if err != nil { - return errors.Wrapf(err, "error retrieving lockfile for volume %s", string(name)) - } - volume.lock = lock - volume.runtime = s.runtime volume.valid = true diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 1055da75b..2ec8d0795 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -421,7 +421,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, for _, v := range volumes { if volume, err := runtime.state.Volume(v); err == nil { - if err := runtime.removeVolume(ctx, volume, false, true); err != nil && err != ErrNoSuchVolume && err != ErrVolumeBeingUsed { + if err := runtime.removeVolume(ctx, volume, false); err != nil && err != ErrNoSuchVolume && err != ErrVolumeBeingUsed { logrus.Errorf("cleanup volume (%s): %v", v, err) } } diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go index b27b5f1a8..d5279934e 100644 --- a/libpod/runtime_renumber.go +++ b/libpod/runtime_renumber.go @@ -12,6 +12,12 @@ import ( // It renders the runtime it is called on, and all container/pod/volume structs // from that runtime, unusable, and requires that a new runtime be initialized // after it is called. +// TODO: It would be desirable to make it impossible to call this until all +// other libpod sessions are dead. +// Possibly use a read-write file lock, with all non-renumber podmans owning the +// lock as read, renumber attempting to take a write lock? +// The alternative is some sort of session tracking, and I don't know how +// reliable that can be. func (r *Runtime) RenumberLocks() error { r.lock.Lock() locked := true diff --git a/libpod/runtime_volume.go b/libpod/runtime_volume.go index beae50ac9..11f37ad4b 100644 --- a/libpod/runtime_volume.go +++ b/libpod/runtime_volume.go @@ -19,7 +19,7 @@ type VolumeCreateOption func(*Volume) error type VolumeFilter func(*Volume) bool // RemoveVolume removes a volumes -func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force, prune bool) error { +func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error { r.lock.Lock() defer r.lock.Unlock() @@ -35,10 +35,7 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force, prune bool } } - v.lock.Lock() - defer v.lock.Unlock() - - return r.removeVolume(ctx, v, force, prune) + return r.removeVolume(ctx, v, force) } // RemoveVolumes removes a slice of volumes or all with a force bool @@ -64,7 +61,7 @@ func (r *Runtime) RemoveVolumes(ctx context.Context, volumes []string, all, forc } for _, vol := range vols { - if err := r.RemoveVolume(ctx, vol, force, false); err != nil { + if err := r.RemoveVolume(ctx, vol, force); err != nil { return deletedVols, err } logrus.Debugf("removed volume %s", vol.Name()) @@ -168,8 +165,8 @@ func (r *Runtime) PruneVolumes(ctx context.Context) ([]string, []error) { } for _, vol := range vols { - if err := r.RemoveVolume(ctx, vol, false, true); err != nil { - if err != ErrVolumeBeingUsed { + if err := r.RemoveVolume(ctx, vol, false); err != nil { + if errors.Cause(err) != ErrVolumeBeingUsed && errors.Cause(err) != ErrVolumeRemoved { pruneErrors = append(pruneErrors, err) } continue diff --git a/libpod/runtime_volume_linux.go b/libpod/runtime_volume_linux.go index 0727cfedf..838c0167a 100644 --- a/libpod/runtime_volume_linux.go +++ b/libpod/runtime_volume_linux.go @@ -67,13 +67,6 @@ func (r *Runtime) newVolume(ctx context.Context, options ...VolumeCreateOption) } volume.config.MountPoint = fullVolPath - lock, err := r.lockManager.AllocateLock() - if err != nil { - return nil, errors.Wrapf(err, "error allocating lock for new volume") - } - volume.lock = lock - volume.config.LockID = volume.lock.ID() - volume.valid = true // Add the volume to state @@ -85,9 +78,12 @@ func (r *Runtime) newVolume(ctx context.Context, options ...VolumeCreateOption) } // removeVolume removes the specified volume from state as well tears down its mountpoint and storage -func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force, prune bool) error { +func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error { if !v.valid { - return ErrNoSuchVolume + if ok, _ := r.state.HasVolume(v.Name()); !ok { + return nil + } + return ErrVolumeRemoved } deps, err := r.state.VolumeInUse(v) @@ -95,9 +91,6 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force, prune bool return err } if len(deps) != 0 { - if prune { - return ErrVolumeBeingUsed - } depsStr := strings.Join(deps, ", ") if !force { return errors.Wrapf(ErrVolumeBeingUsed, "volume %s is being used by the following container(s): %s", v.Name(), depsStr) @@ -112,18 +105,20 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force, prune bool } } - // Delete the mountpoint path of the volume, that is delete the volume from /var/lib/containers/storage/volumes - if err := v.teardownStorage(); err != nil { - return errors.Wrapf(err, "error cleaning up volume storage for %q", v.Name()) - } + // Set volume as invalid so it can no longer be used + v.valid = false // Remove the volume from the state if err := r.state.RemoveVolume(v); err != nil { return errors.Wrapf(err, "error removing volume %s", v.Name()) } - // Set volume as invalid so it can no longer be used - v.valid = false + // Delete the mountpoint path of the volume, that is delete the volume from /var/lib/containers/storage/volumes + if err := v.teardownStorage(); err != nil { + return errors.Wrapf(err, "error cleaning up volume storage for %q", v.Name()) + } + + logrus.Debugf("Removed volume %s", v.Name()) return nil } diff --git a/libpod/volume.go b/libpod/volume.go index 026a3bf49..74878b6a4 100644 --- a/libpod/volume.go +++ b/libpod/volume.go @@ -1,7 +1,5 @@ package libpod -import "github.com/containers/libpod/libpod/lock" - // Volume is the type used to create named volumes // TODO: all volumes should be created using this and the Volume API type Volume struct { @@ -9,7 +7,6 @@ type Volume struct { valid bool runtime *Runtime - lock lock.Locker } // VolumeConfig holds the volume's config information @@ -17,8 +14,6 @@ type Volume struct { type VolumeConfig struct { // Name of the volume Name string `json:"name"` - // ID of this volume's lock - LockID uint32 `json:"lockID"` Labels map[string]string `json:"labels"` MountPoint string `json:"mountPoint"` diff --git a/libpod/volume_internal.go b/libpod/volume_internal.go index 0de8a2350..35f0ca19d 100644 --- a/libpod/volume_internal.go +++ b/libpod/volume_internal.go @@ -18,8 +18,5 @@ func newVolume(runtime *Runtime) (*Volume, error) { // teardownStorage deletes the volume from volumePath func (v *Volume) teardownStorage() error { - if !v.valid { - return ErrNoSuchVolume - } return os.RemoveAll(filepath.Join(v.runtime.config.VolumePath, v.Name())) } -- cgit v1.2.3-54-g00ecf From a72025d6fd111bfa2dc4e1d22871966fec614f88 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Fri, 15 Feb 2019 09:59:11 -0500 Subject: Move RenumberLocks into runtime init We can't do renumbering after init - we need to open a potentially invalid locks file (too many/too few locks), and then potentially delete the old locks and make new ones. We need to be in init to bypass the checks that would otherwise make this impossible. This leaves us with two choices: make RenumberLocks a separate entrypoint from NewRuntime, duplicating a lot of configuration load code (we need to know where the locks live, how many there are, etc) - or modify NewRuntime to allow renumbering during it. Previous experience says the first is not really a viable option and produces massive code bloat, so the second it is. Signed-off-by: Matthew Heon --- libpod/options.go | 20 ++++++++++++++++++++ libpod/runtime.go | 17 ++++++++++++++--- libpod/runtime_renumber.go | 29 +++-------------------------- 3 files changed, 37 insertions(+), 29 deletions(-) (limited to 'libpod/runtime_renumber.go') diff --git a/libpod/options.go b/libpod/options.go index 06737776b..4a3dd582d 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -394,6 +394,26 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption { } } +// WithRenumber instructs libpod to perform a lock renumbering instead of a +// normal init. +// When this is specified, no valid runtime will be returned by NewRuntime. +// Instead, libpod will reinitialize lock numbers on all pods and containers, +// shut down the runtime, and return. +// Renumber is intended to be used from a dedicated entrypoint, where it will +// handle a changed maximum number of locks and return, with the program +// exiting after that. +func WithRenumber() RuntimeOption { + return func(rt *Runtime) error { + if rt.valid { + return ErrRuntimeFinalized + } + + rt.doRenumber = true + + return nil + } +} + // Container Creation Options // WithShmDir sets the directory that should be mounted on /dev/shm. diff --git a/libpod/runtime.go b/libpod/runtime.go index 4f5d1e292..bc7c061c4 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -79,7 +79,8 @@ type RuntimeOption func(*Runtime) error // Runtime is the core libpod runtime type Runtime struct { - config *RuntimeConfig + config *RuntimeConfig + state State store storage.Store storageService *storageService @@ -88,12 +89,15 @@ type Runtime struct { netPlugin ocicni.CNIPlugin ociRuntimePath OCIRuntimePath conmonPath string - valid bool - lock sync.RWMutex imageRuntime *image.Runtime firewallBackend firewall.FirewallBackend lockManager lock.Manager configuredFrom *runtimeConfiguredFrom + + doRenumber bool + + valid bool + lock sync.RWMutex } // OCIRuntimePath contains information about an OCI runtime. @@ -753,6 +757,13 @@ func makeRuntime(runtime *Runtime) (err error) { aliveLock.Unlock() } }() + // If we're renumbering locks, do it now. + // It breaks out of normal runtime init, and will not return a valid + // runtime. + if runtime.doRenumber { + return runtime.renumberLocks() + } + _, err = os.Stat(runtimeAliveFile) if err != nil { // If the file doesn't exist, we need to refresh the state diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go index d5279934e..04abc84d1 100644 --- a/libpod/runtime_renumber.go +++ b/libpod/runtime_renumber.go @@ -1,14 +1,11 @@ package libpod import ( - "path/filepath" - - "github.com/containers/storage" "github.com/pkg/errors" ) -// RenumberLocks reassigns lock numbers for all containers, pods, and volumes in -// the state. +// renumberLocks reassigns lock numbers for all containers and pods in the +// state. // It renders the runtime it is called on, and all container/pod/volume structs // from that runtime, unusable, and requires that a new runtime be initialized // after it is called. @@ -18,24 +15,7 @@ import ( // lock as read, renumber attempting to take a write lock? // The alternative is some sort of session tracking, and I don't know how // reliable that can be. -func (r *Runtime) RenumberLocks() error { - r.lock.Lock() - locked := true - defer func() { - if locked { - r.lock.Unlock() - } - }() - - runtimeAliveLock := filepath.Join(r.config.TmpDir, "alive.lck") - aliveLock, err := storage.GetLockfile(runtimeAliveLock) - if err != nil { - return errors.Wrapf(err, "error acquiring runtime init lock") - } - aliveLock.Lock() - // It's OK to defer until Shutdown() has run, so no need to check locked - defer aliveLock.Unlock() - +func (r *Runtime) renumberLocks() error { // Start off by deallocating all locks if err := r.lockManager.FreeAllLocks(); err != nil { return err @@ -76,8 +56,5 @@ func (r *Runtime) RenumberLocks() error { } } - r.lock.Unlock() - locked = false - return r.Shutdown(false) } -- cgit v1.2.3-54-g00ecf From d2b77f8b3397b3ffbbade6e04e37b291105028aa Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Mon, 18 Feb 2019 16:20:02 -0500 Subject: Do not make renumber shut down the runtime The original intent behind the requirement was to ensure that, if two SHM lock structs were open at the same time, we should not make such a runtime available to the user, and should clean it up instead. It turns out that we don't even need to open a second SHM lock struct - if we get an error mapping the first one due to a lock count mismatch, we can just delete it, and it cleans itself up when it errors. So there's no reason not to return a valid runtime. Signed-off-by: Matthew Heon --- cmd/podman/system_renumber.go | 11 ++++++++--- docs/podman-system-renumber.1.md | 29 +++++++++++++++++++++++++++++ libpod/lock/in_memory_locks.go | 4 +++- libpod/lock/lock.go | 11 +++++++++++ libpod/lock/shm_lock_manager_linux.go | 4 +++- libpod/options.go | 12 ++++-------- libpod/runtime.go | 16 ++++++++++++++-- libpod/runtime_renumber.go | 5 +---- 8 files changed, 73 insertions(+), 19 deletions(-) create mode 100644 docs/podman-system-renumber.1.md (limited to 'libpod/runtime_renumber.go') diff --git a/cmd/podman/system_renumber.go b/cmd/podman/system_renumber.go index 7f9436d5d..c8ce628b1 100644 --- a/cmd/podman/system_renumber.go +++ b/cmd/podman/system_renumber.go @@ -12,7 +12,8 @@ var ( renumberDescription = ` podman system renumber - Migrate lock numbers to handle a change in maximum number of locks + Migrate lock numbers to handle a change in maximum number of locks. + Mandatory after the number of locks in libpod.conf is changed. ` _renumberCommand = &cobra.Command{ @@ -34,11 +35,15 @@ func init() { func renumberCmd(c *cliconfig.SystemRenumberValues) error { // We need to pass one extra option to NewRuntime. - // This will inform the OCI runtime to start - _, err := libpodruntime.GetRuntimeRenumber(&c.PodmanCommand) + // This will inform the OCI runtime to start a renumber. + // That's controlled by the last argument to GetRuntime. + r, err := libpodruntime.GetRuntimeRenumber(&c.PodmanCommand) if err != nil { return errors.Wrapf(err, "error renumbering locks") } + if err := r.Shutdown(false); err != nil { + return err + } return nil } diff --git a/docs/podman-system-renumber.1.md b/docs/podman-system-renumber.1.md new file mode 100644 index 000000000..a88640d63 --- /dev/null +++ b/docs/podman-system-renumber.1.md @@ -0,0 +1,29 @@ +% podman-system-renumber(1) podman + +## NAME +podman\-system\-renumber - Renumber container locks + +## SYNOPSIS +** podman system renumber** + +## DESCRIPTION +** podman system renumber** renumbers locks used by containers and pods. + +Each Podman container and pod is allocated a lock at creation time, up to a maximum number controlled by the **num_locks** parameter in **libpod.conf**. + +When all available locks are exhausted, no further containers and pods can be created until some existing containers and pods are removed. This can be avoided by increasing the number of locks available via modifying **libpod.conf** and subsequently running **podman system renumber** to prepare the new locks (and reallocate lock numbers to fit the new struct). + +**podman system renumber** must be called after any changes to **num_locks** - failure to do so will result in errors starting Podman as the number of locks available conflicts with the configured number of locks. + +**podman system renumber** can also be used to migrate 1.0 and earlier versions of Podman, which used a different locking scheme, to the new locking model. It is not strictly required to do this, but it is highly recommended to do so as deadlocks can occur otherwise. + +If possible, avoid calling **podman system renumber** while there are other Podman processes running. + +## SYNOPSIS +**podman system renumber** + +## SEE ALSO +`podman(1)`, `libpod.conf(5)` + +# HISTORY +February 2019, Originally compiled by Matt Heon (mheon at redhat dot com) diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go index 7eb22328f..7c9605917 100644 --- a/libpod/lock/in_memory_locks.go +++ b/libpod/lock/in_memory_locks.go @@ -90,7 +90,9 @@ func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) { return m.locks[id], nil } -// FreeAllLocks frees all locks +// FreeAllLocks frees all locks. +// This function is DANGEROUS. Please read the full comment in locks.go before +// trying to use it. func (m *InMemoryManager) FreeAllLocks() error { for _, lock := range m.locks { lock.allocated = false diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go index b96393574..d6841646b 100644 --- a/libpod/lock/lock.go +++ b/libpod/lock/lock.go @@ -24,8 +24,19 @@ type Manager interface { // The underlying lock MUST be the same as another other lock with the // same UUID. RetrieveLock(id uint32) (Locker, error) + // PLEASE READ FULL DESCRIPTION BEFORE USING. // FreeAllLocks frees all allocated locks, in preparation for lock // reallocation. + // As this deallocates all presently-held locks, this can be very + // dangerous - if there are other processes running that might be + // attempting to allocate new locks and free existing locks, we may + // encounter races leading to an inconsistent state. + // (This is in addition to the fact that FreeAllLocks instantly makes + // the state inconsistent simply by using it, and requires a full + // lock renumbering to restore consistency!). + // In short, this should only be used as part of unit tests, or lock + // renumbering, where reasonable guarantees about other processes can be + // made. FreeAllLocks() error } diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go index 187661c8b..8678958ee 100644 --- a/libpod/lock/shm_lock_manager_linux.go +++ b/libpod/lock/shm_lock_manager_linux.go @@ -71,7 +71,9 @@ func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) { return lock, nil } -// FreeAllLocks frees all locks in the manager +// FreeAllLocks frees all locks in the manager. +// This function is DANGEROUS. Please read the full comment in locks.go before +// trying to use it. func (m *SHMLockManager) FreeAllLocks() error { return m.locks.DeallocateAllSemaphores() } diff --git a/libpod/options.go b/libpod/options.go index 4a3dd582d..7c37fd65b 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -394,14 +394,10 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption { } } -// WithRenumber instructs libpod to perform a lock renumbering instead of a -// normal init. -// When this is specified, no valid runtime will be returned by NewRuntime. -// Instead, libpod will reinitialize lock numbers on all pods and containers, -// shut down the runtime, and return. -// Renumber is intended to be used from a dedicated entrypoint, where it will -// handle a changed maximum number of locks and return, with the program -// exiting after that. +// WithRenumber instructs libpod to perform a lock renumbering while +// initializing. This will handle migrations from early versions of libpod with +// file locks to newer versions with SHM locking, as well as changes in the +// number of configured locks. func WithRenumber() RuntimeOption { return func(rt *Runtime) error { if rt.valid { diff --git a/libpod/runtime.go b/libpod/runtime.go index 850df4fc9..94dbf37dd 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -95,8 +95,16 @@ type Runtime struct { lockManager lock.Manager configuredFrom *runtimeConfiguredFrom + // doRenumber indicates that the runtime should perform a lock renumber + // during initialization. + // Once the runtime has been initialized and returned, this variable is + // unused. doRenumber bool + // valid indicates whether the runtime is ready to use. + // valid is set to true when a runtime is returned from GetRuntime(), + // and remains true until the runtime is shut down (rendering its + // storage unusable). When valid is false, the runtime cannot be used. valid bool lock sync.RWMutex } @@ -784,7 +792,9 @@ func makeRuntime(runtime *Runtime) (err error) { if err != nil { return err } - } else if err == syscall.ERANGE && runtime.doRenumber { + } else if errors.Cause(err) == syscall.ERANGE && runtime.doRenumber { + logrus.Debugf("Number of locks does not match - removing old locks") + // ERANGE indicates a lock numbering mismatch. // Since we're renumbering, this is not fatal. // Remove the earlier set of locks and recreate. @@ -806,7 +816,9 @@ func makeRuntime(runtime *Runtime) (err error) { // It breaks out of normal runtime init, and will not return a valid // runtime. if runtime.doRenumber { - return runtime.renumberLocks() + if err := runtime.renumberLocks(); err != nil { + return err + } } // If we need to refresh the state, do it now - things are guaranteed to diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go index 04abc84d1..125cf0825 100644 --- a/libpod/runtime_renumber.go +++ b/libpod/runtime_renumber.go @@ -6,9 +6,6 @@ import ( // renumberLocks reassigns lock numbers for all containers and pods in the // state. -// It renders the runtime it is called on, and all container/pod/volume structs -// from that runtime, unusable, and requires that a new runtime be initialized -// after it is called. // TODO: It would be desirable to make it impossible to call this until all // other libpod sessions are dead. // Possibly use a read-write file lock, with all non-renumber podmans owning the @@ -56,5 +53,5 @@ func (r *Runtime) renumberLocks() error { } } - return r.Shutdown(false) + return nil } -- cgit v1.2.3-54-g00ecf