From 7fdd20ae5a1ced1faceab9cb0a6e553343911a0b Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Thu, 14 Feb 2019 17:25:58 -0500
Subject: Add initial version of renumber backend

Renumber is a way of renumbering container locks after the number
of locks available has changed.

For now, renumber only works with containers.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/lock/in_memory_locks.go              |  9 +++++++++
 libpod/lock/lock.go                         |  3 +++
 libpod/lock/shm/shm_lock.c                  | 30 +++++++++++++++++++++++++++++
 libpod/lock/shm/shm_lock.go                 | 16 +++++++++++++++
 libpod/lock/shm/shm_lock.h                  |  1 +
 libpod/lock/shm/shm_lock_test.go            | 28 ++++++++++++++++++++-------
 libpod/lock/shm_lock_manager_linux.go       |  5 +++++
 libpod/lock/shm_lock_manager_unsupported.go |  5 +++++
 8 files changed, 90 insertions(+), 7 deletions(-)

(limited to 'libpod/lock')

diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go
index db8f20e95..7eb22328f 100644
--- a/libpod/lock/in_memory_locks.go
+++ b/libpod/lock/in_memory_locks.go
@@ -89,3 +89,12 @@ func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
 
 	return m.locks[id], nil
 }
+
+// FreeAllLocks frees all locks
+func (m *InMemoryManager) FreeAllLocks() error {
+	for _, lock := range m.locks {
+		lock.allocated = false
+	}
+
+	return nil
+}
diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go
index 1f94171fe..b96393574 100644
--- a/libpod/lock/lock.go
+++ b/libpod/lock/lock.go
@@ -24,6 +24,9 @@ type Manager interface {
 	// The underlying lock MUST be the same as another other lock with the
 	// same UUID.
 	RetrieveLock(id uint32) (Locker, error)
+	// FreeAllLocks frees all allocated locks, in preparation for lock
+	// reallocation.
+	FreeAllLocks() error
 }
 
 // Locker is similar to sync.Locker, but provides a method for freeing the lock
diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
index 4af58d857..367055823 100644
--- a/libpod/lock/shm/shm_lock.c
+++ b/libpod/lock/shm/shm_lock.c
@@ -407,6 +407,36 @@ int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
   return 0;
 }
 
+// Deallocate all semaphores unconditionally.
+// Returns negative ERRNO values.
+int32_t deallocate_all_semaphores(shm_struct_t *shm) {
+  int ret_code;
+  uint i;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Lock the mutex controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Iterate through all bitmaps and reset to unused
+  for (i = 0; i < shm->num_bitmaps; i++) {
+    shm->locks[i].bitmap = 0;
+  }
+
+  // Unlock the allocation control mutex
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  return 0;
+}
+
 // Lock a given semaphore
 // Does not check if the semaphore is allocated - this ensures that, even for
 // removed containers, we can still successfully lock to check status (and
diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go
index 87d28e5c1..e70ea8743 100644
--- a/libpod/lock/shm/shm_lock.go
+++ b/libpod/lock/shm/shm_lock.go
@@ -155,6 +155,22 @@ func (locks *SHMLocks) DeallocateSemaphore(sem uint32) error {
 	return nil
 }
 
+// DeallocateAllSemaphores frees all semaphores so they can be reallocated to
+// other containers and pods.
+func (locks *SHMLocks) DeallocateAllSemaphores() error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	retCode := C.deallocate_all_semaphores(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno return from C
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
 // LockSemaphore locks the given semaphore.
 // If the semaphore is already locked, LockSemaphore will block until the lock
 // can be acquired.
diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h
index 8e7e23fb7..58e4297e2 100644
--- a/libpod/lock/shm/shm_lock.h
+++ b/libpod/lock/shm/shm_lock.h
@@ -40,6 +40,7 @@ shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
 int32_t close_lock_shm(shm_struct_t *shm);
 int64_t allocate_semaphore(shm_struct_t *shm);
 int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t deallocate_all_semaphores(shm_struct_t *shm);
 int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
 int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index);
 
diff --git a/libpod/lock/shm/shm_lock_test.go b/libpod/lock/shm/shm_lock_test.go
index 594eb5d8e..830035881 100644
--- a/libpod/lock/shm/shm_lock_test.go
+++ b/libpod/lock/shm/shm_lock_test.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"os"
 	"runtime"
-	"syscall"
 	"testing"
 	"time"
 
@@ -53,12 +52,8 @@ func runLockTest(t *testing.T, testFunc func(*testing.T, *SHMLocks)) {
 	}
 	defer func() {
 		// Deallocate all locks
-		// Ignore ENOENT (lock is not allocated)
-		var i uint32
-		for i = 0; i < numLocks; i++ {
-			if err := locks.DeallocateSemaphore(i); err != nil && err != syscall.ENOENT {
-				t.Fatalf("Error deallocating semaphore %d: %v", i, err)
-			}
+		if err := locks.DeallocateAllSemaphores(); err != nil {
+			t.Fatalf("Error deallocating semaphores: %v", err)
 		}
 
 		if err := locks.Close(); err != nil {
@@ -212,6 +207,25 @@ func TestAllocateDeallocateCycle(t *testing.T) {
 	})
 }
 
+// Test that DeallocateAllSemaphores deallocates all semaphores
+func TestDeallocateAllSemaphoresDeallocatesAll(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate a lock
+		locks1, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		// Free all locks
+		err = locks.DeallocateAllSemaphores()
+		assert.NoError(t, err)
+
+		// Allocate another lock
+		locks2, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		assert.Equal(t, locks1, locks2)
+	})
+}
+
 // Test that locks actually lock
 func TestLockSemaphoreActuallyLocks(t *testing.T) {
 	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go
index 94dfd7dd7..187661c8b 100644
--- a/libpod/lock/shm_lock_manager_linux.go
+++ b/libpod/lock/shm_lock_manager_linux.go
@@ -71,6 +71,11 @@ func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
 	return lock, nil
 }
 
+// FreeAllLocks frees all locks in the manager
+func (m *SHMLockManager) FreeAllLocks() error {
+	return m.locks.DeallocateAllSemaphores()
+}
+
 // SHMLock is an individual shared memory lock.
 type SHMLock struct {
 	lockID  uint32
diff --git a/libpod/lock/shm_lock_manager_unsupported.go b/libpod/lock/shm_lock_manager_unsupported.go
index cbdb2f7bc..1d6e3fcbd 100644
--- a/libpod/lock/shm_lock_manager_unsupported.go
+++ b/libpod/lock/shm_lock_manager_unsupported.go
@@ -27,3 +27,8 @@ func (m *SHMLockManager) AllocateLock() (Locker, error) {
 func (m *SHMLockManager) RetrieveLock(id string) (Locker, error) {
 	return nil, fmt.Errorf("not supported")
 }
+
+// FreeAllLocks is not supported on this platform
+func (m *SHMLockManager) FreeAllLocks() error {
+	return fmt.Errorf("not supported")
+}
-- 
cgit v1.2.3-54-g00ecf


From f9c548219b6543959dd240618f8a922fdbcabc6d Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Fri, 15 Feb 2019 10:33:59 -0500
Subject: Recreate SHM locks when renumbering on count mismatch

When we're renumbering locks, we're destroying all existing
allocations anyways, so destroying the old lock struct is not a
particularly big deal. Existing long-lived libpod instances will
continue to use the old locks, but that will be solved in a
followon.

Also, solve an issue with returning error values in the C code.
There were a few places where we return ERRNO where it was not
set, so make them return actual error codes).

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 libpod/lock/shm/shm_lock.c |  6 ++++--
 libpod/runtime.go          | 26 ++++++++++++++++++++------
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'libpod/lock')

diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
index 367055823..d11fce71a 100644
--- a/libpod/lock/shm/shm_lock.c
+++ b/libpod/lock/shm/shm_lock.c
@@ -203,6 +203,8 @@ shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
 // terminating NULL byte.
 // Returns a valid pointer on success or NULL on error.
 // If an error occurs, negative ERRNO values will be written to error_code.
+// ERANGE is returned for a mismatch between num_locks and the number of locks
+// available in the the SHM lock struct.
 shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
   int shm_fd;
   shm_struct_t *shm;
@@ -255,11 +257,11 @@ shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
 
   // Need to check the SHM to see if it's actually our locks
   if (shm->magic != MAGIC) {
-    *error_code = -1 * errno;
+    *error_code = -1 * EBADF;
     goto CLEANUP;
   }
   if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
-    *error_code = -1 * errno;
+    *error_code = -1 * ERANGE;
     goto CLEANUP;
   }
 
diff --git a/libpod/runtime.go b/libpod/runtime.go
index bc7c061c4..850df4fc9 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -7,6 +7,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"sync"
+	"syscall"
 
 	"github.com/BurntSushi/toml"
 	is "github.com/containers/image/storage"
@@ -757,12 +758,6 @@ func makeRuntime(runtime *Runtime) (err error) {
 			aliveLock.Unlock()
 		}
 	}()
-	// If we're renumbering locks, do it now.
-	// It breaks out of normal runtime init, and will not return a valid
-	// runtime.
-	if runtime.doRenumber {
-		return runtime.renumberLocks()
-	}
 
 	_, err = os.Stat(runtimeAliveFile)
 	if err != nil {
@@ -785,6 +780,18 @@ func makeRuntime(runtime *Runtime) (err error) {
 	manager, err := lock.OpenSHMLockManager(lockPath, runtime.config.NumLocks)
 	if err != nil {
 		if os.IsNotExist(errors.Cause(err)) {
+			manager, err = lock.NewSHMLockManager(lockPath, runtime.config.NumLocks)
+			if err != nil {
+				return err
+			}
+		} else if err == syscall.ERANGE && runtime.doRenumber {
+			// ERANGE indicates a lock numbering mismatch.
+			// Since we're renumbering, this is not fatal.
+			// Remove the earlier set of locks and recreate.
+			if err := os.Remove(filepath.Join("/dev/shm", lockPath)); err != nil {
+				return errors.Wrapf(err, "error removing libpod locks file %s", lockPath)
+			}
+
 			manager, err = lock.NewSHMLockManager(lockPath, runtime.config.NumLocks)
 			if err != nil {
 				return err
@@ -795,6 +802,13 @@ func makeRuntime(runtime *Runtime) (err error) {
 	}
 	runtime.lockManager = manager
 
+	// If we're renumbering locks, do it now.
+	// It breaks out of normal runtime init, and will not return a valid
+	// runtime.
+	if runtime.doRenumber {
+		return runtime.renumberLocks()
+	}
+
 	// If we need to refresh the state, do it now - things are guaranteed to
 	// be set up by now.
 	if doRefresh {
-- 
cgit v1.2.3-54-g00ecf


From d2b77f8b3397b3ffbbade6e04e37b291105028aa Mon Sep 17 00:00:00 2001
From: Matthew Heon <matthew.heon@pm.me>
Date: Mon, 18 Feb 2019 16:20:02 -0500
Subject: Do not make renumber shut down the runtime

The original intent behind the requirement was to ensure that, if
two SHM lock structs were open at the same time, we should not
make such a runtime available to the user, and should clean it up
instead.

It turns out that we don't even need to open a second SHM lock
struct - if we get an error mapping the first one due to a lock
count mismatch, we can just delete it, and it cleans itself up
when it errors. So there's no reason not to return a valid
runtime.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
---
 cmd/podman/system_renumber.go         | 11 ++++++++---
 docs/podman-system-renumber.1.md      | 29 +++++++++++++++++++++++++++++
 libpod/lock/in_memory_locks.go        |  4 +++-
 libpod/lock/lock.go                   | 11 +++++++++++
 libpod/lock/shm_lock_manager_linux.go |  4 +++-
 libpod/options.go                     | 12 ++++--------
 libpod/runtime.go                     | 16 ++++++++++++++--
 libpod/runtime_renumber.go            |  5 +----
 8 files changed, 73 insertions(+), 19 deletions(-)
 create mode 100644 docs/podman-system-renumber.1.md

(limited to 'libpod/lock')

diff --git a/cmd/podman/system_renumber.go b/cmd/podman/system_renumber.go
index 7f9436d5d..c8ce628b1 100644
--- a/cmd/podman/system_renumber.go
+++ b/cmd/podman/system_renumber.go
@@ -12,7 +12,8 @@ var (
 	renumberDescription = `
         podman system renumber
 
-        Migrate lock numbers to handle a change in maximum number of locks
+        Migrate lock numbers to handle a change in maximum number of locks.
+        Mandatory after the number of locks in libpod.conf is changed.
 `
 
 	_renumberCommand = &cobra.Command{
@@ -34,11 +35,15 @@ func init() {
 
 func renumberCmd(c *cliconfig.SystemRenumberValues) error {
 	// We need to pass one extra option to NewRuntime.
-	// This will inform the OCI runtime to start
-	_, err := libpodruntime.GetRuntimeRenumber(&c.PodmanCommand)
+	// This will inform the OCI runtime to start a renumber.
+	// That's controlled by the last argument to GetRuntime.
+	r, err := libpodruntime.GetRuntimeRenumber(&c.PodmanCommand)
 	if err != nil {
 		return errors.Wrapf(err, "error renumbering locks")
 	}
+	if err := r.Shutdown(false); err != nil {
+		return err
+	}
 
 	return nil
 }
diff --git a/docs/podman-system-renumber.1.md b/docs/podman-system-renumber.1.md
new file mode 100644
index 000000000..a88640d63
--- /dev/null
+++ b/docs/podman-system-renumber.1.md
@@ -0,0 +1,29 @@
+% podman-system-renumber(1) podman
+
+## NAME
+podman\-system\-renumber - Renumber container locks
+
+## SYNOPSIS
+** podman system renumber**
+
+## DESCRIPTION
+** podman system renumber** renumbers locks used by containers and pods.
+
+Each Podman container and pod is allocated a lock at creation time, up to a maximum number controlled by the **num_locks** parameter in **libpod.conf**.
+
+When all available locks are exhausted, no further containers and pods can be created until some existing containers and pods are removed. This can be avoided by increasing the number of locks available via modifying **libpod.conf** and subsequently running **podman system renumber** to prepare the new locks (and reallocate lock numbers to fit the new struct).
+
+**podman system renumber** must be called after any changes to **num_locks** - failure to do so will result in errors starting Podman as the number of locks available conflicts with the configured number of locks.
+
+**podman system renumber** can also be used to migrate 1.0 and earlier versions of Podman, which used a different locking scheme, to the new locking model. It is not strictly required to do this, but it is highly recommended to do so as deadlocks can occur otherwise.
+
+If possible, avoid calling **podman system renumber** while there are other Podman processes running.
+
+## SYNOPSIS
+**podman system renumber**
+
+## SEE ALSO
+`podman(1)`, `libpod.conf(5)`
+
+# HISTORY
+February 2019, Originally compiled by Matt Heon (mheon at redhat dot com)
diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go
index 7eb22328f..7c9605917 100644
--- a/libpod/lock/in_memory_locks.go
+++ b/libpod/lock/in_memory_locks.go
@@ -90,7 +90,9 @@ func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
 	return m.locks[id], nil
 }
 
-// FreeAllLocks frees all locks
+// FreeAllLocks frees all locks.
+// This function is DANGEROUS. Please read the full comment in locks.go before
+// trying to use it.
 func (m *InMemoryManager) FreeAllLocks() error {
 	for _, lock := range m.locks {
 		lock.allocated = false
diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go
index b96393574..d6841646b 100644
--- a/libpod/lock/lock.go
+++ b/libpod/lock/lock.go
@@ -24,8 +24,19 @@ type Manager interface {
 	// The underlying lock MUST be the same as another other lock with the
 	// same UUID.
 	RetrieveLock(id uint32) (Locker, error)
+	// PLEASE READ FULL DESCRIPTION BEFORE USING.
 	// FreeAllLocks frees all allocated locks, in preparation for lock
 	// reallocation.
+	// As this deallocates all presently-held locks, this can be very
+	// dangerous - if there are other processes running that might be
+	// attempting to allocate new locks and free existing locks, we may
+	// encounter races leading to an inconsistent state.
+	// (This is in addition to the fact that FreeAllLocks instantly makes
+	// the state inconsistent simply by using it, and requires a full
+	// lock renumbering to restore consistency!).
+	// In short, this should only be used as part of unit tests, or lock
+	// renumbering, where reasonable guarantees about other processes can be
+	// made.
 	FreeAllLocks() error
 }
 
diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go
index 187661c8b..8678958ee 100644
--- a/libpod/lock/shm_lock_manager_linux.go
+++ b/libpod/lock/shm_lock_manager_linux.go
@@ -71,7 +71,9 @@ func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
 	return lock, nil
 }
 
-// FreeAllLocks frees all locks in the manager
+// FreeAllLocks frees all locks in the manager.
+// This function is DANGEROUS. Please read the full comment in locks.go before
+// trying to use it.
 func (m *SHMLockManager) FreeAllLocks() error {
 	return m.locks.DeallocateAllSemaphores()
 }
diff --git a/libpod/options.go b/libpod/options.go
index 4a3dd582d..7c37fd65b 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -394,14 +394,10 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption {
 	}
 }
 
-// WithRenumber instructs libpod to perform a lock renumbering instead of a
-// normal init.
-// When this is specified, no valid runtime will be returned by NewRuntime.
-// Instead, libpod will reinitialize lock numbers on all pods and containers,
-// shut down the runtime, and return.
-// Renumber is intended to be used from a dedicated entrypoint, where it will
-// handle a changed maximum number of locks and return, with the program
-// exiting after that.
+// WithRenumber instructs libpod to perform a lock renumbering while
+// initializing. This will handle migrations from early versions of libpod with
+// file locks to newer versions with SHM locking, as well as changes in the
+// number of configured locks.
 func WithRenumber() RuntimeOption {
 	return func(rt *Runtime) error {
 		if rt.valid {
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 850df4fc9..94dbf37dd 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -95,8 +95,16 @@ type Runtime struct {
 	lockManager     lock.Manager
 	configuredFrom  *runtimeConfiguredFrom
 
+	// doRenumber indicates that the runtime should perform a lock renumber
+	// during initialization.
+	// Once the runtime has been initialized and returned, this variable is
+	// unused.
 	doRenumber bool
 
+	// valid indicates whether the runtime is ready to use.
+	// valid is set to true when a runtime is returned from GetRuntime(),
+	// and remains true until the runtime is shut down (rendering its
+	// storage unusable). When valid is false, the runtime cannot be used.
 	valid bool
 	lock  sync.RWMutex
 }
@@ -784,7 +792,9 @@ func makeRuntime(runtime *Runtime) (err error) {
 			if err != nil {
 				return err
 			}
-		} else if err == syscall.ERANGE && runtime.doRenumber {
+		} else if errors.Cause(err) == syscall.ERANGE && runtime.doRenumber {
+			logrus.Debugf("Number of locks does not match - removing old locks")
+
 			// ERANGE indicates a lock numbering mismatch.
 			// Since we're renumbering, this is not fatal.
 			// Remove the earlier set of locks and recreate.
@@ -806,7 +816,9 @@ func makeRuntime(runtime *Runtime) (err error) {
 	// It breaks out of normal runtime init, and will not return a valid
 	// runtime.
 	if runtime.doRenumber {
-		return runtime.renumberLocks()
+		if err := runtime.renumberLocks(); err != nil {
+			return err
+		}
 	}
 
 	// If we need to refresh the state, do it now - things are guaranteed to
diff --git a/libpod/runtime_renumber.go b/libpod/runtime_renumber.go
index 04abc84d1..125cf0825 100644
--- a/libpod/runtime_renumber.go
+++ b/libpod/runtime_renumber.go
@@ -6,9 +6,6 @@ import (
 
 // renumberLocks reassigns lock numbers for all containers and pods in the
 // state.
-// It renders the runtime it is called on, and all container/pod/volume structs
-// from that runtime, unusable, and requires that a new runtime be initialized
-// after it is called.
 // TODO: It would be desirable to make it impossible to call this until all
 // other libpod sessions are dead.
 // Possibly use a read-write file lock, with all non-renumber podmans owning the
@@ -56,5 +53,5 @@ func (r *Runtime) renumberLocks() error {
 		}
 	}
 
-	return r.Shutdown(false)
+	return nil
 }
-- 
cgit v1.2.3-54-g00ecf