Merge pull request #1235 from mheon/shm_locking

SHM locking for Libpod
author: OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> 2019-01-04 10:41:05 -0800
committer: GitHub <noreply@github.com> 2019-01-04 10:41:05 -0800
commit: bf5f779331870d31863c486619daae3fcea458eb (patch)
tree: aafcacc17883a8df4734bed0aadbaca59a9882fe /libpod/lock
parent: 6868b5aa1444404113bc6a4582203fbbf89490c2 (diff)
parent: 56c5c89408f89fc3733692786d66eb44133b2c59 (diff)
download: podman-bf5f779331870d31863c486619daae3fcea458eb.tar.gz
podman-bf5f779331870d31863c486619daae3fcea458eb.tar.bz2
podman-bf5f779331870d31863c486619daae3fcea458eb.zip
8 files changed, 1264 insertions, 0 deletions
diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go
new file mode 100644
index 000000000..db8f20e95
--- /dev/null
+++ b/libpod/lock/in_memory_locks.go
@@ -0,0 +1,91 @@
+package lock
+
+import (
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+// Mutex holds a single mutex and whether it has been allocated.
+type Mutex struct {
+	id        uint32
+	lock      sync.Mutex
+	allocated bool
+}
+
+// ID retrieves the ID of the mutex
+func (m *Mutex) ID() uint32 {
+	return m.id
+}
+
+// Lock locks the mutex
+func (m *Mutex) Lock() {
+	m.lock.Lock()
+}
+
+// Unlock unlocks the mutex
+func (m *Mutex) Unlock() {
+	m.lock.Unlock()
+}
+
+// Free deallocates the mutex to allow its reuse
+func (m *Mutex) Free() error {
+	m.allocated = false
+
+	return nil
+}
+
+// InMemoryManager is a lock manager that allocates and retrieves local-only
+// locks - that is, they are not multiprocess. This lock manager is intended
+// purely for unit and integration testing and should not be used in production
+// deployments.
+type InMemoryManager struct {
+	locks     []*Mutex
+	numLocks  uint32
+	localLock sync.Mutex
+}
+
+// NewInMemoryManager creates a new in-memory lock manager with the given number
+// of locks.
+func NewInMemoryManager(numLocks uint32) (Manager, error) {
+	if numLocks == 0 {
+		return nil, errors.Errorf("must provide a non-zero number of locks!")
+	}
+
+	manager := new(InMemoryManager)
+	manager.numLocks = numLocks
+	manager.locks = make([]*Mutex, numLocks)
+
+	var i uint32
+	for i = 0; i < numLocks; i++ {
+		lock := new(Mutex)
+		lock.id = i
+		manager.locks[i] = lock
+	}
+
+	return manager, nil
+}
+
+// AllocateLock allocates a lock from the manager.
+func (m *InMemoryManager) AllocateLock() (Locker, error) {
+	m.localLock.Lock()
+	defer m.localLock.Unlock()
+
+	for _, lock := range m.locks {
+		if !lock.allocated {
+			lock.allocated = true
+			return lock, nil
+		}
+	}
+
+	return nil, errors.Errorf("all locks have been allocated")
+}
+
+// RetrieveLock retrieves a lock from the manager.
+func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
+	if id >= m.numLocks {
+		return nil, errors.Errorf("given lock ID %d is too large - this manager only supports lock indexes up to %d", id, m.numLocks-1)
+	}
+
+	return m.locks[id], nil
+}
diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go
new file mode 100644
index 000000000..73c1fdcf7
--- /dev/null
+++ b/libpod/lock/lock.go
@@ -0,0 +1,58 @@
+package lock
+
+// Manager provides an interface for allocating multiprocess locks.
+// Locks returned by Manager MUST be multiprocess - allocating a lock in
+// process A and retrieving that lock's ID in process B must return handles for
+// the same lock, and locking the lock in A should exclude B from the lock until
+// it is unlocked in A.
+// All locks must be identified by a UUID (retrieved with Locker's ID() method).
+// All locks with a given UUID must refer to the same underlying lock, and it
+// must be possible to retrieve the lock given its UUID.
+// Each UUID should refer to a unique underlying lock.
+// Calls to AllocateLock() must return a unique, unallocated UUID.
+// AllocateLock() must fail once all available locks have been allocated.
+// Locks are returned to use by calls to Free(), and can subsequently be
+// reallocated.
+type Manager interface {
+	// AllocateLock returns an unallocated lock.
+	// It is guaranteed that the same lock will not be returned again by
+	// AllocateLock until the returned lock has Free() called on it.
+	// If all available locks are allocated, AllocateLock will return an
+	// error.
+	AllocateLock() (Locker, error)
+	// RetrieveLock retrieves a lock given its UUID.
+	// The underlying lock MUST be the same as another other lock with the
+	// same UUID.
+	RetrieveLock(id uint32) (Locker, error)
+}
+
+// Locker is similar to sync.Locker, but provides a method for freeing the lock
+// to allow its reuse.
+// All Locker implementations must maintain mutex semantics - the lock only
+// allows one caller in the critical section at a time.
+// All locks with the same ID must refer to the same underlying lock, even
+// if they are within multiple processes.
+type Locker interface {
+	// ID retrieves the lock's ID.
+	// ID is guaranteed to uniquely identify the lock within the
+	// Manager - that is, calling RetrieveLock with this ID will return
+	// another instance of the same lock.
+	ID() uint32
+	// Lock locks the lock.
+	// This call MUST block until it successfully acquires the lock or
+	// encounters a fatal error.
+	// All errors must be handled internally, as they are not returned. For
+	// the most part, panicking should be appropriate.
+	Lock()
+	// Unlock unlocks the lock.
+	// All errors must be handled internally, as they are not returned. For
+	// the most part, panicking should be appropriate.
+	// This includes unlocking locks which are already unlocked.
+	Unlock()
+	// Free deallocates the underlying lock, allowing its reuse by other
+	// pods and containers.
+	// The lock MUST still be usable after a Free() - some libpod instances
+	// may still retain Container structs with the old lock. This simply
+	// advises the manager that the lock may be reallocated.
+	Free() error
+}
diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
new file mode 100644
index 000000000..4af58d857
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.c
@@ -0,0 +1,452 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "shm_lock.h"
+
+// Compute the size of the SHM struct
+static size_t compute_shm_size(uint32_t num_bitmaps) {
+  return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
+}
+
+// Take the given mutex.
+// Handles exceptional conditions, including a mutex locked by a process that
+// died holding it.
+// Returns 0 on success, or positive errno on failure.
+static int take_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_lock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code == EOWNERDEAD) {
+    // The previous owner of the mutex died while holding it
+    // Take it for ourselves
+    ret_code = pthread_mutex_consistent(mutex);
+    if (ret_code != 0) {
+      // Someone else may have gotten here first and marked the state consistent
+      // However, the mutex could also be invalid.
+      // Fail here instead of looping back to trying to lock the mutex.
+      return ret_code;
+    }
+  } else if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Release the given mutex.
+// Returns 0 on success, or positive errno on failure.
+static int release_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_unlock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Set up an SHM segment holding locks for libpod.
+// num_locks must not be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd, i, j, ret_code;
+  uint32_t num_bitmaps;
+  size_t shm_size;
+  shm_struct_t *shm;
+  pthread_mutexattr_t attr;
+
+  // If error_code doesn't point to anything, we can't reasonably return errors
+  // So fail immediately
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    // The actual number given is not an even multiple of our bitmap size
+    // So round up
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  // Create a new SHM segment for us
+  shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Increase its size to what we need
+  ret_code = ftruncate(shm_fd, shm_size);
+  if (ret_code < 0) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // We have successfully mapped the memory, now initialize the region
+  shm->magic = MAGIC;
+  shm->unused = 0;
+  shm->num_locks = num_bitmaps * BITMAP_SIZE;
+  shm->num_bitmaps = num_bitmaps;
+
+  // Create an initializer for our pthread mutexes
+  ret_code = pthread_mutexattr_init(&attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_UNMAP;
+  }
+
+  // Set mutexes to pshared - multiprocess-safe
+  ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Set mutexes to robust - if a process dies while holding a mutex, we'll get
+  // a special error code on the next attempt to lock it.
+  // This should prevent panicing processes from leaving the state unusable.
+  ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize the mutex that protects the bitmaps using the mutex attributes
+  ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize all bitmaps to 0 initially
+  // And initialize all semaphores they use
+  for (i = 0; i < num_bitmaps; i++) {
+    shm->locks[i].bitmap = 0;
+    for (j = 0; j < BITMAP_SIZE; j++) {
+      // Initialize each mutex
+      ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
+      if (ret_code != 0) {
+	*error_code = -1 * ret_code;
+	goto CLEANUP_FREEATTR;
+      }
+    }
+  }
+
+  // Close the file descriptor, we're done with it
+  // Ignore errors, it's ok if we leak a single FD and this should only run once
+  close(shm_fd);
+
+  // Destroy the pthread initializer attribute.
+  // Again, ignore errors, this will only run once and we might leak a tiny bit
+  // of memory at worst.
+  pthread_mutexattr_destroy(&attr);
+
+  return shm;
+
+  // Cleanup after an error
+ CLEANUP_FREEATTR:
+  pthread_mutexattr_destroy(&attr);
+ CLEANUP_UNMAP:
+  munmap(shm, shm_size);
+ CLEANUP_UNLINK:
+  close(shm_fd);
+  shm_unlink(path);
+  return NULL;
+}
+
+// Open an existing SHM segment holding libpod locks.
+// num_locks is the number of locks that will be configured in the SHM segment.
+// num_locks cannot be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd;
+  shm_struct_t *shm;
+  size_t shm_size;
+  uint32_t num_bitmaps;
+
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  shm_fd = shm_open(path, O_RDWR, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+  }
+
+  // Ignore errors, it's ok if we leak a single FD since this only runs once
+  close(shm_fd);
+
+  // Check if we successfully mmap'd
+  if (shm == MAP_FAILED) {
+    return NULL;
+  }
+
+  // Need to check the SHM to see if it's actually our locks
+  if (shm->magic != MAGIC) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+  if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+
+  return shm;
+
+ CLEANUP:
+  munmap(shm, shm_size);
+  return NULL;
+}
+
+// Close an open SHM lock struct, unmapping the backing memory.
+// The given shm_struct_t will be rendered unusable as a result.
+// On success, 0 is returned. On failure, negative ERRNO values are returned.
+int32_t close_lock_shm(shm_struct_t *shm) {
+  int ret_code;
+  size_t shm_size;
+
+  // We can't unmap null...
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  shm_size = compute_shm_size(shm->num_bitmaps);
+
+  ret_code = munmap(shm, shm_size);
+
+  if (ret_code != 0) {
+    return -1 * errno;
+  }
+
+  return 0;
+}
+
+// Allocate the first available semaphore
+// Returns a positive integer guaranteed to be less than UINT32_MAX on success,
+// or negative errno values on failure
+// On sucess, the returned integer is the number of the semaphore allocated
+int64_t allocate_semaphore(shm_struct_t *shm) {
+  int ret_code, i;
+  bitmap_t test_map;
+  int64_t sem_number, num_within_bitmap;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Lock the semaphore controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Loop through our bitmaps to search for one that is not full
+  for (i = 0; i < shm->num_bitmaps; i++) {
+    if (shm->locks[i].bitmap != 0xFFFFFFFF) {
+      test_map = 0x1;
+      num_within_bitmap = 0;
+      while (test_map != 0) {
+	if ((test_map & shm->locks[i].bitmap) == 0) {
+	  // Compute the number of the semaphore we are allocating
+	  sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
+	  // OR in the bitmap
+	  shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
+
+	  // Clear the mutex
+	  ret_code = release_mutex(&(shm->segment_lock));
+	  if (ret_code != 0) {
+	    return -1 * ret_code;
+	  }
+
+	  // Return the semaphore we've allocated
+	  return sem_number;
+	}
+	test_map = test_map << 1;
+	num_within_bitmap++;
+      }
+      // We should never fall through this loop
+      // TODO maybe an assert() here to panic if we do?
+    }
+  }
+
+  // Clear the mutex
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // All bitmaps are full
+  // We have no available semaphores, report allocation failure
+  return -1 * ENOSPC;
+}
+
+// Deallocate a given semaphore
+// Returns 0 on success, negative ERRNO values on failure
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  bitmap_t test_map;
+  int bitmap_index, index_in_bitmap, ret_code, i;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Check if the lock index is valid
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  // This should never happen if the sem_index test above succeeded, but better
+  // safe than sorry
+  if (bitmap_index >= shm->num_bitmaps) {
+    return -1 * EFAULT;
+  }
+
+  test_map = 0x1 << index_in_bitmap;
+
+  // Lock the mutex controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Check if the semaphore is allocated
+  if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
+    ret_code = release_mutex(&(shm->segment_lock));
+    if (ret_code != 0) {
+      return -1 * ret_code;
+    }
+
+    return -1 * ENOENT;
+  }
+
+  // The semaphore is allocated, clear it
+  // Invert the bitmask we used to test to clear the bit
+  test_map = ~test_map;
+  shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
+
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  return 0;
+}
+
+// Lock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
+
+// Unlock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go
new file mode 100644
index 000000000..3372a8c71
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.go
@@ -0,0 +1,216 @@
+package shm
+
+// #cgo LDFLAGS: -lrt -lpthread
+// #include <stdlib.h>
+// #include "shm_lock.h"
+// const uint32_t bitmap_size_c = BITMAP_SIZE;
+import "C"
+
+import (
+	"runtime"
+	"syscall"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	// BitmapSize is the size of the bitmap used when managing SHM locks.
+	// an SHM lock manager's max locks will be rounded up to a multiple of
+	// this number.
+	BitmapSize uint32 = uint32(C.bitmap_size_c)
+)
+
+// SHMLocks is a struct enabling POSIX semaphore locking in a shared memory
+// segment.
+type SHMLocks struct { // nolint
+	lockStruct *C.shm_struct_t
+	maxLocks   uint32
+	valid      bool
+}
+
+// CreateSHMLock sets up a shared-memory segment holding a given number of POSIX
+// semaphores, and returns a struct that can be used to operate on those locks.
+// numLocks must not be 0, and may be rounded up to a multiple of the bitmap
+// size used by the underlying implementation.
+func CreateSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.setup_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, syscall.Errno(-1 * errCode)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = uint32(lockStruct.num_locks)
+	locks.valid = true
+
+	logrus.Debugf("Initialized SHM lock manager at path %s", path)
+
+	return locks, nil
+}
+
+// OpenSHMLock opens an existing shared-memory segment holding a given number of
+// POSIX semaphores. numLocks must match the number of locks the shared memory
+// segment was created with.
+func OpenSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.open_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, syscall.Errno(-1 * errCode)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = numLocks
+	locks.valid = true
+
+	return locks, nil
+}
+
+// GetMaxLocks returns the maximum number of locks in the SHM
+func (locks *SHMLocks) GetMaxLocks() uint32 {
+	return locks.maxLocks
+}
+
+// Close closes an existing shared-memory segment.
+// The segment will be rendered unusable after closing.
+// WARNING: If you Close() while there are still locks locked, these locks may
+// fail to release, causing a program freeze.
+// Close() is only intended to be used while testing the locks.
+func (locks *SHMLocks) Close() error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	locks.valid = false
+
+	retCode := C.close_lock_shm(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// AllocateSemaphore allocates a semaphore from a shared-memory segment for use
+// by a container or pod.
+// Returns the index of the semaphore that was allocated.
+// Allocations past the maximum number of locks given when the SHM segment was
+// created will result in an error, and no semaphore will be allocated.
+func (locks *SHMLocks) AllocateSemaphore() (uint32, error) {
+	if !locks.valid {
+		return 0, errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	// This returns a U64, so we have the full u32 range available for
+	// semaphore indexes, and can still return error codes.
+	retCode := C.allocate_semaphore(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return 0, syscall.Errno(-1 * retCode)
+	}
+
+	return uint32(retCode), nil
+}
+
+// DeallocateSemaphore frees a semaphore in a shared-memory segment so it can be
+// reallocated to another container or pod.
+// The given semaphore must be already allocated, or an error will be returned.
+func (locks *SHMLocks) DeallocateSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.deallocate_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// LockSemaphore locks the given semaphore.
+// If the semaphore is already locked, LockSemaphore will block until the lock
+// can be acquired.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) LockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	runtime.LockOSThread()
+
+	retCode := C.lock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// UnlockSemaphore unlocks the given semaphore.
+// Unlocking a semaphore that is already unlocked with return EBUSY.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) UnlockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.unlock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	// OK if we take multiple locks - UnlockOSThread() won't actually unlock
+	// until the number of calls equals the number of calls to
+	// LockOSThread()
+	runtime.UnlockOSThread()
+
+	return nil
+}
diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h
new file mode 100644
index 000000000..8e7e23fb7
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.h
@@ -0,0 +1,46 @@
+#ifndef shm_locks_h_
+#define shm_locks_h_
+
+#include <pthread.h>
+#include <stdint.h>
+
+// Magic number to ensure we open the right SHM segment
+#define MAGIC 0x87D1
+
+// Type for our bitmaps
+typedef uint32_t bitmap_t;
+
+// bitmap size
+#define BITMAP_SIZE (sizeof(bitmap_t) * 8)
+
+// Struct to hold a single bitmap and associated locks
+typedef struct lock_group {
+  bitmap_t        bitmap;
+  pthread_mutex_t locks[BITMAP_SIZE];
+} lock_group_t;
+
+// Struct to hold our SHM locks.
+// Unused is required to be 0 in the current implementation. If we ever make
+// changes to this structure in the future, this will be repurposed as a version
+// field.
+typedef struct shm_struct {
+  uint16_t        magic;
+  uint16_t        unused;
+  pthread_mutex_t segment_lock;
+  uint32_t        num_bitmaps;
+  uint32_t        num_locks;
+  lock_group_t    locks[];
+} shm_struct_t;
+
+static size_t compute_shm_size(uint32_t num_bitmaps);
+static int take_mutex(pthread_mutex_t *mutex);
+static int release_mutex(pthread_mutex_t *mutex);
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code);
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
+int32_t close_lock_shm(shm_struct_t *shm);
+int64_t allocate_semaphore(shm_struct_t *shm);
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+
+#endif
diff --git a/libpod/lock/shm/shm_lock_test.go b/libpod/lock/shm/shm_lock_test.go
new file mode 100644
index 000000000..0f3a96cca
--- /dev/null
+++ b/libpod/lock/shm/shm_lock_test.go
@@ -0,0 +1,278 @@
+package shm
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"syscall"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// All tests here are in the same process, which somewhat limits their utility
+// The big intent of this package it multiprocess locking, which is really hard
+// to test without actually having multiple processes...
+// We can at least verify that the locks work within the local process.
+
+var (
+	// 4 * BITMAP_SIZE to ensure we have to traverse bitmaps
+	numLocks = 4 * BitmapSize
+)
+
+const lockPath = "/libpod_test"
+
+// We need a test main to ensure that the SHM is created before the tests run
+func TestMain(m *testing.M) {
+	shmLock, err := CreateSHMLock(lockPath, numLocks)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error creating SHM for tests: %v\n", err)
+		os.Exit(-1)
+	}
+
+	// Close the SHM - every subsequent test will reopen
+	if err := shmLock.Close(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error closing SHM locks: %v\n", err)
+		os.Exit(-1)
+	}
+
+	exitCode := m.Run()
+
+	// We need to remove the SHM segment to clean up after ourselves
+	os.RemoveAll("/dev/shm/libpod_lock")
+
+	os.Exit(exitCode)
+}
+
+func runLockTest(t *testing.T, testFunc func(*testing.T, *SHMLocks)) {
+	locks, err := OpenSHMLock(lockPath, numLocks)
+	if err != nil {
+		t.Fatalf("Error opening locks: %v", err)
+	}
+	defer func() {
+		// Deallocate all locks
+		// Ignore ENOENT (lock is not allocated)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			if err := locks.DeallocateSemaphore(i); err != nil && err != syscall.ENOENT {
+				t.Fatalf("Error deallocating semaphore %d: %v", i, err)
+			}
+		}
+
+		if err := locks.Close(); err != nil {
+			t.Fatalf("Error closing locks: %v", err)
+		}
+	}()
+
+	success := t.Run("locks", func(t *testing.T) {
+		testFunc(t, locks)
+	})
+	if !success {
+		t.Fail()
+	}
+}
+
+// Test that creating an SHM with a bad size rounds up to a good size
+func TestCreateNewSHMBadSizeRoundsUp(t *testing.T) {
+	// Odd number, not a power of 2, should never be a word size on a system
+	lock, err := CreateSHMLock("/test1", 7)
+	assert.NoError(t, err)
+
+	assert.Equal(t, lock.GetMaxLocks(), BitmapSize)
+
+	if err := lock.Close(); err != nil {
+		t.Fatalf("Error closing locks: %v", err)
+	}
+}
+
+// Test that creating an SHM with 0 size fails
+func TestCreateNewSHMZeroSize(t *testing.T) {
+	_, err := CreateSHMLock("/test2", 0)
+	assert.Error(t, err)
+}
+
+// Test that deallocating an unallocated lock errors
+func TestDeallocateUnallocatedLockErrors(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.DeallocateSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that unlocking an unlocked lock fails
+func TestUnlockingUnlockedLockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that locking and double-unlocking fails
+func TestDoubleUnlockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating - lock - unlock - deallocate cycle, single lock
+func TestLockLifecycleSingleLock(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem, err := locks.AllocateSemaphore()
+		require.NoError(t, err)
+
+		err = locks.LockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.DeallocateSemaphore(sem)
+		assert.NoError(t, err)
+	})
+}
+
+// Test allocate two locks returns different locks
+func TestAllocateTwoLocksGetsDifferentLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem1, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		sem2, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		assert.NotEqual(t, sem1, sem2)
+	})
+}
+
+// Test allocate all locks successful and all are unique
+func TestAllocateAllLocksSucceeds(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sems := make(map[uint32]bool)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			sem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+
+			// Ensure the allocate semaphore is unique
+			_, ok := sems[sem]
+			assert.False(t, ok)
+
+			sems[sem] = true
+		}
+	})
+}
+
+// Test allocating more than the given max fails
+func TestAllocateTooManyLocksFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Try and allocate one more
+		_, err := locks.AllocateSemaphore()
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating max locks, deallocating one, and then allocating again succeeds
+func TestAllocateDeallocateCycle(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Now loop through again, deallocating and reallocating.
+		// Each time we free 1 semaphore, allocate again, and make sure
+		// we get the same semaphore back.
+		var j uint32
+		for j = 0; j < numLocks; j++ {
+			err := locks.DeallocateSemaphore(j)
+			assert.NoError(t, err)
+
+			newSem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+			assert.Equal(t, j, newSem)
+		}
+	})
+}
+
+// Test that locks actually lock
+func TestLockSemaphoreActuallyLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// This entire test is very ugly - lots of sleeps to try and get
+		// things to occur in the right order.
+		// It also doesn't even exercise the multiprocess nature of the
+		// locks.
+
+		// Get the current time
+		startTime := time.Now()
+
+		// Start a goroutine to take the lock and then release it after
+		// a second.
+		go func() {
+			err := locks.LockSemaphore(0)
+			assert.NoError(t, err)
+
+			time.Sleep(1 * time.Second)
+
+			err = locks.UnlockSemaphore(0)
+			assert.NoError(t, err)
+		}()
+
+		// Sleep for a quarter of a second to give the goroutine time
+		// to kick off and grab the lock
+		time.Sleep(250 * time.Millisecond)
+
+		// Take the lock
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		// Get the current time
+		endTime := time.Now()
+
+		// Verify that at least 1 second has passed since start
+		duration := endTime.Sub(startTime)
+		assert.True(t, duration.Seconds() > 1.0)
+	})
+}
+
+// Test that locking and unlocking two semaphores succeeds
+// Ensures that runtime.LockOSThread() is doing its job
+func TestLockAndUnlockTwoSemaphore(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.LockSemaphore(1)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(1)
+		assert.NoError(t, err)
+
+		// Now yield scheduling
+		// To try and get us on another OS thread
+		runtime.Gosched()
+
+		// And unlock the last semaphore
+		// If we are in a different OS thread, this should fail.
+		// However, runtime.UnlockOSThread() should guarantee we are not
+		err = locks.UnlockSemaphore(0)
+		assert.NoError(t, err)
+	})
+}
diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go
new file mode 100644
index 000000000..3e8f4f3d2
--- /dev/null
+++ b/libpod/lock/shm_lock_manager_linux.go
@@ -0,0 +1,94 @@
+// +build linux
+
+package lock
+
+import (
+	"github.com/containers/libpod/libpod/lock/shm"
+)
+
+// SHMLockManager manages shared memory locks.
+type SHMLockManager struct {
+	locks *shm.SHMLocks
+}
+
+// NewSHMLockManager makes a new SHMLockManager with the given number of locks.
+// Due to the underlying implementation, the exact number of locks created may
+// be greater than the number given here.
+func NewSHMLockManager(path string, numLocks uint32) (Manager, error) {
+	locks, err := shm.CreateSHMLock(path, numLocks)
+	if err != nil {
+		return nil, err
+	}
+
+	manager := new(SHMLockManager)
+	manager.locks = locks
+
+	return manager, nil
+}
+
+// OpenSHMLockManager opens an existing SHMLockManager with the given number of
+// locks.
+func OpenSHMLockManager(path string, numLocks uint32) (Manager, error) {
+	locks, err := shm.OpenSHMLock(path, numLocks)
+	if err != nil {
+		return nil, err
+	}
+
+	manager := new(SHMLockManager)
+	manager.locks = locks
+
+	return manager, nil
+}
+
+// AllocateLock allocates a new lock from the manager.
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+	semIndex, err := m.locks.AllocateSemaphore()
+	if err != nil {
+		return nil, err
+	}
+
+	lock := new(SHMLock)
+	lock.lockID = semIndex
+	lock.manager = m
+
+	return lock, nil
+}
+
+// RetrieveLock retrieves a lock from the manager given its ID.
+func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
+	lock := new(SHMLock)
+	lock.lockID = id
+	lock.manager = m
+
+	return lock, nil
+}
+
+// SHMLock is an individual shared memory lock.
+type SHMLock struct {
+	lockID  uint32
+	manager *SHMLockManager
+}
+
+// ID returns the ID of the lock.
+func (l *SHMLock) ID() uint32 {
+	return l.lockID
+}
+
+// Lock acquires the lock.
+func (l *SHMLock) Lock() {
+	if err := l.manager.locks.LockSemaphore(l.lockID); err != nil {
+		panic(err.Error())
+	}
+}
+
+// Unlock releases the lock.
+func (l *SHMLock) Unlock() {
+	if err := l.manager.locks.UnlockSemaphore(l.lockID); err != nil {
+		panic(err.Error())
+	}
+}
+
+// Free releases the lock, allowing it to be reused.
+func (l *SHMLock) Free() error {
+	return l.manager.locks.DeallocateSemaphore(l.lockID)
+}
diff --git a/libpod/lock/shm_lock_manager_unsupported.go b/libpod/lock/shm_lock_manager_unsupported.go
new file mode 100644
index 000000000..a1340fcd1
--- /dev/null
+++ b/libpod/lock/shm_lock_manager_unsupported.go
@@ -0,0 +1,29 @@
+// +build !linux
+
+package lock
+
+import "fmt"
+
+// SHMLockManager is a shared memory lock manager.
+// It is not supported on non-Unix platforms.
+type SHMLockManager struct{}
+
+// NewSHMLockManager is not supported on this platform
+func NewSHMLockManager(numLocks uint32) (Manager, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// OpenSHMLockManager is not supported on this platform
+func OpenSHMLockManager(numLocks uint32) (Manager, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// AllocateLock is not supported on this platform
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+	return nil, fmt.Errorf("not supported")
+}
+
+// RetrieveLock is not supported on this platform
+func (m *SHMLockManager) RetrieveLock(id string) (Locker, error) {
+	return nil, fmt.Errorf("not supported")
+}
author	OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>	2019-01-04 10:41:05 -0800
committer	GitHub <noreply@github.com>	2019-01-04 10:41:05 -0800
commit	bf5f779331870d31863c486619daae3fcea458eb (patch)
tree	aafcacc17883a8df4734bed0aadbaca59a9882fe /libpod/lock
parent	6868b5aa1444404113bc6a4582203fbbf89490c2 (diff)
parent	56c5c89408f89fc3733692786d66eb44133b2c59 (diff)
download	podman-bf5f779331870d31863c486619daae3fcea458eb.tar.gz podman-bf5f779331870d31863c486619daae3fcea458eb.tar.bz2 podman-bf5f779331870d31863c486619daae3fcea458eb.zip