4 files changed, 992 insertions, 0 deletions
diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
new file mode 100644
index 000000000..4af58d857
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.c
@@ -0,0 +1,452 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "shm_lock.h"
+
+// Compute the size of the SHM struct
+static size_t compute_shm_size(uint32_t num_bitmaps) {
+  return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
+}
+
+// Take the given mutex.
+// Handles exceptional conditions, including a mutex locked by a process that
+// died holding it.
+// Returns 0 on success, or positive errno on failure.
+static int take_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_lock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code == EOWNERDEAD) {
+    // The previous owner of the mutex died while holding it
+    // Take it for ourselves
+    ret_code = pthread_mutex_consistent(mutex);
+    if (ret_code != 0) {
+      // Someone else may have gotten here first and marked the state consistent
+      // However, the mutex could also be invalid.
+      // Fail here instead of looping back to trying to lock the mutex.
+      return ret_code;
+    }
+  } else if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Release the given mutex.
+// Returns 0 on success, or positive errno on failure.
+static int release_mutex(pthread_mutex_t *mutex) {
+  int ret_code;
+
+  do {
+    ret_code = pthread_mutex_unlock(mutex);
+  } while(ret_code == EAGAIN);
+
+  if (ret_code != 0) {
+    return ret_code;
+  }
+
+  return 0;
+}
+
+// Set up an SHM segment holding locks for libpod.
+// num_locks must not be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd, i, j, ret_code;
+  uint32_t num_bitmaps;
+  size_t shm_size;
+  shm_struct_t *shm;
+  pthread_mutexattr_t attr;
+
+  // If error_code doesn't point to anything, we can't reasonably return errors
+  // So fail immediately
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    // The actual number given is not an even multiple of our bitmap size
+    // So round up
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  // Create a new SHM segment for us
+  shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Increase its size to what we need
+  ret_code = ftruncate(shm_fd, shm_size);
+  if (ret_code < 0) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+    goto CLEANUP_UNLINK;
+  }
+
+  // We have successfully mapped the memory, now initialize the region
+  shm->magic = MAGIC;
+  shm->unused = 0;
+  shm->num_locks = num_bitmaps * BITMAP_SIZE;
+  shm->num_bitmaps = num_bitmaps;
+
+  // Create an initializer for our pthread mutexes
+  ret_code = pthread_mutexattr_init(&attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_UNMAP;
+  }
+
+  // Set mutexes to pshared - multiprocess-safe
+  ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Set mutexes to robust - if a process dies while holding a mutex, we'll get
+  // a special error code on the next attempt to lock it.
+  // This should prevent panicing processes from leaving the state unusable.
+  ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize the mutex that protects the bitmaps using the mutex attributes
+  ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
+  if (ret_code != 0) {
+    *error_code = -1 * ret_code;
+    goto CLEANUP_FREEATTR;
+  }
+
+  // Initialize all bitmaps to 0 initially
+  // And initialize all semaphores they use
+  for (i = 0; i < num_bitmaps; i++) {
+    shm->locks[i].bitmap = 0;
+    for (j = 0; j < BITMAP_SIZE; j++) {
+      // Initialize each mutex
+      ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
+      if (ret_code != 0) {
+	*error_code = -1 * ret_code;
+	goto CLEANUP_FREEATTR;
+      }
+    }
+  }
+
+  // Close the file descriptor, we're done with it
+  // Ignore errors, it's ok if we leak a single FD and this should only run once
+  close(shm_fd);
+
+  // Destroy the pthread initializer attribute.
+  // Again, ignore errors, this will only run once and we might leak a tiny bit
+  // of memory at worst.
+  pthread_mutexattr_destroy(&attr);
+
+  return shm;
+
+  // Cleanup after an error
+ CLEANUP_FREEATTR:
+  pthread_mutexattr_destroy(&attr);
+ CLEANUP_UNMAP:
+  munmap(shm, shm_size);
+ CLEANUP_UNLINK:
+  close(shm_fd);
+  shm_unlink(path);
+  return NULL;
+}
+
+// Open an existing SHM segment holding libpod locks.
+// num_locks is the number of locks that will be configured in the SHM segment.
+// num_locks cannot be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+  int shm_fd;
+  shm_struct_t *shm;
+  size_t shm_size;
+  uint32_t num_bitmaps;
+
+  if (error_code == NULL) {
+    return NULL;
+  }
+
+  // We need a nonzero number of locks
+  if (num_locks == 0) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  if (path == NULL) {
+    *error_code = -1 * EINVAL;
+    return NULL;
+  }
+
+  // Calculate the number of bitmaps required
+  num_bitmaps = num_locks / BITMAP_SIZE;
+  if (num_locks % BITMAP_SIZE != 0) {
+    num_bitmaps += 1;
+  }
+
+  // Calculate size of the shm segment
+  shm_size = compute_shm_size(num_bitmaps);
+
+  shm_fd = shm_open(path, O_RDWR, 0600);
+  if (shm_fd < 0) {
+    *error_code = -1 * errno;
+    return NULL;
+  }
+
+  // Map the shared memory in
+  shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (shm == MAP_FAILED) {
+    *error_code = -1 * errno;
+  }
+
+  // Ignore errors, it's ok if we leak a single FD since this only runs once
+  close(shm_fd);
+
+  // Check if we successfully mmap'd
+  if (shm == MAP_FAILED) {
+    return NULL;
+  }
+
+  // Need to check the SHM to see if it's actually our locks
+  if (shm->magic != MAGIC) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+  if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
+    *error_code = -1 * errno;
+    goto CLEANUP;
+  }
+
+  return shm;
+
+ CLEANUP:
+  munmap(shm, shm_size);
+  return NULL;
+}
+
+// Close an open SHM lock struct, unmapping the backing memory.
+// The given shm_struct_t will be rendered unusable as a result.
+// On success, 0 is returned. On failure, negative ERRNO values are returned.
+int32_t close_lock_shm(shm_struct_t *shm) {
+  int ret_code;
+  size_t shm_size;
+
+  // We can't unmap null...
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  shm_size = compute_shm_size(shm->num_bitmaps);
+
+  ret_code = munmap(shm, shm_size);
+
+  if (ret_code != 0) {
+    return -1 * errno;
+  }
+
+  return 0;
+}
+
+// Allocate the first available semaphore
+// Returns a positive integer guaranteed to be less than UINT32_MAX on success,
+// or negative errno values on failure
+// On sucess, the returned integer is the number of the semaphore allocated
+int64_t allocate_semaphore(shm_struct_t *shm) {
+  int ret_code, i;
+  bitmap_t test_map;
+  int64_t sem_number, num_within_bitmap;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Lock the semaphore controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Loop through our bitmaps to search for one that is not full
+  for (i = 0; i < shm->num_bitmaps; i++) {
+    if (shm->locks[i].bitmap != 0xFFFFFFFF) {
+      test_map = 0x1;
+      num_within_bitmap = 0;
+      while (test_map != 0) {
+	if ((test_map & shm->locks[i].bitmap) == 0) {
+	  // Compute the number of the semaphore we are allocating
+	  sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
+	  // OR in the bitmap
+	  shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
+
+	  // Clear the mutex
+	  ret_code = release_mutex(&(shm->segment_lock));
+	  if (ret_code != 0) {
+	    return -1 * ret_code;
+	  }
+
+	  // Return the semaphore we've allocated
+	  return sem_number;
+	}
+	test_map = test_map << 1;
+	num_within_bitmap++;
+      }
+      // We should never fall through this loop
+      // TODO maybe an assert() here to panic if we do?
+    }
+  }
+
+  // Clear the mutex
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // All bitmaps are full
+  // We have no available semaphores, report allocation failure
+  return -1 * ENOSPC;
+}
+
+// Deallocate a given semaphore
+// Returns 0 on success, negative ERRNO values on failure
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  bitmap_t test_map;
+  int bitmap_index, index_in_bitmap, ret_code, i;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  // Check if the lock index is valid
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  // This should never happen if the sem_index test above succeeded, but better
+  // safe than sorry
+  if (bitmap_index >= shm->num_bitmaps) {
+    return -1 * EFAULT;
+  }
+
+  test_map = 0x1 << index_in_bitmap;
+
+  // Lock the mutex controlling access to our shared memory
+  ret_code = take_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  // Check if the semaphore is allocated
+  if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
+    ret_code = release_mutex(&(shm->segment_lock));
+    if (ret_code != 0) {
+      return -1 * ret_code;
+    }
+
+    return -1 * ENOENT;
+  }
+
+  // The semaphore is allocated, clear it
+  // Invert the bitmask we used to test to clear the bit
+  test_map = ~test_map;
+  shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
+
+  ret_code = release_mutex(&(shm->segment_lock));
+  if (ret_code != 0) {
+    return -1 * ret_code;
+  }
+
+  return 0;
+}
+
+// Lock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
+
+// Unlock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+  int bitmap_index, index_in_bitmap, ret_code;
+
+  if (shm == NULL) {
+    return -1 * EINVAL;
+  }
+
+  if (sem_index >= shm->num_locks) {
+    return -1 * EINVAL;
+  }
+
+  bitmap_index = sem_index / BITMAP_SIZE;
+  index_in_bitmap = sem_index % BITMAP_SIZE;
+
+  return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go
new file mode 100644
index 000000000..87d28e5c1
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.go
@@ -0,0 +1,216 @@
+package shm
+
+// #cgo LDFLAGS: -lrt -lpthread
+// #include <stdlib.h>
+// #include "shm_lock.h"
+// const uint32_t bitmap_size_c = BITMAP_SIZE;
+import "C"
+
+import (
+	"runtime"
+	"syscall"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	// BitmapSize is the size of the bitmap used when managing SHM locks.
+	// an SHM lock manager's max locks will be rounded up to a multiple of
+	// this number.
+	BitmapSize uint32 = uint32(C.bitmap_size_c)
+)
+
+// SHMLocks is a struct enabling POSIX semaphore locking in a shared memory
+// segment.
+type SHMLocks struct { // nolint
+	lockStruct *C.shm_struct_t
+	maxLocks   uint32
+	valid      bool
+}
+
+// CreateSHMLock sets up a shared-memory segment holding a given number of POSIX
+// semaphores, and returns a struct that can be used to operate on those locks.
+// numLocks must not be 0, and may be rounded up to a multiple of the bitmap
+// size used by the underlying implementation.
+func CreateSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must be greater than 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.setup_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, errors.Wrapf(syscall.Errno(-1*errCode), "failed to create %d locks in %s", numLocks, path)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = uint32(lockStruct.num_locks)
+	locks.valid = true
+
+	logrus.Debugf("Initialized SHM lock manager at path %s", path)
+
+	return locks, nil
+}
+
+// OpenSHMLock opens an existing shared-memory segment holding a given number of
+// POSIX semaphores. numLocks must match the number of locks the shared memory
+// segment was created with.
+func OpenSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+	if numLocks == 0 {
+		return nil, errors.Wrapf(syscall.EINVAL, "number of locks must be greater than 0")
+	}
+
+	locks := new(SHMLocks)
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var errCode C.int
+	lockStruct := C.open_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+	if lockStruct == nil {
+		// We got a null pointer, so something errored
+		return nil, errors.Wrapf(syscall.Errno(-1*errCode), "failed to open %d locks in %s", numLocks, path)
+	}
+
+	locks.lockStruct = lockStruct
+	locks.maxLocks = numLocks
+	locks.valid = true
+
+	return locks, nil
+}
+
+// GetMaxLocks returns the maximum number of locks in the SHM
+func (locks *SHMLocks) GetMaxLocks() uint32 {
+	return locks.maxLocks
+}
+
+// Close closes an existing shared-memory segment.
+// The segment will be rendered unusable after closing.
+// WARNING: If you Close() while there are still locks locked, these locks may
+// fail to release, causing a program freeze.
+// Close() is only intended to be used while testing the locks.
+func (locks *SHMLocks) Close() error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	locks.valid = false
+
+	retCode := C.close_lock_shm(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// AllocateSemaphore allocates a semaphore from a shared-memory segment for use
+// by a container or pod.
+// Returns the index of the semaphore that was allocated.
+// Allocations past the maximum number of locks given when the SHM segment was
+// created will result in an error, and no semaphore will be allocated.
+func (locks *SHMLocks) AllocateSemaphore() (uint32, error) {
+	if !locks.valid {
+		return 0, errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	// This returns a U64, so we have the full u32 range available for
+	// semaphore indexes, and can still return error codes.
+	retCode := C.allocate_semaphore(locks.lockStruct)
+	if retCode < 0 {
+		// Negative errno returned
+		return 0, syscall.Errno(-1 * retCode)
+	}
+
+	return uint32(retCode), nil
+}
+
+// DeallocateSemaphore frees a semaphore in a shared-memory segment so it can be
+// reallocated to another container or pod.
+// The given semaphore must be already allocated, or an error will be returned.
+func (locks *SHMLocks) DeallocateSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.deallocate_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// LockSemaphore locks the given semaphore.
+// If the semaphore is already locked, LockSemaphore will block until the lock
+// can be acquired.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) LockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	runtime.LockOSThread()
+
+	retCode := C.lock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// UnlockSemaphore unlocks the given semaphore.
+// Unlocking a semaphore that is already unlocked with return EBUSY.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) UnlockSemaphore(sem uint32) error {
+	if !locks.valid {
+		return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+	}
+
+	if sem > locks.maxLocks {
+		return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+	}
+
+	retCode := C.unlock_semaphore(locks.lockStruct, C.uint32_t(sem))
+	if retCode < 0 {
+		// Negative errno returned
+		return syscall.Errno(-1 * retCode)
+	}
+
+	// For pthread mutexes, we have to guarantee lock and unlock happen in
+	// the same thread.
+	// OK if we take multiple locks - UnlockOSThread() won't actually unlock
+	// until the number of calls equals the number of calls to
+	// LockOSThread()
+	runtime.UnlockOSThread()
+
+	return nil
+}
diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h
new file mode 100644
index 000000000..8e7e23fb7
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.h
@@ -0,0 +1,46 @@
+#ifndef shm_locks_h_
+#define shm_locks_h_
+
+#include <pthread.h>
+#include <stdint.h>
+
+// Magic number to ensure we open the right SHM segment
+#define MAGIC 0x87D1
+
+// Type for our bitmaps
+typedef uint32_t bitmap_t;
+
+// bitmap size
+#define BITMAP_SIZE (sizeof(bitmap_t) * 8)
+
+// Struct to hold a single bitmap and associated locks
+typedef struct lock_group {
+  bitmap_t        bitmap;
+  pthread_mutex_t locks[BITMAP_SIZE];
+} lock_group_t;
+
+// Struct to hold our SHM locks.
+// Unused is required to be 0 in the current implementation. If we ever make
+// changes to this structure in the future, this will be repurposed as a version
+// field.
+typedef struct shm_struct {
+  uint16_t        magic;
+  uint16_t        unused;
+  pthread_mutex_t segment_lock;
+  uint32_t        num_bitmaps;
+  uint32_t        num_locks;
+  lock_group_t    locks[];
+} shm_struct_t;
+
+static size_t compute_shm_size(uint32_t num_bitmaps);
+static int take_mutex(pthread_mutex_t *mutex);
+static int release_mutex(pthread_mutex_t *mutex);
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code);
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
+int32_t close_lock_shm(shm_struct_t *shm);
+int64_t allocate_semaphore(shm_struct_t *shm);
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+
+#endif
diff --git a/libpod/lock/shm/shm_lock_test.go b/libpod/lock/shm/shm_lock_test.go
new file mode 100644
index 000000000..594eb5d8e
--- /dev/null
+++ b/libpod/lock/shm/shm_lock_test.go
@@ -0,0 +1,278 @@
+package shm
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"syscall"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// All tests here are in the same process, which somewhat limits their utility
+// The big intent of this package it multiprocess locking, which is really hard
+// to test without actually having multiple processes...
+// We can at least verify that the locks work within the local process.
+
+var (
+	// 4 * BITMAP_SIZE to ensure we have to traverse bitmaps
+	numLocks = 4 * BitmapSize
+)
+
+const lockPath = "/libpod_test"
+
+// We need a test main to ensure that the SHM is created before the tests run
+func TestMain(m *testing.M) {
+	shmLock, err := CreateSHMLock(lockPath, numLocks)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error creating SHM for tests: %v\n", err)
+		os.Exit(-1)
+	}
+
+	// Close the SHM - every subsequent test will reopen
+	if err := shmLock.Close(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error closing SHM locks: %v\n", err)
+		os.Exit(-1)
+	}
+
+	exitCode := m.Run()
+
+	// We need to remove the SHM segment to clean up after ourselves
+	os.RemoveAll("/dev/shm/libpod_lock")
+
+	os.Exit(exitCode)
+}
+
+func runLockTest(t *testing.T, testFunc func(*testing.T, *SHMLocks)) {
+	locks, err := OpenSHMLock(lockPath, numLocks)
+	if err != nil {
+		t.Fatalf("Error opening locks: %v", err)
+	}
+	defer func() {
+		// Deallocate all locks
+		// Ignore ENOENT (lock is not allocated)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			if err := locks.DeallocateSemaphore(i); err != nil && err != syscall.ENOENT {
+				t.Fatalf("Error deallocating semaphore %d: %v", i, err)
+			}
+		}
+
+		if err := locks.Close(); err != nil {
+			t.Fatalf("Error closing locks: %v", err)
+		}
+	}()
+
+	success := t.Run("locks", func(t *testing.T) {
+		testFunc(t, locks)
+	})
+	if !success {
+		t.Fail()
+	}
+}
+
+// Test that creating an SHM with a bad size rounds up to a good size
+func TestCreateNewSHMBadSizeRoundsUp(t *testing.T) {
+	// Odd number, not a power of 2, should never be a word size on a system
+	lock, err := CreateSHMLock("/test1", 7)
+	assert.NoError(t, err)
+
+	assert.Equal(t, lock.GetMaxLocks(), BitmapSize)
+
+	if err := lock.Close(); err != nil {
+		t.Fatalf("Error closing locks: %v", err)
+	}
+}
+
+// Test that creating an SHM with 0 size fails
+func TestCreateNewSHMZeroSize(t *testing.T) {
+	_, err := CreateSHMLock("/test2", 0)
+	assert.Error(t, err)
+}
+
+// Test that deallocating an unallocated lock errors
+func TestDeallocateUnallocatedLockErrors(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.DeallocateSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that unlocking an unlocked lock fails
+func TestUnlockingUnlockedLockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test that locking and double-unlocking fails
+func TestDoubleUnlockFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(0)
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating - lock - unlock - deallocate cycle, single lock
+func TestLockLifecycleSingleLock(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem, err := locks.AllocateSemaphore()
+		require.NoError(t, err)
+
+		err = locks.LockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(sem)
+		assert.NoError(t, err)
+
+		err = locks.DeallocateSemaphore(sem)
+		assert.NoError(t, err)
+	})
+}
+
+// Test allocate two locks returns different locks
+func TestAllocateTwoLocksGetsDifferentLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sem1, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		sem2, err := locks.AllocateSemaphore()
+		assert.NoError(t, err)
+
+		assert.NotEqual(t, sem1, sem2)
+	})
+}
+
+// Test allocate all locks successful and all are unique
+func TestAllocateAllLocksSucceeds(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		sems := make(map[uint32]bool)
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			sem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+
+			// Ensure the allocate semaphore is unique
+			_, ok := sems[sem]
+			assert.False(t, ok)
+
+			sems[sem] = true
+		}
+	})
+}
+
+// Test allocating more than the given max fails
+func TestAllocateTooManyLocksFails(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Try and allocate one more
+		_, err := locks.AllocateSemaphore()
+		assert.Error(t, err)
+	})
+}
+
+// Test allocating max locks, deallocating one, and then allocating again succeeds
+func TestAllocateDeallocateCycle(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// Allocate all locks
+		var i uint32
+		for i = 0; i < numLocks; i++ {
+			_, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+		}
+
+		// Now loop through again, deallocating and reallocating.
+		// Each time we free 1 semaphore, allocate again, and make sure
+		// we get the same semaphore back.
+		var j uint32
+		for j = 0; j < numLocks; j++ {
+			err := locks.DeallocateSemaphore(j)
+			assert.NoError(t, err)
+
+			newSem, err := locks.AllocateSemaphore()
+			assert.NoError(t, err)
+			assert.Equal(t, j, newSem)
+		}
+	})
+}
+
+// Test that locks actually lock
+func TestLockSemaphoreActuallyLocks(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		// This entire test is very ugly - lots of sleeps to try and get
+		// things to occur in the right order.
+		// It also doesn't even exercise the multiprocess nature of the
+		// locks.
+
+		// Get the current time
+		startTime := time.Now()
+
+		// Start a goroutine to take the lock and then release it after
+		// a second.
+		go func() {
+			err := locks.LockSemaphore(0)
+			assert.NoError(t, err)
+
+			time.Sleep(1 * time.Second)
+
+			err = locks.UnlockSemaphore(0)
+			assert.NoError(t, err)
+		}()
+
+		// Sleep for a quarter of a second to give the goroutine time
+		// to kick off and grab the lock
+		time.Sleep(250 * time.Millisecond)
+
+		// Take the lock
+		err := locks.LockSemaphore(0)
+		assert.NoError(t, err)
+
+		// Get the current time
+		endTime := time.Now()
+
+		// Verify that at least 1 second has passed since start
+		duration := endTime.Sub(startTime)
+		assert.True(t, duration.Seconds() > 1.0)
+	})
+}
+
+// Test that locking and unlocking two semaphores succeeds
+// Ensures that runtime.LockOSThread() is doing its job
+func TestLockAndUnlockTwoSemaphore(t *testing.T) {
+	runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+		err := locks.LockSemaphore(5)
+		assert.NoError(t, err)
+
+		err = locks.LockSemaphore(6)
+		assert.NoError(t, err)
+
+		err = locks.UnlockSemaphore(6)
+		assert.NoError(t, err)
+
+		// Now yield scheduling
+		// To try and get us on another OS thread
+		runtime.Gosched()
+
+		// And unlock the last semaphore
+		// If we are in a different OS thread, this should fail.
+		// However, runtime.UnlockOSThread() should guarantee we are not
+		err = locks.UnlockSemaphore(5)
+		assert.NoError(t, err)
+	})
+}