summaryrefslogtreecommitdiff
path: root/libpod/lock
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2019-01-04 10:41:05 -0800
committerGitHub <noreply@github.com>2019-01-04 10:41:05 -0800
commitbf5f779331870d31863c486619daae3fcea458eb (patch)
treeaafcacc17883a8df4734bed0aadbaca59a9882fe /libpod/lock
parent6868b5aa1444404113bc6a4582203fbbf89490c2 (diff)
parent56c5c89408f89fc3733692786d66eb44133b2c59 (diff)
downloadpodman-bf5f779331870d31863c486619daae3fcea458eb.tar.gz
podman-bf5f779331870d31863c486619daae3fcea458eb.tar.bz2
podman-bf5f779331870d31863c486619daae3fcea458eb.zip
Merge pull request #1235 from mheon/shm_locking
SHM locking for Libpod
Diffstat (limited to 'libpod/lock')
-rw-r--r--libpod/lock/in_memory_locks.go91
-rw-r--r--libpod/lock/lock.go58
-rw-r--r--libpod/lock/shm/shm_lock.c452
-rw-r--r--libpod/lock/shm/shm_lock.go216
-rw-r--r--libpod/lock/shm/shm_lock.h46
-rw-r--r--libpod/lock/shm/shm_lock_test.go278
-rw-r--r--libpod/lock/shm_lock_manager_linux.go94
-rw-r--r--libpod/lock/shm_lock_manager_unsupported.go29
8 files changed, 1264 insertions, 0 deletions
diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go
new file mode 100644
index 000000000..db8f20e95
--- /dev/null
+++ b/libpod/lock/in_memory_locks.go
@@ -0,0 +1,91 @@
+package lock
+
+import (
+ "sync"
+
+ "github.com/pkg/errors"
+)
+
+// Mutex holds a single mutex and whether it has been allocated.
+type Mutex struct {
+ id uint32
+ lock sync.Mutex
+ allocated bool
+}
+
+// ID retrieves the ID of the mutex
+func (m *Mutex) ID() uint32 {
+ return m.id
+}
+
+// Lock locks the mutex
+func (m *Mutex) Lock() {
+ m.lock.Lock()
+}
+
+// Unlock unlocks the mutex
+func (m *Mutex) Unlock() {
+ m.lock.Unlock()
+}
+
+// Free deallocates the mutex to allow its reuse
+func (m *Mutex) Free() error {
+ m.allocated = false
+
+ return nil
+}
+
+// InMemoryManager is a lock manager that allocates and retrieves local-only
+// locks - that is, they are not multiprocess. This lock manager is intended
+// purely for unit and integration testing and should not be used in production
+// deployments.
+type InMemoryManager struct {
+ locks []*Mutex
+ numLocks uint32
+ localLock sync.Mutex
+}
+
+// NewInMemoryManager creates a new in-memory lock manager with the given number
+// of locks.
+func NewInMemoryManager(numLocks uint32) (Manager, error) {
+ if numLocks == 0 {
+ return nil, errors.Errorf("must provide a non-zero number of locks!")
+ }
+
+ manager := new(InMemoryManager)
+ manager.numLocks = numLocks
+ manager.locks = make([]*Mutex, numLocks)
+
+ var i uint32
+ for i = 0; i < numLocks; i++ {
+ lock := new(Mutex)
+ lock.id = i
+ manager.locks[i] = lock
+ }
+
+ return manager, nil
+}
+
+// AllocateLock allocates a lock from the manager.
+func (m *InMemoryManager) AllocateLock() (Locker, error) {
+ m.localLock.Lock()
+ defer m.localLock.Unlock()
+
+ for _, lock := range m.locks {
+ if !lock.allocated {
+ lock.allocated = true
+ return lock, nil
+ }
+ }
+
+ return nil, errors.Errorf("all locks have been allocated")
+}
+
+// RetrieveLock retrieves a lock from the manager.
+func (m *InMemoryManager) RetrieveLock(id uint32) (Locker, error) {
+ if id >= m.numLocks {
+ return nil, errors.Errorf("given lock ID %d is too large - this manager only supports lock indexes up to %d", id, m.numLocks-1)
+ }
+
+ return m.locks[id], nil
+}
diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go
new file mode 100644
index 000000000..73c1fdcf7
--- /dev/null
+++ b/libpod/lock/lock.go
@@ -0,0 +1,58 @@
+package lock
+
+// Manager provides an interface for allocating multiprocess locks.
+// Locks returned by Manager MUST be multiprocess - allocating a lock in
+// process A and retrieving that lock's ID in process B must return handles for
+// the same lock, and locking the lock in A should exclude B from the lock until
+// it is unlocked in A.
+// All locks must be identified by a UUID (retrieved with Locker's ID() method).
+// All locks with a given UUID must refer to the same underlying lock, and it
+// must be possible to retrieve the lock given its UUID.
+// Each UUID should refer to a unique underlying lock.
+// Calls to AllocateLock() must return a unique, unallocated UUID.
+// AllocateLock() must fail once all available locks have been allocated.
+// Locks are returned to use by calls to Free(), and can subsequently be
+// reallocated.
+type Manager interface {
+ // AllocateLock returns an unallocated lock.
+ // It is guaranteed that the same lock will not be returned again by
+ // AllocateLock until the returned lock has Free() called on it.
+ // If all available locks are allocated, AllocateLock will return an
+ // error.
+ AllocateLock() (Locker, error)
+ // RetrieveLock retrieves a lock given its UUID.
+ // The underlying lock MUST be the same as another other lock with the
+ // same UUID.
+ RetrieveLock(id uint32) (Locker, error)
+}
+
+// Locker is similar to sync.Locker, but provides a method for freeing the lock
+// to allow its reuse.
+// All Locker implementations must maintain mutex semantics - the lock only
+// allows one caller in the critical section at a time.
+// All locks with the same ID must refer to the same underlying lock, even
+// if they are within multiple processes.
+type Locker interface {
+ // ID retrieves the lock's ID.
+ // ID is guaranteed to uniquely identify the lock within the
+ // Manager - that is, calling RetrieveLock with this ID will return
+ // another instance of the same lock.
+ ID() uint32
+ // Lock locks the lock.
+ // This call MUST block until it successfully acquires the lock or
+ // encounters a fatal error.
+ // All errors must be handled internally, as they are not returned. For
+ // the most part, panicking should be appropriate.
+ Lock()
+ // Unlock unlocks the lock.
+ // All errors must be handled internally, as they are not returned. For
+ // the most part, panicking should be appropriate.
+ // This includes unlocking locks which are already unlocked.
+ Unlock()
+ // Free deallocates the underlying lock, allowing its reuse by other
+ // pods and containers.
+ // The lock MUST still be usable after a Free() - some libpod instances
+ // may still retain Container structs with the old lock. This simply
+ // advises the manager that the lock may be reallocated.
+ Free() error
+}
diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c
new file mode 100644
index 000000000..4af58d857
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.c
@@ -0,0 +1,452 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "shm_lock.h"
+
+// Compute the size of the SHM struct
+static size_t compute_shm_size(uint32_t num_bitmaps) {
+ return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
+}
+
+// Take the given mutex.
+// Handles exceptional conditions, including a mutex locked by a process that
+// died holding it.
+// Returns 0 on success, or positive errno on failure.
+static int take_mutex(pthread_mutex_t *mutex) {
+ int ret_code;
+
+ do {
+ ret_code = pthread_mutex_lock(mutex);
+ } while(ret_code == EAGAIN);
+
+ if (ret_code == EOWNERDEAD) {
+ // The previous owner of the mutex died while holding it
+ // Take it for ourselves
+ ret_code = pthread_mutex_consistent(mutex);
+ if (ret_code != 0) {
+ // Someone else may have gotten here first and marked the state consistent
+ // However, the mutex could also be invalid.
+ // Fail here instead of looping back to trying to lock the mutex.
+ return ret_code;
+ }
+ } else if (ret_code != 0) {
+ return ret_code;
+ }
+
+ return 0;
+}
+
+// Release the given mutex.
+// Returns 0 on success, or positive errno on failure.
+static int release_mutex(pthread_mutex_t *mutex) {
+ int ret_code;
+
+ do {
+ ret_code = pthread_mutex_unlock(mutex);
+ } while(ret_code == EAGAIN);
+
+ if (ret_code != 0) {
+ return ret_code;
+ }
+
+ return 0;
+}
+
+// Set up an SHM segment holding locks for libpod.
+// num_locks must not be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+ int shm_fd, i, j, ret_code;
+ uint32_t num_bitmaps;
+ size_t shm_size;
+ shm_struct_t *shm;
+ pthread_mutexattr_t attr;
+
+ // If error_code doesn't point to anything, we can't reasonably return errors
+ // So fail immediately
+ if (error_code == NULL) {
+ return NULL;
+ }
+
+ // We need a nonzero number of locks
+ if (num_locks == 0) {
+ *error_code = -1 * EINVAL;
+ return NULL;
+ }
+
+ if (path == NULL) {
+ *error_code = -1 * EINVAL;
+ return NULL;
+ }
+
+ // Calculate the number of bitmaps required
+ num_bitmaps = num_locks / BITMAP_SIZE;
+ if (num_locks % BITMAP_SIZE != 0) {
+ // The actual number given is not an even multiple of our bitmap size
+ // So round up
+ num_bitmaps += 1;
+ }
+
+ // Calculate size of the shm segment
+ shm_size = compute_shm_size(num_bitmaps);
+
+ // Create a new SHM segment for us
+ shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+ if (shm_fd < 0) {
+ *error_code = -1 * errno;
+ return NULL;
+ }
+
+ // Increase its size to what we need
+ ret_code = ftruncate(shm_fd, shm_size);
+ if (ret_code < 0) {
+ *error_code = -1 * errno;
+ goto CLEANUP_UNLINK;
+ }
+
+ // Map the shared memory in
+ shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+ if (shm == MAP_FAILED) {
+ *error_code = -1 * errno;
+ goto CLEANUP_UNLINK;
+ }
+
+ // We have successfully mapped the memory, now initialize the region
+ shm->magic = MAGIC;
+ shm->unused = 0;
+ shm->num_locks = num_bitmaps * BITMAP_SIZE;
+ shm->num_bitmaps = num_bitmaps;
+
+ // Create an initializer for our pthread mutexes
+ ret_code = pthread_mutexattr_init(&attr);
+ if (ret_code != 0) {
+ *error_code = -1 * ret_code;
+ goto CLEANUP_UNMAP;
+ }
+
+ // Set mutexes to pshared - multiprocess-safe
+ ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ if (ret_code != 0) {
+ *error_code = -1 * ret_code;
+ goto CLEANUP_FREEATTR;
+ }
+
+ // Set mutexes to robust - if a process dies while holding a mutex, we'll get
+ // a special error code on the next attempt to lock it.
+ // This should prevent panicing processes from leaving the state unusable.
+ ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
+ if (ret_code != 0) {
+ *error_code = -1 * ret_code;
+ goto CLEANUP_FREEATTR;
+ }
+
+ // Initialize the mutex that protects the bitmaps using the mutex attributes
+ ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
+ if (ret_code != 0) {
+ *error_code = -1 * ret_code;
+ goto CLEANUP_FREEATTR;
+ }
+
+ // Initialize all bitmaps to 0 initially
+ // And initialize all semaphores they use
+ for (i = 0; i < num_bitmaps; i++) {
+ shm->locks[i].bitmap = 0;
+ for (j = 0; j < BITMAP_SIZE; j++) {
+ // Initialize each mutex
+ ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
+ if (ret_code != 0) {
+ *error_code = -1 * ret_code;
+ goto CLEANUP_FREEATTR;
+ }
+ }
+ }
+
+ // Close the file descriptor, we're done with it
+ // Ignore errors, it's ok if we leak a single FD and this should only run once
+ close(shm_fd);
+
+ // Destroy the pthread initializer attribute.
+ // Again, ignore errors, this will only run once and we might leak a tiny bit
+ // of memory at worst.
+ pthread_mutexattr_destroy(&attr);
+
+ return shm;
+
+ // Cleanup after an error
+ CLEANUP_FREEATTR:
+ pthread_mutexattr_destroy(&attr);
+ CLEANUP_UNMAP:
+ munmap(shm, shm_size);
+ CLEANUP_UNLINK:
+ close(shm_fd);
+ shm_unlink(path);
+ return NULL;
+}
+
+// Open an existing SHM segment holding libpod locks.
+// num_locks is the number of locks that will be configured in the SHM segment.
+// num_locks cannot be 0.
+// Path is the path to the SHM segment. It must begin with a single / and
+// container no other / characters, and be at most 255 characters including
+// terminating NULL byte.
+// Returns a valid pointer on success or NULL on error.
+// If an error occurs, negative ERRNO values will be written to error_code.
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
+ int shm_fd;
+ shm_struct_t *shm;
+ size_t shm_size;
+ uint32_t num_bitmaps;
+
+ if (error_code == NULL) {
+ return NULL;
+ }
+
+ // We need a nonzero number of locks
+ if (num_locks == 0) {
+ *error_code = -1 * EINVAL;
+ return NULL;
+ }
+
+ if (path == NULL) {
+ *error_code = -1 * EINVAL;
+ return NULL;
+ }
+
+ // Calculate the number of bitmaps required
+ num_bitmaps = num_locks / BITMAP_SIZE;
+ if (num_locks % BITMAP_SIZE != 0) {
+ num_bitmaps += 1;
+ }
+
+ // Calculate size of the shm segment
+ shm_size = compute_shm_size(num_bitmaps);
+
+ shm_fd = shm_open(path, O_RDWR, 0600);
+ if (shm_fd < 0) {
+ *error_code = -1 * errno;
+ return NULL;
+ }
+
+ // Map the shared memory in
+ shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+ if (shm == MAP_FAILED) {
+ *error_code = -1 * errno;
+ }
+
+ // Ignore errors, it's ok if we leak a single FD since this only runs once
+ close(shm_fd);
+
+ // Check if we successfully mmap'd
+ if (shm == MAP_FAILED) {
+ return NULL;
+ }
+
+ // Need to check the SHM to see if it's actually our locks
+ if (shm->magic != MAGIC) {
+ *error_code = -1 * errno;
+ goto CLEANUP;
+ }
+ if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
+ *error_code = -1 * errno;
+ goto CLEANUP;
+ }
+
+ return shm;
+
+ CLEANUP:
+ munmap(shm, shm_size);
+ return NULL;
+}
+
+// Close an open SHM lock struct, unmapping the backing memory.
+// The given shm_struct_t will be rendered unusable as a result.
+// On success, 0 is returned. On failure, negative ERRNO values are returned.
+int32_t close_lock_shm(shm_struct_t *shm) {
+ int ret_code;
+ size_t shm_size;
+
+ // We can't unmap null...
+ if (shm == NULL) {
+ return -1 * EINVAL;
+ }
+
+ shm_size = compute_shm_size(shm->num_bitmaps);
+
+ ret_code = munmap(shm, shm_size);
+
+ if (ret_code != 0) {
+ return -1 * errno;
+ }
+
+ return 0;
+}
+
+// Allocate the first available semaphore
+// Returns a positive integer guaranteed to be less than UINT32_MAX on success,
+// or negative errno values on failure
+// On sucess, the returned integer is the number of the semaphore allocated
+int64_t allocate_semaphore(shm_struct_t *shm) {
+ int ret_code, i;
+ bitmap_t test_map;
+ int64_t sem_number, num_within_bitmap;
+
+ if (shm == NULL) {
+ return -1 * EINVAL;
+ }
+
+ // Lock the semaphore controlling access to our shared memory
+ ret_code = take_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ // Loop through our bitmaps to search for one that is not full
+ for (i = 0; i < shm->num_bitmaps; i++) {
+ if (shm->locks[i].bitmap != 0xFFFFFFFF) {
+ test_map = 0x1;
+ num_within_bitmap = 0;
+ while (test_map != 0) {
+ if ((test_map & shm->locks[i].bitmap) == 0) {
+ // Compute the number of the semaphore we are allocating
+ sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
+ // OR in the bitmap
+ shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
+
+ // Clear the mutex
+ ret_code = release_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ // Return the semaphore we've allocated
+ return sem_number;
+ }
+ test_map = test_map << 1;
+ num_within_bitmap++;
+ }
+ // We should never fall through this loop
+ // TODO maybe an assert() here to panic if we do?
+ }
+ }
+
+ // Clear the mutex
+ ret_code = release_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ // All bitmaps are full
+ // We have no available semaphores, report allocation failure
+ return -1 * ENOSPC;
+}
+
+// Deallocate a given semaphore
+// Returns 0 on success, negative ERRNO values on failure
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+ bitmap_t test_map;
+ int bitmap_index, index_in_bitmap, ret_code, i;
+
+ if (shm == NULL) {
+ return -1 * EINVAL;
+ }
+
+ // Check if the lock index is valid
+ if (sem_index >= shm->num_locks) {
+ return -1 * EINVAL;
+ }
+
+ bitmap_index = sem_index / BITMAP_SIZE;
+ index_in_bitmap = sem_index % BITMAP_SIZE;
+
+ // This should never happen if the sem_index test above succeeded, but better
+ // safe than sorry
+ if (bitmap_index >= shm->num_bitmaps) {
+ return -1 * EFAULT;
+ }
+
+ test_map = 0x1 << index_in_bitmap;
+
+ // Lock the mutex controlling access to our shared memory
+ ret_code = take_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ // Check if the semaphore is allocated
+ if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
+ ret_code = release_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ return -1 * ENOENT;
+ }
+
+ // The semaphore is allocated, clear it
+ // Invert the bitmask we used to test to clear the bit
+ test_map = ~test_map;
+ shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
+
+ ret_code = release_mutex(&(shm->segment_lock));
+ if (ret_code != 0) {
+ return -1 * ret_code;
+ }
+
+ return 0;
+}
+
+// Lock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+ int bitmap_index, index_in_bitmap, ret_code;
+
+ if (shm == NULL) {
+ return -1 * EINVAL;
+ }
+
+ if (sem_index >= shm->num_locks) {
+ return -1 * EINVAL;
+ }
+
+ bitmap_index = sem_index / BITMAP_SIZE;
+ index_in_bitmap = sem_index % BITMAP_SIZE;
+
+ return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
+
+// Unlock a given semaphore
+// Does not check if the semaphore is allocated - this ensures that, even for
+// removed containers, we can still successfully lock to check status (and
+// subsequently realize they have been removed).
+// Returns 0 on success, -1 on failure
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
+ int bitmap_index, index_in_bitmap, ret_code;
+
+ if (shm == NULL) {
+ return -1 * EINVAL;
+ }
+
+ if (sem_index >= shm->num_locks) {
+ return -1 * EINVAL;
+ }
+
+ bitmap_index = sem_index / BITMAP_SIZE;
+ index_in_bitmap = sem_index % BITMAP_SIZE;
+
+ return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
+}
diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go
new file mode 100644
index 000000000..3372a8c71
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.go
@@ -0,0 +1,216 @@
+package shm
+
+// #cgo LDFLAGS: -lrt -lpthread
+// #include <stdlib.h>
+// #include "shm_lock.h"
+// const uint32_t bitmap_size_c = BITMAP_SIZE;
+import "C"
+
+import (
+ "runtime"
+ "syscall"
+ "unsafe"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+var (
+ // BitmapSize is the size of the bitmap used when managing SHM locks.
+ // an SHM lock manager's max locks will be rounded up to a multiple of
+ // this number.
+ BitmapSize uint32 = uint32(C.bitmap_size_c)
+)
+
+// SHMLocks is a struct enabling POSIX semaphore locking in a shared memory
+// segment.
+type SHMLocks struct { // nolint
+ lockStruct *C.shm_struct_t
+ maxLocks uint32
+ valid bool
+}
+
+// CreateSHMLock sets up a shared-memory segment holding a given number of POSIX
+// semaphores, and returns a struct that can be used to operate on those locks.
+// numLocks must not be 0, and may be rounded up to a multiple of the bitmap
+// size used by the underlying implementation.
+func CreateSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+ if numLocks == 0 {
+ return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0 0")
+ }
+
+ locks := new(SHMLocks)
+
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var errCode C.int
+ lockStruct := C.setup_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+ if lockStruct == nil {
+ // We got a null pointer, so something errored
+ return nil, syscall.Errno(-1 * errCode)
+ }
+
+ locks.lockStruct = lockStruct
+ locks.maxLocks = uint32(lockStruct.num_locks)
+ locks.valid = true
+
+ logrus.Debugf("Initialized SHM lock manager at path %s", path)
+
+ return locks, nil
+}
+
+// OpenSHMLock opens an existing shared-memory segment holding a given number of
+// POSIX semaphores. numLocks must match the number of locks the shared memory
+// segment was created with.
+func OpenSHMLock(path string, numLocks uint32) (*SHMLocks, error) {
+ if numLocks == 0 {
+ return nil, errors.Wrapf(syscall.EINVAL, "number of locks must greater than 0")
+ }
+
+ locks := new(SHMLocks)
+
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var errCode C.int
+ lockStruct := C.open_lock_shm(cPath, C.uint32_t(numLocks), &errCode)
+ if lockStruct == nil {
+ // We got a null pointer, so something errored
+ return nil, syscall.Errno(-1 * errCode)
+ }
+
+ locks.lockStruct = lockStruct
+ locks.maxLocks = numLocks
+ locks.valid = true
+
+ return locks, nil
+}
+
+// GetMaxLocks returns the maximum number of locks in the SHM
+func (locks *SHMLocks) GetMaxLocks() uint32 {
+ return locks.maxLocks
+}
+
+// Close closes an existing shared-memory segment.
+// The segment will be rendered unusable after closing.
+// WARNING: If you Close() while there are still locks locked, these locks may
+// fail to release, causing a program freeze.
+// Close() is only intended to be used while testing the locks.
+func (locks *SHMLocks) Close() error {
+ if !locks.valid {
+ return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+ }
+
+ locks.valid = false
+
+ retCode := C.close_lock_shm(locks.lockStruct)
+ if retCode < 0 {
+ // Negative errno returned
+ return syscall.Errno(-1 * retCode)
+ }
+
+ return nil
+}
+
+// AllocateSemaphore allocates a semaphore from a shared-memory segment for use
+// by a container or pod.
+// Returns the index of the semaphore that was allocated.
+// Allocations past the maximum number of locks given when the SHM segment was
+// created will result in an error, and no semaphore will be allocated.
+func (locks *SHMLocks) AllocateSemaphore() (uint32, error) {
+ if !locks.valid {
+ return 0, errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+ }
+
+ // This returns a U64, so we have the full u32 range available for
+ // semaphore indexes, and can still return error codes.
+ retCode := C.allocate_semaphore(locks.lockStruct)
+ if retCode < 0 {
+ // Negative errno returned
+ return 0, syscall.Errno(-1 * retCode)
+ }
+
+ return uint32(retCode), nil
+}
+
+// DeallocateSemaphore frees a semaphore in a shared-memory segment so it can be
+// reallocated to another container or pod.
+// The given semaphore must be already allocated, or an error will be returned.
+func (locks *SHMLocks) DeallocateSemaphore(sem uint32) error {
+ if !locks.valid {
+ return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+ }
+
+ if sem > locks.maxLocks {
+ return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+ }
+
+ retCode := C.deallocate_semaphore(locks.lockStruct, C.uint32_t(sem))
+ if retCode < 0 {
+ // Negative errno returned
+ return syscall.Errno(-1 * retCode)
+ }
+
+ return nil
+}
+
+// LockSemaphore locks the given semaphore.
+// If the semaphore is already locked, LockSemaphore will block until the lock
+// can be acquired.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) LockSemaphore(sem uint32) error {
+ if !locks.valid {
+ return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+ }
+
+ if sem > locks.maxLocks {
+ return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+ }
+
+ // For pthread mutexes, we have to guarantee lock and unlock happen in
+ // the same thread.
+ runtime.LockOSThread()
+
+ retCode := C.lock_semaphore(locks.lockStruct, C.uint32_t(sem))
+ if retCode < 0 {
+ // Negative errno returned
+ return syscall.Errno(-1 * retCode)
+ }
+
+ return nil
+}
+
+// UnlockSemaphore unlocks the given semaphore.
+// Unlocking a semaphore that is already unlocked with return EBUSY.
+// There is no requirement that the given semaphore be allocated.
+// This ensures that attempts to lock a container after it has been deleted,
+// but before the caller has queried the database to determine this, will
+// succeed.
+func (locks *SHMLocks) UnlockSemaphore(sem uint32) error {
+ if !locks.valid {
+ return errors.Wrapf(syscall.EINVAL, "locks have already been closed")
+ }
+
+ if sem > locks.maxLocks {
+ return errors.Wrapf(syscall.EINVAL, "given semaphore %d is higher than maximum locks count %d", sem, locks.maxLocks)
+ }
+
+ retCode := C.unlock_semaphore(locks.lockStruct, C.uint32_t(sem))
+ if retCode < 0 {
+ // Negative errno returned
+ return syscall.Errno(-1 * retCode)
+ }
+
+ // For pthread mutexes, we have to guarantee lock and unlock happen in
+ // the same thread.
+ // OK if we take multiple locks - UnlockOSThread() won't actually unlock
+ // until the number of calls equals the number of calls to
+ // LockOSThread()
+ runtime.UnlockOSThread()
+
+ return nil
+}
diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h
new file mode 100644
index 000000000..8e7e23fb7
--- /dev/null
+++ b/libpod/lock/shm/shm_lock.h
@@ -0,0 +1,46 @@
+#ifndef shm_locks_h_
+#define shm_locks_h_
+
+#include <pthread.h>
+#include <stdint.h>
+
+// Magic number to ensure we open the right SHM segment
+#define MAGIC 0x87D1
+
+// Type for our bitmaps
+typedef uint32_t bitmap_t;
+
+// bitmap size
+#define BITMAP_SIZE (sizeof(bitmap_t) * 8)
+
+// Struct to hold a single bitmap and associated locks
+typedef struct lock_group {
+ bitmap_t bitmap;
+ pthread_mutex_t locks[BITMAP_SIZE];
+} lock_group_t;
+
+// Struct to hold our SHM locks.
+// Unused is required to be 0 in the current implementation. If we ever make
+// changes to this structure in the future, this will be repurposed as a version
+// field.
+typedef struct shm_struct {
+ uint16_t magic;
+ uint16_t unused;
+ pthread_mutex_t segment_lock;
+ uint32_t num_bitmaps;
+ uint32_t num_locks;
+ lock_group_t locks[];
+} shm_struct_t;
+
+static size_t compute_shm_size(uint32_t num_bitmaps);
+static int take_mutex(pthread_mutex_t *mutex);
+static int release_mutex(pthread_mutex_t *mutex);
+shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code);
+shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code);
+int32_t close_lock_shm(shm_struct_t *shm);
+int64_t allocate_semaphore(shm_struct_t *shm);
+int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index);
+
+#endif
diff --git a/libpod/lock/shm/shm_lock_test.go b/libpod/lock/shm/shm_lock_test.go
new file mode 100644
index 000000000..0f3a96cca
--- /dev/null
+++ b/libpod/lock/shm/shm_lock_test.go
@@ -0,0 +1,278 @@
+package shm
+
+import (
+ "fmt"
+ "os"
+ "runtime"
+ "syscall"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// All tests here are in the same process, which somewhat limits their utility
+// The big intent of this package it multiprocess locking, which is really hard
+// to test without actually having multiple processes...
+// We can at least verify that the locks work within the local process.
+
+var (
+ // 4 * BITMAP_SIZE to ensure we have to traverse bitmaps
+ numLocks = 4 * BitmapSize
+)
+
+const lockPath = "/libpod_test"
+
+// We need a test main to ensure that the SHM is created before the tests run
+func TestMain(m *testing.M) {
+ shmLock, err := CreateSHMLock(lockPath, numLocks)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error creating SHM for tests: %v\n", err)
+ os.Exit(-1)
+ }
+
+ // Close the SHM - every subsequent test will reopen
+ if err := shmLock.Close(); err != nil {
+ fmt.Fprintf(os.Stderr, "Error closing SHM locks: %v\n", err)
+ os.Exit(-1)
+ }
+
+ exitCode := m.Run()
+
+ // We need to remove the SHM segment to clean up after ourselves
+ os.RemoveAll("/dev/shm/libpod_lock")
+
+ os.Exit(exitCode)
+}
+
+func runLockTest(t *testing.T, testFunc func(*testing.T, *SHMLocks)) {
+ locks, err := OpenSHMLock(lockPath, numLocks)
+ if err != nil {
+ t.Fatalf("Error opening locks: %v", err)
+ }
+ defer func() {
+ // Deallocate all locks
+ // Ignore ENOENT (lock is not allocated)
+ var i uint32
+ for i = 0; i < numLocks; i++ {
+ if err := locks.DeallocateSemaphore(i); err != nil && err != syscall.ENOENT {
+ t.Fatalf("Error deallocating semaphore %d: %v", i, err)
+ }
+ }
+
+ if err := locks.Close(); err != nil {
+ t.Fatalf("Error closing locks: %v", err)
+ }
+ }()
+
+ success := t.Run("locks", func(t *testing.T) {
+ testFunc(t, locks)
+ })
+ if !success {
+ t.Fail()
+ }
+}
+
+// Test that creating an SHM with a bad size rounds up to a good size
+func TestCreateNewSHMBadSizeRoundsUp(t *testing.T) {
+ // Odd number, not a power of 2, should never be a word size on a system
+ lock, err := CreateSHMLock("/test1", 7)
+ assert.NoError(t, err)
+
+ assert.Equal(t, lock.GetMaxLocks(), BitmapSize)
+
+ if err := lock.Close(); err != nil {
+ t.Fatalf("Error closing locks: %v", err)
+ }
+}
+
+// Test that creating an SHM with 0 size fails
+func TestCreateNewSHMZeroSize(t *testing.T) {
+ _, err := CreateSHMLock("/test2", 0)
+ assert.Error(t, err)
+}
+
+// Test that deallocating an unallocated lock errors
+func TestDeallocateUnallocatedLockErrors(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ err := locks.DeallocateSemaphore(0)
+ assert.Error(t, err)
+ })
+}
+
+// Test that unlocking an unlocked lock fails
+func TestUnlockingUnlockedLockFails(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ err := locks.UnlockSemaphore(0)
+ assert.Error(t, err)
+ })
+}
+
+// Test that locking and double-unlocking fails
+func TestDoubleUnlockFails(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ err := locks.LockSemaphore(0)
+ assert.NoError(t, err)
+
+ err = locks.UnlockSemaphore(0)
+ assert.NoError(t, err)
+
+ err = locks.UnlockSemaphore(0)
+ assert.Error(t, err)
+ })
+}
+
+// Test allocating - lock - unlock - deallocate cycle, single lock
+func TestLockLifecycleSingleLock(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ sem, err := locks.AllocateSemaphore()
+ require.NoError(t, err)
+
+ err = locks.LockSemaphore(sem)
+ assert.NoError(t, err)
+
+ err = locks.UnlockSemaphore(sem)
+ assert.NoError(t, err)
+
+ err = locks.DeallocateSemaphore(sem)
+ assert.NoError(t, err)
+ })
+}
+
+// Test allocate two locks returns different locks
+func TestAllocateTwoLocksGetsDifferentLocks(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ sem1, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+
+ sem2, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+
+ assert.NotEqual(t, sem1, sem2)
+ })
+}
+
+// Test allocate all locks successful and all are unique
+func TestAllocateAllLocksSucceeds(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ sems := make(map[uint32]bool)
+ var i uint32
+ for i = 0; i < numLocks; i++ {
+ sem, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+
+ // Ensure the allocate semaphore is unique
+ _, ok := sems[sem]
+ assert.False(t, ok)
+
+ sems[sem] = true
+ }
+ })
+}
+
+// Test allocating more than the given max fails
+func TestAllocateTooManyLocksFails(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ // Allocate all locks
+ var i uint32
+ for i = 0; i < numLocks; i++ {
+ _, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+ }
+
+ // Try and allocate one more
+ _, err := locks.AllocateSemaphore()
+ assert.Error(t, err)
+ })
+}
+
+// Test allocating max locks, deallocating one, and then allocating again succeeds
+func TestAllocateDeallocateCycle(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ // Allocate all locks
+ var i uint32
+ for i = 0; i < numLocks; i++ {
+ _, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+ }
+
+ // Now loop through again, deallocating and reallocating.
+ // Each time we free 1 semaphore, allocate again, and make sure
+ // we get the same semaphore back.
+ var j uint32
+ for j = 0; j < numLocks; j++ {
+ err := locks.DeallocateSemaphore(j)
+ assert.NoError(t, err)
+
+ newSem, err := locks.AllocateSemaphore()
+ assert.NoError(t, err)
+ assert.Equal(t, j, newSem)
+ }
+ })
+}
+
+// Test that locks actually lock
+func TestLockSemaphoreActuallyLocks(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ // This entire test is very ugly - lots of sleeps to try and get
+ // things to occur in the right order.
+ // It also doesn't even exercise the multiprocess nature of the
+ // locks.
+
+ // Get the current time
+ startTime := time.Now()
+
+ // Start a goroutine to take the lock and then release it after
+ // a second.
+ go func() {
+ err := locks.LockSemaphore(0)
+ assert.NoError(t, err)
+
+ time.Sleep(1 * time.Second)
+
+ err = locks.UnlockSemaphore(0)
+ assert.NoError(t, err)
+ }()
+
+ // Sleep for a quarter of a second to give the goroutine time
+ // to kick off and grab the lock
+ time.Sleep(250 * time.Millisecond)
+
+ // Take the lock
+ err := locks.LockSemaphore(0)
+ assert.NoError(t, err)
+
+ // Get the current time
+ endTime := time.Now()
+
+ // Verify that at least 1 second has passed since start
+ duration := endTime.Sub(startTime)
+ assert.True(t, duration.Seconds() > 1.0)
+ })
+}
+
+// Test that locking and unlocking two semaphores succeeds
+// Ensures that runtime.LockOSThread() is doing its job
+func TestLockAndUnlockTwoSemaphore(t *testing.T) {
+ runLockTest(t, func(t *testing.T, locks *SHMLocks) {
+ err := locks.LockSemaphore(0)
+ assert.NoError(t, err)
+
+ err = locks.LockSemaphore(1)
+ assert.NoError(t, err)
+
+ err = locks.UnlockSemaphore(1)
+ assert.NoError(t, err)
+
+ // Now yield scheduling
+ // To try and get us on another OS thread
+ runtime.Gosched()
+
+ // And unlock the last semaphore
+ // If we are in a different OS thread, this should fail.
+ // However, runtime.UnlockOSThread() should guarantee we are not
+ err = locks.UnlockSemaphore(0)
+ assert.NoError(t, err)
+ })
+}
diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go
new file mode 100644
index 000000000..3e8f4f3d2
--- /dev/null
+++ b/libpod/lock/shm_lock_manager_linux.go
@@ -0,0 +1,94 @@
+// +build linux
+
+package lock
+
+import (
+ "github.com/containers/libpod/libpod/lock/shm"
+)
+
+// SHMLockManager manages shared memory locks.
+type SHMLockManager struct {
+ locks *shm.SHMLocks
+}
+
+// NewSHMLockManager makes a new SHMLockManager with the given number of locks.
+// Due to the underlying implementation, the exact number of locks created may
+// be greater than the number given here.
+func NewSHMLockManager(path string, numLocks uint32) (Manager, error) {
+ locks, err := shm.CreateSHMLock(path, numLocks)
+ if err != nil {
+ return nil, err
+ }
+
+ manager := new(SHMLockManager)
+ manager.locks = locks
+
+ return manager, nil
+}
+
+// OpenSHMLockManager opens an existing SHMLockManager with the given number of
+// locks.
+func OpenSHMLockManager(path string, numLocks uint32) (Manager, error) {
+ locks, err := shm.OpenSHMLock(path, numLocks)
+ if err != nil {
+ return nil, err
+ }
+
+ manager := new(SHMLockManager)
+ manager.locks = locks
+
+ return manager, nil
+}
+
+// AllocateLock allocates a new lock from the manager.
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+ semIndex, err := m.locks.AllocateSemaphore()
+ if err != nil {
+ return nil, err
+ }
+
+ lock := new(SHMLock)
+ lock.lockID = semIndex
+ lock.manager = m
+
+ return lock, nil
+}
+
+// RetrieveLock retrieves a lock from the manager given its ID.
+func (m *SHMLockManager) RetrieveLock(id uint32) (Locker, error) {
+ lock := new(SHMLock)
+ lock.lockID = id
+ lock.manager = m
+
+ return lock, nil
+}
+
+// SHMLock is an individual shared memory lock.
+type SHMLock struct {
+ lockID uint32
+ manager *SHMLockManager
+}
+
+// ID returns the ID of the lock.
+func (l *SHMLock) ID() uint32 {
+ return l.lockID
+}
+
+// Lock acquires the lock.
+func (l *SHMLock) Lock() {
+ if err := l.manager.locks.LockSemaphore(l.lockID); err != nil {
+ panic(err.Error())
+ }
+}
+
+// Unlock releases the lock.
+func (l *SHMLock) Unlock() {
+ if err := l.manager.locks.UnlockSemaphore(l.lockID); err != nil {
+ panic(err.Error())
+ }
+}
+
+// Free releases the lock, allowing it to be reused.
+func (l *SHMLock) Free() error {
+ return l.manager.locks.DeallocateSemaphore(l.lockID)
+}
diff --git a/libpod/lock/shm_lock_manager_unsupported.go b/libpod/lock/shm_lock_manager_unsupported.go
new file mode 100644
index 000000000..a1340fcd1
--- /dev/null
+++ b/libpod/lock/shm_lock_manager_unsupported.go
@@ -0,0 +1,29 @@
+// +build !linux
+
+package lock
+
+import "fmt"
+
+// SHMLockManager is a shared memory lock manager.
+// It is not supported on non-Unix platforms.
+type SHMLockManager struct{}
+
+// NewSHMLockManager is not supported on this platform
+func NewSHMLockManager(numLocks uint32) (Manager, error) {
+ return nil, fmt.Errorf("not supported")
+}
+
+// OpenSHMLockManager is not supported on this platform
+func OpenSHMLockManager(numLocks uint32) (Manager, error) {
+ return nil, fmt.Errorf("not supported")
+}
+
+// AllocateLock is not supported on this platform
+func (m *SHMLockManager) AllocateLock() (Locker, error) {
+ return nil, fmt.Errorf("not supported")
+}
+
+// RetrieveLock is not supported on this platform
+func (m *SHMLockManager) RetrieveLock(id string) (Locker, error) {
+ return nil, fmt.Errorf("not supported")
+}