summaryrefslogtreecommitdiff
path: root/vendor/go.etcd.io/bbolt/freelist.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/go.etcd.io/bbolt/freelist.go')
-rw-r--r--vendor/go.etcd.io/bbolt/freelist.go413
1 files changed, 413 insertions, 0 deletions
diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go
new file mode 100644
index 000000000..d441b6925
--- /dev/null
+++ b/vendor/go.etcd.io/bbolt/freelist.go
@@ -0,0 +1,413 @@
+package bbolt
+
+import (
+ "fmt"
+ "reflect"
+ "sort"
+ "unsafe"
+)
+
+// txPending holds a list of pgids and corresponding allocation txns
+// that are pending to be freed.
+type txPending struct {
+ ids []pgid
+ alloctx []txid // txids allocating the ids
+ lastReleaseBegin txid // beginning txid of last matching releaseRange
+}
+
+// pidSet holds the set of starting pgids which have the same span size
+type pidSet map[pgid]struct{}
+
+// freelist represents a list of all pages that are available for allocation.
+// It also tracks pages that have been freed but are still in use by open transactions.
+type freelist struct {
+ freelistType FreelistType // freelist type
+ ids []pgid // all free and available free page ids.
+ allocs map[pgid]txid // mapping of txid that allocated a pgid.
+ pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
+ cache map[pgid]bool // fast lookup of all free and pending page ids.
+ freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
+ forwardMap map[pgid]uint64 // key is start pgid, value is its span size
+ backwardMap map[pgid]uint64 // key is end pgid, value is its span size
+ allocate func(txid txid, n int) pgid // the freelist allocate func
+ free_count func() int // the function which gives you free page number
+ mergeSpans func(ids pgids) // the mergeSpan func
+ getFreePageIDs func() []pgid // get free pgids func
+ readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
+}
+
+// newFreelist returns an empty, initialized freelist.
+func newFreelist(freelistType FreelistType) *freelist {
+ f := &freelist{
+ freelistType: freelistType,
+ allocs: make(map[pgid]txid),
+ pending: make(map[txid]*txPending),
+ cache: make(map[pgid]bool),
+ freemaps: make(map[uint64]pidSet),
+ forwardMap: make(map[pgid]uint64),
+ backwardMap: make(map[pgid]uint64),
+ }
+
+ if freelistType == FreelistMapType {
+ f.allocate = f.hashmapAllocate
+ f.free_count = f.hashmapFreeCount
+ f.mergeSpans = f.hashmapMergeSpans
+ f.getFreePageIDs = f.hashmapGetFreePageIDs
+ f.readIDs = f.hashmapReadIDs
+ } else {
+ f.allocate = f.arrayAllocate
+ f.free_count = f.arrayFreeCount
+ f.mergeSpans = f.arrayMergeSpans
+ f.getFreePageIDs = f.arrayGetFreePageIDs
+ f.readIDs = f.arrayReadIDs
+ }
+
+ return f
+}
+
+// size returns the size of the page after serialization.
+func (f *freelist) size() int {
+ n := f.count()
+ if n >= 0xFFFF {
+ // The first element will be used to store the count. See freelist.write.
+ n++
+ }
+ return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n)
+}
+
+// count returns count of pages on the freelist
+func (f *freelist) count() int {
+ return f.free_count() + f.pending_count()
+}
+
+// arrayFreeCount returns count of free pages(array version)
+func (f *freelist) arrayFreeCount() int {
+ return len(f.ids)
+}
+
+// pending_count returns count of pending pages
+func (f *freelist) pending_count() int {
+ var count int
+ for _, txp := range f.pending {
+ count += len(txp.ids)
+ }
+ return count
+}
+
+// copyallunsafe copies a list of all free ids and all pending ids in one sorted list.
+// f.count returns the minimum length required for dst.
+func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer
+ m := make(pgids, 0, f.pending_count())
+ for _, txp := range f.pending {
+ m = append(m, txp.ids...)
+ }
+ sort.Sort(m)
+ fpgids := f.getFreePageIDs()
+ sz := len(fpgids) + len(m)
+ dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
+ Data: uintptr(dstptr),
+ Len: sz,
+ Cap: sz,
+ }))
+ mergepgids(dst, fpgids, m)
+}
+
+func (f *freelist) copyall(dst []pgid) {
+ m := make(pgids, 0, f.pending_count())
+ for _, txp := range f.pending {
+ m = append(m, txp.ids...)
+ }
+ sort.Sort(m)
+ mergepgids(dst, f.getFreePageIDs(), m)
+}
+
+// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
+// If a contiguous block cannot be found then 0 is returned.
+func (f *freelist) arrayAllocate(txid txid, n int) pgid {
+ if len(f.ids) == 0 {
+ return 0
+ }
+
+ var initial, previd pgid
+ for i, id := range f.ids {
+ if id <= 1 {
+ panic(fmt.Sprintf("invalid page allocation: %d", id))
+ }
+
+ // Reset initial page if this is not contiguous.
+ if previd == 0 || id-previd != 1 {
+ initial = id
+ }
+
+ // If we found a contiguous block then remove it and return it.
+ if (id-initial)+1 == pgid(n) {
+ // If we're allocating off the beginning then take the fast path
+ // and just adjust the existing slice. This will use extra memory
+ // temporarily but the append() in free() will realloc the slice
+ // as is necessary.
+ if (i + 1) == n {
+ f.ids = f.ids[i+1:]
+ } else {
+ copy(f.ids[i-n+1:], f.ids[i+1:])
+ f.ids = f.ids[:len(f.ids)-n]
+ }
+
+ // Remove from the free cache.
+ for i := pgid(0); i < pgid(n); i++ {
+ delete(f.cache, initial+i)
+ }
+ f.allocs[initial] = txid
+ return initial
+ }
+
+ previd = id
+ }
+ return 0
+}
+
+// free releases a page and its overflow for a given transaction id.
+// If the page is already free then a panic will occur.
+func (f *freelist) free(txid txid, p *page) {
+ if p.id <= 1 {
+ panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
+ }
+
+ // Free page and all its overflow pages.
+ txp := f.pending[txid]
+ if txp == nil {
+ txp = &txPending{}
+ f.pending[txid] = txp
+ }
+ allocTxid, ok := f.allocs[p.id]
+ if ok {
+ delete(f.allocs, p.id)
+ } else if (p.flags & freelistPageFlag) != 0 {
+ // Freelist is always allocated by prior tx.
+ allocTxid = txid - 1
+ }
+
+ for id := p.id; id <= p.id+pgid(p.overflow); id++ {
+ // Verify that page is not already free.
+ if f.cache[id] {
+ panic(fmt.Sprintf("page %d already freed", id))
+ }
+ // Add to the freelist and cache.
+ txp.ids = append(txp.ids, id)
+ txp.alloctx = append(txp.alloctx, allocTxid)
+ f.cache[id] = true
+ }
+}
+
+// release moves all page ids for a transaction id (or older) to the freelist.
+func (f *freelist) release(txid txid) {
+ m := make(pgids, 0)
+ for tid, txp := range f.pending {
+ if tid <= txid {
+ // Move transaction's pending pages to the available freelist.
+ // Don't remove from the cache since the page is still free.
+ m = append(m, txp.ids...)
+ delete(f.pending, tid)
+ }
+ }
+ f.mergeSpans(m)
+}
+
+// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
+func (f *freelist) releaseRange(begin, end txid) {
+ if begin > end {
+ return
+ }
+ var m pgids
+ for tid, txp := range f.pending {
+ if tid < begin || tid > end {
+ continue
+ }
+ // Don't recompute freed pages if ranges haven't updated.
+ if txp.lastReleaseBegin == begin {
+ continue
+ }
+ for i := 0; i < len(txp.ids); i++ {
+ if atx := txp.alloctx[i]; atx < begin || atx > end {
+ continue
+ }
+ m = append(m, txp.ids[i])
+ txp.ids[i] = txp.ids[len(txp.ids)-1]
+ txp.ids = txp.ids[:len(txp.ids)-1]
+ txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
+ txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
+ i--
+ }
+ txp.lastReleaseBegin = begin
+ if len(txp.ids) == 0 {
+ delete(f.pending, tid)
+ }
+ }
+ f.mergeSpans(m)
+}
+
+// rollback removes the pages from a given pending tx.
+func (f *freelist) rollback(txid txid) {
+ // Remove page ids from cache.
+ txp := f.pending[txid]
+ if txp == nil {
+ return
+ }
+ var m pgids
+ for i, pgid := range txp.ids {
+ delete(f.cache, pgid)
+ tx := txp.alloctx[i]
+ if tx == 0 {
+ continue
+ }
+ if tx != txid {
+ // Pending free aborted; restore page back to alloc list.
+ f.allocs[pgid] = tx
+ } else {
+ // Freed page was allocated by this txn; OK to throw away.
+ m = append(m, pgid)
+ }
+ }
+ // Remove pages from pending list and mark as free if allocated by txid.
+ delete(f.pending, txid)
+ f.mergeSpans(m)
+}
+
+// freed returns whether a given page is in the free list.
+func (f *freelist) freed(pgid pgid) bool {
+ return f.cache[pgid]
+}
+
+// read initializes the freelist from a freelist page.
+func (f *freelist) read(p *page) {
+ if (p.flags & freelistPageFlag) == 0 {
+ panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
+ }
+ // If the page.count is at the max uint16 value (64k) then it's considered
+ // an overflow and the size of the freelist is stored as the first element.
+ var idx, count uintptr = 0, uintptr(p.count)
+ if count == 0xFFFF {
+ idx = 1
+ count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))))
+ }
+
+ // Copy the list of page ids from the freelist.
+ if count == 0 {
+ f.ids = nil
+ } else {
+ ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
+ Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)),
+ Len: int(count),
+ Cap: int(count),
+ }))
+
+ // copy the ids, so we don't modify on the freelist page directly
+ idsCopy := make([]pgid, count)
+ copy(idsCopy, ids)
+ // Make sure they're sorted.
+ sort.Sort(pgids(idsCopy))
+
+ f.readIDs(idsCopy)
+ }
+}
+
+// arrayReadIDs initializes the freelist from a given list of ids.
+func (f *freelist) arrayReadIDs(ids []pgid) {
+ f.ids = ids
+ f.reindex()
+}
+
+func (f *freelist) arrayGetFreePageIDs() []pgid {
+ return f.ids
+}
+
+// write writes the page ids onto a freelist page. All free and pending ids are
+// saved to disk since in the event of a program crash, all pending ids will
+// become free.
+func (f *freelist) write(p *page) error {
+ // Combine the old free pgids and pgids waiting on an open transaction.
+
+ // Update the header flag.
+ p.flags |= freelistPageFlag
+
+ // The page.count can only hold up to 64k elements so if we overflow that
+ // number then we handle it by putting the size in the first element.
+ lenids := f.count()
+ if lenids == 0 {
+ p.count = uint16(lenids)
+ } else if lenids < 0xFFFF {
+ p.count = uint16(lenids)
+ f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
+ } else {
+ p.count = 0xFFFF
+ *(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids)
+ f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0))))
+ }
+
+ return nil
+}
+
+// reload reads the freelist from a page and filters out pending items.
+func (f *freelist) reload(p *page) {
+ f.read(p)
+
+ // Build a cache of only pending pages.
+ pcache := make(map[pgid]bool)
+ for _, txp := range f.pending {
+ for _, pendingID := range txp.ids {
+ pcache[pendingID] = true
+ }
+ }
+
+ // Check each page in the freelist and build a new available freelist
+ // with any pages not in the pending lists.
+ var a []pgid
+ for _, id := range f.getFreePageIDs() {
+ if !pcache[id] {
+ a = append(a, id)
+ }
+ }
+
+ f.readIDs(a)
+}
+
+// noSyncReload reads the freelist from pgids and filters out pending items.
+func (f *freelist) noSyncReload(pgids []pgid) {
+ // Build a cache of only pending pages.
+ pcache := make(map[pgid]bool)
+ for _, txp := range f.pending {
+ for _, pendingID := range txp.ids {
+ pcache[pendingID] = true
+ }
+ }
+
+ // Check each page in the freelist and build a new available freelist
+ // with any pages not in the pending lists.
+ var a []pgid
+ for _, id := range pgids {
+ if !pcache[id] {
+ a = append(a, id)
+ }
+ }
+
+ f.readIDs(a)
+}
+
+// reindex rebuilds the free cache based on available and pending free lists.
+func (f *freelist) reindex() {
+ ids := f.getFreePageIDs()
+ f.cache = make(map[pgid]bool, len(ids))
+ for _, id := range ids {
+ f.cache[id] = true
+ }
+ for _, txp := range f.pending {
+ for _, pendingID := range txp.ids {
+ f.cache[pendingID] = true
+ }
+ }
+}
+
+// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
+func (f *freelist) arrayMergeSpans(ids pgids) {
+ sort.Sort(ids)
+ f.ids = pgids(f.ids).merge(ids)
+}