diff options
Diffstat (limited to 'vendor/go.etcd.io/bbolt/freelist.go')
-rw-r--r-- | vendor/go.etcd.io/bbolt/freelist.go | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go new file mode 100644 index 000000000..d441b6925 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/freelist.go @@ -0,0 +1,413 @@ +package bbolt + +import ( + "fmt" + "reflect" + "sort" + "unsafe" +) + +// txPending holds a list of pgids and corresponding allocation txns +// that are pending to be freed. +type txPending struct { + ids []pgid + alloctx []txid // txids allocating the ids + lastReleaseBegin txid // beginning txid of last matching releaseRange +} + +// pidSet holds the set of starting pgids which have the same span size +type pidSet map[pgid]struct{} + +// freelist represents a list of all pages that are available for allocation. +// It also tracks pages that have been freed but are still in use by open transactions. +type freelist struct { + freelistType FreelistType // freelist type + ids []pgid // all free and available free page ids. + allocs map[pgid]txid // mapping of txid that allocated a pgid. + pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. + cache map[pgid]bool // fast lookup of all free and pending page ids. + freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size + forwardMap map[pgid]uint64 // key is start pgid, value is its span size + backwardMap map[pgid]uint64 // key is end pgid, value is its span size + allocate func(txid txid, n int) pgid // the freelist allocate func + free_count func() int // the function which gives you free page number + mergeSpans func(ids pgids) // the mergeSpan func + getFreePageIDs func() []pgid // get free pgids func + readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist +} + +// newFreelist returns an empty, initialized freelist. +func newFreelist(freelistType FreelistType) *freelist { + f := &freelist{ + freelistType: freelistType, + allocs: make(map[pgid]txid), + pending: make(map[txid]*txPending), + cache: make(map[pgid]bool), + freemaps: make(map[uint64]pidSet), + forwardMap: make(map[pgid]uint64), + backwardMap: make(map[pgid]uint64), + } + + if freelistType == FreelistMapType { + f.allocate = f.hashmapAllocate + f.free_count = f.hashmapFreeCount + f.mergeSpans = f.hashmapMergeSpans + f.getFreePageIDs = f.hashmapGetFreePageIDs + f.readIDs = f.hashmapReadIDs + } else { + f.allocate = f.arrayAllocate + f.free_count = f.arrayFreeCount + f.mergeSpans = f.arrayMergeSpans + f.getFreePageIDs = f.arrayGetFreePageIDs + f.readIDs = f.arrayReadIDs + } + + return f +} + +// size returns the size of the page after serialization. +func (f *freelist) size() int { + n := f.count() + if n >= 0xFFFF { + // The first element will be used to store the count. See freelist.write. + n++ + } + return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n) +} + +// count returns count of pages on the freelist +func (f *freelist) count() int { + return f.free_count() + f.pending_count() +} + +// arrayFreeCount returns count of free pages(array version) +func (f *freelist) arrayFreeCount() int { + return len(f.ids) +} + +// pending_count returns count of pending pages +func (f *freelist) pending_count() int { + var count int + for _, txp := range f.pending { + count += len(txp.ids) + } + return count +} + +// copyallunsafe copies a list of all free ids and all pending ids in one sorted list. +// f.count returns the minimum length required for dst. +func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer + m := make(pgids, 0, f.pending_count()) + for _, txp := range f.pending { + m = append(m, txp.ids...) + } + sort.Sort(m) + fpgids := f.getFreePageIDs() + sz := len(fpgids) + len(m) + dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{ + Data: uintptr(dstptr), + Len: sz, + Cap: sz, + })) + mergepgids(dst, fpgids, m) +} + +func (f *freelist) copyall(dst []pgid) { + m := make(pgids, 0, f.pending_count()) + for _, txp := range f.pending { + m = append(m, txp.ids...) + } + sort.Sort(m) + mergepgids(dst, f.getFreePageIDs(), m) +} + +// arrayAllocate returns the starting page id of a contiguous list of pages of a given size. +// If a contiguous block cannot be found then 0 is returned. +func (f *freelist) arrayAllocate(txid txid, n int) pgid { + if len(f.ids) == 0 { + return 0 + } + + var initial, previd pgid + for i, id := range f.ids { + if id <= 1 { + panic(fmt.Sprintf("invalid page allocation: %d", id)) + } + + // Reset initial page if this is not contiguous. + if previd == 0 || id-previd != 1 { + initial = id + } + + // If we found a contiguous block then remove it and return it. + if (id-initial)+1 == pgid(n) { + // If we're allocating off the beginning then take the fast path + // and just adjust the existing slice. This will use extra memory + // temporarily but the append() in free() will realloc the slice + // as is necessary. + if (i + 1) == n { + f.ids = f.ids[i+1:] + } else { + copy(f.ids[i-n+1:], f.ids[i+1:]) + f.ids = f.ids[:len(f.ids)-n] + } + + // Remove from the free cache. + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, initial+i) + } + f.allocs[initial] = txid + return initial + } + + previd = id + } + return 0 +} + +// free releases a page and its overflow for a given transaction id. +// If the page is already free then a panic will occur. +func (f *freelist) free(txid txid, p *page) { + if p.id <= 1 { + panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) + } + + // Free page and all its overflow pages. + txp := f.pending[txid] + if txp == nil { + txp = &txPending{} + f.pending[txid] = txp + } + allocTxid, ok := f.allocs[p.id] + if ok { + delete(f.allocs, p.id) + } else if (p.flags & freelistPageFlag) != 0 { + // Freelist is always allocated by prior tx. + allocTxid = txid - 1 + } + + for id := p.id; id <= p.id+pgid(p.overflow); id++ { + // Verify that page is not already free. + if f.cache[id] { + panic(fmt.Sprintf("page %d already freed", id)) + } + // Add to the freelist and cache. + txp.ids = append(txp.ids, id) + txp.alloctx = append(txp.alloctx, allocTxid) + f.cache[id] = true + } +} + +// release moves all page ids for a transaction id (or older) to the freelist. +func (f *freelist) release(txid txid) { + m := make(pgids, 0) + for tid, txp := range f.pending { + if tid <= txid { + // Move transaction's pending pages to the available freelist. + // Don't remove from the cache since the page is still free. + m = append(m, txp.ids...) + delete(f.pending, tid) + } + } + f.mergeSpans(m) +} + +// releaseRange moves pending pages allocated within an extent [begin,end] to the free list. +func (f *freelist) releaseRange(begin, end txid) { + if begin > end { + return + } + var m pgids + for tid, txp := range f.pending { + if tid < begin || tid > end { + continue + } + // Don't recompute freed pages if ranges haven't updated. + if txp.lastReleaseBegin == begin { + continue + } + for i := 0; i < len(txp.ids); i++ { + if atx := txp.alloctx[i]; atx < begin || atx > end { + continue + } + m = append(m, txp.ids[i]) + txp.ids[i] = txp.ids[len(txp.ids)-1] + txp.ids = txp.ids[:len(txp.ids)-1] + txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1] + txp.alloctx = txp.alloctx[:len(txp.alloctx)-1] + i-- + } + txp.lastReleaseBegin = begin + if len(txp.ids) == 0 { + delete(f.pending, tid) + } + } + f.mergeSpans(m) +} + +// rollback removes the pages from a given pending tx. +func (f *freelist) rollback(txid txid) { + // Remove page ids from cache. + txp := f.pending[txid] + if txp == nil { + return + } + var m pgids + for i, pgid := range txp.ids { + delete(f.cache, pgid) + tx := txp.alloctx[i] + if tx == 0 { + continue + } + if tx != txid { + // Pending free aborted; restore page back to alloc list. + f.allocs[pgid] = tx + } else { + // Freed page was allocated by this txn; OK to throw away. + m = append(m, pgid) + } + } + // Remove pages from pending list and mark as free if allocated by txid. + delete(f.pending, txid) + f.mergeSpans(m) +} + +// freed returns whether a given page is in the free list. +func (f *freelist) freed(pgid pgid) bool { + return f.cache[pgid] +} + +// read initializes the freelist from a freelist page. +func (f *freelist) read(p *page) { + if (p.flags & freelistPageFlag) == 0 { + panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ())) + } + // If the page.count is at the max uint16 value (64k) then it's considered + // an overflow and the size of the freelist is stored as the first element. + var idx, count uintptr = 0, uintptr(p.count) + if count == 0xFFFF { + idx = 1 + count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))) + } + + // Copy the list of page ids from the freelist. + if count == 0 { + f.ids = nil + } else { + ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{ + Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)), + Len: int(count), + Cap: int(count), + })) + + // copy the ids, so we don't modify on the freelist page directly + idsCopy := make([]pgid, count) + copy(idsCopy, ids) + // Make sure they're sorted. + sort.Sort(pgids(idsCopy)) + + f.readIDs(idsCopy) + } +} + +// arrayReadIDs initializes the freelist from a given list of ids. +func (f *freelist) arrayReadIDs(ids []pgid) { + f.ids = ids + f.reindex() +} + +func (f *freelist) arrayGetFreePageIDs() []pgid { + return f.ids +} + +// write writes the page ids onto a freelist page. All free and pending ids are +// saved to disk since in the event of a program crash, all pending ids will +// become free. +func (f *freelist) write(p *page) error { + // Combine the old free pgids and pgids waiting on an open transaction. + + // Update the header flag. + p.flags |= freelistPageFlag + + // The page.count can only hold up to 64k elements so if we overflow that + // number then we handle it by putting the size in the first element. + lenids := f.count() + if lenids == 0 { + p.count = uint16(lenids) + } else if lenids < 0xFFFF { + p.count = uint16(lenids) + f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) + } else { + p.count = 0xFFFF + *(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids) + f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0)))) + } + + return nil +} + +// reload reads the freelist from a page and filters out pending items. +func (f *freelist) reload(p *page) { + f.read(p) + + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range f.getFreePageIDs() { + if !pcache[id] { + a = append(a, id) + } + } + + f.readIDs(a) +} + +// noSyncReload reads the freelist from pgids and filters out pending items. +func (f *freelist) noSyncReload(pgids []pgid) { + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range pgids { + if !pcache[id] { + a = append(a, id) + } + } + + f.readIDs(a) +} + +// reindex rebuilds the free cache based on available and pending free lists. +func (f *freelist) reindex() { + ids := f.getFreePageIDs() + f.cache = make(map[pgid]bool, len(ids)) + for _, id := range ids { + f.cache[id] = true + } + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + f.cache[pendingID] = true + } + } +} + +// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array +func (f *freelist) arrayMergeSpans(ids pgids) { + sort.Sort(ids) + f.ids = pgids(f.ids).merge(ids) +} |