1 files changed, 309 insertions, 0 deletions
diff --git a/vendor/github.com/ulikunitz/xz/lzma/hashtable.go b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go
new file mode 100644
index 000000000..d786a9745
--- /dev/null
+++ b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go
@@ -0,0 +1,309 @@
+// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzma
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/ulikunitz/xz/internal/hash"
+)
+
+/* For compression we need to find byte sequences that match the byte
+ * sequence at the dictionary head. A hash table is a simple method to
+ * provide this capability.
+ */
+
+// maxMatches limits the number of matches requested from the Matches
+// function. This controls the speed of the overall encoding.
+const maxMatches = 16
+
+// shortDists defines the number of short distances supported by the
+// implementation.
+const shortDists = 8
+
+// The minimum is somehow arbitrary but the maximum is limited by the
+// memory requirements of the hash table.
+const (
+	minTableExponent = 9
+	maxTableExponent = 20
+)
+
+// newRoller contains the function used to create an instance of the
+// hash.Roller.
+var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) }
+
+// hashTable stores the hash table including the rolling hash method.
+//
+// We implement chained hashing into a circular buffer. Each entry in
+// the circular buffer stores the delta distance to the next position with a
+// word that has the same hash value.
+type hashTable struct {
+	dict *encoderDict
+	// actual hash table
+	t []int64
+	// circular list data with the offset to the next word
+	data  []uint32
+	front int
+	// mask for computing the index for the hash table
+	mask uint64
+	// hash offset; initial value is -int64(wordLen)
+	hoff int64
+	// length of the hashed word
+	wordLen int
+	// hash roller for computing the hash values for the Write
+	// method
+	wr hash.Roller
+	// hash roller for computing arbitrary hashes
+	hr hash.Roller
+	// preallocated slices
+	p         [maxMatches]int64
+	distances [maxMatches + shortDists]int
+}
+
+// hashTableExponent derives the hash table exponent from the dictionary
+// capacity.
+func hashTableExponent(n uint32) int {
+	e := 30 - nlz32(n)
+	switch {
+	case e < minTableExponent:
+		e = minTableExponent
+	case e > maxTableExponent:
+		e = maxTableExponent
+	}
+	return e
+}
+
+// newHashTable creates a new hash table for words of length wordLen
+func newHashTable(capacity int, wordLen int) (t *hashTable, err error) {
+	if !(0 < capacity) {
+		return nil, errors.New(
+			"newHashTable: capacity must not be negative")
+	}
+	exp := hashTableExponent(uint32(capacity))
+	if !(1 <= wordLen && wordLen <= 4) {
+		return nil, errors.New("newHashTable: " +
+			"argument wordLen out of range")
+	}
+	n := 1 << uint(exp)
+	if n <= 0 {
+		panic("newHashTable: exponent is too large")
+	}
+	t = &hashTable{
+		t:       make([]int64, n),
+		data:    make([]uint32, capacity),
+		mask:    (uint64(1) << uint(exp)) - 1,
+		hoff:    -int64(wordLen),
+		wordLen: wordLen,
+		wr:      newRoller(wordLen),
+		hr:      newRoller(wordLen),
+	}
+	return t, nil
+}
+
+func (t *hashTable) SetDict(d *encoderDict) { t.dict = d }
+
+// buffered returns the number of bytes that are currently hashed.
+func (t *hashTable) buffered() int {
+	n := t.hoff + 1
+	switch {
+	case n <= 0:
+		return 0
+	case n >= int64(len(t.data)):
+		return len(t.data)
+	}
+	return int(n)
+}
+
+// addIndex adds n to an index ensuring that is stays inside the
+// circular buffer for the hash chain.
+func (t *hashTable) addIndex(i, n int) int {
+	i += n - len(t.data)
+	if i < 0 {
+		i += len(t.data)
+	}
+	return i
+}
+
+// putDelta puts the delta instance at the current front of the circular
+// chain buffer.
+func (t *hashTable) putDelta(delta uint32) {
+	t.data[t.front] = delta
+	t.front = t.addIndex(t.front, 1)
+}
+
+// putEntry puts a new entry into the hash table. If there is already a
+// value stored it is moved into the circular chain buffer.
+func (t *hashTable) putEntry(h uint64, pos int64) {
+	if pos < 0 {
+		return
+	}
+	i := h & t.mask
+	old := t.t[i] - 1
+	t.t[i] = pos + 1
+	var delta int64
+	if old >= 0 {
+		delta = pos - old
+		if delta > 1<<32-1 || delta > int64(t.buffered()) {
+			delta = 0
+		}
+	}
+	t.putDelta(uint32(delta))
+}
+
+// WriteByte converts a single byte into a hash and puts them into the hash
+// table.
+func (t *hashTable) WriteByte(b byte) error {
+	h := t.wr.RollByte(b)
+	t.hoff++
+	t.putEntry(h, t.hoff)
+	return nil
+}
+
+// Write converts the bytes provided into hash tables and stores the
+// abbreviated offsets into the hash table. The method will never return an
+// error.
+func (t *hashTable) Write(p []byte) (n int, err error) {
+	for _, b := range p {
+		// WriteByte doesn't generate an error.
+		t.WriteByte(b)
+	}
+	return len(p), nil
+}
+
+// getMatches the matches for a specific hash. The functions returns the
+// number of positions found.
+//
+// TODO: Make a getDistances because that we are actually interested in.
+func (t *hashTable) getMatches(h uint64, positions []int64) (n int) {
+	if t.hoff < 0 || len(positions) == 0 {
+		return 0
+	}
+	buffered := t.buffered()
+	tailPos := t.hoff + 1 - int64(buffered)
+	rear := t.front - buffered
+	if rear >= 0 {
+		rear -= len(t.data)
+	}
+	// get the slot for the hash
+	pos := t.t[h&t.mask] - 1
+	delta := pos - tailPos
+	for {
+		if delta < 0 {
+			return n
+		}
+		positions[n] = tailPos + delta
+		n++
+		if n >= len(positions) {
+			return n
+		}
+		i := rear + int(delta)
+		if i < 0 {
+			i += len(t.data)
+		}
+		u := t.data[i]
+		if u == 0 {
+			return n
+		}
+		delta -= int64(u)
+	}
+}
+
+// hash computes the rolling hash for the word stored in p. For correct
+// results its length must be equal to t.wordLen.
+func (t *hashTable) hash(p []byte) uint64 {
+	var h uint64
+	for _, b := range p {
+		h = t.hr.RollByte(b)
+	}
+	return h
+}
+
+// Matches fills the positions slice with potential matches. The
+// functions returns the number of positions filled into positions. The
+// byte slice p must have word length of the hash table.
+func (t *hashTable) Matches(p []byte, positions []int64) int {
+	if len(p) != t.wordLen {
+		panic(fmt.Errorf(
+			"byte slice must have length %d", t.wordLen))
+	}
+	h := t.hash(p)
+	return t.getMatches(h, positions)
+}
+
+// NextOp identifies the next operation using the hash table.
+//
+// TODO: Use all repetitions to find matches.
+func (t *hashTable) NextOp(rep [4]uint32) operation {
+	// get positions
+	data := t.dict.data[:maxMatchLen]
+	n, _ := t.dict.buf.Peek(data)
+	data = data[:n]
+	var p []int64
+	if n < t.wordLen {
+		p = t.p[:0]
+	} else {
+		p = t.p[:maxMatches]
+		n = t.Matches(data[:t.wordLen], p)
+		p = p[:n]
+	}
+
+	// convert positions in potential distances
+	head := t.dict.head
+	dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8)
+	for _, pos := range p {
+		dis := int(head - pos)
+		if dis > shortDists {
+			dists = append(dists, dis)
+		}
+	}
+
+	// check distances
+	var m match
+	dictLen := t.dict.DictLen()
+	for _, dist := range dists {
+		if dist > dictLen {
+			continue
+		}
+
+		// Here comes a trick. We are only interested in matches
+		// that are longer than the matches we have been found
+		// before. So before we test the whole byte sequence at
+		// the given distance, we test the first byte that would
+		// make the match longer. If it doesn't match the byte
+		// to match, we don't to care any longer.
+		i := t.dict.buf.rear - dist + m.n
+		if i < 0 {
+			i += len(t.dict.buf.data)
+		}
+		if t.dict.buf.data[i] != data[m.n] {
+			// We can't get a longer match. Jump to the next
+			// distance.
+			continue
+		}
+
+		n := t.dict.buf.matchLen(dist, data)
+		switch n {
+		case 0:
+			continue
+		case 1:
+			if uint32(dist-minDistance) != rep[0] {
+				continue
+			}
+		}
+		if n > m.n {
+			m = match{int64(dist), n}
+			if n == len(data) {
+				// No better match will be found.
+				break
+			}
+		}
+	}
+
+	if m.n == 0 {
+		return lit{data[0]}
+	}
+	return m
+}