aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r--vendor/github.com/klauspost/compress/flate/deflate.go6
-rw-r--r--vendor/github.com/klauspost/compress/flate/fast_encoder.go26
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go39
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_code.go53
-rw-r--r--vendor/github.com/klauspost/compress/flate/level2.go2
-rw-r--r--vendor/github.com/klauspost/compress/fse/compress.go10
-rw-r--r--vendor/github.com/klauspost/compress/fse/decompress.go4
-rw-r--r--vendor/github.com/klauspost/compress/huff0/compress.go4
-rw-r--r--vendor/github.com/klauspost/compress/snappy/snappy.go2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockenc.go42
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_base.go40
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_best.go1
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_better.go589
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_dfast.go414
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_fast.go371
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder.go10
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder_options.go34
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_encoder.go12
-rw-r--r--vendor/github.com/klauspost/compress/zstd/snappy.go2
19 files changed, 1535 insertions, 126 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 25dbe3e15..3f428d8b5 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -645,15 +645,15 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
- d.w.logNewTablePenalty = 4
- d.window = make([]byte, maxStoreBlockSize)
+ d.w.logNewTablePenalty = 8
+ d.window = make([]byte, 32<<10)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
- d.w.logNewTablePenalty = 6
+ d.w.logNewTablePenalty = 8
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 4a73e1bdd..678f08105 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -6,6 +6,7 @@
package flate
import (
+ "encoding/binary"
"fmt"
"math/bits"
)
@@ -65,26 +66,15 @@ func load32(b []byte, i int) uint32 {
}
func load64(b []byte, i int) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+ return binary.LittleEndian.Uint64(b[i:])
}
func load3232(b []byte, i int32) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+ return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+ return binary.LittleEndian.Uint64(b[i:])
}
func hash(u uint32) uint32 {
@@ -225,9 +215,9 @@ func (e *fastGen) Reset() {
func matchLen(a, b []byte) int {
b = b[:len(a)]
var checked int
- if len(a) > 4 {
+ if len(a) >= 4 {
// Try 4 bytes first
- if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
+ if diff := binary.LittleEndian.Uint32(a) ^ binary.LittleEndian.Uint32(b); diff != 0 {
return bits.TrailingZeros32(diff) >> 3
}
// Switch to 8 byte matching.
@@ -236,7 +226,7 @@ func matchLen(a, b []byte) int {
b = b[4:]
for len(a) >= 8 {
b = b[:len(a)]
- if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
+ if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
@@ -247,7 +237,7 @@ func matchLen(a, b []byte) int {
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
- return int(i) + checked
+ return i + checked
}
}
return len(a) + checked
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index 208d66711..db54be139 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -5,6 +5,7 @@
package flate
import (
+ "encoding/binary"
"io"
)
@@ -206,7 +207,7 @@ func (w *huffmanBitWriter) write(b []byte) {
}
func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
- w.bits |= uint64(b) << (w.nbits & reg16SizeMask64)
+ w.bits |= uint64(b) << w.nbits
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
@@ -420,13 +421,11 @@ func (w *huffmanBitWriter) writeOutBits() {
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
+
+ // We over-write, but faster...
+ binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
+
if n >= bufferFlushSize {
if w.err != nil {
n = 0
@@ -435,6 +434,7 @@ func (w *huffmanBitWriter) writeOutBits() {
w.write(w.bytes[:n])
n = 0
}
+
w.nbytes = n
}
@@ -759,7 +759,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode&31]
- w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
+ w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
@@ -779,7 +779,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := offs[offsetCode&31]
- w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
+ w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
@@ -830,8 +830,8 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
- estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
- estBits += w.lastHeader + 15
+ estBits := histogramSize(input, w.literalFreq[:], !eof && !sync)
+ estBits += w.lastHeader + len(input)/32
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
@@ -845,9 +845,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
return
}
+ reuseSize := 0
if w.lastHeader > 0 {
- reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
- estBits += estExtra
+ reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256])
if estBits < reuseSize {
// We owe an EOB
@@ -859,6 +859,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
const numLiterals = endBlockMarker + 1
const numOffsets = 1
if w.lastHeader == 0 {
+ if !eof && !sync {
+ // Generate a slightly suboptimal tree that can be used for all.
+ fillHist(w.literalFreq[:numLiterals])
+ }
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
@@ -878,19 +882,14 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
- w.bits |= uint64(c.code) << ((w.nbits) & reg16SizeMask64)
+ w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
+ binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index 4c39a3018..0d3445a1c 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -122,6 +122,16 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
return total
}
+func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
+ var total int
+ for _, f := range b {
+ if f != 0 {
+ total += int(h.codes[f].len)
+ }
+ }
+ return total
+}
+
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
@@ -327,37 +337,40 @@ func atLeastOne(v float32) float32 {
return v
}
+// Unassigned values are assigned '1' in the histogram.
+func fillHist(b []uint16) {
+ for i, v := range b {
+ if v == 0 {
+ b[i] = 1
+ }
+ }
+}
+
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
-// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
-func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
+func histogramSize(b []byte, h []uint16, fill bool) (bits int) {
h = h[:256]
for _, t := range b {
h[t]++
}
- invTotal := 1.0 / float32(len(b))
- shannon := float32(0.0)
- var extra float32
+ total := len(b)
if fill {
- oneBits := atLeastOne(-mFastLog2(invTotal))
- for i, v := range h[:] {
- if v > 0 {
- n := float32(v)
- shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
- } else {
- h[i] = 1
- extra += oneBits
+ for _, v := range h {
+ if v == 0 {
+ total++
}
}
- } else {
- for _, v := range h[:] {
- if v > 0 {
- n := float32(v)
- shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
- }
+ }
+
+ invTotal := 1.0 / float32(total)
+ shannon := float32(0.0)
+ for _, v := range h {
+ if v > 0 {
+ n := float32(v)
+ shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
- return int(shannon + 0.99), int(extra + 0.99)
+ return int(shannon + 0.99)
}
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
index 5b986a194..234c4389a 100644
--- a/vendor/github.com/klauspost/compress/flate/level2.go
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -155,7 +155,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
- x := load6432(src, int32(i))
+ x := load6432(src, i)
nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go
index b69237c9b..0d31f5ebc 100644
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -301,7 +301,7 @@ func (s *Scratch) writeCount() error {
out[outP+1] = byte(bitStream >> 8)
outP += (bitCount + 7) / 8
- if uint16(charnum) > s.symbolLen {
+ if charnum > s.symbolLen {
return errors.New("internal error: charnum > s.symbolLen")
}
s.Out = out[:outP]
@@ -331,7 +331,7 @@ type cTable struct {
func (s *Scratch) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
- if cap(s.ct.tableSymbol) < int(tableSize) {
+ if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@@ -565,8 +565,8 @@ func (s *Scratch) normalizeCount2() error {
distributed uint32
total = uint32(s.br.remain())
tableLog = s.actualTableLog
- lowThreshold = uint32(total >> tableLog)
- lowOne = uint32((total * 3) >> (tableLog + 1))
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@@ -591,7 +591,7 @@ func (s *Scratch) normalizeCount2() error {
if (total / toDistribute) > lowOne {
// risk of rounding to zero
- lowOne = uint32((total * 3) / (toDistribute * 2))
+ lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
index 413ec3b3c..926f5f153 100644
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -172,7 +172,7 @@ type decSymbol struct {
// allocDtable will allocate decoding tables if they are not big enough.
func (s *Scratch) allocDtable() {
tableSize := 1 << s.actualTableLog
- if cap(s.decTable) < int(tableSize) {
+ if cap(s.decTable) < tableSize {
s.decTable = make([]decSymbol, tableSize)
}
s.decTable = s.decTable[:tableSize]
@@ -340,7 +340,7 @@ type decoder struct {
func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
d.dt = dt
d.br = in
- d.state = uint16(in.getBits(tableLog))
+ d.state = in.getBits(tableLog)
}
// next returns the next symbol and sets the next state.
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index f9ed5f830..dea115b33 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -403,7 +403,7 @@ func (s *Scratch) buildCTable() error {
var startNode = int16(s.symbolLen)
nonNullRank := s.symbolLen - 1
- nodeNb := int16(startNode)
+ nodeNb := startNode
huffNode := s.nodes[1 : huffNodesLen+1]
// This overlays the slice above, but allows "-1" index lookups.
@@ -580,7 +580,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
// Get pos of last (smallest) symbol per rank
{
- currentNbBits := uint8(maxNbBits)
+ currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
if huffNode[pos].nbBits >= currentNbBits {
continue
diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go
index 74a36689e..ea58ced88 100644
--- a/vendor/github.com/klauspost/compress/snappy/snappy.go
+++ b/vendor/github.com/klauspost/compress/snappy/snappy.go
@@ -94,5 +94,5 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli)
// https://github.com/google/snappy/blob/master/framing_format.txt
func crc(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
- return uint32(c>>15|c<<17) + 0xa282ead8
+ return c>>15 | c<<17 + 0xa282ead8
}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index c85c40255..9647c64e5 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -22,28 +22,44 @@ type blockEnc struct {
dictLitEnc *huff0.Scratch
wr bitWriter
- extraLits int
- last bool
-
+ extraLits int
output []byte
recentOffsets [3]uint32
prevRecentOffsets [3]uint32
+
+ last bool
+ lowMem bool
}
// init should be used once the block has been created.
// If called more than once, the effect is the same as calling reset.
func (b *blockEnc) init() {
- if cap(b.literals) < maxCompressedLiteralSize {
- b.literals = make([]byte, 0, maxCompressedLiteralSize)
- }
- const defSeqs = 200
- b.literals = b.literals[:0]
- if cap(b.sequences) < defSeqs {
- b.sequences = make([]seq, 0, defSeqs)
- }
- if cap(b.output) < maxCompressedBlockSize {
- b.output = make([]byte, 0, maxCompressedBlockSize)
+ if b.lowMem {
+ // 1K literals
+ if cap(b.literals) < 1<<10 {
+ b.literals = make([]byte, 0, 1<<10)
+ }
+ const defSeqs = 20
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ // 1K
+ if cap(b.output) < 1<<10 {
+ b.output = make([]byte, 0, 1<<10)
+ }
+ } else {
+ if cap(b.literals) < maxCompressedBlockSize {
+ b.literals = make([]byte, 0, maxCompressedBlockSize)
+ }
+ const defSeqs = 200
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ if cap(b.output) < maxCompressedBlockSize {
+ b.output = make([]byte, 0, maxCompressedBlockSize)
+ }
}
+
if b.coders.mlEnc == nil {
b.coders.mlEnc = &fseEncoder{}
b.coders.mlPrev = &fseEncoder{}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
index b1b7c6e6a..2d4d893eb 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -7,6 +7,10 @@ import (
"github.com/klauspost/compress/zstd/internal/xxhash"
)
+const (
+ dictShardBits = 6
+)
+
type fastBase struct {
// cur is the offset at the start of hist
cur int32
@@ -17,6 +21,7 @@ type fastBase struct {
tmp [8]byte
blk *blockEnc
lastDictID uint32
+ lowMem bool
}
// CRC returns the underlying CRC writer.
@@ -57,15 +62,10 @@ func (e *fastBase) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
- l := e.maxMatchOff * 2
- // Make it at least 1MB.
- if l < 1<<20 {
- l = 1 << 20
- }
- e.hist = make([]byte, 0, l)
+ e.ensureHist(len(src))
} else {
- if cap(e.hist) < int(e.maxMatchOff*2) {
- panic("unexpected buffer size")
+ if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
+ panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
}
// Move down
offset := int32(len(e.hist)) - e.maxMatchOff
@@ -79,6 +79,28 @@ func (e *fastBase) addBlock(src []byte) int32 {
return s
}
+// ensureHist will ensure that history can keep at least this many bytes.
+func (e *fastBase) ensureHist(n int) {
+ if cap(e.hist) >= n {
+ return
+ }
+ l := e.maxMatchOff
+ if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
+ l += maxCompressedBlockSize
+ } else {
+ l += e.maxMatchOff
+ }
+ // Make it at least 1MB.
+ if l < 1<<20 && !e.lowMem {
+ l = 1 << 20
+ }
+ // Make it at least the requested size.
+ if l < int32(n) {
+ l = int32(n)
+ }
+ e.hist = make([]byte, 0, l)
+}
+
// useBlock will replace the block with the provided one,
// but transfer recent offsets from the previous.
func (e *fastBase) UseBlock(enc *blockEnc) {
@@ -117,7 +139,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
// Reset the encoding table.
func (e *fastBase) resetBase(d *dict, singleBlock bool) {
if e.blk == nil {
- e.blk = &blockEnc{}
+ e.blk = &blockEnc{lowMem: e.lowMem}
e.blk.init()
} else {
e.blk.reset(nil)
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
index bb71d1eea..fe3625c5f 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -407,6 +407,7 @@ encodeLoop:
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
e.Encode(blk, src)
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index 94a5343d0..c2ce4a2ba 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -16,6 +16,12 @@ const (
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
+
+ betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
+ betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
+
+ betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
+ betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
)
type prevEntry struct {
@@ -31,10 +37,17 @@ type prevEntry struct {
// and that it is longer (lazy matching).
type betterFastEncoder struct {
fastBase
- table [betterShortTableSize]tableEntry
- longTable [betterLongTableSize]prevEntry
- dictTable []tableEntry
- dictLongTable []prevEntry
+ table [betterShortTableSize]tableEntry
+ longTable [betterLongTableSize]prevEntry
+}
+
+type betterFastEncoderDict struct {
+ betterFastEncoder
+ dictTable []tableEntry
+ dictLongTable []prevEntry
+ shortTableShardDirty [betterShortTableShardCnt]bool
+ longTableShardDirty [betterLongTableShardCnt]bool
+ allDirty bool
}
// Encode improves compression...
@@ -516,12 +529,512 @@ encodeLoop:
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
e.Encode(blk, src)
}
+// Encode improves compression...
+func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ e.allDirty = true
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.allDirty = true
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 9
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+ var matched int32
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ off := s + e.cur
+ e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s + repOff
+ s += lenght + repOff
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ const repOff2 = 1
+
+ // We deviate from the reference encoder and also check offset 2.
+ // Still slower and not much better, so disabled.
+ // repIndex = s - offset2 + repOff2
+ if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
+ // Consider history as well.
+ var seq seq
+ lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 2
+ seq.offset = 2
+ if debugSequences {
+ println("repeat sequence 2", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ index0 := s + repOff2
+ s += lenght + repOff2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ // Swap offsets
+ offset1, offset2 = offset2, offset1
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := candidateL.offset - e.cur
+ coffsetLP := candidateL.prev - e.cur
+
+ // Check if we have a long match.
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetL+8, src) + 8
+ t = coffsetL
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
+ if prevMatch > matched {
+ matched = prevMatch
+ t = coffsetLP
+ }
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ }
+ break
+ }
+
+ // Check if we have a long match on prev.
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetLP+8, src) + 8
+ t = coffsetLP
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ coffsetS := candidateS.offset - e.cur
+
+ // Check if we have a short match.
+ if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ matched = e.matchlen(s+4, coffsetS+4, src) + 4
+
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, betterLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = candidateL.offset - e.cur
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+ }
+
+ // Check prev long...
+ coffsetL = candidateL.prev - e.cur
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match (after short)")
+ }
+ break
+ }
+ }
+ t = coffsetS
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := matched
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
// ResetDict will reset and set a dictionary if not nil
func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("betterFastEncoder: Reset with dict")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
if d == nil {
return
}
@@ -557,6 +1070,7 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
+ e.allDirty = true
}
// Init or copy dict table
@@ -585,11 +1099,72 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
+ e.allDirty = true
}
+
// Reset table to initial state
- copy(e.longTable[:], e.dictLongTable)
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterShortTableShardCnt
+ const shardSize = betterShortTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.shortTableShardDirty {
+ e.shortTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.shortTableShardDirty {
+ if !e.shortTableShardDirty[i] {
+ continue
+ }
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.shortTableShardDirty[i] = false
+ }
+ }
+ }
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterLongTableShardCnt
+ const shardSize = betterLongTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
+ e.longTableShardDirty[i] = false
+ }
+ }
+ }
e.cur = e.maxMatchOff
- // Reset table to initial state
- copy(e.table[:], e.dictTable)
+ e.allDirty = false
+}
+
+func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
+}
+
+func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
+ e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index 19eebf66e..8629d43d8 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -11,6 +11,9 @@ const (
dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+ dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
+ dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
+
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
@@ -18,8 +21,14 @@ const (
type doubleFastEncoder struct {
fastEncoder
- longTable [dFastLongTableSize]tableEntry
- dictLongTable []tableEntry
+ longTable [dFastLongTableSize]tableEntry
+}
+
+type doubleFastEncoderDict struct {
+ fastEncoderDict
+ longTable [dFastLongTableSize]tableEntry
+ dictLongTable []tableEntry
+ longTableShardDirty [dLongTableShardCnt]bool
}
// Encode mimmics functionality in zstd_dfast.c
@@ -678,9 +687,379 @@ encodeLoop:
}
}
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = tableEntry{}
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.longTable[i].offset = v
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += lenght + repOff
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := s - (candidateL.offset - e.cur)
+ coffsetS := s - (candidateS.offset - e.cur)
+
+ // Check if we have a long match.
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ // Check if we have a short match.
+ if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, dFastLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+ e.markLongShardDirty(nextHashL)
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ s += checkAt
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+
+ t = candidateS.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ l := e.matchlen(s+4, t+4, src) + 4
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) and start+2 (short)
+ index0 := s - l + 1
+ // Index match end-2 (long) and end-1 (short)
+ index1 := s - 2
+
+ cv0 := load6432(src, index0)
+ cv1 := load6432(src, index1)
+ te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+ te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+ longHash1 := hash8(cv0, dFastLongTableBits)
+ longHash2 := hash8(cv0, dFastLongTableBits)
+ e.longTable[longHash1] = te0
+ e.longTable[longHash2] = te1
+ e.markLongShardDirty(longHash1)
+ e.markLongShardDirty(longHash2)
+ cv0 >>= 8
+ cv1 >>= 8
+ te0.offset++
+ te1.offset++
+ te0.val = uint32(cv0)
+ te1.val = uint32(cv1)
+ hashVal1 := hash5(cv0, dFastShortTableBits)
+ hashVal2 := hash5(cv1, dFastShortTableBits)
+ e.table[hashVal1] = te0
+ e.markShardDirty(hashVal1)
+ e.table[hashVal2] = te1
+ e.markShardDirty(hashVal2)
+
+ cv = load6432(src, s)
+
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+ // If we encoded more than 64K mark all dirty.
+ if len(src) > 64<<10 {
+ e.markAllShardsDirty()
+ }
+}
+
// ResetDict will reset and set a dictionary if not nil
func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
e.fastEncoder.Reset(d, singleBlock)
+ if d != nil {
+ panic("doubleFastEncoder: Reset with dict not supported")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ allDirty := e.allDirty
+ e.fastEncoderDict.Reset(d, singleBlock)
if d == nil {
return
}
@@ -706,8 +1085,37 @@ func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
+ e.allDirty = true
}
// Reset table to initial state
e.cur = e.maxMatchOff
- copy(e.longTable[:], e.dictLongTable)
+
+ dirtyShardCnt := 0
+ if !allDirty {
+ for i := range e.longTableShardDirty {
+ if e.longTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ return
+ }
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ e.longTableShardDirty[i] = false
+ }
+}
+
+func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/dLongTableShardSize] = true
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index 0045016d9..ba4a17e10 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -11,9 +11,11 @@ import (
)
const (
- tableBits = 15 // Bits used in the table
- tableSize = 1 << tableBits // Size of the table
- tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+ tableBits = 15 // Bits used in the table
+ tableSize = 1 << tableBits // Size of the table
+ tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
+ tableShardSize = tableSize / tableShardCnt // Size of an individual shard
+ tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
)
@@ -24,8 +26,14 @@ type tableEntry struct {
type fastEncoder struct {
fastBase
- table [tableSize]tableEntry
- dictTable []tableEntry
+ table [tableSize]tableEntry
+}
+
+type fastEncoderDict struct {
+ fastEncoder
+ dictTable []tableEntry
+ tableShardDirty [tableShardCnt]bool
+ allDirty bool
}
// Encode mimmics functionality in zstd_fast.c
@@ -617,9 +625,323 @@ encodeLoop:
}
}
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ inputMargin = 8
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+ if e.allDirty || len(src) > 32<<10 {
+ e.fastEncoder.Encode(blk, src)
+ e.allDirty = true
+ return
+ }
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 2.
+ const stepSize = 2
+
+ // TEMPLATE
+ const hashLog = tableBits
+ // seems global, but would be nice to tweak.
+ const kSearchStrength = 7
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // t will contain the match offset when we find one.
+ // When existing the search loop, we have already checked 4 bytes.
+ var t int32
+
+ // We will not use repeat offsets across blocks.
+ // By not using them for the first 3 matches
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHash := hash6(cv, hashLog)
+ nextHash2 := hash6(cv>>8, hashLog)
+ candidate := e.table[nextHash]
+ candidate2 := e.table[nextHash2]
+ repIndex := s - offset1 + 2
+
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
+ e.markShardDirty(nextHash2)
+
+ if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+ // Consider history as well.
+ var seq seq
+ var length int32
+ // length = 4 + e.matchlen(s+6, repIndex+4, src)
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + 2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ sMin := s - e.maxMatchOff
+ if sMin < 0 {
+ sMin = 0
+ }
+ for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + 2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ coffset0 := s - (candidate.offset - e.cur)
+ coffset1 := s - (candidate2.offset - e.cur) + 1
+ if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
+ // found a regular match
+ t = candidate.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ break
+ }
+
+ if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
+ // found a regular match
+ t = candidate2.offset - e.cur
+ s++
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ break
+ }
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // A 4-byte match has been found. We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlen(s+4, t+4, src) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence.
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ // Don't use repeat offsets
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+
+ // Check offset 2
+ if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Store this, since we have it.
+ nextHash := hash6(cv, hashLog)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ break encodeLoop
+ }
+ // Prepare next loop.
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
// ResetDict will reset and set a dictionary if not nil
func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("fastEncoder: Reset with dict")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
if d == nil {
return
}
@@ -653,9 +975,44 @@ func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
+ e.allDirty = true
}
e.cur = e.maxMatchOff
- // Reset table to initial state
- copy(e.table[:], e.dictTable)
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.tableShardDirty {
+ if e.tableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ const shardCnt = tableShardCnt
+ const shardSize = tableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.tableShardDirty {
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+ return
+ }
+ for i := range e.tableShardDirty {
+ if !e.tableShardDirty[i] {
+ continue
+ }
+
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+}
+
+func (e *fastEncoderDict) markAllShardsDirty() {
+ e.allDirty = true
+}
+
+func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
+ e.tableShardDirty[entryNum/tableShardSize] = true
}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index f5759211d..6f0265099 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -106,7 +106,7 @@ func (e *Encoder) Reset(w io.Writer) {
s.encoder = e.o.encoder()
}
if s.writing == nil {
- s.writing = &blockEnc{}
+ s.writing = &blockEnc{lowMem: e.o.lowMem}
s.writing.init()
}
s.writing.initNewEncode()
@@ -176,6 +176,12 @@ func (e *Encoder) nextBlock(final bool) error {
}
if !s.headerWritten {
// If we have a single block encode, do a sync compression.
+ if final && len(s.filling) == 0 && !e.o.fullZero {
+ s.headerWritten = true
+ s.fullFrameWritten = true
+ s.eofWritten = true
+ return nil
+ }
if final && len(s.filling) > 0 {
s.current = e.EncodeAll(s.filling, s.current[:0])
var n2 int
@@ -471,7 +477,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
// If less than 1MB, allocate a buffer up front.
- if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 {
+ if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
dst, err := fh.appendTo(dst)
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index a7312f42a..18a47eb03 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -24,12 +24,12 @@ type encoderOptions struct {
allLitEntropy bool
customWindow bool
customALEntropy bool
+ lowMem bool
dict *dict
}
func (o *encoderOptions) setDefault() {
*o = encoderOptions{
- // use less ram: true for now, but may change.
concurrent: runtime.GOMAXPROCS(0),
crc: true,
single: nil,
@@ -37,20 +37,31 @@ func (o *encoderOptions) setDefault() {
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
+ lowMem: false,
}
}
// encoder returns an encoder with the selected options.
func (o encoderOptions) encoder() encoder {
switch o.level {
+ case SpeedFastest:
+ if o.dict != nil {
+ return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+
case SpeedDefault:
- return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
+ if o.dict != nil {
+ return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+ }
+ return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
case SpeedBetterCompression:
- return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
+ if o.dict != nil {
+ return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
case SpeedBestCompression:
- return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
- case SpeedFastest:
- return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
+ return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
@@ -276,6 +287,17 @@ func WithSingleSegment(b bool) EOption {
}
}
+// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
+// slower encoding speed.
+// This will not change the window size which is the primary function for reducing
+// memory usage. See WithWindowSize.
+func WithLowerEncoderMem(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.lowMem = b
+ return nil
+ }
+}
+
// WithEncoderDict allows to register a dictionary that will be used for the encode.
// The encoder *may* choose to use no dictionary instead for certain payloads.
func WithEncoderDict(dict []byte) EOption {
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index aa9eba88b..b80709d5e 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -97,7 +97,7 @@ func (s *fseEncoder) prepare() (*fseEncoder, error) {
func (s *fseEncoder) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
- if cap(s.ct.tableSymbol) < int(tableSize) {
+ if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@@ -202,13 +202,13 @@ func (s *fseEncoder) buildCTable() error {
case 0:
case -1, 1:
symbolTT[i].deltaNbBits = tl
- symbolTT[i].deltaFindState = int16(total - 1)
+ symbolTT[i].deltaFindState = total - 1
total++
default:
maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
minStatePlus := uint32(v) << maxBitsOut
symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
- symbolTT[i].deltaFindState = int16(total - v)
+ symbolTT[i].deltaFindState = total - v
total += v
}
}
@@ -353,8 +353,8 @@ func (s *fseEncoder) normalizeCount2(length int) error {
distributed uint32
total = uint32(length)
tableLog = s.actualTableLog
- lowThreshold = uint32(total >> tableLog)
- lowOne = uint32((total * 3) >> (tableLog + 1))
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@@ -379,7 +379,7 @@ func (s *fseEncoder) normalizeCount2(length int) error {
if (total / toDistribute) > lowOne {
// risk of rounding to zero
- lowOne = uint32((total * 3) / (toDistribute * 2))
+ lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index 841fd95ac..c95fe5111 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -417,7 +417,7 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli)
// https://github.com/google/snappy/blob/master/framing_format.txt
func snappyCRC(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
- return uint32(c>>15|c<<17) + 0xa282ead8
+ return c>>15 | c<<17 + 0xa282ead8
}
// snappyDecodedLen returns the length of the decoded block and the number of bytes