diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress')
9 files changed, 726 insertions, 59 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index ec0c3fc53..bda4021ef 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -38,7 +38,7 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { b.nBits += bits } -// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. +// encSymbol will add up to 16 bits. value may not contain more set bits than indicated. // It will not check if there is space for them, so the caller must ensure that it has flushed recently. func (b *bitWriter) encSymbol(ct cTable, symbol byte) { enc := ct[symbol] @@ -46,6 +46,17 @@ func (b *bitWriter) encSymbol(ct cTable, symbol byte) { b.nBits += enc.nBits } +// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { + encA := ct[av] + encB := ct[bv] + sh := b.nBits & 63 + combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63)) + b.bitContainer |= combined << sh + b.nBits += encA.nBits + encB.nBits +} + // addBits16ZeroNC will add up to 16 bits. // It will not check if there is space for them, // so the caller must ensure that it has flushed recently. diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 51e00aaeb..0843cb014 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -80,9 +80,12 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) if s.Reuse == ReusePolicyPrefer && canReuse { keepTable := s.cTable + keepTL := s.actualTableLog s.cTable = s.prevTable + s.actualTableLog = s.prevTableLog s.Out, err = compressor(in) s.cTable = keepTable + s.actualTableLog = keepTL if err == nil && len(s.Out) < wantSize { s.OutData = s.Out return s.Out, true, nil @@ -92,7 +95,6 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) } // Calculate new table. - s.optimalTableLog() err = s.buildCTable() if err != nil { return nil, false, err @@ -109,9 +111,15 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) if oldSize <= hSize+newSize || hSize+12 >= wantSize { // Retain cTable even if we re-use. keepTable := s.cTable + keepTL := s.actualTableLog + s.cTable = s.prevTable + s.actualTableLog = s.prevTableLog s.Out, err = compressor(in) + + // Restore ctable. s.cTable = keepTable + s.actualTableLog = keepTL if err != nil { return nil, false, err } @@ -142,7 +150,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) return nil, false, ErrIncompressible } // Move current table into previous. - s.prevTable, s.cTable = s.cTable, s.prevTable[:0] + s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0] s.OutData = s.Out[len(s.OutTable):] return s.Out, false, nil } @@ -163,28 +171,23 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { for i := len(src) & 3; i > 0; i-- { bw.encSymbol(cTable, src[n+i-1]) } + n -= 4 if s.actualTableLog <= 8 { - n -= 4 for ; n >= 0; n -= 4 { tmp := src[n : n+4] // tmp should be len 4 bw.flush32() - bw.encSymbol(cTable, tmp[3]) - bw.encSymbol(cTable, tmp[2]) - bw.encSymbol(cTable, tmp[1]) - bw.encSymbol(cTable, tmp[0]) + bw.encTwoSymbols(cTable, tmp[3], tmp[2]) + bw.encTwoSymbols(cTable, tmp[1], tmp[0]) } } else { - n -= 4 for ; n >= 0; n -= 4 { tmp := src[n : n+4] // tmp should be len 4 bw.flush32() - bw.encSymbol(cTable, tmp[3]) - bw.encSymbol(cTable, tmp[2]) + bw.encTwoSymbols(cTable, tmp[3], tmp[2]) bw.flush32() - bw.encSymbol(cTable, tmp[1]) - bw.encSymbol(cTable, tmp[0]) + bw.encTwoSymbols(cTable, tmp[1], tmp[0]) } } err := bw.close() @@ -322,9 +325,26 @@ func (s *Scratch) canUseTable(c cTable) bool { return true } +func (s *Scratch) validateTable(c cTable) bool { + if len(c) < int(s.symbolLen) { + return false + } + for i, v := range s.count[:s.symbolLen] { + if v != 0 { + if c[i].nBits == 0 { + return false + } + if c[i].nBits > s.actualTableLog { + return false + } + } + } + return true +} + // minTableLog provides the minimum logSize to safely represent a distribution. func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBit32(uint32(s.br.remain()-1)) + 1 + minBitsSrc := highBit32(uint32(s.br.remain())) + 1 minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 if minBitsSrc < minBitsSymbols { return uint8(minBitsSrc) @@ -336,7 +356,7 @@ func (s *Scratch) minTableLog() uint8 { func (s *Scratch) optimalTableLog() { tableLog := s.TableLog minBits := s.minTableLog() - maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 2 + maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 if maxBitsSrc < tableLog { // Accuracy can be reduced tableLog = maxBitsSrc @@ -363,6 +383,7 @@ type cTableEntry struct { const huffNodesMask = huffNodesLen - 1 func (s *Scratch) buildCTable() error { + s.optimalTableLog() s.huffSort() if cap(s.cTable) < maxSymbolValue+1 { s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1) @@ -439,7 +460,7 @@ func (s *Scratch) buildCTable() error { return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax) } var nbPerRank [tableLogMax + 1]uint16 - var valPerRank [tableLogMax + 1]uint16 + var valPerRank [16]uint16 for _, v := range huffNode[:nonNullRank+1] { nbPerRank[v.nbBits]++ } @@ -455,16 +476,17 @@ func (s *Scratch) buildCTable() error { } // push nbBits per symbol, symbol order - // TODO: changed `s.symbolLen` -> `nonNullRank+1` (micro-opt) for _, v := range huffNode[:nonNullRank+1] { s.cTable[v.symbol].nBits = v.nbBits } // assign value within rank, symbol order - for n, val := range s.cTable[:s.symbolLen] { - v := valPerRank[val.nBits] - s.cTable[n].val = v - valPerRank[val.nBits] = v + 1 + t := s.cTable[:s.symbolLen] + for n, val := range t { + nbits := val.nBits & 15 + v := valPerRank[nbits] + t[n].val = v + valPerRank[nbits] = v + 1 } return nil @@ -488,10 +510,12 @@ func (s *Scratch) huffSort() { r := highBit32(v+1) & 31 rank[r].base++ } - for n := 30; n > 0; n-- { + // maxBitLength is log2(BlockSizeMax) + 1 + const maxBitLength = 18 + 1 + for n := maxBitLength; n > 0; n-- { rank[n-1].base += rank[n].base } - for n := range rank[:] { + for n := range rank[:maxBitLength] { rank[n].current = rank[n].base } for n, c := range s.count[:s.symbolLen] { @@ -510,7 +534,7 @@ func (s *Scratch) huffSort() { } func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { - maxNbBits := s.TableLog + maxNbBits := s.actualTableLog huffNode := s.nodes[1 : huffNodesLen+1] //huffNode = huffNode[: huffNodesLen] diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 6bc23bbf0..53249df05 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -83,7 +83,7 @@ type Scratch struct { MaxSymbolValue uint8 // TableLog will attempt to override the tablelog for the next block. - // Must be <= 11. + // Must be <= 11 and >= 5. TableLog uint8 // Reuse will specify the reuse policy @@ -105,6 +105,7 @@ type Scratch struct { maxCount int // count of the most probable symbol clearCount bool // clear count actualTableLog uint8 // Selected tablelog. + prevTableLog uint8 // Tablelog for previous table prevTable cTable // Table used for previous compression. cTable cTable // compression table dt dTable // decompression table @@ -127,8 +128,8 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) { if s.TableLog == 0 { s.TableLog = tableLogDefault } - if s.TableLog > tableLogMax { - return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, tableLogMax) + if s.TableLog > tableLogMax || s.TableLog < minTablelog { + return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax) } if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax { s.MaxDecodedSize = BlockSizeMax diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 52dc0aee3..bc977a302 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -36,7 +36,7 @@ so as always, testing is recommended. For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. The "Fastest" compression ratio is roughly equivalent to zstd level 1. -The "Default" compression ration is roughly equivalent to zstd level 3 (default). +The "Default" compression ratio is roughly equivalent to zstd level 3 (default). In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. @@ -390,4 +390,4 @@ For sending files for reproducing errors use a service like [goobox](https://goo For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). -This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.
\ No newline at end of file +This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 99eccda11..507757d52 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -299,6 +299,20 @@ func (b *blockEnc) encodeRaw(a []byte) { } } +// encodeRaw can be used to set the output to a raw representation of supplied bytes. +func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(uint32(len(src))) + bh.setType(blockTypeRaw) + dst = bh.appendTo(dst) + dst = append(dst, src...) + if debug { + println("Adding RAW block, length", len(src)) + } + return dst +} + // encodeLits can be used if the block is only litLen. func (b *blockEnc) encodeLits(raw bool) error { var bh blockHeader @@ -324,18 +338,10 @@ func (b *blockEnc) encodeLits(raw bool) error { if len(b.literals) >= 1024 { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - if len(out) > len(b.literals)-len(b.literals)>>4 { - // Bail out of compression is too little. - err = huff0.ErrIncompressible - } } else if len(b.literals) > 32 { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) - if len(out) > len(b.literals)-len(b.literals)>>4 { - // Bail out of compression is too little. - err = huff0.ErrIncompressible - } } else { err = huff0.ErrIncompressible } @@ -437,7 +443,7 @@ func fuzzFseEncoder(data []byte) int { return 1 } -// encode will encode the block and put the output in b.output. +// encode will encode the block and append the output in b.output. func (b *blockEnc) encode(raw bool) error { if len(b.sequences) == 0 { return b.encodeLits(raw) @@ -451,6 +457,8 @@ func (b *blockEnc) encode(raw bool) error { var lh literalsHeader bh.setLast(b.last) bh.setType(blockTypeCompressed) + // Store offset of the block header. Needed when we know the size. + bhOffset := len(b.output) b.output = bh.appendTo(b.output) var ( @@ -468,6 +476,7 @@ func (b *blockEnc) encode(raw bool) error { } else { err = huff0.ErrIncompressible } + switch err { case huff0.ErrIncompressible: lh.setType(literalsBlockRaw) @@ -735,18 +744,18 @@ func (b *blockEnc) encode(raw bool) error { } b.output = wr.out - if len(b.output)-3 >= b.size { + if len(b.output)-3-bhOffset >= b.size { // Maybe even add a bigger margin. b.litEnc.Reuse = huff0.ReusePolicyNone return errIncompressible } // Size is output minus block header. - bh.setSize(uint32(len(b.output)) - 3) + bh.setSize(uint32(len(b.output)-bhOffset) - 3) if debug { println("Rewriting block header", bh) } - _ = bh.appendTo(b.output[:0]) + _ = bh.appendTo(b.output[bhOffset:bhOffset]) b.coders.setPrev(llEnc, mlEnc, ofEnc) return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 1de94eef0..35a3cda91 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -388,6 +388,35 @@ func (d *Decoder) Close() { d.current.err = ErrDecoderClosed } +// IOReadCloser returns the decoder as an io.ReadCloser for convenience. +// Any changes to the decoder will be reflected, so the returned ReadCloser +// can be reused along with the decoder. +// io.WriterTo is also supported by the returned ReadCloser. +func (d *Decoder) IOReadCloser() io.ReadCloser { + return closeWrapper{d: d} +} + +// closeWrapper wraps a function call as a closer. +type closeWrapper struct { + d *Decoder +} + +// WriteTo forwards WriteTo calls to the decoder. +func (c closeWrapper) WriteTo(w io.Writer) (n int64, err error) { + return c.d.WriteTo(w) +} + +// Read forwards read calls to the decoder. +func (c closeWrapper) Read(p []byte) (n int, err error) { + return c.d.Read(p) +} + +// Close closes the decoder. +func (c closeWrapper) Close() error { + c.d.Close() + return nil +} + type decodeOutput struct { d *blockDec b []byte diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 2f41bcd0d..ee3b09b02 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -411,3 +411,316 @@ encodeLoop: println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } } + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + if e.cur > (1<<30)+e.maxMatchOff { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.cur = e.maxMatchOff + } + + s := int32(0) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + stepSize := int32(e.o.targetLength) + if stepSize == 0 { + stepSize++ + } + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + for { + + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + + if len(blk.sequences) > 2 { + if load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:])) + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + repOff + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debug && s <= t { + panic("s <= t") + } + if debug && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, dFastLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debug && s <= t { + panic("s <= t") + } + if debug && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debug && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debug && s <= t { + panic("s <= t") + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlen(s+4, t+4, src) + 4 + l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + e.longTable[hash8(cv0, dFastLongTableBits)] = te0 + e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + e.table[hash5(cv0, dFastShortTableBits)] = te0 + e.table[hash5(cv1, dFastShortTableBits)] = te1 + + cv = load6432(src, s) + + if len(blk.sequences) <= 2 { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlen(s+4, o2+4, src) + l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } + +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 6f388de04..0bdddac5b 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -329,6 +329,246 @@ encodeLoop: } } +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debug { + if len(src) > maxBlockSize { + panic("src too big") + } + } + // Protect against e.cur wraparound. + if e.cur > (1<<30)+e.maxMatchOff { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = e.maxMatchOff + } + + s := int32(0) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 + + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + + for { + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + + if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + // lenght := 4 + e.matchlen(s+6, repIndex+4, src) + lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += lenght + 2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debug && s <= t { + panic("s <= t") + } + if debug && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debug && s <= t { + panic("s <= t") + } + if debug && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debug && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debug && s <= t { + panic("s <= t") + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlenNoHist(s+4, t+4, src) + 4 + l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlenNoHist(s+4, o2+4, src) + l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + + // Store this, since we have it. + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + func (e *fastEncoder) addBlock(src []byte) int32 { // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { @@ -362,6 +602,11 @@ func (e *fastEncoder) UseBlock(enc *blockEnc) { e.blk = enc } +func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { if debug { if s < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index f413042f4..366dd66bd 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -29,6 +29,7 @@ type Encoder struct { type encoder interface { Encode(blk *blockEnc, src []byte) + EncodeNoHist(blk *blockEnc, src []byte) Block() *blockEnc CRC() *xxhash.Digest AppendCRC([]byte) []byte @@ -433,7 +434,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { }() enc.Reset() blk := enc.Block() - single := len(src) > 1<<20 + // Use single segments when above minimum window and below 1MB. + single := len(src) < 1<<20 && len(src) > MinWindowSize if e.o.single != nil { single = *e.o.single } @@ -454,25 +456,22 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { panic(err) } - for len(src) > 0 { - todo := src - if len(todo) > e.o.blockSize { - todo = todo[:e.o.blockSize] - } - src = src[len(todo):] + if len(src) <= e.o.blockSize && len(src) <= maxBlockSize { + // Slightly faster with no history and everything in one block. if e.o.crc { - _, _ = enc.CRC().Write(todo) + _, _ = enc.CRC().Write(src) } blk.reset(nil) - blk.pushOffsets() - enc.Encode(blk, todo) - if len(src) == 0 { - blk.last = true - } - err := errIncompressible + blk.last = true + enc.EncodeNoHist(blk, src) + // If we got the exact same number of literals as input, // assume the literals cannot be compressed. - if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { + err := errIncompressible + oldout := blk.output + if len(blk.literals) != len(src) || len(src) != e.o.blockSize { + // Output directly to dst + blk.output = dst err = blk.encode(e.o.noEntropy) } @@ -481,13 +480,49 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if debug { println("Storing incompressible block as raw") } - blk.encodeRaw(todo) - blk.popOffsets() + dst = blk.encodeRawTo(dst, src) case nil: + dst = blk.output default: panic(err) } - dst = append(dst, blk.output...) + blk.output = oldout + } else { + for len(src) > 0 { + todo := src + if len(todo) > e.o.blockSize { + todo = todo[:e.o.blockSize] + } + src = src[len(todo):] + if e.o.crc { + _, _ = enc.CRC().Write(todo) + } + blk.reset(nil) + blk.pushOffsets() + enc.Encode(blk, todo) + if len(src) == 0 { + blk.last = true + } + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { + err = blk.encode(e.o.noEntropy) + } + + switch err { + case errIncompressible: + if debug { + println("Storing incompressible block as raw") + } + dst = blk.encodeRawTo(dst, todo) + blk.popOffsets() + case nil: + dst = append(dst, blk.output...) + default: + panic(err) + } + } } if e.o.crc { dst = enc.AppendCRC(dst) |