diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate')
9 files changed, 93 insertions, 34 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index b0a470f92..3d2fdcd77 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -42,10 +42,10 @@ const ( baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 maxMatchOffset = 1 << 15 // The largest match offset - bTableBits = 18 // Bits used in the big tables - bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxMatchOffset * 10 // Size to preallocate for history. - bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize // Reset the buffer offset when reaching this. + bTableBits = 18 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) const ( @@ -210,16 +210,14 @@ func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { // Reset the encoding table. func (e *fastGen) Reset() { - if cap(e.hist) < int(maxMatchOffset*8) { - l := maxMatchOffset * 8 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 - } - e.hist = make([]byte, 0, l) + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) } - // We offset current position so everything will be out of reach - e.cur += maxMatchOffset + int32(len(e.hist)) e.hist = e.hist[:0] } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index 9feea87a3..56ee6dc8b 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -177,6 +177,11 @@ func (w *huffmanBitWriter) flush() { w.nbits = 0 return } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } n := w.nbytes for w.nbits != 0 { w.bytes[n] = byte(w.bits) @@ -594,8 +599,8 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b tokens.AddEOB() } - // We cannot reuse pure huffman table. - if w.lastHuffMan && w.lastHeader > 0 { + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { // We will not try to reuse. w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go index 20de8f11f..102fc74c7 100644 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastGen maintains the table for matches, // and the previous byte block for level 2. // This is the generic implementation. @@ -14,6 +16,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go index 7c824431e..dc6b1d314 100644 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastGen maintains the table for matches, // and the previous byte block for level 2. // This is the generic implementation. @@ -16,6 +18,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go index 4153d24c9..1a3ff9b6b 100644 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -1,5 +1,7 @@ package flate +import "fmt" + // fastEncL3 type fastEncL3 struct { fastGen @@ -13,6 +15,10 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go index c689ac771..f3ecc9c4d 100644 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -13,7 +13,9 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { if len(e.hist) == 0 { diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 14a235612..4e3916825 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -13,6 +13,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index cad0c7df7..00a311977 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -13,6 +13,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) + if debugDecode && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } // Protect against e.cur wraparound. for e.cur >= bufferReset { diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go index a47051197..53e899124 100644 --- a/vendor/github.com/klauspost/compress/flate/stateless.go +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -8,6 +8,8 @@ import ( const ( maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 slTableBits = 13 slTableSize = 1 << slTableBits @@ -25,11 +27,11 @@ func (s *statelessWriter) Close() error { } s.closed = true // Emit EOF block - return StatelessDeflate(s.dst, nil, true) + return StatelessDeflate(s.dst, nil, true, nil) } func (s *statelessWriter) Write(p []byte) (n int, err error) { - err = StatelessDeflate(s.dst, p, false) + err = StatelessDeflate(s.dst, p, false, nil) if err != nil { return 0, err } @@ -59,7 +61,10 @@ var bitWriterPool = sync.Pool{ // StatelessDeflate allows to compress directly to a Writer without retaining state. // When returning everything will be flushed. -func StatelessDeflate(out io.Writer, in []byte, eof bool) error { +// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { var dst tokens bw := bitWriterPool.Get().(*huffmanBitWriter) bw.reset(out) @@ -76,35 +81,53 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool) error { return bw.err } + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock { - todo = todo[:maxStatelessBlock] + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] } in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } // Compress - statelessEnc(&dst, todo) + statelessEnc(&dst, todo, int16(len(dict))) isEof := eof && len(in) == 0 if dst.n == 0 { - bw.writeStoredHeader(len(todo), isEof) + bw.writeStoredHeader(len(uncompressed), isEof) if bw.err != nil { return bw.err } - bw.writeBytes(todo) - } else if int(dst.n) > len(todo)-len(todo)>>4 { + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { // If we removed less than 1/16th, huffman compress the block. - bw.writeBlockHuff(isEof, todo, false) + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) } else { - bw.writeBlockDynamic(&dst, isEof, todo, false) + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() } if bw.err != nil { return bw.err } - dst.Reset() } if !eof { - // Align. + // Align, only a stored block can do that. bw.writeStoredHeader(0, false) } bw.flush() @@ -130,7 +153,7 @@ func load6416(b []byte, i int16) uint64 { uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 } -func statelessEnc(dst *tokens, src []byte) { +func statelessEnc(dst *tokens, src []byte, startAt int16) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin @@ -144,15 +167,23 @@ func statelessEnc(dst *tokens, src []byte) { // This check isn't in the Snappy implementation, but there, the caller // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { + if len(src)-int(startAt) < minNonLiteralBlockSize { // We do not fill the token table. // This will be picked up by caller. - dst.n = uint16(len(src)) + dst.n = 0 return } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } - s := int16(1) - nextEmit := int16(0) + s := startAt + 1 + nextEmit := startAt // sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. |