summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r--vendor/github.com/klauspost/compress/README.md28
-rw-r--r--vendor/github.com/klauspost/compress/flate/fast_encoder.go2
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go166
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_code.go40
-rw-r--r--vendor/github.com/klauspost/compress/flate/inflate.go222
-rw-r--r--vendor/github.com/klauspost/compress/flate/inflate_gen.go657
-rw-r--r--vendor/github.com/klauspost/compress/flate/level1.go56
-rw-r--r--vendor/github.com/klauspost/compress/flate/level3.go45
-rw-r--r--vendor/github.com/klauspost/compress/flate/token.go17
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress.go333
-rw-r--r--vendor/github.com/klauspost/compress/huff0/huff0.go2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_fast.go4
12 files changed, 918 insertions, 654 deletions
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
index e8ff994f8..f7f74c153 100644
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -17,6 +17,19 @@ This package provides various compression algorithms.
# changelog
+* Feb 17, 2022 (v1.14.3)
+ * flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494) [#478](https://github.com/klauspost/compress/pull/478)
+ * flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483)
+ * s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486)
+
+* Jan 25, 2022 (v1.14.2)
+ * zstd: improve header decoder by @dsnet [#476](https://github.com/klauspost/compress/pull/476)
+ * zstd: Add bigger default blocks [#469](https://github.com/klauspost/compress/pull/469)
+ * zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470)
+ * zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472)
+ * flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473)
+ * zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475)
+
* Jan 11, 2022 (v1.14.1)
* s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462)
* flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458)
@@ -53,6 +66,9 @@ This package provides various compression algorithms.
* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
+<details>
+ <summary>See changes to v1.12.x</summary>
+
* May 25, 2021 (v1.12.3)
* deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374)
* deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375)
@@ -74,9 +90,10 @@ This package provides various compression algorithms.
* s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352)
* zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346)
* s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349)
+</details>
<details>
- <summary>See changes prior to v1.12.1</summary>
+ <summary>See changes to v1.11.x</summary>
* Mar 26, 2021 (v1.11.13)
* zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345)
@@ -135,7 +152,7 @@ This package provides various compression algorithms.
</details>
<details>
- <summary>See changes prior to v1.11.0</summary>
+ <summary>See changes to v1.10.x</summary>
* July 8, 2020 (v1.10.11)
* zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278)
@@ -297,11 +314,6 @@ This package provides various compression algorithms.
# deflate usage
-* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/).
-* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/).
-* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
-* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/)
-
The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
| old import | new import | Documentation
@@ -323,6 +335,8 @@ Memory usage is typically 1MB for a Writer. stdlib is in the same range.
If you expect to have a lot of concurrently allocated Writers consider using
the stateless compress described below.
+For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing).
+
# Stateless compression
This package offers stateless compression as a special option for gzip/deflate.
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 0b2e54972..d55ea2a77 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -179,7 +179,7 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
- if debugDecode {
+ if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index fd49efd75..25f6d1108 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -8,6 +8,7 @@ import (
"encoding/binary"
"fmt"
"io"
+ "math"
)
const (
@@ -24,6 +25,10 @@ const (
codegenCodeCount = 19
badCode = 255
+ // maxPredefinedTokens is the maximum number of tokens
+ // where we check if fixed size is smaller.
+ maxPredefinedTokens = 250
+
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
@@ -36,8 +41,11 @@ const (
bufferSize = bufferFlushSize + 8
)
+// Minimum length code that emits bits.
+const lengthExtraBitsMinCode = 8
+
// The number of extra bits needed by length code X - LENGTH_CODES_START.
-var lengthExtraBits = [32]int8{
+var lengthExtraBits = [32]uint8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
@@ -51,6 +59,9 @@ var lengthBase = [32]uint8{
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
+// Minimum offset code that emits bits.
+const offsetExtraBitsMinCode = 4
+
// offset code word extra bits.
var offsetExtraBits = [32]int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
@@ -78,10 +89,10 @@ func init() {
for i := range offsetCombined[:] {
// Don't use extended window values...
- if offsetBase[i] > 0x006000 {
+ if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 {
continue
}
- offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
+ offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8)
}
}
@@ -97,7 +108,7 @@ type huffmanBitWriter struct {
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
- nbits uint16
+ nbits uint8
nbytes uint8
lastHuffMan bool
literalEncoding *huffmanEncoder
@@ -215,7 +226,7 @@ func (w *huffmanBitWriter) write(b []byte) {
_, w.err = w.writer.Write(b)
}
-func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
+func (w *huffmanBitWriter) writeBits(b int32, nb uint8) {
w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
@@ -571,7 +582,10 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
- var size = w.fixedSize(extraBits)
+ var size = math.MaxInt32
+ if tokens.n < maxPredefinedTokens {
+ size = w.fixedSize(extraBits)
+ }
// Dynamic Huffman?
var numCodegens int
@@ -672,19 +686,21 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
size = reuseSize
}
- if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
- // Check if we get a reasonable size decrease.
- if storable && ssize <= size {
- w.writeStoredHeader(len(input), eof)
- w.writeBytes(input)
+ if tokens.n < maxPredefinedTokens {
+ if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
+ // Check if we get a reasonable size decrease.
+ if storable && ssize <= size {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ w.writeFixedHeader(eof)
+ if !sync {
+ tokens.AddEOB()
+ }
+ w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
return
}
- w.writeFixedHeader(eof)
- if !sync {
- tokens.AddEOB()
- }
- w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
- return
}
// Check if we get a reasonable size decrease.
if storable && ssize <= size {
@@ -717,19 +733,21 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
// Store predefined, if we don't get a reasonable improvement.
- if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
- // Store bytes, if we don't get an improvement.
- if storable && ssize <= preSize {
- w.writeStoredHeader(len(input), eof)
- w.writeBytes(input)
+ if tokens.n < maxPredefinedTokens {
+ if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
+ // Store bytes, if we don't get an improvement.
+ if storable && ssize <= preSize {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ w.writeFixedHeader(eof)
+ if !sync {
+ tokens.AddEOB()
+ }
+ w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
return
}
- w.writeFixedHeader(eof)
- if !sync {
- tokens.AddEOB()
- }
- w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
- return
}
if storable && ssize <= size {
@@ -833,9 +851,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
for _, t := range tokens {
- if t < matchType {
+ if t < 256 {
//w.writeCode(lits[t.literal()])
- c := lits[t.literal()]
+ c := lits[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
@@ -858,12 +876,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
// Write the length
length := t.length()
- lengthCode := lengthCode(length)
+ lengthCode := lengthCode(length) & 31
if false {
- w.writeCode(lengths[lengthCode&31])
+ w.writeCode(lengths[lengthCode])
} else {
// inlined
- c := lengths[lengthCode&31]
+ c := lengths[lengthCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
@@ -883,10 +901,10 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
}
- extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
- if extraLengthBits > 0 {
+ if lengthCode >= lengthExtraBitsMinCode {
+ extraLengthBits := lengthExtraBits[lengthCode]
//w.writeBits(extraLength, extraLengthBits)
- extraLength := int32(length - lengthBase[lengthCode&31])
+ extraLength := int32(length - lengthBase[lengthCode])
bits |= uint64(extraLength) << (nbits & 63)
nbits += extraLengthBits
if nbits >= 48 {
@@ -907,10 +925,9 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
// Write the offset
offset := t.offset()
- offsetCode := offset >> 16
- offset &= matchOffsetOnlyMask
+ offsetCode := (offset >> 16) & 31
if false {
- w.writeCode(offs[offsetCode&31])
+ w.writeCode(offs[offsetCode])
} else {
// inlined
c := offs[offsetCode]
@@ -932,11 +949,12 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
}
}
}
- offsetComb := offsetCombined[offsetCode]
- if offsetComb > 1<<16 {
+
+ if offsetCode >= offsetExtraBitsMinCode {
+ offsetComb := offsetCombined[offsetCode]
//w.writeBits(extraOffset, extraOffsetBits)
- bits |= uint64(offset-(offsetComb&0xffff)) << (nbits & 63)
- nbits += uint16(offsetComb >> 16)
+ bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63)
+ nbits += uint8(offsetComb)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
@@ -1002,6 +1020,29 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
histogram(input, w.literalFreq[:numLiterals], fill)
+ ssize, storable := w.storedSize(input)
+ if storable && len(input) > 1024 {
+ // Quick check for incompressible content.
+ abs := float64(0)
+ avg := float64(len(input)) / 256
+ max := float64(len(input) * 2)
+ for _, v := range w.literalFreq[:256] {
+ diff := float64(v) - avg
+ abs += diff * diff
+ if abs > max {
+ break
+ }
+ }
+ if abs < max {
+ if debugDeflate {
+ fmt.Println("stored", abs, "<", max)
+ }
+ // No chance we can compress this...
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
+ }
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
@@ -1019,8 +1060,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
estBits += estBits >> w.logNewTablePenalty
// Store bytes, if we don't get a reasonable improvement.
- ssize, storable := w.storedSize(input)
if storable && ssize <= estBits {
+ if debugDeflate {
+ fmt.Println("stored,", ssize, "<=", estBits)
+ }
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
@@ -1031,7 +1074,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
if estBits < reuseSize {
if debugDeflate {
- //fmt.Println("not reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
+ fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes")
}
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
@@ -1065,6 +1108,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+ if debugDeflate {
+ count -= int(nbytes)*8 + int(nbits)
+ }
// Unroll, write 3 codes/loop.
// Fastest number of unrolls.
for len(input) > 3 {
@@ -1074,13 +1120,16 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
bits >>= (n * 8) & 63
nbits -= n * 8
- nbytes += uint8(n)
+ nbytes += n
}
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
+ if debugDeflate {
+ count += int(nbytes) * 8
+ }
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
@@ -1096,13 +1145,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Remaining...
for _, t := range input {
- // Bitwriting inlined, ~30% speedup
- c := encoding[t]
- bits |= uint64(c.code) << (nbits & 63)
- nbits += c.len
- if debugDeflate {
- count += int(c.len)
- }
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
@@ -1114,17 +1156,33 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
nbytes = 0
return
}
+ if debugDeflate {
+ count += int(nbytes) * 8
+ }
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
+ // Bitwriting inlined, ~30% speedup
+ c := encoding[t]
+ bits |= uint64(c.code) << (nbits & 63)
+ nbits += c.len
+ if debugDeflate {
+ count += int(c.len)
+ }
}
// Restore...
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
if debugDeflate {
- fmt.Println("wrote", count/8, "bytes")
+ nb := count + int(nbytes)*8 + int(nbits)
+ fmt.Println("wrote", nb, "bits,", nb/8, "bytes.")
+ }
+ // Flush if needed to have space.
+ if w.nbits >= 48 {
+ w.writeOutBits()
}
+
if eof || sync {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index f35e00261..9ab497c27 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -17,7 +17,8 @@ const (
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
- code, len uint16
+ code uint16
+ len uint8
}
type huffmanEncoder struct {
@@ -56,7 +57,7 @@ type levelInfo struct {
}
// set sets the code and length of an hcode.
-func (h *hcode) set(code uint16, length uint16) {
+func (h *hcode) set(code uint16, length uint8) {
h.len = length
h.code = code
}
@@ -80,7 +81,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
- var size uint16
+ var size uint8
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
@@ -99,7 +100,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280
size = 8
}
- codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
+ codes[ch] = hcode{code: reverseBits(bits, size), len: size}
}
return h
}
@@ -187,14 +188,19 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
+ // Descending to only have 1 bounds check.
+ l2f := int32(list[2].freq)
+ l1f := int32(list[1].freq)
+ l0f := int32(list[0].freq) + int32(list[1].freq)
+
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
- lastFreq: int32(list[1].freq),
- nextCharFreq: int32(list[2].freq),
- nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
+ lastFreq: l1f,
+ nextCharFreq: l2f,
+ nextPairFreq: l0f,
}
leafCounts[level][level] = 2
if level == 1 {
@@ -205,8 +211,8 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
- level := maxBits
- for {
+ level := uint32(maxBits)
+ for level < 16 {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
@@ -238,7 +244,13 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
- copy(leafCounts[level][:level], leafCounts[level-1][:level])
+ if true {
+ save := leafCounts[level][level]
+ leafCounts[level] = leafCounts[level-1]
+ leafCounts[level][level] = save
+ } else {
+ copy(leafCounts[level][:level], leafCounts[level-1][:level])
+ }
levels[l.level-1].needed = 2
}
@@ -296,7 +308,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
sortByLiteral(chunk)
for _, node := range chunk {
- h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
+ h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)}
code++
}
list = list[0 : len(list)-int(bits)]
@@ -309,6 +321,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list := h.freqcache[:len(freq)+1]
+ codes := h.codes[:len(freq)]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
@@ -317,11 +330,10 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list[count] = literalNode{uint16(i), f}
count++
} else {
- list[count] = literalNode{}
- h.codes[i].len = 0
+ codes[i].len = 0
}
}
- list[len(freq)] = literalNode{}
+ list[count] = literalNode{}
list = list[:count]
if count <= 2 {
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index d5f62f6a2..414c0bea9 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -36,6 +36,13 @@ type lengthExtra struct {
var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
+var bitMask32 = [32]uint32{
+ 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+ 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+ 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+ 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
@@ -559,221 +566,6 @@ func (f *decompressor) readHuffman() error {
return nil
}
-// Decode a single Huffman block from f.
-// hl and hd are the Huffman states for the lit/length values
-// and the distance values, respectively. If hd == nil, using the
-// fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) huffmanBlockGeneric() {
- const (
- stateInit = iota // Zero value must be stateInit
- stateDict
- )
-
- switch f.stepState {
- case stateInit:
- goto readLiteral
- case stateDict:
- goto copyHistory
- }
-
-readLiteral:
- // Read literal and/or (length, distance) according to RFC section 3.2.3.
- {
- var v int
- {
- // Inlined v, err := f.huffSym(f.hl)
- // Since a huffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
- for {
- for nb < n {
- c, err := f.r.ReadByte()
- if err != nil {
- f.b = b
- f.nb = nb
- f.err = noEOF(err)
- return
- }
- f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
- }
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
- n = uint(chunk & huffmanCountMask)
- if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
- n = uint(chunk & huffmanCountMask)
- }
- if n <= nb {
- if n == 0 {
- f.b = b
- f.nb = nb
- if debugDecode {
- fmt.Println("huffsym: n==0")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
- v = int(chunk >> huffmanValueShift)
- break
- }
- }
- }
-
- var n uint // number of bits extra
- var length int
- var err error
- switch {
- case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric
- f.stepState = stateInit
- return
- }
- goto readLiteral
- case v == 256:
- f.finishBlock()
- return
- // otherwise, reference to older data
- case v < 265:
- length = v - (257 - 3)
- n = 0
- case v < 269:
- length = v*2 - (265*2 - 11)
- n = 1
- case v < 273:
- length = v*4 - (269*4 - 19)
- n = 2
- case v < 277:
- length = v*8 - (273*8 - 35)
- n = 3
- case v < 281:
- length = v*16 - (277*16 - 67)
- n = 4
- case v < 285:
- length = v*32 - (281*32 - 131)
- n = 5
- case v < maxNumLit:
- length = 258
- n = 0
- default:
- if debugDecode {
- fmt.Println(v, ">= maxNumLit")
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
- if n > 0 {
- for f.nb < n {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits n>0:", err)
- }
- f.err = err
- return
- }
- }
- length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
- }
-
- var dist uint32
- if f.hd == nil {
- for f.nb < 5 {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<5:", err)
- }
- f.err = err
- return
- }
- }
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
- } else {
- sym, err := f.huffSym(f.hd)
- if err != nil {
- if debugDecode {
- fmt.Println("huffsym:", err)
- }
- f.err = err
- return
- }
- dist = uint32(sym)
- }
-
- switch {
- case dist < 4:
- dist++
- case dist < maxNumDist:
- nb := uint(dist-2) >> 1
- // have 1 bit in bottom of dist, need nb more.
- extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
- if err = f.moreBits(); err != nil {
- if debugDecode {
- fmt.Println("morebits f.nb<nb:", err)
- }
- f.err = err
- return
- }
- }
- extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
- f.b >>= nb & regSizeMaskUint32
- f.nb -= nb
- dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
- default:
- if debugDecode {
- fmt.Println("dist too big:", dist, maxNumDist)
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- // No check on length; encoding can be prescient.
- if dist > uint32(f.dict.histSize()) {
- if debugDecode {
- fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
- }
- f.err = CorruptInputError(f.roffset)
- return
- }
-
- f.copyLen, f.copyDist = length, int(dist)
- goto copyHistory
- }
-
-copyHistory:
- // Perform a backwards copy according to RFC section 3.2.3.
- {
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
- if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
- }
- f.copyLen -= cnt
-
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
- f.stepState = stateDict
- return
- }
- goto readLiteral
- }
-}
-
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
index cc6db2792..8d632cea0 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -21,6 +21,11 @@ func (f *decompressor) huffmanBytesBuffer() {
)
fr := f.r.(*bytes.Buffer)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb := f.nb, f.b
+
switch f.stepState {
case stateInit:
goto readLiteral
@@ -39,41 +44,35 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
@@ -88,10 +87,12 @@ readLiteral:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
@@ -101,9 +102,10 @@ readLiteral:
val := decCodeToLen[(v - 257)]
length = int(val.length) + 3
n := uint(val.extra)
- for f.nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
@@ -111,25 +113,27 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
+ for fnb < 5 {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
@@ -137,12 +141,12 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
@@ -152,38 +156,35 @@ readLiteral:
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hd.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask]
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
dist = uint32(chunk >> huffmanValueShift)
break
}
@@ -197,9 +198,10 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
+ for fnb < nb {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
@@ -207,14 +209,16 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
- f.b >>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -224,6 +228,7 @@ readLiteral:
// No check on length; encoding can be prescient.
if dist > uint32(f.dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
@@ -248,10 +253,12 @@ copyHistory:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -265,6 +272,11 @@ func (f *decompressor) huffmanBytesReader() {
)
fr := f.r.(*bytes.Reader)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb := f.nb, f.b
+
switch f.stepState {
case stateInit:
goto readLiteral
@@ -283,41 +295,35 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
@@ -332,10 +338,12 @@ readLiteral:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
@@ -345,9 +353,10 @@ readLiteral:
val := decCodeToLen[(v - 257)]
length = int(val.length) + 3
n := uint(val.extra)
- for f.nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
@@ -355,25 +364,27 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
+ for fnb < 5 {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
@@ -381,12 +392,12 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
@@ -396,38 +407,35 @@ readLiteral:
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hd.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask]
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
dist = uint32(chunk >> huffmanValueShift)
break
}
@@ -441,9 +449,10 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
+ for fnb < nb {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
@@ -451,14 +460,16 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
- f.b >>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -468,6 +479,7 @@ readLiteral:
// No check on length; encoding can be prescient.
if dist > uint32(f.dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
@@ -492,10 +504,12 @@ copyHistory:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -509,6 +523,11 @@ func (f *decompressor) huffmanBufioReader() {
)
fr := f.r.(*bufio.Reader)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb := f.nb, f.b
+
switch f.stepState {
case stateInit:
goto readLiteral
@@ -527,41 +546,35 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
- // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
@@ -576,10 +589,12 @@ readLiteral:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
@@ -589,9 +604,10 @@ readLiteral:
val := decCodeToLen[(v - 257)]
length = int(val.length) + 3
n := uint(val.extra)
- for f.nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
@@ -599,25 +615,27 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
+ for fnb < 5 {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
@@ -625,12 +643,12 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
@@ -640,38 +658,35 @@ readLiteral:
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hd.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask]
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
dist = uint32(chunk >> huffmanValueShift)
break
}
@@ -685,9 +700,10 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
+ for fnb < nb {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
@@ -695,14 +711,16 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
- f.b >>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -712,6 +730,7 @@ readLiteral:
// No check on length; encoding can be prescient.
if dist > uint32(f.dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
@@ -736,10 +755,12 @@ copyHistory:
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
// Decode a single Huffman block from f.
@@ -753,6 +774,11 @@ func (f *decompressor) huffmanStringsReader() {
)
fr := f.r.(*strings.Reader)
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb := f.nb, f.b
+
switch f.stepState {
case stateInit:
goto readLiteral
@@ -771,41 +797,286 @@ readLiteral:
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
+ for {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ v = int(chunk >> huffmanValueShift)
+ break
+ }
+ }
+ }
+
+ var length int
+ switch {
+ case v < 256:
+ f.dict.writeByte(byte(v))
+ if f.dict.availWrite() == 0 {
+ f.toRead = f.dict.readFlush()
+ f.step = (*decompressor).huffmanStringsReader
+ f.stepState = stateInit
+ f.b, f.nb = fb, fnb
+ return
+ }
+ goto readLiteral
+ case v == 256:
+ f.b, f.nb = fb, fnb
+ f.finishBlock()
+ return
+ // otherwise, reference to older data
+ case v < 265:
+ length = v - (257 - 3)
+ case v < maxNumLit:
+ val := decCodeToLen[(v - 257)]
+ length = int(val.length) + 3
+ n := uint(val.extra)
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits n>0:", err)
+ }
+ f.err = err
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
+ default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
+ }
+ f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
+ return
+ }
+
+ var dist uint32
+ if f.hd == nil {
+ for fnb < 5 {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits f.nb<5:", err)
+ }
+ f.err = err
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
+ } else {
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hd.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ f.err = noEOF(err)
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= fnb {
+ if n == 0 {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
+ dist = uint32(chunk >> huffmanValueShift)
+ break
+ }
+ }
+ }
+
+ switch {
+ case dist < 4:
+ dist++
+ case dist < maxNumDist:
+ nb := uint(dist-2) >> 1
+ // have 1 bit in bottom of dist, need nb more.
+ extra := (dist & 1) << (nb & regSizeMaskUint32)
+ for fnb < nb {
+ c, err := fr.ReadByte()
+ if err != nil {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("morebits f.nb<nb:", err)
+ }
+ f.err = err
+ return
+ }
+ f.roffset++
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
+ }
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
+ dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
+ default:
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("dist too big:", dist, maxNumDist)
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ // No check on length; encoding can be prescient.
+ if dist > uint32(f.dict.histSize()) {
+ f.b, f.nb = fb, fnb
+ if debugDecode {
+ fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ }
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ f.copyLen, f.copyDist = length, int(dist)
+ goto copyHistory
+ }
+
+copyHistory:
+ // Perform a backwards copy according to RFC section 3.2.3.
+ {
+ cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ if cnt == 0 {
+ cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ }
+ f.copyLen -= cnt
+
+ if f.dict.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = f.dict.readFlush()
+ f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+ f.stepState = stateDict
+ f.b, f.nb = fb, fnb
+ return
+ }
+ goto readLiteral
+ }
+ // Not reached
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanGenericReader() {
+ const (
+ stateInit = iota // Zero value must be stateInit
+ stateDict
+ )
+ fr := f.r.(Reader)
+
+ // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+ // but is smart enough to keep local variables in registers, so use nb and b,
+ // inline call to moreBits and reassign b,nb back to f on return.
+ fnb, fb := f.nb, f.b
+
+ switch f.stepState {
+ case stateInit:
+ goto readLiteral
+ case stateDict:
+ goto copyHistory
+ }
+
+readLiteral:
+ // Read literal and/or (length, distance) according to RFC section 3.2.3.
+ {
+ var v int
+ {
+ // Inlined v, err := f.huffSym(f.hl)
+ // Since a huffmanDecoder can be empty or be composed of a degenerate tree
+ // with single element, huffSym must error on these two edge cases. In both
+ // cases, the chunks slice will be 0 for the invalid sequence, leading it
+ // satisfy the n == 0 check below.
+ n := uint(f.hl.maxRead)
+ for {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hl.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+ chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
v = int(chunk >> huffmanValueShift)
break
}
@@ -818,12 +1089,14 @@ readLiteral:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanStringsReader
+ f.step = (*decompressor).huffmanGenericReader
f.stepState = stateInit
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
case v == 256:
+ f.b, f.nb = fb, fnb
f.finishBlock()
return
// otherwise, reference to older data
@@ -833,9 +1106,10 @@ readLiteral:
val := decCodeToLen[(v - 257)]
length = int(val.length) + 3
n := uint(val.extra)
- for f.nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits n>0:", err)
}
@@ -843,25 +1117,27 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
- f.b >>= n & regSizeMaskUint32
- f.nb -= n
+ length += int(fb & bitMask32[n])
+ fb >>= n & regSizeMaskUint32
+ fnb -= n
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
+ f.b, f.nb = fb, fnb
return
}
var dist uint32
if f.hd == nil {
- for f.nb < 5 {
+ for fnb < 5 {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
@@ -869,12 +1145,12 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
- f.b >>= 5
- f.nb -= 5
+ dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3)))
+ fb >>= 5
+ fnb -= 5
} else {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
@@ -884,38 +1160,35 @@ readLiteral:
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
- nb, b := f.nb, f.b
for {
- for nb < n {
+ for fnb < n {
c, err := fr.ReadByte()
if err != nil {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
f.err = noEOF(err)
return
}
f.roffset++
- b |= uint32(c) << (nb & regSizeMaskUint32)
- nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- chunk := f.hd.chunks[b&(huffmanNumChunks-1)]
+ chunk := f.hd.chunks[fb&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
- chunk = f.hd.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hd.linkMask]
+ chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask]
n = uint(chunk & huffmanCountMask)
}
- if n <= nb {
+ if n <= fnb {
if n == 0 {
- f.b = b
- f.nb = nb
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
- f.b = b >> (n & regSizeMaskUint32)
- f.nb = nb - n
+ fb = fb >> (n & regSizeMaskUint32)
+ fnb = fnb - n
dist = uint32(chunk >> huffmanValueShift)
break
}
@@ -929,9 +1202,10 @@ readLiteral:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << (nb & regSizeMaskUint32)
- for f.nb < nb {
+ for fnb < nb {
c, err := fr.ReadByte()
if err != nil {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
@@ -939,14 +1213,16 @@ readLiteral:
return
}
f.roffset++
- f.b |= uint32(c) << f.nb
- f.nb += 8
+ fb |= uint32(c) << (fnb & regSizeMaskUint32)
+ fnb += 8
}
- extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
- f.b >>= nb & regSizeMaskUint32
- f.nb -= nb
+ extra |= fb & bitMask32[nb]
+ fb >>= nb & regSizeMaskUint32
+ fnb -= nb
dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
+ // slower: dist = bitMask32[nb+1] + 2 + extra
default:
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
@@ -956,6 +1232,7 @@ readLiteral:
// No check on length; encoding can be prescient.
if dist > uint32(f.dict.histSize()) {
+ f.b, f.nb = fb, fnb
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
@@ -978,12 +1255,14 @@ copyHistory:
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
- f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+ f.step = (*decompressor).huffmanGenericReader // We need to continue this work
f.stepState = stateDict
+ f.b, f.nb = fb, fnb
return
}
goto readLiteral
}
+ // Not reached
}
func (f *decompressor) huffmanBlockDecoder() func() {
@@ -996,7 +1275,9 @@ func (f *decompressor) huffmanBlockDecoder() func() {
return f.huffmanBufioReader
case *strings.Reader:
return f.huffmanStringsReader
+ case Reader:
+ return f.huffmanGenericReader
default:
- return f.huffmanBlockGeneric
+ return f.huffmanGenericReader
}
}
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
index 1e5eea396..0022c8bb6 100644
--- a/vendor/github.com/klauspost/compress/flate/level1.go
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -1,6 +1,10 @@
package flate
-import "fmt"
+import (
+ "encoding/binary"
+ "fmt"
+ "math/bits"
+)
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
@@ -116,7 +120,32 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
- l := e.matchlenLong(s+4, t+4, src) + 4
+ var l = int32(4)
+ if false {
+ l = e.matchlenLong(s+4, t+4, src) + 4
+ } else {
+ // inlined:
+ a := src[s+4:]
+ b := src[t+4:]
+ for len(a) >= 8 {
+ if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
+ l += int32(bits.TrailingZeros64(diff) >> 3)
+ break
+ }
+ l += 8
+ a = a[8:]
+ b = b[8:]
+ }
+ if len(a) < 8 {
+ b = b[:len(a)]
+ for i := range a {
+ if a[i] != b[i] {
+ break
+ }
+ l++
+ }
+ }
+ }
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
@@ -129,7 +158,28 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
// Save the match found
- dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ if false {
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ } else {
+ // Inlined...
+ xoffset := uint32(s - t - baseMatchOffset)
+ xlength := l
+ oc := offsetCode(xoffset)
+ xoffset |= oc << 16
+ for xlength > 0 {
+ xl := xlength
+ if xl > 258 {
+ // We need to have at least baseMatchLength left over for next loop.
+ xl = 258 - baseMatchLength
+ }
+ xlength -= xl
+ xl -= baseMatchLength
+ dst.extraHist[lengthCodes1[uint8(xl)]]++
+ dst.offHist[oc]++
+ dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+ dst.n++
+ }
+ }
s += l
nextEmit = s
if nextS >= s {
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
index c22b4244a..465b5e0ba 100644
--- a/vendor/github.com/klauspost/compress/flate/level3.go
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -5,7 +5,7 @@ import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
- table [tableSize]tableEntryPrev
+ table [1 << 16]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
@@ -13,6 +13,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 8 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ tableBits = 16
+ tableSize = 1 << tableBits
)
if debugDeflate && e.cur < 0 {
@@ -73,7 +75,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
- nextHash := hash(cv)
+ nextHash := hash4u(cv, tableBits)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
@@ -156,7 +158,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(t+4) < len(src) && t > 0 {
cv := load3232(src, t)
- nextHash := hash(cv)
+ nextHash := hash4u(cv, tableBits)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
@@ -165,30 +167,31 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
goto emitRemainder
}
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-3 to s.
- x := load6432(src, s-3)
- prevHash := hash(uint32(x))
- e.table[prevHash] = tableEntryPrev{
- Prev: e.table[prevHash].Cur,
- Cur: tableEntry{offset: e.cur + s - 3},
+ // Store every 5th hash in-between.
+ for i := s - l + 2; i < s-5; i += 5 {
+ nextHash := hash4u(load3232(src, i), tableBits)
+ e.table[nextHash] = tableEntryPrev{
+ Prev: e.table[nextHash].Cur,
+ Cur: tableEntry{offset: e.cur + i}}
}
- x >>= 8
- prevHash = hash(uint32(x))
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-2 to s.
+ x := load6432(src, s-2)
+ prevHash := hash4u(uint32(x), tableBits)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
- prevHash = hash(uint32(x))
+ prevHash = hash4u(uint32(x), tableBits)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
- currHash := hash(uint32(x))
+ currHash := hash4u(uint32(x), tableBits)
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
@@ -200,15 +203,15 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)
- if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
+ if candidate.offset > minOffset {
+ if cv == load3232(src, candidate.offset-e.cur) {
+ // Found a match...
+ continue
+ }
candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- continue
- }
+ // Match at prev...
+ continue
}
}
cv = uint32(x >> 8)
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
index 3a9618ee1..8005a5ca8 100644
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -13,11 +13,10 @@ import (
)
const (
- // From top
- // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
- // 8 bits: xlength = length - MIN_MATCH_LENGTH
- // 5 bits offsetcode
- // 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
+ // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
+ // bits 16-22 offsetcode - 5 bits
+ // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
+ // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
@@ -276,7 +275,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
xoffset |= oCode << 16
t.extraHist[lengthCodes1[uint8(xlength)]]++
- t.offHist[oCode]++
+ t.offHist[oCode&31]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.n++
}
@@ -300,7 +299,7 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
xlength -= xl
xl -= baseMatchLength
t.extraHist[lengthCodes1[uint8(xl)]]++
- t.offHist[oc]++
+ t.offHist[oc&31]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
@@ -356,8 +355,8 @@ func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint8 { return uint8(t >> lengthShift) }
-// The code is never more than 8 bits, but is returned as uint32 for convenience.
-func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
+// Convert length to code.
+func lengthCode(len uint8) uint8 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 2a06bd1a7..2668b64d3 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"io"
+ "sync"
"github.com/klauspost/compress/fse"
)
@@ -216,6 +217,7 @@ func (s *Scratch) Decoder() *Decoder {
return &Decoder{
dt: s.dt,
actualTableLog: s.actualTableLog,
+ bufs: &s.decPool,
}
}
@@ -223,6 +225,15 @@ func (s *Scratch) Decoder() *Decoder {
type Decoder struct {
dt dTable
actualTableLog uint8
+ bufs *sync.Pool
+}
+
+func (d *Decoder) buffer() *[4][256]byte {
+ buf, ok := d.bufs.Get().(*[4][256]byte)
+ if ok {
+ return buf
+ }
+ return &[4][256]byte{}
}
// Decompress1X will decompress a 1X encoded stream.
@@ -249,7 +260,8 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
dt := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ bufs := d.buffer()
+ buf := &bufs[0]
var off uint8
for br.off >= 8 {
@@ -277,6 +289,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
+ d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
@@ -284,6 +297,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
}
if len(dst)+int(off) > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -310,6 +324,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
}
}
if len(dst) >= maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -319,6 +334,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
+ d.bufs.Put(bufs)
return dst, br.close()
}
@@ -341,7 +357,8 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ bufs := d.buffer()
+ buf := &bufs[0]
var off uint8
switch d.actualTableLog {
@@ -369,6 +386,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
+ d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
@@ -398,6 +416,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
+ d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
@@ -426,6 +445,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -455,6 +475,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -484,6 +505,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -513,6 +535,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -542,6 +565,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -571,6 +595,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -578,10 +603,12 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
}
}
default:
+ d.bufs.Put(bufs)
return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
}
if len(dst)+int(off) > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -601,6 +628,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
}
if len(dst) >= maxDecodedSize {
br.close()
+ d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
@@ -609,6 +637,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
+ d.bufs.Put(bufs)
return dst, br.close()
}
@@ -628,7 +657,8 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ bufs := d.buffer()
+ buf := &bufs[0]
var off uint8
const shift = 56
@@ -655,6 +685,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -663,6 +694,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
}
if len(dst)+int(off) > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -679,6 +711,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
}
}
if len(dst) >= maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -688,6 +721,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
+ d.bufs.Put(bufs)
return dst, br.close()
}
@@ -735,12 +769,12 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ buf := d.buffer()
var off uint8
var decoded int
// Decode 2 values from each decoder/loop.
- const bufoff = 256 / 4
+ const bufoff = 256
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
@@ -758,8 +792,8 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+ buf[stream][off] = uint8(v.entry >> 8)
+ buf[stream2][off] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
@@ -767,8 +801,8 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ buf[stream][off+1] = uint8(v.entry >> 8)
+ buf[stream2][off+1] = uint8(v2.entry >> 8)
}
{
@@ -783,8 +817,8 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+ buf[stream][off] = uint8(v.entry >> 8)
+ buf[stream2][off] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
@@ -792,25 +826,26 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ buf[stream][off+1] = uint8(v.entry >> 8)
+ buf[stream2][off+1] = uint8(v2.entry >> 8)
}
off += 2
- if off == bufoff {
+ if off == 0 {
if bufoff > dstEvery {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
+ copy(out, buf[0][:])
+ copy(out[dstEvery:], buf[1][:])
+ copy(out[dstEvery*2:], buf[2][:])
+ copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
- decoded += 256
+ decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
}
@@ -818,12 +853,13 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ copy(out, buf[0][:off])
+ copy(out[dstEvery:], buf[1][:off])
+ copy(out[dstEvery*2:], buf[2][:off])
+ copy(out[dstEvery*3:], buf[3][:off])
decoded += int(off) * 4
out = out[off:]
}
@@ -853,6 +889,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
}
// end inline...
if offset >= len(out) {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
@@ -871,6 +908,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
return nil, err
}
}
+ d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
@@ -916,12 +954,12 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ buf := d.buffer()
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
- const bufoff = 256 / 4
+ const bufoff = 256
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
@@ -942,8 +980,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream] = uint8(v >> 8)
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -951,8 +989,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -960,8 +998,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -969,8 +1007,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- buf[off+bufoff*stream+3] = uint8(v >> 8)
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
{
@@ -987,8 +1025,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream] = uint8(v >> 8)
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -996,8 +1034,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -1005,8 +1043,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
@@ -1014,25 +1052,26 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- buf[off+bufoff*stream+3] = uint8(v >> 8)
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
off += 4
- if off == bufoff {
+ if off == 0 {
if bufoff > dstEvery {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
+ copy(out, buf[0][:])
+ copy(out[dstEvery:], buf[1][:])
+ copy(out[dstEvery*2:], buf[2][:])
+ copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
- decoded += 256
+ decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
}
@@ -1040,12 +1079,13 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ copy(out, buf[0][:off])
+ copy(out[dstEvery:], buf[1][:off])
+ copy(out[dstEvery*2:], buf[2][:off])
+ copy(out[dstEvery*3:], buf[3][:off])
decoded += int(off) * 4
out = out[off:]
}
@@ -1057,6 +1097,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
+ d.bufs.Put(buf)
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
@@ -1077,6 +1118,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
}
// end inline...
if offset >= len(out) {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
@@ -1091,9 +1133,11 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
+ d.bufs.Put(buf)
return nil, err
}
}
+ d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
@@ -1135,12 +1179,12 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ buf := d.buffer()
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
- const bufoff = 256 / 4
+ const bufoff = 256
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
@@ -1150,104 +1194,109 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
+
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
- v := single[uint8(br[stream].value>>shift)].entry
- v2 := single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream] = uint8(v >> 8)
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
+
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
+
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
{
const stream = 2
const stream2 = 3
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
+
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
+
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
+
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
- v := single[uint8(br[stream].value>>shift)].entry
- v2 := single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream] = uint8(v >> 8)
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-
- v = single[uint8(br[stream].value>>shift)].entry
- v2 = single[uint8(br[stream2].value>>shift)].entry
- br[stream].bitsRead += uint8(v)
- br[stream].value <<= v & 63
- br[stream2].bitsRead += uint8(v2)
- br[stream2].value <<= v2 & 63
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
off += 4
- if off == bufoff {
+ if off == 0 {
if bufoff > dstEvery {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
+ copy(out, buf[0][:])
+ copy(out[dstEvery:], buf[1][:])
+ copy(out[dstEvery*2:], buf[2][:])
+ copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
- decoded += 256
+ decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
}
@@ -1257,10 +1306,10 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ copy(out, buf[0][:off])
+ copy(out[dstEvery:], buf[1][:off])
+ copy(out[dstEvery*2:], buf[2][:off])
+ copy(out[dstEvery*3:], buf[3][:off])
decoded += int(off) * 4
out = out[off:]
}
@@ -1272,6 +1321,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
+ d.bufs.Put(buf)
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
@@ -1292,6 +1342,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
}
// end inline...
if offset >= len(out) {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
@@ -1306,9 +1357,11 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
+ d.bufs.Put(buf)
return nil, err
}
}
+ d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index 3ee00ecb4..e8ad17ad0 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -8,6 +8,7 @@ import (
"fmt"
"math"
"math/bits"
+ "sync"
"github.com/klauspost/compress/fse"
)
@@ -116,6 +117,7 @@ type Scratch struct {
nodes []nodeElt
tmpOut [4][]byte
fse *fse.Scratch
+ decPool sync.Pool // *[4][256]byte buffers.
huffWeight [maxSymbolValue + 1]byte
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index 5f08a2830..f51ab529a 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -85,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
- const kSearchStrength = 7
+ const kSearchStrength = 6
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
@@ -334,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
- const kSearchStrength = 8
+ const kSearchStrength = 6
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s