summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r--vendor/github.com/klauspost/compress/fse/bitreader.go27
-rw-r--r--vendor/github.com/klauspost/compress/fse/bytereader.go13
-rw-r--r--vendor/github.com/klauspost/compress/huff0/README.md6
-rw-r--r--vendor/github.com/klauspost/compress/huff0/bitreader.go256
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress.go776
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bitreader.go25
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go4
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytereader.go25
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go15
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder.go19
10 files changed, 1026 insertions, 140 deletions
diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go
index b9db204f5..f65eb3909 100644
--- a/vendor/github.com/klauspost/compress/fse/bitreader.go
+++ b/vendor/github.com/klauspost/compress/fse/bitreader.go
@@ -6,6 +6,7 @@
package fse
import (
+ "encoding/binary"
"errors"
"io"
)
@@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error {
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil
}
@@ -63,8 +68,9 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
- v := b.in[b.off-4 : b.off]
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -77,7 +83,8 @@ func (b *bitReader) fill() {
return
}
if b.off > 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -91,9 +98,17 @@ func (b *bitReader) fill() {
}
}
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool {
- return b.off == 0 && b.bitsRead >= 64
+ return b.bitsRead >= 64 && b.off == 0
}
// close the bitstream and returns an error if out-of-buffer reads occurred.
diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go
index f228a46cd..abade2d60 100644
--- a/vendor/github.com/klauspost/compress/fse/bytereader.go
+++ b/vendor/github.com/klauspost/compress/fse/bytereader.go
@@ -25,19 +25,10 @@ func (b *byteReader) advance(n uint) {
b.off += int(n)
}
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
- v3 := int32(b2[3])
- v2 := int32(b2[2])
- v1 := int32(b2[1])
- v0 := int32(b2[0])
- return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
-}
-
// Uint32 returns a little endian uint32 starting at current offset.
func (b byteReader) Uint32() uint32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
v3 := uint32(b2[3])
v2 := uint32(b2[2])
v1 := uint32(b2[1])
diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md
index 0a8448ce9..e12da4db2 100644
--- a/vendor/github.com/klauspost/compress/huff0/README.md
+++ b/vendor/github.com/klauspost/compress/huff0/README.md
@@ -12,8 +12,6 @@ but it can be used as a secondary step to compressors (like Snappy) that does no
* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
-THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE.
-
## News
* Mar 2018: First implementation released. Consider this beta software for now.
@@ -75,6 +73,8 @@ which can be given to the decompressor.
Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
+
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
your input was likely corrupted.
@@ -84,4 +84,4 @@ There are no integrity checks, so relying on errors from the decompressor does n
# Contributing
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
-changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
index 7d0903c70..a4979e886 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -6,6 +6,7 @@
package huff0
import (
+ "encoding/binary"
"errors"
"io"
)
@@ -34,29 +35,16 @@ func (b *bitReader) init(in []byte) error {
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBit32(uint32(v)))
return nil
}
-// getBits will return n bits. n can be 0.
-func (b *bitReader) getBits(n uint8) uint16 {
- if n == 0 || b.bitsRead >= 64 {
- return 0
- }
- return b.getBitsFast(n)
-}
-
-// getBitsFast requires that at least one bit is requested every time.
-// There are no checks if the buffer is filled.
-func (b *bitReader) getBitsFast(n uint8) uint16 {
- const regMask = 64 - 1
- v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
- b.bitsRead += n
- return v
-}
-
// peekBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) peekBitsFast(n uint8) uint16 {
@@ -71,21 +59,36 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
+
+ // 2 bounds checks.
v := b.in[b.off-4 : b.off]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
+func (b *bitReader) advance(n uint8) {
+ b.bitsRead += n
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off > 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -113,3 +116,214 @@ func (b *bitReader) close() error {
}
return nil
}
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderBytes struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderBytes) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderBytes) peekByteFast() uint8 {
+ got := uint8(b.value >> 56)
+ return got
+}
+
+func (b *bitReaderBytes) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderBytes) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
+func (b *bitReaderBytes) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderBytes) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderBytes) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderBytes) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReaderShifted reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderShifted struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderShifted) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
+ return uint16(b.value >> ((64 - n) & 63))
+}
+
+func (b *bitReaderShifted) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderShifted) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
+func (b *bitReaderShifted) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderShifted) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderShifted) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderShifted) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index fb42a398b..a03b2634a 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -25,6 +25,9 @@ type dEntryDouble struct {
len uint8
}
+// Uses special code for all tables that are < 8 bits.
+const use8BitTables = true
+
// ReadTable will read a table from the input.
// The size of the input may be larger than the table definition.
// Any content remaining after the table definition will be returned.
@@ -83,6 +86,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
}
v2 := v & 15
rankStats[v2]++
+ // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
weightTotal += (1 << v2) >> 1
}
if weightTotal == 0 {
@@ -142,12 +146,14 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
d := dEntrySingle{
entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
}
- single := s.dt.single[rankStats[w] : rankStats[w]+length]
+ rank := &rankStats[w]
+ single := s.dt.single[*rank : *rank+length]
for i := range single {
single[i] = d
}
- rankStats[w] += length
+ *rank += length
}
+
return s, in, nil
}
@@ -208,7 +214,10 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
- var br bitReader
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress1X8Bit(dst, src)
+ }
+ var br bitReaderShifted
err := br.init(src)
if err != nil {
return dst, err
@@ -216,17 +225,6 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
maxDecodedSize := cap(dst)
dst = dst[:0]
- decode := func() byte {
- val := br.peekBitsFast(d.actualTableLog) /* note : actualTableLog >= 1 */
- v := d.dt.single[val]
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
- hasDec := func(v dEntrySingle) byte {
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
-
// Avoid bounds check by always having full sized table.
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
@@ -238,11 +236,25 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
for br.off >= 8 {
br.fillFast()
- buf[off+0] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask])
- buf[off+1] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask])
+ v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ // Refill
br.fillFast()
- buf[off+2] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask])
- buf[off+3] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask])
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
@@ -259,13 +271,196 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
}
dst = append(dst, buf[:off]...)
- for !br.finished() {
+ // br < 8, so uint8 is fine
+ bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+ for bitsLeft > 0 {
br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
- dst = append(dst, decode())
+ v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= nBits
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress1X8BitExactly(dst, src)
+ }
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ shift := (8 - d.actualTableLog) & 7
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ const shift = 0
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
@@ -274,15 +469,18 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
-func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
- if len(s.dt.single) == 0 {
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
if len(src) < 6+(4*1) {
return nil, errors.New("input too small")
}
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress4X8bit(dst, src)
+ }
- var br [4]bitReader
+ var br [4]bitReaderShifted
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
@@ -308,14 +506,7 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
- single := s.dt.single[:tlSize]
-
- decode := func(br *bitReader) byte {
- val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
- v := single[val&tlMask]
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
+ single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
@@ -324,69 +515,484 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
// Decode 2 values from each decoder/loop.
const bufoff = 256 / 4
-bigloop:
for {
- for i := range br {
- br := &br[i]
- if br.off < 4 {
- break bigloop
- }
- br.fillFast()
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
}
{
const stream = 0
- val := br[stream].peekBitsFast(s.actualTableLog)
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
- buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
{
- const stream = 1
- val := br[stream].peekBitsFast(s.actualTableLog)
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
- buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ }
+
+ off += 2
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := br.off*8 + uint(64-br.bitsRead)
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ val := br.peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= uint(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress4X8bitExactly(dst, src)
+ }
+
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ shift := (8 - d.actualTableLog) & 7
+
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
}
{
const stream = 2
- val := br[stream].peekBitsFast(s.actualTableLog)
- v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
- val2 := br[stream].peekBitsFast(s.actualTableLog)
- v2 := single[val2&tlMask]
- buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const shift = 0
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
}
{
- const stream = 3
- val := br[stream].peekBitsFast(s.actualTableLog)
- v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
- val2 := br[stream].peekBitsFast(s.actualTableLog)
- v2 := single[val2&tlMask]
- buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
}
- off += 2
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
if off == bufoff {
if bufoff > dstEvery {
@@ -422,12 +1028,38 @@ bigloop:
for i := range br {
offset := dstEvery * i
br := &br[i]
- for !br.finished() {
- br.fill()
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
- out[offset] = decode(br)
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
index 15d79d439..854458537 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -5,6 +5,7 @@
package zstd
import (
+ "encoding/binary"
"errors"
"io"
"math/bits"
@@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error {
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil
}
@@ -63,21 +68,31 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
- v := b.in[b.off-4 : b.off]
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off >= 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 4a14242c7..c8ec6e331 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -83,6 +83,10 @@ type blockDec struct {
err error
decWG sync.WaitGroup
+ // Frame to use for singlethreaded decoding.
+ // Should not be used by the decoder itself since parent may be another frame.
+ localFrame *frameDec
+
// Block is RLE, this is the size.
RLESize uint32
tmp [4]byte
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
index f708df1c4..2c4fca17f 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -4,8 +4,6 @@
package zstd
-import "encoding/binary"
-
// byteReader provides a byte reader that reads
// little endian values from a byte stream.
// The input stream is manually advanced.
@@ -33,7 +31,8 @@ func (b *byteReader) overread() bool {
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
v3 := int32(b2[3])
v2 := int32(b2[2])
v1 := int32(b2[1])
@@ -57,7 +56,25 @@ func (b byteReader) Uint32() uint32 {
}
return v
}
- return binary.LittleEndian.Uint32(b.b[b.off : b.off+4])
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint32NC returns a little endian uint32 starting at current offset.
+// The caller must be sure if there are at least 4 bytes left.
+func (b byteReader) Uint32NC() uint32 {
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
}
// unread returns the unread portion of the input.
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 8e34479ff..75bf05bc9 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -23,9 +23,6 @@ type Decoder struct {
// Unreferenced decoders, ready for use.
decoders chan *blockDec
- // Unreferenced decoders, ready for use.
- frames chan *frameDec
-
// Streams ready to be decoded.
stream chan decodeStream
@@ -90,10 +87,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// Create decoders
d.decoders = make(chan *blockDec, d.o.concurrent)
- d.frames = make(chan *frameDec, d.o.concurrent)
for i := 0; i < d.o.concurrent; i++ {
- d.frames <- newFrameDec(d.o)
- d.decoders <- newBlockDec(d.o.lowMem)
+ dec := newBlockDec(d.o.lowMem)
+ dec.localFrame = newFrameDec(d.o)
+ d.decoders <- dec
}
if r == nil {
@@ -283,15 +280,15 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
// Grab a block decoder and frame decoder.
- block, frame := <-d.decoders, <-d.frames
+ block := <-d.decoders
+ frame := block.localFrame
defer func() {
if debug {
printf("re-adding decoder: %p", block)
}
- d.decoders <- block
frame.rawInput = nil
frame.bBuf = nil
- d.frames <- frame
+ d.decoders <- block
}()
frame.bBuf = input
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index 957cfeb79..e6d3d49b3 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -55,7 +55,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
if b.remain() < 4 {
return errors.New("input too small")
}
- bitStream := b.Uint32()
+ bitStream := b.Uint32NC()
nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
if nbBits > tablelogAbsoluteMax {
println("Invalid tablelog:", nbBits)
@@ -79,7 +79,8 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
n0 += 24
if r := b.remain(); r > 5 {
b.advance(2)
- bitStream = b.Uint32() >> bitCount
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
} else {
// end of bit stream
bitStream >>= 16
@@ -104,10 +105,11 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
charnum++
}
- if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 {
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3)
bitCount &= 7
- bitStream = b.Uint32() >> bitCount
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
} else {
bitStream >>= 2
}
@@ -148,17 +150,16 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
threshold >>= 1
}
- //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain())
- if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 {
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3)
bitCount &= 7
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> (bitCount & 31)
} else {
bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
b.off = len(b.b) - 4
- //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend)
+ bitStream = b.Uint32() >> (bitCount & 31)
}
- bitStream = b.Uint32() >> (bitCount & 31)
- //printf("bitstream is now: 0b%b", bitStream)
}
s.symbolLen = charnum
if s.symbolLen <= 1 {