summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/zstd
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd')
-rw-r--r--vendor/github.com/klauspost/compress/zstd/README.md14
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go34
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytereader.go9
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go34
-rw-r--r--vendor/github.com/klauspost/compress/zstd/dict.go104
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_dfast.go4
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_fast.go15
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder.go21
-rw-r--r--vendor/github.com/klauspost/compress/zstd/framedec.go23
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder.go2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/history.go18
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec.go144
12 files changed, 340 insertions, 82 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index bc977a302..f2a80b5d0 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -309,6 +309,20 @@ The decoder can be used for *concurrent* decompression of multiple buffers.
It will only allow a certain number of concurrent operations to run.
To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
+### Dictionaries
+
+Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed.
+
+Dictionaries are added individually to Decoders.
+Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder.
+To add a dictionary use the `RegisterDict(data)` with the dictionary data before starting any decompression.
+
+The dictionary will be used automatically for the data that specifies them.
+
+A re-used Decoder will still contain the dictionaries registered.
+
+When registering a dictionary with the same ID it will override the existing.
+
### Allocation-less operation
The decoder has been designed to operate without allocations after a warmup.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 19181caea..4a14242c7 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -461,26 +461,22 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if huff == nil {
huff = &huff0.Scratch{}
}
- huff.Out = b.literalBuf[:0]
huff, literals, err = huff0.ReadTable(literals, huff)
if err != nil {
println("reading huffman table:", err)
return err
}
// Use our out buffer.
- huff.Out = b.literalBuf[:0]
- huff.MaxDecodedSize = litRegenSize
if fourStreams {
- literals, err = huff.Decompress4X(literals, litRegenSize)
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
- literals, err = huff.Decompress1X(literals)
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
if err != nil {
println("decoding compressed literals:", err)
return err
}
// Make sure we don't leak our literals buffer
- huff.Out = nil
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
@@ -631,15 +627,12 @@ func (b *blockDec) decodeCompressed(hist *history) error {
var err error
// Use our out buffer.
huff = hist.huffTree
- huff.Out = b.literalBuf[:0]
- huff.MaxDecodedSize = litRegenSize
if fourStreams {
- literals, err = huff.Decompress4X(literals, litRegenSize)
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
- literals, err = huff.Decompress1X(literals)
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
// Make sure we don't leak our literals buffer
- huff.Out = nil
if err != nil {
println("decompressing literals:", err)
return err
@@ -649,12 +642,13 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
} else {
if hist.huffTree != nil && huff != nil {
- huffDecoderPool.Put(hist.huffTree)
+ if hist.dict == nil || hist.dict.litDec != hist.huffTree {
+ huffDecoderPool.Put(hist.huffTree)
+ }
hist.huffTree = nil
}
}
if huff != nil {
- huff.Out = nil
hist.huffTree = huff
}
if debug {
@@ -687,14 +681,20 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// If only recent offsets were not transferred, this would be an obvious win.
// Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
- if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
- println("initializing sequences:", err)
- return err
- }
hbytes := hist.b
if len(hbytes) > hist.windowSize {
hbytes = hbytes[len(hbytes)-hist.windowSize:]
+ // We do not need history any more.
+ if hist.dict != nil {
+ hist.dict.content = nil
+ }
}
+
+ if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
+ println("initializing sequences:", err)
+ return err
+ }
+
err = seqs.decode(nSeqs, br, hbytes)
if err != nil {
return err
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
index dc4378b64..f708df1c4 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -4,6 +4,8 @@
package zstd
+import "encoding/binary"
+
// byteReader provides a byte reader that reads
// little endian values from a byte stream.
// The input stream is manually advanced.
@@ -55,12 +57,7 @@ func (b byteReader) Uint32() uint32 {
}
return v
}
- b2 := b.b[b.off : b.off+4 : b.off+4]
- v3 := uint32(b2[3])
- v2 := uint32(b2[2])
- v1 := uint32(b2[1])
- v0 := uint32(b2[0])
- return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+ return binary.LittleEndian.Uint32(b.b[b.off : b.off+4])
}
// unread returns the unread portion of the input.
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 324347623..8e34479ff 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -32,8 +32,9 @@ type Decoder struct {
// Current read position used for Reader functionality.
current decoderState
- // Custom dictionaries
- dicts map[uint32]struct{}
+ // Custom dictionaries.
+ // Always uses copies.
+ dicts map[uint32]dict
// streamWg is the waitgroup for all streams
streamWg sync.WaitGroup
@@ -295,10 +296,18 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
frame.bBuf = input
for {
+ frame.history.reset()
err := frame.reset(&frame.bBuf)
if err == io.EOF {
return dst, nil
}
+ if frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ return nil, ErrUnknownDictionary
+ }
+ frame.history.setDict(&dict)
+ }
if err != nil {
return dst, err
}
@@ -393,6 +402,19 @@ func (d *Decoder) Close() {
d.current.err = ErrDecoderClosed
}
+// RegisterDict will load a dictionary
+func (d *Decoder) RegisterDict(b []byte) error {
+ dc, err := loadDict(b)
+ if err != nil {
+ return err
+ }
+ if d.dicts == nil {
+ d.dicts = make(map[uint32]dict, 1)
+ }
+ d.dicts[dc.id] = *dc
+ return nil
+}
+
// IOReadCloser returns the decoder as an io.ReadCloser for convenience.
// Any changes to the decoder will be reflected, so the returned ReadCloser
// can be reused along with the decoder.
@@ -466,6 +488,14 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
if debug && err != nil {
println("Frame decoder returned", err)
}
+ if err == nil && frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ err = ErrUnknownDictionary
+ } else {
+ frame.history.setDict(&dict)
+ }
+ }
if err != nil {
stream.output <- decodeOutput{
err: err,
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
new file mode 100644
index 000000000..8eb6f6ba3
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -0,0 +1,104 @@
+package zstd
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/klauspost/compress/huff0"
+)
+
+type dict struct {
+ id uint32
+
+ litDec *huff0.Scratch
+ llDec, ofDec, mlDec sequenceDec
+ offsets [3]int
+ content []byte
+}
+
+var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+
+// Load a dictionary as described in
+// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+func loadDict(b []byte) (*dict, error) {
+ // Check static field size.
+ if len(b) <= 8+(3*4) {
+ return nil, io.ErrUnexpectedEOF
+ }
+ d := dict{
+ llDec: sequenceDec{fse: &fseDecoder{}},
+ ofDec: sequenceDec{fse: &fseDecoder{}},
+ mlDec: sequenceDec{fse: &fseDecoder{}},
+ }
+ if !bytes.Equal(b[:4], dictMagic[:]) {
+ return nil, ErrMagicMismatch
+ }
+ d.id = binary.LittleEndian.Uint32(b[4:8])
+ if d.id == 0 {
+ return nil, errors.New("dictionaries cannot have ID 0")
+ }
+
+ // Read literal table
+ var err error
+ d.litDec, b, err = huff0.ReadTable(b[8:], nil)
+ if err != nil {
+ return nil, err
+ }
+
+ br := byteReader{
+ b: b,
+ off: 0,
+ }
+ readDec := func(i tableIndex, dec *fseDecoder) error {
+ if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
+ return err
+ }
+ if br.overread() {
+ return io.ErrUnexpectedEOF
+ }
+ err = dec.transform(symbolTableX[i])
+ if err != nil {
+ println("Transform table error:", err)
+ return err
+ }
+ if debug {
+ println("Read table ok", "symbolLen:", dec.symbolLen)
+ }
+ // Set decoders as predefined so they aren't reused.
+ dec.preDefined = true
+ return nil
+ }
+
+ if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
+ return nil, err
+ }
+ if br.remain() < 12 {
+ return nil, io.ErrUnexpectedEOF
+ }
+
+ d.offsets[0] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[1] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[2] = int(br.Uint32())
+ br.advance(4)
+ if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
+ return nil, errors.New("invalid offset in dictionary")
+ }
+ d.content = make([]byte, br.remain())
+ copy(d.content, br.unread())
+ if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
+ return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
+ }
+
+ return &d, nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index 5ebead9dc..50276bcde 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -671,4 +671,8 @@ encodeLoop:
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
+ }
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index d1d3658e6..4104b456c 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -383,6 +383,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
panic("src too big")
}
}
+
// Protect against e.cur wraparound.
if e.cur >= bufferReset {
for i := range e.table[:] {
@@ -516,6 +517,9 @@ encodeLoop:
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
+ }
break
}
@@ -548,6 +552,9 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0 ", t))
+ }
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
@@ -647,6 +654,10 @@ encodeLoop:
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
+ }
}
func (e *fastBase) addBlock(src []byte) int32 {
@@ -714,7 +725,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
}
// Reset the encoding table.
-func (e *fastBase) Reset() {
+func (e *fastBase) Reset(singleBlock bool) {
if e.blk == nil {
e.blk = &blockEnc{}
e.blk.init()
@@ -727,7 +738,7 @@ func (e *fastBase) Reset() {
} else {
e.crc.Reset()
}
- if cap(e.hist) < int(e.maxMatchOff*2) {
+ if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) {
l := e.maxMatchOff * 2
// Make it at least 1MB.
if l < 1<<20 {
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index af4f00b73..bf42bb1cf 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -35,7 +35,7 @@ type encoder interface {
AppendCRC([]byte) []byte
WindowSize(size int) int32
UseBlock(*blockEnc)
- Reset()
+ Reset(singleBlock bool)
}
type encoderState struct {
@@ -82,7 +82,10 @@ func (e *Encoder) initialize() {
}
e.encoders = make(chan encoder, e.o.concurrent)
for i := 0; i < e.o.concurrent; i++ {
- e.encoders <- e.o.encoder()
+ enc := e.o.encoder()
+ // If not single block, history will be allocated on first use.
+ enc.Reset(true)
+ e.encoders <- enc
}
}
@@ -112,7 +115,7 @@ func (e *Encoder) Reset(w io.Writer) {
s.filling = s.filling[:0]
s.current = s.current[:0]
s.previous = s.previous[:0]
- s.encoder.Reset()
+ s.encoder.Reset(false)
s.headerWritten = false
s.eofWritten = false
s.fullFrameWritten = false
@@ -445,11 +448,10 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
enc := <-e.encoders
defer func() {
// Release encoder reference to last block.
- enc.Reset()
+ // If a non-single block is needed the encoder will reset again.
+ enc.Reset(true)
e.encoders <- enc
}()
- enc.Reset()
- blk := enc.Block()
// Use single segments when above minimum window and below 1MB.
single := len(src) < 1<<20 && len(src) > MinWindowSize
if e.o.single != nil {
@@ -472,12 +474,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
panic(err)
}
- if len(src) <= e.o.blockSize && len(src) <= maxBlockSize {
+ // If we can do everything in one block, prefer that.
+ if len(src) <= maxCompressedBlockSize {
// Slightly faster with no history and everything in one block.
if e.o.crc {
_, _ = enc.CRC().Write(src)
}
- blk.reset(nil)
+ blk := enc.Block()
blk.last = true
enc.EncodeNoHist(blk, src)
@@ -504,6 +507,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
blk.output = oldout
} else {
+ enc.Reset(false)
+ blk := enc.Block()
for len(src) > 0 {
todo := src
if len(todo) > e.o.blockSize {
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 780880ebe..fc4a566d3 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -40,7 +40,7 @@ type frameDec struct {
FrameContentSize uint64
frameDone sync.WaitGroup
- DictionaryID uint32
+ DictionaryID *uint32
HasCheckSum bool
SingleSegment bool
@@ -142,7 +142,7 @@ func (d *frameDec) reset(br byteBuffer) error {
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
- d.DictionaryID = 0
+ d.DictionaryID = nil
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
@@ -154,19 +154,22 @@ func (d *frameDec) reset(br byteBuffer) error {
}
return io.ErrUnexpectedEOF
}
+ var id uint32
switch size {
case 1:
- d.DictionaryID = uint32(b[0])
+ id = uint32(b[0])
case 2:
- d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
+ id = uint32(b[0]) | (uint32(b[1]) << 8)
case 4:
- d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
if debug {
- println("Dict size", size, "ID:", d.DictionaryID)
+ println("Dict size", size, "ID:", id)
}
- if d.DictionaryID != 0 {
- return ErrUnknownDictionary
+ if id > 0 {
+ // ID 0 means "sorry, no dictionary anyway".
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
+ d.DictionaryID = &id
}
}
@@ -351,8 +354,6 @@ func (d *frameDec) initAsync() {
// When the frame has finished decoding the *bufio.Reader
// containing the remaining input will be sent on frameDec.frameDone.
func (d *frameDec) startDecoder(output chan decodeOutput) {
- // TODO: Init to dictionary
- d.history.reset()
written := int64(0)
defer func() {
@@ -445,8 +446,6 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
// runDecoder will create a sync decoder that will decode a block of data.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
- // TODO: Init to dictionary
- d.history.reset()
saved := d.history.b
// We use the history for output to avoid copying it.
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index e002be98b..957cfeb79 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -19,7 +19,7 @@ const (
* Increasing memory usage improves compression ratio
* Reduced memory usage can improve speed, due to cache effect
* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
- maxMemoryUsage = 11
+ maxMemoryUsage = tablelogAbsoluteMax + 2
maxTableLog = maxMemoryUsage - 2
maxTablesize = 1 << maxTableLog
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index e8c419bd5..f418f50fc 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -17,6 +17,7 @@ type history struct {
windowSize int
maxSize int
error bool
+ dict *dict
}
// reset will reset the history to initial state of a frame.
@@ -36,12 +37,27 @@ func (h *history) reset() {
}
h.decoders = sequenceDecs{}
if h.huffTree != nil {
- huffDecoderPool.Put(h.huffTree)
+ if h.dict == nil || h.dict.litDec != h.huffTree {
+ huffDecoderPool.Put(h.huffTree)
+ }
}
h.huffTree = nil
+ h.dict = nil
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
}
+func (h *history) setDict(dict *dict) {
+ if dict == nil {
+ return
+ }
+ h.dict = dict
+ h.decoders.litLengths = dict.llDec
+ h.decoders.offsets = dict.ofDec
+ h.decoders.matchLengths = dict.mlDec
+ h.recentOffsets = dict.offsets
+ h.huffTree = dict.litDec
+}
+
// append bytes to history.
// This function will make sure there is space for it,
// if the buffer has been allocated with enough extra space.
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index 39238e16a..7ff870400 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -62,6 +62,7 @@ type sequenceDecs struct {
matchLengths sequenceDec
prevOffset [3]int
hist []byte
+ dict []byte
literals []byte
out []byte
windowSize int
@@ -85,6 +86,10 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
s.windowSize = hist.windowSize
s.out = out
+ s.dict = nil
+ if hist.dict != nil {
+ s.dict = hist.dict.content
+ }
return nil
}
@@ -100,23 +105,78 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
printf("reading sequence %d, exceeded available data\n", seqs-i)
return io.ErrUnexpectedEOF
}
- var litLen, matchOff, matchLen int
+ var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
- litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState)
+ // inlined function:
+ // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
+
+ // Final will not read from stream.
+ var llB, mlB, moB uint8
+ ll, llB = llState.final()
+ ml, mlB = mlState.final()
+ mo, moB = ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fillFast()
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
+ br.fillFast()
+ }
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ if moB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = mo
+ } else {
+ // mo = s.adjustOffset(mo, ll, moB)
+ // Inlined for rather big speedup
+ if ll == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ mo++
+ }
+
+ if mo == 0 {
+ mo = s.prevOffset[0]
+ } else {
+ var temp int
+ if mo == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[mo]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("temp was 0")
+ temp = 1
+ }
+
+ if mo != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ mo = temp
+ }
+ }
br.fillFast()
} else {
- litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState)
+ ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}
if debugSequences {
- println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen)
+ println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}
- if litLen > len(s.literals) {
- return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals))
+ if ll > len(s.literals) {
+ return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
}
- size := litLen + matchLen + len(s.out)
+ size := ll + ml + len(s.out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size", size)
}
@@ -127,52 +187,70 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
s.out = append(s.out, make([]byte, maxBlockSize)...)
s.out = s.out[:len(s.out)-maxBlockSize]
}
- if matchLen > maxMatchLen {
- return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen)
- }
- if matchOff > len(s.out)+len(hist)+litLen {
- return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen)
- }
- if matchOff > s.windowSize {
- return fmt.Errorf("match offset (%d) bigger than window size (%d)", matchOff, s.windowSize)
- }
- if matchOff == 0 && matchLen > 0 {
- return fmt.Errorf("zero matchoff and matchlen > 0")
+ if ml > maxMatchLen {
+ return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
- s.out = append(s.out, s.literals[:litLen]...)
- s.literals = s.literals[litLen:]
+ // Add literals
+ s.out = append(s.out, s.literals[:ll]...)
+ s.literals = s.literals[ll:]
out := s.out
+ if mo > len(s.out)+len(hist) || mo > s.windowSize {
+ if len(s.dict) == 0 {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+
+ // we may be in dictionary.
+ dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
+ if dictO < 0 || dictO >= len(s.dict) {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+ end := dictO + ml
+ if end > len(s.dict) {
+ out = append(out, s.dict[dictO:]...)
+ mo -= len(s.dict) - dictO
+ ml -= len(s.dict) - dictO
+ } else {
+ out = append(out, s.dict[dictO:end]...)
+ mo = 0
+ ml = 0
+ }
+ }
+
+ if mo == 0 && ml > 0 {
+ return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+ }
+
// Copy from history.
// TODO: Blocks without history could be made to ignore this completely.
- if v := matchOff - len(s.out); v > 0 {
+ if v := mo - len(s.out); v > 0 {
// v is the start position in history from end.
start := len(s.hist) - v
- if matchLen > v {
+ if ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, s.hist[start:]...)
- matchOff -= v
- matchLen -= v
+ mo -= v
+ ml -= v
} else {
- out = append(out, s.hist[start:start+matchLen]...)
- matchLen = 0
+ out = append(out, s.hist[start:start+ml]...)
+ ml = 0
}
}
// We must be in current buffer now
- if matchLen > 0 {
- start := len(s.out) - matchOff
- if matchLen <= len(s.out)-start {
+ if ml > 0 {
+ start := len(s.out) - mo
+ if ml <= len(s.out)-start {
// No overlap
- out = append(out, s.out[start:start+matchLen]...)
+ out = append(out, s.out[start:start+ml]...)
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
- out = out[:len(out)+matchLen]
- src := out[start : start+matchLen]
+ out = out[:len(out)+ml]
+ src := out[start : start+ml]
// Destination is the space we just added.
- dst := out[len(out)-matchLen:]
+ dst := out[len(out)-ml:]
dst = dst[:len(src)]
for i := range src {
dst[i] = src[i]