summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/zstd
diff options
context:
space:
mode:
authorDaniel J Walsh <dwalsh@redhat.com>2022-06-11 06:48:22 -0400
committerDaniel J Walsh <dwalsh@redhat.com>2022-06-13 17:54:17 -0400
commit5e9d20448c52fd134ac990e9f897cbc378760fce (patch)
tree4283d9c401ee2fc166bb9377edbe1e2c071de82b /vendor/github.com/klauspost/compress/zstd
parent9fac1b335f681400a029e9d8014f45fa5634ec40 (diff)
downloadpodman-5e9d20448c52fd134ac990e9f897cbc378760fce.tar.gz
podman-5e9d20448c52fd134ac990e9f897cbc378760fce.tar.bz2
podman-5e9d20448c52fd134ac990e9f897cbc378760fce.zip
Update vendor of containers/buildah
Changes since 2022-05-31: - add --omit-history option (buildah PR 4028) Signed-off-by: Ed Santiago <santiago@redhat.com> Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd')
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bitreader.go7
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bitwriter.go76
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go31
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytebuf.go4
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytereader.go6
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go93
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_better.go8
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_dfast.go10
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder.go2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/framedec.go5
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder.go40
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_encoder.go23
-rw-r--r--vendor/github.com/klauspost/compress/zstd/hash.go6
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec.go102
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go16
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s622
-rw-r--r--vendor/github.com/klauspost/compress/zstd/zip.go9
-rw-r--r--vendor/github.com/klauspost/compress/zstd/zstd.go11
18 files changed, 491 insertions, 580 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
index d7cd15ba2..97299d499 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -63,13 +63,6 @@ func (b *bitReader) get32BitsFast(n uint8) uint32 {
return v
}
-func (b *bitReader) get16BitsFast(n uint8) uint16 {
- const regMask = 64 - 1
- v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
- b.bitsRead += n
- return v
-}
-
// fillFast() will make sure at least 32 bits are available.
// There must be at least 4 bytes available.
func (b *bitReader) fillFast() {
diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
index b36618285..78b3c61be 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -5,8 +5,6 @@
package zstd
-import "fmt"
-
// bitWriter will write bits.
// First bit will be LSB of the first byte of output.
type bitWriter struct {
@@ -73,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}
-// flush will flush all pending full bytes.
-// There will be at least 56 bits available for writing when this has been called.
-// Using flush32 is faster, but leaves less space for writing.
-func (b *bitWriter) flush() {
- v := b.nBits >> 3
- switch v {
- case 0:
- case 1:
- b.out = append(b.out,
- byte(b.bitContainer),
- )
- case 2:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- )
- case 3:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- )
- case 4:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- byte(b.bitContainer>>24),
- )
- case 5:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- byte(b.bitContainer>>24),
- byte(b.bitContainer>>32),
- )
- case 6:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- byte(b.bitContainer>>24),
- byte(b.bitContainer>>32),
- byte(b.bitContainer>>40),
- )
- case 7:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- byte(b.bitContainer>>24),
- byte(b.bitContainer>>32),
- byte(b.bitContainer>>40),
- byte(b.bitContainer>>48),
- )
- case 8:
- b.out = append(b.out,
- byte(b.bitContainer),
- byte(b.bitContainer>>8),
- byte(b.bitContainer>>16),
- byte(b.bitContainer>>24),
- byte(b.bitContainer>>32),
- byte(b.bitContainer>>40),
- byte(b.bitContainer>>48),
- byte(b.bitContainer>>56),
- )
- default:
- panic(fmt.Errorf("bits (%d) > 64", b.nBits))
- }
- b.bitContainer >>= v << 3
- b.nBits &= 7
-}
-
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index b2bca3301..7eed729be 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -49,11 +49,8 @@ const (
// Maximum possible block size (all Raw+Uncompressed).
maxBlockSize = (1 << 21) - 1
- // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
- maxCompressedLiteralSize = 1 << 18
- maxRLELiteralSize = 1 << 20
- maxMatchLen = 131074
- maxSequences = 0x7f00 + 0xffff
+ maxMatchLen = 131074
+ maxSequences = 0x7f00 + 0xffff
// We support slightly less than the reference decoder to be able to
// use ints on 32 bit archs.
@@ -105,7 +102,6 @@ type blockDec struct {
// Block is RLE, this is the size.
RLESize uint32
- tmp [4]byte
Type blockType
@@ -368,14 +364,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
}
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
- b.literalBuf = make([]byte, litRegenSize)
+ b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
} else {
- if litRegenSize > maxCompressedLiteralSize {
- // Exceptional
- b.literalBuf = make([]byte, litRegenSize)
- } else {
- b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
- }
+ b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
literals = b.literalBuf[:litRegenSize]
@@ -405,14 +396,14 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
- b.literalBuf = make([]byte, 0, litRegenSize)
+ b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
- b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+ b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
var err error
// Use our out buffer.
- huff.MaxDecodedSize = maxCompressedBlockSize
+ huff.MaxDecodedSize = litRegenSize
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
@@ -437,9 +428,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
- b.literalBuf = make([]byte, 0, litRegenSize)
+ b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
- b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
+ b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
huff := hist.huffTree
@@ -456,7 +447,7 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
return in, err
}
hist.huffTree = huff
- huff.MaxDecodedSize = maxCompressedBlockSize
+ huff.MaxDecodedSize = litRegenSize
// Use our out buffer.
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
@@ -471,6 +462,8 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
if len(literals) != litRegenSize {
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
+ // Re-cap to get extra size.
+ literals = b.literalBuf[:len(literals)]
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index b80191e4b..4493baa75 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
return r, nil
}
-func (b *byteBuf) remain() []byte {
- return *b
-}
-
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
index 2c4fca17f..0e59a242d 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -13,12 +13,6 @@ type byteReader struct {
off int
}
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
- b.b = in
- b.off = 0
-}
-
// advance the stream b n bytes.
func (b *byteReader) advance(n uint) {
b.off += int(n)
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 36119f385..286c8f9d7 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -637,60 +637,18 @@ func (d *Decoder) startSyncDecoder(r io.Reader) error {
// Create Decoder:
// ASYNC:
-// Spawn 4 go routines.
-// 0: Read frames and decode blocks.
-// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree.
-// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets.
-// 3: Wait for stream history, execute sequences, send stream history.
+// Spawn 3 go routines.
+// 0: Read frames and decode block literals.
+// 1: Decode sequences.
+// 2: Execute sequences, send to output.
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
defer d.streamWg.Done()
br := readerWrapper{r: r}
- var seqPrepare = make(chan *blockDec, d.o.concurrent)
var seqDecode = make(chan *blockDec, d.o.concurrent)
var seqExecute = make(chan *blockDec, d.o.concurrent)
- // Async 1: Prepare blocks...
- go func() {
- var hist history
- var hasErr bool
- for block := range seqPrepare {
- if hasErr {
- if block != nil {
- seqDecode <- block
- }
- continue
- }
- if block.async.newHist != nil {
- if debugDecoder {
- println("Async 1: new history")
- }
- hist.reset()
- if block.async.newHist.dict != nil {
- hist.setDict(block.async.newHist.dict)
- }
- }
- if block.err != nil || block.Type != blockTypeCompressed {
- hasErr = block.err != nil
- seqDecode <- block
- continue
- }
-
- remain, err := block.decodeLiterals(block.data, &hist)
- block.err = err
- hasErr = block.err != nil
- if err == nil {
- block.async.literals = hist.decoders.literals
- block.async.seqData = remain
- } else if debugDecoder {
- println("decodeLiterals error:", err)
- }
- seqDecode <- block
- }
- close(seqDecode)
- }()
-
- // Async 2: Decode sequences...
+ // Async 1: Decode sequences...
go func() {
var hist history
var hasErr bool
@@ -704,7 +662,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
- println("Async 2: new history, recent:", block.async.newHist.recentOffsets)
+ println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
@@ -758,7 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
- println("Async 3: new history")
+ println("Async 2: new history")
}
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
@@ -845,6 +803,33 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
decodeStream:
for {
+ var hist history
+ var hasErr bool
+
+ decodeBlock := func(block *blockDec) {
+ if hasErr {
+ if block != nil {
+ seqDecode <- block
+ }
+ return
+ }
+ if block.err != nil || block.Type != blockTypeCompressed {
+ hasErr = block.err != nil
+ seqDecode <- block
+ return
+ }
+
+ remain, err := block.decodeLiterals(block.data, &hist)
+ block.err = err
+ hasErr = block.err != nil
+ if err == nil {
+ block.async.literals = hist.decoders.literals
+ block.async.seqData = remain
+ } else if debugDecoder {
+ println("decodeLiterals error:", err)
+ }
+ seqDecode <- block
+ }
frame := d.frame
if debugDecoder {
println("New frame...")
@@ -871,7 +856,7 @@ decodeStream:
case <-ctx.Done():
case dec := <-d.decoders:
dec.sendErr(err)
- seqPrepare <- dec
+ decodeBlock(dec)
}
break decodeStream
}
@@ -891,6 +876,10 @@ decodeStream:
if debugDecoder {
println("Alloc History:", h.allocFrameBuffer)
}
+ hist.reset()
+ if h.dict != nil {
+ hist.setDict(h.dict)
+ }
dec.async.newHist = &h
dec.async.fcs = frame.FrameContentSize
historySent = true
@@ -917,7 +906,7 @@ decodeStream:
}
err = dec.err
last := dec.Last
- seqPrepare <- dec
+ decodeBlock(dec)
if err != nil {
break decodeStream
}
@@ -926,7 +915,7 @@ decodeStream:
}
}
}
- close(seqPrepare)
+ close(seqDecode)
wg.Wait()
d.frame.history.b = frameHistCache
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index 602c05ee0..c769f6941 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -156,8 +156,8 @@ encodeLoop:
panic("offset0 was 0")
}
- nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+ nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -518,8 +518,8 @@ encodeLoop:
}
// Store this, since we have it.
- nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+ nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -674,8 +674,8 @@ encodeLoop:
panic("offset0 was 0")
}
- nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+ nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -1047,8 +1047,8 @@ encodeLoop:
}
// Store this, since we have it.
- nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+ nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index d6b310424..7ff0c64fa 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -127,8 +127,8 @@ encodeLoop:
panic("offset0 was 0")
}
- nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+ nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -439,8 +439,8 @@ encodeLoop:
var t int32
for {
- nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+ nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -785,8 +785,8 @@ encodeLoop:
panic("offset0 was 0")
}
- nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+ nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -969,7 +969,7 @@ encodeLoop:
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
- longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
+ longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
e.longTable[longHash1] = te0
e.longTable[longHash2] = te1
e.markLongShardDirty(longHash1)
@@ -1002,8 +1002,8 @@ encodeLoop:
}
// Store this, since we have it.
- nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+ nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index dcc987a7c..e6b1d01cf 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -551,7 +551,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
// If we can do everything in one block, prefer that.
- if len(src) <= maxCompressedBlockSize {
+ if len(src) <= e.o.blockSize {
enc.Reset(e.o.dict, true)
// Slightly faster with no history and everything in one block.
if e.o.crc {
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 3ff109cce..fa0a633f3 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -253,10 +253,11 @@ func (d *frameDec) reset(br byteBuffer) error {
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
- if d.o.lowMem && d.history.windowSize < maxBlockSize {
+ if !d.o.lowMem || d.history.windowSize < maxBlockSize {
+ // Alloc 2x window size if not low-mem, or very small window size.
d.history.allocFrameBuffer = d.history.windowSize * 2
- // TODO: Maybe use FrameContent size
} else {
+ // Alloc with one additional block
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index fde4e6b60..23333b969 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -229,18 +229,10 @@ func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
-func (d decSymbol) baseline() uint32 {
- return uint32(d >> 32)
-}
-
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
-func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
- *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
-}
-
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
@@ -256,11 +248,6 @@ func (d *decSymbol) setNewState(state uint16) {
*d = (*d & mask) | decSymbol(state)<<16
}
-func (d *decSymbol) setBaseline(baseline uint32) {
- const mask = 0xffffffff
- *d = (*d & mask) | decSymbol(baseline)<<32
-}
-
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
@@ -377,34 +364,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
s.state = dt[br.getBits(tableLog)]
}
-// next returns the current symbol and sets the next state.
-// At least tablelog bits must be available in the bit reader.
-func (s *fseState) next(br *bitReader) {
- lowBits := uint16(br.getBits(s.state.nbBits()))
- s.state = s.dt[s.state.newState()+lowBits]
-}
-
-// finished returns true if all bits have been read from the bitstream
-// and the next state would require reading bits from the input.
-func (s *fseState) finished(br *bitReader) bool {
- return br.finished() && s.state.nbBits() > 0
-}
-
-// final returns the current state symbol without decoding the next.
-func (s *fseState) final() (int, uint8) {
- return s.state.baselineInt(), s.state.addBits()
-}
-
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
-
-// nextFast returns the next symbol and sets the next state.
-// This can only be used if no symbols are 0 bits.
-// At least tablelog bits must be available in the bit reader.
-func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
- lowBits := br.get16BitsFast(s.state.nbBits())
- s.state = s.dt[s.state.newState()+lowBits]
- return s.state.baseline(), s.state.addBits()
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index 5442061b1..ab26326a8 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -76,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
s.clearCount = maxCount != 0
}
-// prepare will prepare and allocate scratch tables used for both compression and decompression.
-func (s *fseEncoder) prepare() (*fseEncoder, error) {
- if s == nil {
- s = &fseEncoder{}
- }
- s.useRLE = false
- if s.clearCount && s.maxCount == 0 {
- for i := range s.count {
- s.count[i] = 0
- }
- s.clearCount = false
- }
- return s, nil
-}
-
// allocCtable will allocate tables needed for compression.
// If existing tables a re big enough, they are simply re-used.
func (s *fseEncoder) allocCtable() {
@@ -709,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
c.state = c.stateTable[lu]
}
-// encode the output symbol provided and write it to the bitstream.
-func (c *cState) encode(symbolTT symbolTransform) {
- nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
- dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
- c.bw.addBits16NC(c.state, uint8(nbBitsOut))
- c.state = c.stateTable[dstState]
-}
-
// flush will write the tablelog to the output and flush the remaining full bytes.
func (c *cState) flush(tableLog uint8) {
c.bw.flush32()
diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go
index cf33f29a1..5d73c21eb 100644
--- a/vendor/github.com/klauspost/compress/zstd/hash.go
+++ b/vendor/github.com/klauspost/compress/zstd/hash.go
@@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 {
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
-
-// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash3(u uint32, h uint8) uint32 {
- return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index e80139dd9..df0447203 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -188,6 +188,7 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}
}
}
+
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
@@ -203,12 +204,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decodeSync(hist []byte) error {
- if true {
- supported, err := s.decodeSyncSimple(hist)
- if supported {
- return err
- }
+ supported, err := s.decodeSyncSimple(hist)
+ if supported {
+ return err
}
+
br := s.br
seqs := s.nSeqs
startSize := len(s.out)
@@ -396,6 +396,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
+
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@@ -418,16 +419,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
return br.close()
}
-// update states, at least 27 bits must be available.
-func (s *sequenceDecs) update(br *bitReader) {
- // Max 8 bits
- s.litLengths.state.next(br)
- // Max 9 bits
- s.matchLengths.state.next(br)
- // Max 8 bits
- s.offsets.state.next(br)
-}
-
var bitMask [16]uint16
func init() {
@@ -436,87 +427,6 @@ func init() {
}
}
-// update states, at least 27 bits must be available.
-func (s *sequenceDecs) updateAlt(br *bitReader) {
- // Update all 3 states at once. Approx 20% faster.
- a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
-
- nBits := a.nbBits() + b.nbBits() + c.nbBits()
- if nBits == 0 {
- s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
- s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
- s.offsets.state.state = s.offsets.state.dt[c.newState()]
- return
- }
- bits := br.get32BitsFast(nBits)
- lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
- s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
-
- lowBits = uint16(bits >> (c.nbBits() & 31))
- lowBits &= bitMask[b.nbBits()&15]
- s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
-
- lowBits = uint16(bits) & bitMask[c.nbBits()&15]
- s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
-}
-
-// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
-func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
- // Final will not read from stream.
- ll, llB := llState.final()
- ml, mlB := mlState.final()
- mo, moB := ofState.final()
-
- // extra bits are stored in reverse order.
- br.fillFast()
- mo += br.getBits(moB)
- if s.maxBits > 32 {
- br.fillFast()
- }
- ml += br.getBits(mlB)
- ll += br.getBits(llB)
-
- if moB > 1 {
- s.prevOffset[2] = s.prevOffset[1]
- s.prevOffset[1] = s.prevOffset[0]
- s.prevOffset[0] = mo
- return
- }
- // mo = s.adjustOffset(mo, ll, moB)
- // Inlined for rather big speedup
- if ll == 0 {
- // There is an exception though, when current sequence's literals_length = 0.
- // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
- // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
- mo++
- }
-
- if mo == 0 {
- mo = s.prevOffset[0]
- return
- }
- var temp int
- if mo == 3 {
- temp = s.prevOffset[0] - 1
- } else {
- temp = s.prevOffset[mo]
- }
-
- if temp == 0 {
- // 0 is not valid; input is corrupted; force offset to 1
- println("temp was 0")
- temp = 1
- }
-
- if mo != 1 {
- s.prevOffset[2] = s.prevOffset[1]
- }
- s.prevOffset[1] = s.prevOffset[0]
- s.prevOffset[0] = temp
- mo = temp
- return
-}
-
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
index 4676b09cc..847b322ae 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -62,6 +62,10 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
useSafe = true
}
+ if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
+ useSafe = true
+ }
+
br := s.br
maxBlockSize := maxCompressedBlockSize
@@ -301,6 +305,10 @@ type executeAsmContext struct {
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
+// Same as above, but with safe memcopies
+//go:noescape
+func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
+
// executeSimple handles cases when dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
@@ -327,8 +335,12 @@ func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
literals: s.literals,
windowSize: s.windowSize,
}
-
- ok := sequenceDecs_executeSimple_amd64(&ctx)
+ var ok bool
+ if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
+ ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
+ } else {
+ ok = sequenceDecs_executeSimple_amd64(&ctx)
+ }
if !ok {
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 2585b2e98..212c6cac3 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -705,60 +705,55 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ CX, (R9)
// Fill bitreader for state updates
- MOVQ R13, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R13
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decode_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_bmi2_skip_update
+ LEAQ (SI)(DI*1), R14
+ ADDQ R8, R14
+ MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
- ADDQ R15, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, R8
+ // Update Literal Length State
+ BZHIQ SI, R15, CX
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decode_bmi2_skip_update:
// Adjust offset
@@ -965,60 +960,55 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ CX, (R9)
// Fill bitreader for state updates
- MOVQ R13, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R13
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decode_56_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_56_bmi2_skip_update
+ LEAQ (SI)(DI*1), R14
+ ADDQ R8, R14
+ MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
- ADDQ R15, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, R8
+ // Update Literal Length State
+ BZHIQ SI, R15, CX
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decode_56_bmi2_skip_update:
// Adjust offset
@@ -1171,6 +1161,228 @@ main_loop:
TESTQ R11, R11
JZ check_offset
XORQ R14, R14
+
+copy_1:
+ MOVUPS (SI)(R14*1), X0
+ MOVUPS X0, (BX)(R14*1)
+ ADDQ $0x10, R14
+ CMPQ R14, R11
+ JB copy_1
+ ADDQ R11, SI
+ ADDQ R11, BX
+ ADDQ R11, DI
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ LEAQ (DI)(R10*1), R11
+ CMPQ R12, R11
+ JG error_match_off_too_big
+ CMPQ R12, R8
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ R12, R11
+ SUBQ DI, R11
+ JLS copy_match
+ MOVQ R9, R14
+ SUBQ R11, R14
+ CMPQ R13, R11
+ JGE copy_all_from_history
+ XORQ R11, R11
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(R11*1), R12
+ MOVB R12, (BX)(R11*1)
+ ADDQ $0x01, R11
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(R11*1), R12
+ MOVW R12, (BX)(R11*1)
+ ADDQ $0x02, R11
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(R11*1), R12
+ MOVL R12, (BX)(R11*1)
+ ADDQ $0x04, R11
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(R11*1), R12
+ MOVQ R12, (BX)(R11*1)
+ ADDQ $0x08, R11
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(R11*1), X0
+ MOVUPS X0, (BX)(R11*1)
+ ADDQ $0x10, R11
+
+copy_4_test:
+ CMPQ R11, R13
+ JB copy_4
+ ADDQ R13, DI
+ ADDQ R13, BX
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, R11
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (BX)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, R11
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (BX)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, R11
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (BX)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, R11
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (BX)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (BX)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, R11
+ JB copy_5
+ ADDQ R11, BX
+ ADDQ R11, DI
+ SUBQ R11, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ BX, R11
+ SUBQ R12, R11
+
+ // ml <= mo
+ CMPQ R13, R12
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ ADDQ R13, DI
+ MOVQ BX, R12
+ ADDQ R13, BX
+
+copy_2:
+ MOVUPS (R11), X0
+ MOVUPS X0, (R12)
+ ADDQ $0x10, R11
+ ADDQ $0x10, R12
+ SUBQ $0x10, R13
+ JHI copy_2
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ ADDQ R13, DI
+
+copy_slow_3:
+ MOVB (R11), R12
+ MOVB R12, (BX)
+ INCQ R11
+ INCQ BX
+ DECQ R13
+ JNZ copy_slow_3
+
+handle_loop:
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+
+loop_finished:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+error_match_off_too_big:
+ // Return value
+ MOVB $0x00, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+empty_seqs:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+ RET
+
+// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
+// Requires: SSE
+TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9
+ MOVQ ctx+0(FP), R10
+ MOVQ 8(R10), CX
+ TESTQ CX, CX
+ JZ empty_seqs
+ MOVQ (R10), AX
+ MOVQ 24(R10), DX
+ MOVQ 32(R10), BX
+ MOVQ 80(R10), SI
+ MOVQ 104(R10), DI
+ MOVQ 120(R10), R8
+ MOVQ 56(R10), R9
+ MOVQ 64(R10), R10
+ ADDQ R10, R9
+
+ // seqsBase += 24 * seqIndex
+ LEAQ (DX)(DX*2), R11
+ SHLQ $0x03, R11
+ ADDQ R11, AX
+
+ // outBase += outPosition
+ ADDQ DI, BX
+
+main_loop:
+ MOVQ (AX), R11
+ MOVQ 16(AX), R12
+ MOVQ 8(AX), R13
+
+ // Copy literals
+ TESTQ R11, R11
+ JZ check_offset
+ XORQ R14, R14
TESTQ $0x00000001, R11
JZ copy_1_word
MOVB (SI)(R14*1), R15
@@ -1326,18 +1538,46 @@ copy_match:
JA copy_overlapping_match
// Copy non-overlapping match
- ADDQ R13, DI
- MOVQ BX, R12
- ADDQ R13, BX
+ ADDQ R13, DI
+ XORQ R12, R12
+ TESTQ $0x00000001, R13
+ JZ copy_2_word
+ MOVB (R11)(R12*1), R14
+ MOVB R14, (BX)(R12*1)
+ ADDQ $0x01, R12
+
+copy_2_word:
+ TESTQ $0x00000002, R13
+ JZ copy_2_dword
+ MOVW (R11)(R12*1), R14
+ MOVW R14, (BX)(R12*1)
+ ADDQ $0x02, R12
+
+copy_2_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_2_qword
+ MOVL (R11)(R12*1), R14
+ MOVL R14, (BX)(R12*1)
+ ADDQ $0x04, R12
+
+copy_2_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_2_test
+ MOVQ (R11)(R12*1), R14
+ MOVQ R14, (BX)(R12*1)
+ ADDQ $0x08, R12
+ JMP copy_2_test
copy_2:
- MOVUPS (R11), X0
- MOVUPS X0, (R12)
- ADDQ $0x10, R11
+ MOVUPS (R11)(R12*1), X0
+ MOVUPS X0, (BX)(R12*1)
ADDQ $0x10, R12
- SUBQ $0x10, R13
- JHI copy_2
- JMP handle_loop
+
+copy_2_test:
+ CMPQ R12, R13
+ JB copy_2
+ ADDQ R13, BX
+ JMP handle_loop
// Copy overlapping match
copy_overlapping_match:
@@ -1673,45 +1913,16 @@ sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
TESTQ AX, AX
JZ check_offset
XORQ R14, R14
- TESTQ $0x00000001, AX
- JZ copy_1_word
- MOVB (R11)(R14*1), R15
- MOVB R15, (R10)(R14*1)
- ADDQ $0x01, R14
-
-copy_1_word:
- TESTQ $0x00000002, AX
- JZ copy_1_dword
- MOVW (R11)(R14*1), R15
- MOVW R15, (R10)(R14*1)
- ADDQ $0x02, R14
-
-copy_1_dword:
- TESTQ $0x00000004, AX
- JZ copy_1_qword
- MOVL (R11)(R14*1), R15
- MOVL R15, (R10)(R14*1)
- ADDQ $0x04, R14
-
-copy_1_qword:
- TESTQ $0x00000008, AX
- JZ copy_1_test
- MOVQ (R11)(R14*1), R15
- MOVQ R15, (R10)(R14*1)
- ADDQ $0x08, R14
- JMP copy_1_test
copy_1:
MOVUPS (R11)(R14*1), X0
MOVUPS X0, (R10)(R14*1)
ADDQ $0x10, R14
-
-copy_1_test:
- CMPQ R14, AX
- JB copy_1
- ADDQ AX, R11
- ADDQ AX, R10
- ADDQ AX, R12
+ CMPQ R14, AX
+ JB copy_1
+ ADDQ AX, R11
+ ADDQ AX, R10
+ ADDQ AX, R12
// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
check_offset:
@@ -2044,60 +2255,55 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ CX, 24(SP)
// Fill bitreader for state updates
- MOVQ R12, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R12
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decodeSync_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_bmi2_skip_update
+ LEAQ (SI)(DI*1), R13
+ ADDQ R8, R13
+ MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
- ADDQ R14, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, R8
+ // Update Literal Length State
+ BZHIQ SI, R14, CX
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_bmi2_skip_update:
// Adjust offset
@@ -2180,45 +2386,16 @@ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
TESTQ CX, CX
JZ check_offset
XORQ R14, R14
- TESTQ $0x00000001, CX
- JZ copy_1_word
- MOVB (R10)(R14*1), R15
- MOVB R15, (R9)(R14*1)
- ADDQ $0x01, R14
-
-copy_1_word:
- TESTQ $0x00000002, CX
- JZ copy_1_dword
- MOVW (R10)(R14*1), R15
- MOVW R15, (R9)(R14*1)
- ADDQ $0x02, R14
-
-copy_1_dword:
- TESTQ $0x00000004, CX
- JZ copy_1_qword
- MOVL (R10)(R14*1), R15
- MOVL R15, (R9)(R14*1)
- ADDQ $0x04, R14
-
-copy_1_qword:
- TESTQ $0x00000008, CX
- JZ copy_1_test
- MOVQ (R10)(R14*1), R15
- MOVQ R15, (R9)(R14*1)
- ADDQ $0x08, R14
- JMP copy_1_test
copy_1:
MOVUPS (R10)(R14*1), X0
MOVUPS X0, (R9)(R14*1)
ADDQ $0x10, R14
-
-copy_1_test:
- CMPQ R14, CX
- JB copy_1
- ADDQ CX, R10
- ADDQ CX, R9
- ADDQ CX, R11
+ CMPQ R14, CX
+ JB copy_1
+ ADDQ CX, R10
+ ADDQ CX, R9
+ ADDQ CX, R11
// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
check_offset:
@@ -3108,60 +3285,55 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ CX, 24(SP)
// Fill bitreader for state updates
- MOVQ R12, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R12
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
+ LEAQ (SI)(DI*1), R13
+ ADDQ R8, R13
+ MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
- ADDQ R14, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, R8
+ // Update Literal Length State
+ BZHIQ SI, R14, CX
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_safe_bmi2_skip_update:
// Adjust offset
diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go
index b53f606a1..29c15c8c4 100644
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@@ -18,7 +18,14 @@ const ZipMethodWinZip = 93
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
const ZipMethodPKWare = 20
-var zipReaderPool sync.Pool
+// zipReaderPool is the default reader pool.
+var zipReaderPool = sync.Pool{New: func() interface{} {
+ z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
+ if err != nil {
+ panic(err)
+ }
+ return z
+}}
// newZipReader creates a pooled zip decompressor.
func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index c1c90b4a0..3eb3f1c82 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -110,17 +110,6 @@ func printf(format string, a ...interface{}) {
}
}
-// matchLenFast does matching, but will not match the last up to 7 bytes.
-func matchLenFast(a, b []byte) int {
- endI := len(a) & (math.MaxInt32 - 7)
- for i := 0; i < endI; i += 8 {
- if diff := load64(a, i) ^ load64(b, i); diff != 0 {
- return i + bits.TrailingZeros64(diff)>>3
- }
- }
- return endI
-}
-
// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.