diff options
author | Daniel J Walsh <dwalsh@redhat.com> | 2022-06-11 06:48:22 -0400 |
---|---|---|
committer | Daniel J Walsh <dwalsh@redhat.com> | 2022-06-13 17:54:17 -0400 |
commit | 5e9d20448c52fd134ac990e9f897cbc378760fce (patch) | |
tree | 4283d9c401ee2fc166bb9377edbe1e2c071de82b /vendor/github.com/klauspost/compress/zstd | |
parent | 9fac1b335f681400a029e9d8014f45fa5634ec40 (diff) | |
download | podman-5e9d20448c52fd134ac990e9f897cbc378760fce.tar.gz podman-5e9d20448c52fd134ac990e9f897cbc378760fce.tar.bz2 podman-5e9d20448c52fd134ac990e9f897cbc378760fce.zip |
Update vendor of containers/buildah
Changes since 2022-05-31:
- add --omit-history option (buildah PR 4028)
Signed-off-by: Ed Santiago <santiago@redhat.com>
Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd')
18 files changed, 491 insertions, 580 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index d7cd15ba2..97299d499 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -63,13 +63,6 @@ func (b *bitReader) get32BitsFast(n uint8) uint32 { return v } -func (b *bitReader) get16BitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - // fillFast() will make sure at least 32 bits are available. // There must be at least 4 bytes available. func (b *bitReader) fillFast() { diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index b36618285..78b3c61be 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -5,8 +5,6 @@ package zstd -import "fmt" - // bitWriter will write bits. // First bit will be LSB of the first byte of output. type bitWriter struct { @@ -73,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { b.nBits += bits } -// flush will flush all pending full bytes. -// There will be at least 56 bits available for writing when this has been called. -// Using flush32 is faster, but leaves less space for writing. -func (b *bitWriter) flush() { - v := b.nBits >> 3 - switch v { - case 0: - case 1: - b.out = append(b.out, - byte(b.bitContainer), - ) - case 2: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - ) - case 3: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - ) - case 4: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - ) - case 5: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - ) - case 6: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - ) - case 7: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - ) - case 8: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - byte(b.bitContainer>>56), - ) - default: - panic(fmt.Errorf("bits (%d) > 64", b.nBits)) - } - b.bitContainer >>= v << 3 - b.nBits &= 7 -} - // flush32 will flush out, so there are at least 32 bits available for writing. func (b *bitWriter) flush32() { if b.nBits < 32 { diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index b2bca3301..7eed729be 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -49,11 +49,8 @@ const ( // Maximum possible block size (all Raw+Uncompressed). maxBlockSize = (1 << 21) - 1 - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header - maxCompressedLiteralSize = 1 << 18 - maxRLELiteralSize = 1 << 20 - maxMatchLen = 131074 - maxSequences = 0x7f00 + 0xffff + maxMatchLen = 131074 + maxSequences = 0x7f00 + 0xffff // We support slightly less than the reference decoder to be able to // use ints on 32 bit archs. @@ -105,7 +102,6 @@ type blockDec struct { // Block is RLE, this is the size. RLESize uint32 - tmp [4]byte Type blockType @@ -368,14 +364,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err } if cap(b.literalBuf) < litRegenSize { if b.lowMem { - b.literalBuf = make([]byte, litRegenSize) + b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc) } else { - if litRegenSize > maxCompressedLiteralSize { - // Exceptional - b.literalBuf = make([]byte, litRegenSize) - } else { - b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize) - } + b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc) } } literals = b.literalBuf[:litRegenSize] @@ -405,14 +396,14 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err // Ensure we have space to store it. if cap(b.literalBuf) < litRegenSize { if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize) + b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) } else { - b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) } } var err error // Use our out buffer. - huff.MaxDecodedSize = maxCompressedBlockSize + huff.MaxDecodedSize = litRegenSize if fourStreams { literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { @@ -437,9 +428,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err // Ensure we have space to store it. if cap(b.literalBuf) < litRegenSize { if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize) + b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) } else { - b.literalBuf = make([]byte, 0, maxCompressedBlockSize) + b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) } } huff := hist.huffTree @@ -456,7 +447,7 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err return in, err } hist.huffTree = huff - huff.MaxDecodedSize = maxCompressedBlockSize + huff.MaxDecodedSize = litRegenSize // Use our out buffer. if fourStreams { literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) @@ -471,6 +462,8 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err if len(literals) != litRegenSize { return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } + // Re-cap to get extra size. + literals = b.literalBuf[:len(literals)] if debugDecoder { printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) } diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index b80191e4b..4493baa75 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { return r, nil } -func (b *byteBuf) remain() []byte { - return *b -} - func (b *byteBuf) readByte() (byte, error) { bb := *b if len(bb) < 1 { diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go index 2c4fca17f..0e59a242d 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -13,12 +13,6 @@ type byteReader struct { off int } -// init will initialize the reader and set the input. -func (b *byteReader) init(in []byte) { - b.b = in - b.off = 0 -} - // advance the stream b n bytes. func (b *byteReader) advance(n uint) { b.off += int(n) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 36119f385..286c8f9d7 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -637,60 +637,18 @@ func (d *Decoder) startSyncDecoder(r io.Reader) error { // Create Decoder: // ASYNC: -// Spawn 4 go routines. -// 0: Read frames and decode blocks. -// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree. -// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets. -// 3: Wait for stream history, execute sequences, send stream history. +// Spawn 3 go routines. +// 0: Read frames and decode block literals. +// 1: Decode sequences. +// 2: Execute sequences, send to output. func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) { defer d.streamWg.Done() br := readerWrapper{r: r} - var seqPrepare = make(chan *blockDec, d.o.concurrent) var seqDecode = make(chan *blockDec, d.o.concurrent) var seqExecute = make(chan *blockDec, d.o.concurrent) - // Async 1: Prepare blocks... - go func() { - var hist history - var hasErr bool - for block := range seqPrepare { - if hasErr { - if block != nil { - seqDecode <- block - } - continue - } - if block.async.newHist != nil { - if debugDecoder { - println("Async 1: new history") - } - hist.reset() - if block.async.newHist.dict != nil { - hist.setDict(block.async.newHist.dict) - } - } - if block.err != nil || block.Type != blockTypeCompressed { - hasErr = block.err != nil - seqDecode <- block - continue - } - - remain, err := block.decodeLiterals(block.data, &hist) - block.err = err - hasErr = block.err != nil - if err == nil { - block.async.literals = hist.decoders.literals - block.async.seqData = remain - } else if debugDecoder { - println("decodeLiterals error:", err) - } - seqDecode <- block - } - close(seqDecode) - }() - - // Async 2: Decode sequences... + // Async 1: Decode sequences... go func() { var hist history var hasErr bool @@ -704,7 +662,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch } if block.async.newHist != nil { if debugDecoder { - println("Async 2: new history, recent:", block.async.newHist.recentOffsets) + println("Async 1: new history, recent:", block.async.newHist.recentOffsets) } hist.decoders = block.async.newHist.decoders hist.recentOffsets = block.async.newHist.recentOffsets @@ -758,7 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch } if block.async.newHist != nil { if debugDecoder { - println("Async 3: new history") + println("Async 2: new history") } hist.windowSize = block.async.newHist.windowSize hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer @@ -845,6 +803,33 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch decodeStream: for { + var hist history + var hasErr bool + + decodeBlock := func(block *blockDec) { + if hasErr { + if block != nil { + seqDecode <- block + } + return + } + if block.err != nil || block.Type != blockTypeCompressed { + hasErr = block.err != nil + seqDecode <- block + return + } + + remain, err := block.decodeLiterals(block.data, &hist) + block.err = err + hasErr = block.err != nil + if err == nil { + block.async.literals = hist.decoders.literals + block.async.seqData = remain + } else if debugDecoder { + println("decodeLiterals error:", err) + } + seqDecode <- block + } frame := d.frame if debugDecoder { println("New frame...") @@ -871,7 +856,7 @@ decodeStream: case <-ctx.Done(): case dec := <-d.decoders: dec.sendErr(err) - seqPrepare <- dec + decodeBlock(dec) } break decodeStream } @@ -891,6 +876,10 @@ decodeStream: if debugDecoder { println("Alloc History:", h.allocFrameBuffer) } + hist.reset() + if h.dict != nil { + hist.setDict(h.dict) + } dec.async.newHist = &h dec.async.fcs = frame.FrameContentSize historySent = true @@ -917,7 +906,7 @@ decodeStream: } err = dec.err last := dec.Last - seqPrepare <- dec + decodeBlock(dec) if err != nil { break decodeStream } @@ -926,7 +915,7 @@ decodeStream: } } } - close(seqPrepare) + close(seqDecode) wg.Wait() d.frame.history.b = frameHistCache } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 602c05ee0..c769f6941 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -156,8 +156,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -518,8 +518,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -674,8 +674,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -1047,8 +1047,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index d6b310424..7ff0c64fa 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -127,8 +127,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -439,8 +439,8 @@ encodeLoop: var t int32 for { - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -785,8 +785,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -969,7 +969,7 @@ encodeLoop: te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) - longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen) e.longTable[longHash1] = te0 e.longTable[longHash2] = te1 e.markLongShardDirty(longHash1) @@ -1002,8 +1002,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index dcc987a7c..e6b1d01cf 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -551,7 +551,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } // If we can do everything in one block, prefer that. - if len(src) <= maxCompressedBlockSize { + if len(src) <= e.o.blockSize { enc.Reset(e.o.dict, true) // Slightly faster with no history and everything in one block. if e.o.crc { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 3ff109cce..fa0a633f3 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -253,10 +253,11 @@ func (d *frameDec) reset(br byteBuffer) error { return ErrWindowSizeTooSmall } d.history.windowSize = int(d.WindowSize) - if d.o.lowMem && d.history.windowSize < maxBlockSize { + if !d.o.lowMem || d.history.windowSize < maxBlockSize { + // Alloc 2x window size if not low-mem, or very small window size. d.history.allocFrameBuffer = d.history.windowSize * 2 - // TODO: Maybe use FrameContent size } else { + // Alloc with one additional block d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize } diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index fde4e6b60..23333b969 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -229,18 +229,10 @@ func (d decSymbol) newState() uint16 { return uint16(d >> 16) } -func (d decSymbol) baseline() uint32 { - return uint32(d >> 32) -} - func (d decSymbol) baselineInt() int { return int(d >> 32) } -func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) { - *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) -} - func (d *decSymbol) setNBits(nBits uint8) { const mask = 0xffffffffffffff00 *d = (*d & mask) | decSymbol(nBits) @@ -256,11 +248,6 @@ func (d *decSymbol) setNewState(state uint16) { *d = (*d & mask) | decSymbol(state)<<16 } -func (d *decSymbol) setBaseline(baseline uint32) { - const mask = 0xffffffff - *d = (*d & mask) | decSymbol(baseline)<<32 -} - func (d *decSymbol) setExt(addBits uint8, baseline uint32) { const mask = 0xffff00ff *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32) @@ -377,34 +364,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) { s.state = dt[br.getBits(tableLog)] } -// next returns the current symbol and sets the next state. -// At least tablelog bits must be available in the bit reader. -func (s *fseState) next(br *bitReader) { - lowBits := uint16(br.getBits(s.state.nbBits())) - s.state = s.dt[s.state.newState()+lowBits] -} - -// finished returns true if all bits have been read from the bitstream -// and the next state would require reading bits from the input. -func (s *fseState) finished(br *bitReader) bool { - return br.finished() && s.state.nbBits() > 0 -} - -// final returns the current state symbol without decoding the next. -func (s *fseState) final() (int, uint8) { - return s.state.baselineInt(), s.state.addBits() -} - // final returns the current state symbol without decoding the next. func (s decSymbol) final() (int, uint8) { return s.baselineInt(), s.addBits() } - -// nextFast returns the next symbol and sets the next state. -// This can only be used if no symbols are 0 bits. -// At least tablelog bits must be available in the bit reader. -func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { - lowBits := br.get16BitsFast(s.state.nbBits()) - s.state = s.dt[s.state.newState()+lowBits] - return s.state.baseline(), s.state.addBits() -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index 5442061b1..ab26326a8 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -76,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) { s.clearCount = maxCount != 0 } -// prepare will prepare and allocate scratch tables used for both compression and decompression. -func (s *fseEncoder) prepare() (*fseEncoder, error) { - if s == nil { - s = &fseEncoder{} - } - s.useRLE = false - if s.clearCount && s.maxCount == 0 { - for i := range s.count { - s.count[i] = 0 - } - s.clearCount = false - } - return s, nil -} - // allocCtable will allocate tables needed for compression. // If existing tables a re big enough, they are simply re-used. func (s *fseEncoder) allocCtable() { @@ -709,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { c.state = c.stateTable[lu] } -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encode(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState) - c.bw.addBits16NC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - // flush will write the tablelog to the output and flush the remaining full bytes. func (c *cState) flush(tableLog uint8) { c.bw.flush32() diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go index cf33f29a1..5d73c21eb 100644 --- a/vendor/github.com/klauspost/compress/zstd/hash.go +++ b/vendor/github.com/klauspost/compress/zstd/hash.go @@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 { return (uint32(u) * prime4bytes) >> (32 - length) } } - -// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash3(u uint32, h uint8) uint32 { - return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index e80139dd9..df0447203 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -188,6 +188,7 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { } } } + // Add final literals copy(out[t:], s.literals) if debugDecoder { @@ -203,12 +204,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { // decode sequences from the stream with the provided history. func (s *sequenceDecs) decodeSync(hist []byte) error { - if true { - supported, err := s.decodeSyncSimple(hist) - if supported { - return err - } + supported, err := s.decodeSyncSimple(hist) + if supported { + return err } + br := s.br seqs := s.nSeqs startSize := len(s.out) @@ -396,6 +396,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { ofState = ofTable[ofState.newState()&maxTableMask] } else { bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) llState = llTable[(llState.newState()+lowBits)&maxTableMask] @@ -418,16 +419,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return br.close() } -// update states, at least 27 bits must be available. -func (s *sequenceDecs) update(br *bitReader) { - // Max 8 bits - s.litLengths.state.next(br) - // Max 9 bits - s.matchLengths.state.next(br) - // Max 8 bits - s.offsets.state.next(br) -} - var bitMask [16]uint16 func init() { @@ -436,87 +427,6 @@ func init() { } } -// update states, at least 27 bits must be available. -func (s *sequenceDecs) updateAlt(br *bitReader) { - // Update all 3 states at once. Approx 20% faster. - a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - - nBits := a.nbBits() + b.nbBits() + c.nbBits() - if nBits == 0 { - s.litLengths.state.state = s.litLengths.state.dt[a.newState()] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()] - s.offsets.state.state = s.offsets.state.dt[c.newState()] - return - } - bits := br.get32BitsFast(nBits) - lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) - s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits] - - lowBits = uint16(bits >> (c.nbBits() & 31)) - lowBits &= bitMask[b.nbBits()&15] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits] - - lowBits = uint16(bits) & bitMask[c.nbBits()&15] - s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits] -} - -// nextFast will return new states when there are at least 4 unused bytes left on the stream when done. -func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { - // Final will not read from stream. - ll, llB := llState.final() - ml, mlB := mlState.final() - mo, moB := ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - return - } - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - return - } - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - return -} - func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { // Final will not read from stream. ll, llB := llState.final() diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 4676b09cc..847b322ae 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -62,6 +62,10 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { useSafe = true } + if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { + useSafe = true + } + br := s.br maxBlockSize := maxCompressedBlockSize @@ -301,6 +305,10 @@ type executeAsmContext struct { //go:noescape func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool +// Same as above, but with safe memcopies +//go:noescape +func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool + // executeSimple handles cases when dictionary is not used. func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { // Ensure we have enough output size... @@ -327,8 +335,12 @@ func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { literals: s.literals, windowSize: s.windowSize, } - - ok := sequenceDecs_executeSimple_amd64(&ctx) + var ok bool + if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { + ok = sequenceDecs_executeSimple_safe_amd64(&ctx) + } else { + ok = sequenceDecs_executeSimple_amd64(&ctx) + } if !ok { return fmt.Errorf("match offset (%d) bigger than current history (%d)", seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 2585b2e98..212c6cac3 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -705,60 +705,55 @@ sequenceDecs_decode_bmi2_fill_2_end: MOVQ CX, (R9) // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_bmi2_skip_update - - // Update Literal Length State - MOVBQZX SI, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, SI, SI + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_bmi2_skip_update + LEAQ (SI)(DI*1), R14 + ADDQ R8, R14 + MOVBQZX R14, R14 LEAQ (DX)(R14*1), CX MOVQ AX, R15 MOVQ CX, DX ROLQ CL, R15 BZHIQ R14, R15, R15 - ADDQ R15, SI - // Load ctx.llTable + // Update Offset State + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 // Update Match Length State - MOVBQZX DI, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, DI, DI - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - ADDQ R15, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, DI, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX MOVQ (CX)(DI*8), DI - // Update Offset State - MOVBQZX R8, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, R8, R8 - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - ADDQ R15, R8 + // Update Literal Length State + BZHIQ SI, R15, CX + MOVQ $0x00001010, R14 + BEXTRQ R14, SI, SI + ADDQ CX, SI - // Load ctx.ofTable + // Load ctx.llTable MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI sequenceDecs_decode_bmi2_skip_update: // Adjust offset @@ -965,60 +960,55 @@ sequenceDecs_decode_56_bmi2_fill_end: MOVQ CX, (R9) // Fill bitreader for state updates - MOVQ R13, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R13 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decode_56_bmi2_skip_update - - // Update Literal Length State - MOVBQZX SI, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, SI, SI + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_bmi2_skip_update + LEAQ (SI)(DI*1), R14 + ADDQ R8, R14 + MOVBQZX R14, R14 LEAQ (DX)(R14*1), CX MOVQ AX, R15 MOVQ CX, DX ROLQ CL, R15 BZHIQ R14, R15, R15 - ADDQ R15, SI - // Load ctx.llTable + // Update Offset State + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 // Update Match Length State - MOVBQZX DI, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, DI, DI - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - ADDQ R15, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, DI, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX MOVQ (CX)(DI*8), DI - // Update Offset State - MOVBQZX R8, R14 - MOVQ $0x00001010, CX - BEXTRQ CX, R8, R8 - LEAQ (DX)(R14*1), CX - MOVQ AX, R15 - MOVQ CX, DX - ROLQ CL, R15 - BZHIQ R14, R15, R15 - ADDQ R15, R8 + // Update Literal Length State + BZHIQ SI, R15, CX + MOVQ $0x00001010, R14 + BEXTRQ R14, SI, SI + ADDQ CX, SI - // Load ctx.ofTable + // Load ctx.llTable MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI sequenceDecs_decode_56_bmi2_skip_update: // Adjust offset @@ -1171,6 +1161,228 @@ main_loop: TESTQ R11, R11 JZ check_offset XORQ R14, R14 + +copy_1: + MOVUPS (SI)(R14*1), X0 + MOVUPS X0, (BX)(R14*1) + ADDQ $0x10, R14 + CMPQ R14, R11 + JB copy_1 + ADDQ R11, SI + ADDQ R11, BX + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JGE copy_all_from_history + XORQ R11, R11 + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(R11*1), R12 + MOVB R12, (BX)(R11*1) + ADDQ $0x01, R11 + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(R11*1), R12 + MOVW R12, (BX)(R11*1) + ADDQ $0x02, R11 + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(R11*1), R12 + MOVL R12, (BX)(R11*1) + ADDQ $0x04, R11 + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(R11*1), R12 + MOVQ R12, (BX)(R11*1) + ADDQ $0x08, R11 + JMP copy_4_test + +copy_4: + MOVUPS (R14)(R11*1), X0 + MOVUPS X0, (BX)(R11*1) + ADDQ $0x10, R11 + +copy_4_test: + CMPQ R11, R13 + JB copy_4 + ADDQ R13, DI + ADDQ R13, BX + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, R11 + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (BX)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, R11 + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (BX)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, R11 + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (BX)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, R11 + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (BX)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (BX)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, R11 + JB copy_5 + ADDQ R11, BX + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, DI + MOVQ BX, R12 + ADDQ R13, BX + +copy_2: + MOVUPS (R11), X0 + MOVUPS X0, (R12) + ADDQ $0x10, R11 + ADDQ $0x10, R12 + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, DI + +copy_slow_3: + MOVB (R11), R12 + MOVB R12, (BX) + INCQ R11 + INCQ BX + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + XORQ R14, R14 TESTQ $0x00000001, R11 JZ copy_1_word MOVB (SI)(R14*1), R15 @@ -1326,18 +1538,46 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match - ADDQ R13, DI - MOVQ BX, R12 - ADDQ R13, BX + ADDQ R13, DI + XORQ R12, R12 + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (R11)(R12*1), R14 + MOVB R14, (BX)(R12*1) + ADDQ $0x01, R12 + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (R11)(R12*1), R14 + MOVW R14, (BX)(R12*1) + ADDQ $0x02, R12 + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (R11)(R12*1), R14 + MOVL R14, (BX)(R12*1) + ADDQ $0x04, R12 + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (R11)(R12*1), R14 + MOVQ R14, (BX)(R12*1) + ADDQ $0x08, R12 + JMP copy_2_test copy_2: - MOVUPS (R11), X0 - MOVUPS X0, (R12) - ADDQ $0x10, R11 + MOVUPS (R11)(R12*1), X0 + MOVUPS X0, (BX)(R12*1) ADDQ $0x10, R12 - SUBQ $0x10, R13 - JHI copy_2 - JMP handle_loop + +copy_2_test: + CMPQ R12, R13 + JB copy_2 + ADDQ R13, BX + JMP handle_loop // Copy overlapping match copy_overlapping_match: @@ -1673,45 +1913,16 @@ sequenceDecs_decodeSync_amd64_match_len_ofs_ok: TESTQ AX, AX JZ check_offset XORQ R14, R14 - TESTQ $0x00000001, AX - JZ copy_1_word - MOVB (R11)(R14*1), R15 - MOVB R15, (R10)(R14*1) - ADDQ $0x01, R14 - -copy_1_word: - TESTQ $0x00000002, AX - JZ copy_1_dword - MOVW (R11)(R14*1), R15 - MOVW R15, (R10)(R14*1) - ADDQ $0x02, R14 - -copy_1_dword: - TESTQ $0x00000004, AX - JZ copy_1_qword - MOVL (R11)(R14*1), R15 - MOVL R15, (R10)(R14*1) - ADDQ $0x04, R14 - -copy_1_qword: - TESTQ $0x00000008, AX - JZ copy_1_test - MOVQ (R11)(R14*1), R15 - MOVQ R15, (R10)(R14*1) - ADDQ $0x08, R14 - JMP copy_1_test copy_1: MOVUPS (R11)(R14*1), X0 MOVUPS X0, (R10)(R14*1) ADDQ $0x10, R14 - -copy_1_test: - CMPQ R14, AX - JB copy_1 - ADDQ AX, R11 - ADDQ AX, R10 - ADDQ AX, R12 + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: @@ -2044,60 +2255,55 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: MOVQ CX, 24(SP) // Fill bitreader for state updates - MOVQ R12, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R12 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_bmi2_skip_update - - // Update Literal Length State - MOVBQZX SI, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, SI, SI + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_bmi2_skip_update + LEAQ (SI)(DI*1), R13 + ADDQ R8, R13 + MOVBQZX R13, R13 LEAQ (DX)(R13*1), CX MOVQ AX, R14 MOVQ CX, DX ROLQ CL, R14 BZHIQ R13, R14, R14 - ADDQ R14, SI - // Load ctx.llTable + // Update Offset State + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 // Update Match Length State - MOVBQZX DI, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, DI, DI - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - ADDQ R14, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, DI, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX MOVQ (CX)(DI*8), DI - // Update Offset State - MOVBQZX R8, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, R8, R8 - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - ADDQ R14, R8 + // Update Literal Length State + BZHIQ SI, R14, CX + MOVQ $0x00001010, R13 + BEXTRQ R13, SI, SI + ADDQ CX, SI - // Load ctx.ofTable + // Load ctx.llTable MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI sequenceDecs_decodeSync_bmi2_skip_update: // Adjust offset @@ -2180,45 +2386,16 @@ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: TESTQ CX, CX JZ check_offset XORQ R14, R14 - TESTQ $0x00000001, CX - JZ copy_1_word - MOVB (R10)(R14*1), R15 - MOVB R15, (R9)(R14*1) - ADDQ $0x01, R14 - -copy_1_word: - TESTQ $0x00000002, CX - JZ copy_1_dword - MOVW (R10)(R14*1), R15 - MOVW R15, (R9)(R14*1) - ADDQ $0x02, R14 - -copy_1_dword: - TESTQ $0x00000004, CX - JZ copy_1_qword - MOVL (R10)(R14*1), R15 - MOVL R15, (R9)(R14*1) - ADDQ $0x04, R14 - -copy_1_qword: - TESTQ $0x00000008, CX - JZ copy_1_test - MOVQ (R10)(R14*1), R15 - MOVQ R15, (R9)(R14*1) - ADDQ $0x08, R14 - JMP copy_1_test copy_1: MOVUPS (R10)(R14*1), X0 MOVUPS X0, (R9)(R14*1) ADDQ $0x10, R14 - -copy_1_test: - CMPQ R14, CX - JB copy_1 - ADDQ CX, R10 - ADDQ CX, R9 - ADDQ CX, R11 + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: @@ -3108,60 +3285,55 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: MOVQ CX, 24(SP) // Fill bitreader for state updates - MOVQ R12, (SP) - MOVQ $0x00000808, CX - BEXTRQ CX, R8, R12 - MOVQ ctx+16(FP), CX - CMPQ 96(CX), $0x00 - JZ sequenceDecs_decodeSync_safe_bmi2_skip_update - - // Update Literal Length State - MOVBQZX SI, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, SI, SI + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_bmi2_skip_update + LEAQ (SI)(DI*1), R13 + ADDQ R8, R13 + MOVBQZX R13, R13 LEAQ (DX)(R13*1), CX MOVQ AX, R14 MOVQ CX, DX ROLQ CL, R14 BZHIQ R13, R14, R14 - ADDQ R14, SI - // Load ctx.llTable + // Update Offset State + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable MOVQ ctx+16(FP), CX - MOVQ (CX), CX - MOVQ (CX)(SI*8), SI + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 // Update Match Length State - MOVBQZX DI, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, DI, DI - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - ADDQ R14, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, DI, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX MOVQ 24(CX), CX MOVQ (CX)(DI*8), DI - // Update Offset State - MOVBQZX R8, R13 - MOVQ $0x00001010, CX - BEXTRQ CX, R8, R8 - LEAQ (DX)(R13*1), CX - MOVQ AX, R14 - MOVQ CX, DX - ROLQ CL, R14 - BZHIQ R13, R14, R14 - ADDQ R14, R8 + // Update Literal Length State + BZHIQ SI, R14, CX + MOVQ $0x00001010, R13 + BEXTRQ R13, SI, SI + ADDQ CX, SI - // Load ctx.ofTable + // Load ctx.llTable MOVQ ctx+16(FP), CX - MOVQ 48(CX), CX - MOVQ (CX)(R8*8), R8 + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI sequenceDecs_decodeSync_safe_bmi2_skip_update: // Adjust offset diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go index b53f606a1..29c15c8c4 100644 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -18,7 +18,14 @@ const ZipMethodWinZip = 93 // See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT const ZipMethodPKWare = 20 -var zipReaderPool sync.Pool +// zipReaderPool is the default reader pool. +var zipReaderPool = sync.Pool{New: func() interface{} { + z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1)) + if err != nil { + panic(err) + } + return z +}} // newZipReader creates a pooled zip decompressor. func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index c1c90b4a0..3eb3f1c82 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -110,17 +110,6 @@ func printf(format string, a ...interface{}) { } } -// matchLenFast does matching, but will not match the last up to 7 bytes. -func matchLenFast(a, b []byte) int { - endI := len(a) & (math.MaxInt32 - 7) - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - return i + bits.TrailingZeros64(diff)>>3 - } - } - return endI -} - // matchLen returns the maximum length. // a must be the shortest of the two. // The function also returns whether all bytes matched. |