From 178cccbf9eaab103460694c37f16e6e40773866a Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Fri, 28 Jan 2022 06:24:01 -0500 Subject: Fix use of infra image to clarify default Signed-off-by: Daniel J Walsh --- .../github.com/klauspost/compress/flate/deflate.go | 62 ++++++++++++---- .../klauspost/compress/flate/huffman_bit_writer.go | 10 ++- .../klauspost/compress/flate/huffman_code.go | 4 +- .../github.com/klauspost/compress/flate/token.go | 19 +++-- .../github.com/klauspost/compress/zstd/blockdec.go | 24 ++----- .../klauspost/compress/zstd/decodeheader.go | 84 ++++++++++++++-------- .../klauspost/compress/zstd/encoder_options.go | 10 ++- .../compress/zstd/internal/xxhash/xxhash_amd64.s | 1 + .../compress/zstd/internal/xxhash/xxhash_arm64.s | 81 ++++++++++----------- .../compress/zstd/internal/xxhash/xxhash_asm.go | 3 +- .../compress/zstd/internal/xxhash/xxhash_other.go | 4 +- 11 files changed, 175 insertions(+), 127 deletions(-) (limited to 'vendor/github.com/klauspost') diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index b27f5a93b..bffa2f332 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -10,9 +10,6 @@ import ( "fmt" "io" "math" - "math/bits" - - comp "github.com/klauspost/compress" ) const ( @@ -76,8 +73,8 @@ var levels = []compressionLevel{ {0, 0, 0, 0, 0, 6}, // Levels 7-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {6, 10, 12, 16, skipNever, 7}, - {10, 24, 32, 64, skipNever, 8}, + {8, 12, 16, 24, skipNever, 7}, + {16, 30, 40, 64, skipNever, 8}, {32, 258, 258, 1024, skipNever, 9}, } @@ -110,6 +107,7 @@ type advancedState struct { type compressor struct { compressionLevel + h *huffmanEncoder w *huffmanBitWriter // compression algorithm @@ -271,7 +269,7 @@ func (d *compressor) fillWindow(b []byte) { // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (length, offset int, ok bool) { +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { minMatchLook := maxMatchLength if lookahead < minMatchLook { minMatchLook = lookahead @@ -297,14 +295,46 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (lengt } offset = 0 + cGain := 0 + if d.chain < 100 { + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return + } + + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 6 // Base is 4 bytes at with an additional cost. // Matches must be better than this. - cGain := minMatchLength*bpb - 12 for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) if n > length { - newGain := n*bpb - bits.Len32(uint32(pos-i)) + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) if newGain > cGain { length = n offset = pos - i @@ -389,10 +419,16 @@ func (d *compressor) deflateLazy() { if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return } - s.estBitsPerByte = 8 - if !d.sync { - s.estBitsPerByte = comp.ShannonEntropyBits(d.window[s.index:d.windowEnd]) - s.estBitsPerByte = int(1 + float64(s.estBitsPerByte)/float64(d.windowEnd-s.index)) + if d.windowEnd != s.index && d.chain > 100 { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + for _, v := range d.window[s.index:d.windowEnd] { + tmp[v]++ + } + d.h.generate(tmp[:], 15) } s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) @@ -446,7 +482,7 @@ func (d *compressor) deflateLazy() { } if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead, s.estBitsPerByte); ok { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { s.length = newLength s.offset = newOffset } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index fb1701eec..fd49efd75 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -52,18 +52,18 @@ var lengthBase = [32]uint8{ } // offset code word extra bits. -var offsetExtraBits = [64]int8{ +var offsetExtraBits = [32]int8{ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, /* extended window */ - 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, + 14, 14, } var offsetCombined = [32]uint32{} func init() { - var offsetBase = [64]uint32{ + var offsetBase = [32]uint32{ /* normal deflate */ 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, @@ -73,9 +73,7 @@ func init() { 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, /* extended window */ - 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, - 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, - 0x100000, 0x180000, 0x200000, 0x300000, + 0x008000, 0x00c000, } for i := range offsetCombined[:] { diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 67b2b3872..f35e00261 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -129,9 +129,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { func (h *huffmanEncoder) bitLengthRaw(b []byte) int { var total int for _, f := range b { - if f != 0 { - total += int(h.codes[f].len) - } + total += int(h.codes[f].len) } return total } diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index eb862d7a9..3a9618ee1 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -129,11 +129,11 @@ var offsetCodes14 = [256]uint32{ type token uint32 type tokens struct { - nLits int extraHist [32]uint16 // codes 256->maxnumlit offHist [32]uint16 // offset codes litHist [256]uint16 // codes 0->255 - n uint16 // Must be able to contain maxStoreBlockSize + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize tokens [maxStoreBlockSize + 1]token } @@ -142,7 +142,7 @@ func (t *tokens) Reset() { return } t.n = 0 - t.nLits = 0 + t.nFilled = 0 for i := range t.litHist[:] { t.litHist[i] = 0 } @@ -161,12 +161,12 @@ func (t *tokens) Fill() { for i, v := range t.litHist[:] { if v == 0 { t.litHist[i] = 1 - t.nLits++ + t.nFilled++ } } for i, v := range t.extraHist[:literalCount-256] { if v == 0 { - t.nLits++ + t.nFilled++ t.extraHist[i] = 1 } } @@ -202,14 +202,12 @@ func emitLiteral(dst *tokens, lit []byte) { dst.litHist[v]++ } dst.n += uint16(len(lit)) - dst.nLits += len(lit) } func (t *tokens) AddLiteral(lit byte) { t.tokens[t.n] = token(lit) t.litHist[lit]++ t.n++ - t.nLits++ } // from https://stackoverflow.com/a/28730362 @@ -230,8 +228,9 @@ func (t *tokens) EstimatedBits() int { shannon := float32(0) bits := int(0) nMatches := 0 - if t.nLits > 0 { - invTotal := 1.0 / float32(t.nLits) + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) for _, v := range t.litHist[:] { if v > 0 { n := float32(v) @@ -275,7 +274,6 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { } oCode := offsetCode(xoffset) xoffset |= oCode << 16 - t.nLits++ t.extraHist[lengthCodes1[uint8(xlength)]]++ t.offHist[oCode]++ @@ -301,7 +299,6 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { } xlength -= xl xl -= baseMatchLength - t.nLits++ t.extraHist[lengthCodes1[uint8(xl)]]++ t.offHist[oc]++ t.tokens[t.n] = token(matchType | uint32(xl)< 0 { if len(in) < 1 { diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 69736e8d4..5022e71c8 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -5,6 +5,7 @@ package zstd import ( "bytes" + "encoding/binary" "errors" "io" ) @@ -15,18 +16,50 @@ const HeaderMaxSize = 14 + 3 // Header contains information about the first frame and block within that. type Header struct { - // Window Size the window of data to keep while decoding. - // Will only be set if HasFCS is false. - WindowSize uint64 + // SingleSegment specifies whether the data is to be decompressed into a + // single contiguous memory segment. + // It implies that WindowSize is invalid and that FrameContentSize is valid. + SingleSegment bool - // Frame content size. - // Expected size of the entire frame. - FrameContentSize uint64 + // WindowSize is the window of data to keep while decoding. + // Will only be set if SingleSegment is false. + WindowSize uint64 // Dictionary ID. // If 0, no dictionary. DictionaryID uint32 + // HasFCS specifies whether FrameContentSize has a valid value. + HasFCS bool + + // FrameContentSize is the expected uncompressed size of the entire frame. + FrameContentSize uint64 + + // Skippable will be true if the frame is meant to be skipped. + // This implies that FirstBlock.OK is false. + Skippable bool + + // SkippableID is the user-specific ID for the skippable frame. + // Valid values are between 0 to 15, inclusive. + SkippableID int + + // SkippableSize is the length of the user data to skip following + // the header. + SkippableSize uint32 + + // HeaderSize is the raw size of the frame header. + // + // For normal frames, it includes the size of the magic number and + // the size of the header (per section 3.1.1.1). + // It does not include the size for any data blocks (section 3.1.1.2) nor + // the size for the trailing content checksum. + // + // For skippable frames, this counts the size of the magic number + // along with the size of the size field of the payload. + // It does not include the size of the skippable payload itself. + // The total frame size is the HeaderSize plus the SkippableSize. + HeaderSize int + // First block information. FirstBlock struct { // OK will be set if first block could be decoded. @@ -51,17 +84,9 @@ type Header struct { CompressedSize int } - // Skippable will be true if the frame is meant to be skipped. - // No other information will be populated. - Skippable bool - // If set there is a checksum present for the block content. + // The checksum field at the end is always 4 bytes long. HasCheckSum bool - - // If this is true FrameContentSize will have a valid value - HasFCS bool - - SingleSegment bool } // Decode the header from the beginning of the stream. @@ -71,39 +96,46 @@ type Header struct { // If there isn't enough input, io.ErrUnexpectedEOF is returned. // The FirstBlock.OK will indicate if enough information was available to decode the first block header. func (h *Header) Decode(in []byte) error { + *h = Header{} if len(in) < 4 { return io.ErrUnexpectedEOF } + h.HeaderSize += 4 b, in := in[:4], in[4:] if !bytes.Equal(b, frameMagic) { if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { return ErrMagicMismatch } - *h = Header{Skippable: true} + if len(in) < 4 { + return io.ErrUnexpectedEOF + } + h.HeaderSize += 4 + h.Skippable = true + h.SkippableID = int(b[0] & 0xf) + h.SkippableSize = binary.LittleEndian.Uint32(in) return nil } + + // Read Window_Descriptor + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if len(in) < 1 { return io.ErrUnexpectedEOF } - - // Clear output - *h = Header{} fhd, in := in[0], in[1:] + h.HeaderSize++ h.SingleSegment = fhd&(1<<5) != 0 h.HasCheckSum = fhd&(1<<2) != 0 - if fhd&(1<<3) != 0 { return errors.New("reserved bit set on frame header") } - // Read Window_Descriptor - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if !h.SingleSegment { if len(in) < 1 { return io.ErrUnexpectedEOF } var wd byte wd, in = in[0], in[1:] + h.HeaderSize++ windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) @@ -120,9 +152,7 @@ func (h *Header) Decode(in []byte) error { return io.ErrUnexpectedEOF } b, in = in[:size], in[size:] - if b == nil { - return io.ErrUnexpectedEOF - } + h.HeaderSize += int(size) switch size { case 1: h.DictionaryID = uint32(b[0]) @@ -152,9 +182,7 @@ func (h *Header) Decode(in []byte) error { return io.ErrUnexpectedEOF } b, in = in[:fcsSize], in[fcsSize:] - if b == nil { - return io.ErrUnexpectedEOF - } + h.HeaderSize += int(fcsSize) switch fcsSize { case 1: h.FrameContentSize = uint64(b[0]) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 7d29e1d68..5f2e1d020 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -24,6 +24,7 @@ type encoderOptions struct { allLitEntropy bool customWindow bool customALEntropy bool + customBlockSize bool lowMem bool dict *dict } @@ -33,7 +34,7 @@ func (o *encoderOptions) setDefault() { concurrent: runtime.GOMAXPROCS(0), crc: true, single: nil, - blockSize: 1 << 16, + blockSize: maxCompressedBlockSize, windowSize: 8 << 20, level: SpeedDefault, allLitEntropy: true, @@ -106,6 +107,7 @@ func WithWindowSize(n int) EOption { o.customWindow = true if o.blockSize > o.windowSize { o.blockSize = o.windowSize + o.customBlockSize = true } return nil } @@ -188,10 +190,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel { return SpeedDefault case level >= 6 && level < 10: return SpeedBetterCompression - case level >= 10: + default: return SpeedBestCompression } - return SpeedDefault } // String provides a string representation of the compression level. @@ -222,6 +223,9 @@ func WithEncoderLevel(l EncoderLevel) EOption { switch o.level { case SpeedFastest: o.windowSize = 4 << 20 + if !o.customBlockSize { + o.blockSize = 1 << 16 + } case SpeedDefault: o.windowSize = 8 << 20 case SpeedBetterCompression: diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index be8db5bf7..cea178561 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -1,6 +1,7 @@ // +build !appengine // +build gc // +build !purego +// +build !noasm #include "textflag.h" diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 662609589..4d64a17d6 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -1,13 +1,13 @@ -// +build gc,!purego +// +build gc,!purego,!noasm #include "textflag.h" // Register allocation. #define digest R1 -#define h R2 // Return value. -#define p R3 // Input pointer. +#define h R2 // Return value. +#define p R3 // Input pointer. #define len R4 -#define nblocks R5 // len / 32. +#define nblocks R5 // len / 32. #define prime1 R7 #define prime2 R8 #define prime3 R9 @@ -22,50 +22,48 @@ #define x3 R22 #define x4 R23 -#define round(acc, x) \ - MADD prime2, acc, x, acc \ - ROR $64-31, acc \ - MUL prime1, acc \ +#define round(acc, x) \ + MADD prime2, acc, x, acc \ + ROR $64-31, acc \ + MUL prime1, acc \ // x = round(0, x). -#define round0(x) \ - MUL prime2, x \ - ROR $64-31, x \ - MUL prime1, x \ +#define round0(x) \ + MUL prime2, x \ + ROR $64-31, x \ + MUL prime1, x \ -#define mergeRound(x) \ - round0(x) \ - EOR x, h \ - MADD h, prime4, prime1, h \ +#define mergeRound(x) \ + round0(x) \ + EOR x, h \ + MADD h, prime4, prime1, h \ // Update v[1-4] with 32-byte blocks. Assumes len >= 32. -#define blocksLoop() \ - LSR $5, len, nblocks \ - PCALIGN $16 \ -loop: \ - LDP.P 32(p), (x1, x2) \ - round(v1, x1) \ - LDP -16(p), (x3, x4) \ - round(v2, x2) \ - SUB $1, nblocks \ - round(v3, x3) \ - round(v4, x4) \ - CBNZ nblocks, loop \ - +#define blocksLoop() \ + LSR $5, len, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 32(p), (x1, x2) \ + round(v1, x1) \ + LDP -16(p), (x3, x4) \ + round(v2, x2) \ + SUB $1, nblocks \ + round(v3, x3) \ + round(v4, x4) \ + CBNZ nblocks, loop \ // The primes are repeated here to ensure that they're stored // in a contiguous array, so we can load them with LDP. -DATA primes<> +0(SB)/8, $11400714785074694791 -DATA primes<> +8(SB)/8, $14029467366897019727 -DATA primes<>+16(SB)/8, $1609587929392839161 -DATA primes<>+24(SB)/8, $9650029242287828579 -DATA primes<>+32(SB)/8, $2870177450012600261 +DATA primes<> +0(SB)/8, $11400714785074694791 +DATA primes<> +8(SB)/8, $14029467366897019727 +DATA primes<>+16(SB)/8, $1609587929392839161 +DATA primes<>+24(SB)/8, $9650029242287828579 +DATA primes<>+32(SB)/8, $2870177450012600261 GLOBL primes<>(SB), NOPTR+RODATA, $40 - // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 - LDP b_base+0(FP), (p, len) + LDP b_base+0(FP), (p, len) LDP primes<> +0(SB), (prime1, prime2) LDP primes<>+16(SB), (prime3, prime4) @@ -156,24 +154,23 @@ try1: end: EOR h >> 33, h - MUL prime2, h + MUL prime2, h EOR h >> 29, h - MUL prime3, h + MUL prime3, h EOR h >> 32, h MOVD h, ret+24(FP) RET - // func writeBlocks(d *Digest, b []byte) int // // Assumes len(b) >= 32. TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40 - LDP primes<>(SB), (prime1, prime2) + LDP primes<>(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. MOVD d+0(FP), digest - LDP 0(digest), (v1, v2) + LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) LDP b_base+8(FP), (p, len) @@ -181,7 +178,7 @@ TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40 blocksLoop() // Store updated state. - STP (v1, v2), 0(digest) + STP (v1, v2), 0(digest) STP (v3, v4), 16(digest) BIC $31, len diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go index 9216e0a40..1a1fac9c2 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go @@ -1,8 +1,9 @@ -//go:build (amd64 || arm64) && !appengine && gc && !purego +//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm // +build amd64 arm64 // +build !appengine // +build gc // +build !purego +// +build !noasm package xxhash diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go index 2deb1ca75..209cb4a99 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -1,5 +1,5 @@ -//go:build (!amd64 && !arm64) || appengine || !gc || purego -// +build !amd64,!arm64 appengine !gc purego +//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm +// +build !amd64,!arm64 appengine !gc purego noasm package xxhash -- cgit v1.2.3-54-g00ecf