diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2020-03-17 17:25:21 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-17 17:25:21 +0100 |
commit | 8f1ce4bddd54fe8067cca688430a7099c5f42cd1 (patch) | |
tree | 1c6f2e629375a9ab1532d769440f183f50c33dcc /vendor/github.com/klauspost | |
parent | 9ef5d28759e6d37c6f5c1f07df4cf676077851dc (diff) | |
parent | 8081d9c74552612ad73fef27e5775fcfb371e812 (diff) | |
download | podman-8f1ce4bddd54fe8067cca688430a7099c5f42cd1.tar.gz podman-8f1ce4bddd54fe8067cca688430a7099c5f42cd1.tar.bz2 podman-8f1ce4bddd54fe8067cca688430a7099c5f42cd1.zip |
Merge pull request #5524 from rhatdan/vendor
Update containers/storage to v1.16.5
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/decoder.go | 2 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/enc_better.go | 521 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/enc_dfast.go | 51 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/enc_fast.go | 140 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/encoder.go | 15 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/encoder_options.go | 26 | ||||
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/zstd.go | 30 | ||||
-rw-r--r-- | vendor/github.com/klauspost/pgzip/README.md | 17 | ||||
-rw-r--r-- | vendor/github.com/klauspost/pgzip/gzip.go | 76 |
9 files changed, 722 insertions, 156 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 73ac3c630..86553c2c3 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -66,7 +66,7 @@ var ( // A Decoder can be used in two modes: // // 1) As a stream, or -// 2) For stateless decoding using DecodeAll or DecodeBuffer. +// 2) For stateless decoding using DecodeAll. // // Only a single stream can be decoded concurrently, but the same decoder // can run multiple concurrent stateless decodes. It is even possible to diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go new file mode 100644 index 000000000..4375e08b4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -0,0 +1,521 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + betterLongTableBits = 19 // Bits used in the long match table + betterLongTableSize = 1 << betterLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + betterShortTableBits = 13 // Bits used in the short match table + betterShortTableSize = 1 << betterShortTableBits // Size of the table +) + +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + stepSize := int32(e.o.targetLength) + if stepSize == 0 { + stepSize++ + } + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.Encode(blk, src) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 0ffea7655..d640e6a9f 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -172,55 +172,6 @@ encodeLoop: cv = load6432(src, s) continue } - const repOff2 = 1 - // We deviate from the reference encoder and also check offset 2. - // Slower and not consistently better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) { - // Consider history as well. - var seq seq - lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src) - - seq.matchLen = uint32(lenght - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 - nextEmit = s - if s >= sLimit { - if debug { - println("repeat ended", s, lenght) - - } - break encodeLoop - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } } // Find the offsets of our two matches. coffsetL := s - (candidateL.offset - e.cur) @@ -372,7 +323,7 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashS := hash5(cv, dFastShortTableBits) nextHashL := hash8(cv, dFastLongTableBits) // We have at least 4 byte match. diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 28134b158..1387b8082 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,6 +6,7 @@ package zstd import ( "fmt" + "math" "math/bits" "github.com/klauspost/compress/zstd/internal/xxhash" @@ -23,7 +24,7 @@ type tableEntry struct { offset int32 } -type fastEncoder struct { +type fastBase struct { o encParams // cur is the offset at the start of hist cur int32 @@ -31,18 +32,22 @@ type fastEncoder struct { maxMatchOff int32 hist []byte crc *xxhash.Digest - table [tableSize]tableEntry tmp [8]byte blk *blockEnc } +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + // CRC returns the underlying CRC writer. -func (e *fastEncoder) CRC() *xxhash.Digest { +func (e *fastBase) CRC() *xxhash.Digest { return e.crc } // AppendCRC will append the CRC to the destination slice and return it. -func (e *fastEncoder) AppendCRC(dst []byte) []byte { +func (e *fastBase) AppendCRC(dst []byte) []byte { crc := e.crc.Sum(e.tmp[:0]) dst = append(dst, crc[7], crc[6], crc[5], crc[4]) return dst @@ -50,7 +55,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastEncoder) WindowSize(size int) int32 { +func (e *fastBase) WindowSize(size int) int32 { if size > 0 && size < int(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. @@ -63,7 +68,7 @@ func (e *fastEncoder) WindowSize(size int) int32 { } // Block returns the current block. -func (e *fastEncoder) Block() *blockEnc { +func (e *fastBase) Block() *blockEnc { return e.blk } @@ -169,9 +174,22 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+6, repIndex+4, src) + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -197,11 +215,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -257,7 +275,20 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -294,7 +325,20 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -412,10 +456,23 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // lenght := 4 + e.matchlen(s+6, repIndex+4, src) - lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -441,11 +498,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -498,7 +555,20 @@ encodeLoop: // Extend the 4-byte match as long as possible. //l := e.matchlenNoHist(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -536,7 +606,20 @@ encodeLoop: // We have at least 4 byte match. // No need to check backwards. We come straight from a match //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -571,7 +654,7 @@ encodeLoop: } } -func (e *fastEncoder) addBlock(src []byte) int32 { +func (e *fastBase) addBlock(src []byte) int32 { if debugAsserts && e.cur > bufferReset { panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) } @@ -602,17 +685,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 { // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. -func (e *fastEncoder) UseBlock(enc *blockEnc) { +func (e *fastBase) UseBlock(enc *blockEnc) { enc.reset(e.blk) e.blk = enc } -func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } -func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { if debugAsserts { if s < 0 { err := fmt.Sprintf("s (%d) < 0", s) @@ -626,18 +709,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) panic(err) } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } } // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. -func (e *fastEncoder) Reset() { +func (e *fastBase) Reset() { if e.blk == nil { e.blk = &blockEnc{} e.blk.init() diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4032fb9fc..67d45efb9 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -71,15 +71,14 @@ func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { } if w != nil { e.Reset(w) - } else { - e.init.Do(func() { - e.initialize() - }) } return &e, nil } func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } e.encoders = make(chan encoder, e.o.concurrent) for i := 0; i < e.o.concurrent; i++ { e.encoders <- e.o.encoder() @@ -89,9 +88,6 @@ func (e *Encoder) initialize() { // Reset will re-initialize the writer and new writes will encode to the supplied writer // as a new, independent stream. func (e *Encoder) Reset(w io.Writer) { - e.init.Do(func() { - e.initialize() - }) s := &e.state s.wg.Wait() s.wWg.Wait() @@ -422,10 +418,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(func() { - e.o.setDefault() - e.initialize() - }) + e.init.Do(e.initialize) enc := <-e.encoders defer func() { // Release encoder reference to last block. diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 40eb45733..0ff970dac 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -39,9 +39,11 @@ func (o *encoderOptions) setDefault() { func (o encoderOptions) encoder() encoder { switch o.level { case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + case SpeedBetterCompression: + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} case SpeedFastest: - return &fastEncoder{maxMatchOff: int32(o.windowSize)} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} } panic("unknown compression level") } @@ -67,7 +69,7 @@ func WithEncoderConcurrency(n int) EOption { } // WithWindowSize will set the maximum allowed back-reference distance. -// The value must be a power of two between WindowSizeMin and WindowSizeMax. +// The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. // The default value is determined by the compression level. @@ -130,18 +132,18 @@ const ( // This is roughly equivalent to the default Zstandard mode (level 3). SpeedDefault + // SpeedBetterCompression will yield better compression than the default. + // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. + // By using this, notice that CPU usage may go up in the future. + SpeedBetterCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - // SpeedBetterCompression will (in the future) yield better compression than the default, - // but at approximately 4x the CPU usage of the default. - // For now this is not implemented. - SpeedBetterCompression = SpeedDefault - // SpeedBestCompression will choose the best available compression option. // For now this is not implemented. - SpeedBestCompression = SpeedDefault + SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back @@ -163,8 +165,10 @@ func EncoderLevelFromZstd(level int) EncoderLevel { switch { case level < 3: return SpeedFastest - case level >= 3: + case level >= 3 && level < 6: return SpeedDefault + case level > 5: + return SpeedBetterCompression } return SpeedDefault } @@ -176,6 +180,8 @@ func (e EncoderLevel) String() string { return "fastest" case SpeedDefault: return "default" + case SpeedBetterCompression: + return "better" default: return "invalid" } diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 5e0b64ccc..0807719c8 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -87,6 +87,17 @@ func printf(format string, a ...interface{}) { } } +// matchLenFast does matching, but will not match the last up to 7 bytes. +func matchLenFast(a, b []byte) int { + endI := len(a) & (math.MaxInt32 - 7) + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + bits.TrailingZeros64(diff)>>3 + } + } + return endI +} + // matchLen returns the maximum length. // a must be the shortest of the two. // The function also returns whether all bytes matched. @@ -97,33 +108,18 @@ func matchLen(a, b []byte) int { return i + (bits.TrailingZeros64(diff) >> 3) } } + checked := (len(a) >> 3) << 3 a = a[checked:] b = b[checked:] - // TODO: We could do a 4 check. for i := range a { if a[i] != b[i] { - return int(i) + checked + return i + checked } } return len(a) + checked } -// matchLen returns a match length in src between index s and t -func matchLenIn(src []byte, s, t int32) int32 { - s1 := len(src) - b := src[t:] - a := src[s:s1] - b = b[:len(a)] - // Extend the match to be as long as possible. - for i := range a { - if a[i] != b[i] { - return int32(i) - } - } - return int32(len(a)) -} - func load3232(b []byte, i int32) uint32 { // Help the compiler eliminate bounds checks on the read so it can be done in a single read. b = b[i:] diff --git a/vendor/github.com/klauspost/pgzip/README.md b/vendor/github.com/klauspost/pgzip/README.md index 81000996c..171b978fd 100644 --- a/vendor/github.com/klauspost/pgzip/README.md +++ b/vendor/github.com/klauspost/pgzip/README.md @@ -39,7 +39,6 @@ You might need to get/update the dependencies: ``` go get -u github.com/klauspost/compress -go get -u github.com/klauspost/crc32 ``` Usage @@ -65,7 +64,7 @@ Changes in [github.com/klauspost/compress](https://github.com/klauspost/compress ## Compression The simplest way to use this is to simply do the same as you would when using [compress/gzip](http://golang.org/pkg/compress/gzip). -To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(250000, 16), meaning blocks are split at 250000 bytes and up to 16 blocks can be processing at once before the writer blocks. +To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(1MB, runtime.GOMAXPROCS(0)), meaning blocks are split at 1 MB and up to the number of CPU threads blocks can be processing at once before the writer blocks. Example: @@ -99,19 +98,19 @@ See my blog post in [Benchmarks of Golang Gzip](https://blog.klauspost.com/go-gz Compression cost is usually about 0.2% with default settings with a block size of 250k. -Example with GOMAXPROC set to 8 (quad core with 8 hyperthreads) +Example with GOMAXPROC set to 32 (16 core CPU) Content is [Matt Mahoneys 10GB corpus](http://mattmahoney.net/dc/10gb.html). Compression level 6. Compressor | MB/sec | speedup | size | size overhead (lower=better) ------------|----------|---------|------|--------- -[gzip](http://golang.org/pkg/compress/gzip) (golang) | 7.21MB/s | 1.0x | 4786608902 | 0% -[gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 10.98MB/s | 1.52x | 4781331645 | -0.11% -[pgzip](https://github.com/klauspost/pgzip) (klauspost) | 50.76MB/s|7.04x | 4784121440 | -0.052% -[bgzf](https://godoc.org/github.com/biogo/hts/bgzf) (biogo) | 38.65MB/s | 5.36x | 4924899484 | 2.889% -[pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 32.00MB/s | 4.44x | 4791226567 | 0.096% +[gzip](http://golang.org/pkg/compress/gzip) (golang) | 15.44MB/s (1 thread) | 1.0x | 4781329307 | 0% +[gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 135.04MB/s (1 thread) | 8.74x | 4894858258 | +2.37% +[pgzip](https://github.com/klauspost/pgzip) (klauspost) | 1573.23MB/s| 101.9x | 4902285651 | +2.53% +[bgzf](https://godoc.org/github.com/biogo/hts/bgzf) (biogo) | 361.40MB/s | 23.4x | 4869686090 | +1.85% +[pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 306.01MB/s | 19.8x | 4786890417 | +0.12% -pgzip also contains a [linear time compression](https://github.com/klauspost/compress#linear-time-compression) mode, that will allow compression at ~150MB per core per second, independent of the content. +pgzip also contains a [linear time compression](https://github.com/klauspost/compress#linear-time-compression-huffman-only) mode, that will allow compression at ~250MB per core per second, independent of the content. See the [complete sheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) for different content types and compression settings. diff --git a/vendor/github.com/klauspost/pgzip/gzip.go b/vendor/github.com/klauspost/pgzip/gzip.go index 85d14e9cb..bb2e33941 100644 --- a/vendor/github.com/klauspost/pgzip/gzip.go +++ b/vendor/github.com/klauspost/pgzip/gzip.go @@ -11,6 +11,7 @@ import ( "hash" "hash/crc32" "io" + "runtime" "sync" "time" @@ -18,9 +19,9 @@ import ( ) const ( - defaultBlockSize = 256 << 10 + defaultBlockSize = 1 << 20 tailSize = 16384 - defaultBlocks = 16 + defaultBlocks = 4 ) // These constants are copied from the flate package, so that code that imports @@ -68,8 +69,8 @@ type result struct { // With this you can control the approximate size of your blocks, // as well as how many you want to be processing in parallel. // -// Default values for this is SetConcurrency(250000, 16), -// meaning blocks are split at 250000 bytes and up to 16 blocks +// Default values for this is SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)), +// meaning blocks are split at 1 MB and up to the number of CPU threads // can be processing at once before the writer blocks. func (z *Writer) SetConcurrency(blockSize, blocks int) error { if blockSize <= tailSize { @@ -115,7 +116,7 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) { return nil, fmt.Errorf("gzip: invalid compression level: %d", level) } z := new(Writer) - z.SetConcurrency(defaultBlockSize, defaultBlocks) + z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)) z.init(w, level) return z, nil } @@ -174,7 +175,7 @@ func (z *Writer) Reset(w io.Writer) { if z.results != nil && !z.closed { close(z.results) } - z.SetConcurrency(defaultBlockSize, defaultBlocks) + z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)) z.init(w, z.level) } @@ -239,36 +240,36 @@ func (z *Writer) writeString(s string) (err error) { // compressCurrent will compress the data currently buffered // This should only be called from the main writer/flush/closer func (z *Writer) compressCurrent(flush bool) { + c := z.currentBuffer + if len(c) > z.blockSize { + // This can never happen through the public interface. + panic("len(z.currentBuffer) > z.blockSize (most likely due to concurrent Write race)") + } + r := result{} r.result = make(chan []byte, 1) r.notifyWritten = make(chan struct{}, 0) + // Reserve a result slot select { case z.results <- r: case <-z.pushedErr: return } - // If block given is more than twice the block size, split it. - c := z.currentBuffer - if len(c) > z.blockSize*2 { - c = c[:z.blockSize] - z.wg.Add(1) - go z.compressBlock(c, z.prevTail, r, false) - z.prevTail = c[len(c)-tailSize:] - z.currentBuffer = z.currentBuffer[z.blockSize:] - z.compressCurrent(flush) - // Last one flushes if needed - return - } - z.wg.Add(1) - go z.compressBlock(c, z.prevTail, r, z.closed) + tail := z.prevTail if len(c) > tailSize { - z.prevTail = c[len(c)-tailSize:] + buf := z.dstPool.Get().([]byte) // Put in .compressBlock + // Copy tail from current buffer before handing the buffer over to the + // compressBlock goroutine. + buf = append(buf[:0], c[len(c)-tailSize:]...) + z.prevTail = buf } else { z.prevTail = nil } - z.currentBuffer = z.dstPool.Get().([]byte) + go z.compressBlock(c, tail, r, z.closed) + + z.currentBuffer = z.dstPool.Get().([]byte) // Put in .compressBlock z.currentBuffer = z.currentBuffer[:0] // Wait if flushing @@ -358,29 +359,37 @@ func (z *Writer) Write(p []byte) (int, error) { // Start receiving data from compressors go func() { listen := z.results + var failed bool for { r, ok := <-listen // If closed, we are finished. if !ok { return } + if failed { + close(r.notifyWritten) + continue + } buf := <-r.result n, err := z.w.Write(buf) if err != nil { z.pushError(err) close(r.notifyWritten) - return + failed = true + continue } if n != len(buf) { z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf))) + failed = true close(r.notifyWritten) - return + continue } z.dstPool.Put(buf) close(r.notifyWritten) } }() - z.currentBuffer = make([]byte, 0, z.blockSize) + z.currentBuffer = z.dstPool.Get().([]byte) + z.currentBuffer = z.currentBuffer[:0] } q := p for len(q) > 0 { @@ -390,7 +399,10 @@ func (z *Writer) Write(p []byte) (int, error) { } z.digest.Write(q[:length]) z.currentBuffer = append(z.currentBuffer, q[:length]...) - if len(z.currentBuffer) >= z.blockSize { + if len(z.currentBuffer) > z.blockSize { + panic("z.currentBuffer too large (most likely due to concurrent Write race)") + } + if len(z.currentBuffer) == z.blockSize { z.compressCurrent(false) if err := z.checkError(); err != nil { return len(p) - len(q) - length, err @@ -410,12 +422,13 @@ func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) { close(r.result) z.wg.Done() }() - buf := z.dstPool.Get().([]byte) + buf := z.dstPool.Get().([]byte) // Corresponding Put in .Write's result writer dest := bytes.NewBuffer(buf[:0]) - compressor := z.dictFlatePool.Get().(*flate.Writer) + compressor := z.dictFlatePool.Get().(*flate.Writer) // Put below compressor.ResetDict(dest, prevTail) compressor.Write(p) + z.dstPool.Put(p) // Corresponding Get in .Write and .compressCurrent err := compressor.Flush() if err != nil { @@ -429,7 +442,12 @@ func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) { return } } - z.dictFlatePool.Put(compressor) + z.dictFlatePool.Put(compressor) // Get above + + if prevTail != nil { + z.dstPool.Put(prevTail) // Get in .compressCurrent + } + // Read back buffer buf = dest.Bytes() r.result <- buf |