diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate/deflate.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/flate/deflate.go | 133 |
1 files changed, 94 insertions, 39 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 5283ac5a5..b27f5a93b 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -6,9 +6,13 @@ package flate import ( + "encoding/binary" "fmt" "io" "math" + "math/bits" + + comp "github.com/klauspost/compress" ) const ( @@ -37,15 +41,17 @@ const ( maxMatchLength = 258 // The longest match for the compressor minOffsetSize = 1 // The shortest offset that makes any sense - // The maximum number of tokens we put into a single flat block, just too - // stop things from getting too large. - maxFlateBlockTokens = 1 << 14 + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 maxStoreBlockSize = 65535 hashBits = 17 // After 17 performance degrades hashSize = 1 << hashBits hashMask = (1 << hashBits) - 1 hashShift = (hashBits + minMatchLength - 1) / minMatchLength - maxHashOffset = 1 << 24 + maxHashOffset = 1 << 28 skipNever = math.MaxInt32 @@ -70,9 +76,9 @@ var levels = []compressionLevel{ {0, 0, 0, 0, 0, 6}, // Levels 7-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {8, 8, 24, 16, skipNever, 7}, - {10, 16, 24, 64, skipNever, 8}, - {32, 258, 258, 4096, skipNever, 9}, + {6, 10, 12, 16, skipNever, 7}, + {10, 24, 32, 64, skipNever, 8}, + {32, 258, 258, 1024, skipNever, 9}, } // advancedState contains state for the advanced levels, with bigger hash tables, etc. @@ -93,8 +99,9 @@ type advancedState struct { hashOffset int // input window: unprocessed data is window[index:windowEnd] - index int - hashMatch [maxMatchLength + minMatchLength]uint32 + index int + estBitsPerByte int + hashMatch [maxMatchLength + minMatchLength]uint32 hash uint32 ii uint16 // position of last match, intended to overflow to reset. @@ -170,7 +177,8 @@ func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { window = d.window[d.blockStart:index] } d.blockStart = index - d.w.writeBlock(tok, eof, window) + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) return d.w.err } return nil @@ -263,7 +271,7 @@ func (d *compressor) fillWindow(b []byte) { // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. // pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { +func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (length, offset int, ok bool) { minMatchLook := maxMatchLength if lookahead < minMatchLook { minMatchLook = lookahead @@ -279,36 +287,43 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // If we've got a match that's good enough, only look in 1/4 the chain. tries := d.chain - length = prevLength - if length >= d.good { - tries >>= 2 - } + length = minMatchLength - 1 wEnd := win[pos+length] wPos := win[pos:] minIndex := pos - windowSize + if minIndex < 0 { + minIndex = 0 + } + offset = 0 + // Base is 4 bytes at with an additional cost. + // Matches must be better than this. + cGain := minMatchLength*bpb - 12 for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) - - if n > length && (n > minMatchLength || pos-i <= 4096) { - length = n - offset = pos - i - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break + if n > length { + newGain := n*bpb - bits.Len32(uint32(pos-i)) + if newGain > cGain { + length = n + offset = pos - i + cGain = newGain + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] } - wEnd = win[pos+n] } } - if i == minIndex { + if i <= minIndex { // hashPrev[i & windowMask] has already been overwritten, so stop now. break } i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex || i < 0 { + if i < minIndex { break } } @@ -327,8 +342,7 @@ func (d *compressor) writeStoredBlock(buf []byte) error { // of the supplied slice. // The caller must ensure that len(b) >= 4. func hash4(b []byte) uint32 { - b = b[:4] - return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits) + return hash4u(binary.LittleEndian.Uint32(b), hashBits) } // bulkHash4 will compute hashes using the same @@ -337,11 +351,12 @@ func bulkHash4(b []byte, dst []uint32) { if len(b) < 4 { return } - hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + hb := binary.LittleEndian.Uint32(b) + dst[0] = hash4u(hb, hashBits) end := len(b) - 4 + 1 for i := 1; i < end; i++ { - hb = (hb << 8) | uint32(b[i+3]) + hb = (hb >> 8) | uint32(b[i+3])<<24 dst[i] = hash4u(hb, hashBits) } } @@ -374,10 +389,15 @@ func (d *compressor) deflateLazy() { if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return } + s.estBitsPerByte = 8 + if !d.sync { + s.estBitsPerByte = comp.ShannonEntropyBits(d.window[s.index:d.windowEnd]) + s.estBitsPerByte = int(1 + float64(s.estBitsPerByte)/float64(d.windowEnd-s.index)) + } s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) if s.index < s.maxInsertIndex { - s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + s.hash = hash4(d.window[s.index:]) } for { @@ -410,7 +430,7 @@ func (d *compressor) deflateLazy() { } if s.index < s.maxInsertIndex { // Update the hash - s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + s.hash = hash4(d.window[s.index:]) ch := s.hashHead[s.hash&hashMask] s.chainHead = int(ch) s.hashPrev[s.index&windowMask] = ch @@ -426,12 +446,37 @@ func (d *compressor) deflateLazy() { } if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead, s.estBitsPerByte); ok { s.length = newLength s.offset = newOffset } } + if prevLength >= minMatchLength && s.length <= prevLength { + // Check for better match at end... + // + // checkOff must be >=2 since we otherwise risk checking s.index + // Offset of 2 seems to yield best results. + const checkOff = 2 + prevIndex := s.index - 1 + if prevIndex+prevLength+checkOff < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength { + end = maxMatchLength + } + end += prevIndex + idx := prevIndex + prevLength - (4 - checkOff) + h := hash4(d.window[idx:]) + ch2 := int(s.hashHead[h&hashMask]) - s.hashOffset - prevLength + (4 - checkOff) + if ch2 > minIndex { + length := matchLen(d.window[prevIndex:end], d.window[ch2:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + } + } + } // There was a match at the previous step, and the current match is // not better. Output the previous match. d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) @@ -479,6 +524,7 @@ func (d *compressor) deflateLazy() { } d.tokens.Reset() } + s.ii = 0 } else { // Reset, if we got a match this run. if s.length >= minMatchLength { @@ -498,13 +544,12 @@ func (d *compressor) deflateLazy() { // If we have a long run of no matches, skip additional bytes // Resets when s.ii overflows after 64KB. - if s.ii > 31 { - n := int(s.ii >> 5) + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) for j := 0; j < n; j++ { if s.index >= d.windowEnd-1 { break } - d.tokens.AddLiteral(d.window[s.index-1]) if d.tokens.n == maxFlateBlockTokens { if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { @@ -512,6 +557,14 @@ func (d *compressor) deflateLazy() { } d.tokens.Reset() } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } s.index++ } // Flush last byte @@ -611,7 +664,9 @@ func (d *compressor) write(b []byte) (n int, err error) { } n = len(b) for len(b) > 0 { - d.step(d) + if d.windowEnd == len(d.window) || d.sync { + d.step(d) + } b = b[d.fill(d, b):] if d.err != nil { return 0, d.err @@ -652,13 +707,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) { level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 8 + d.w.logNewTablePenalty = 7 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeFast case 7 <= level && level <= 9: - d.w.logNewTablePenalty = 10 + d.w.logNewTablePenalty = 8 d.state = &advancedState{} d.compressionLevel = levels[level] d.initDeflate() |