diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd/enc_best.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/enc_best.go | 147 |
1 files changed, 96 insertions, 51 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index b7d4b9004..41025d62b 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -6,21 +6,59 @@ package zstd import ( "fmt" - "math/bits" + + "github.com/klauspost/compress" ) const ( - bestLongTableBits = 20 // Bits used in the long match table + bestLongTableBits = 22 // Bits used in the long match table bestLongTableSize = 1 << bestLongTableBits // Size of the table + bestLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the // long match table and match offsets are quite big. // This greatly depends on the type of input. - bestShortTableBits = 16 // Bits used in the short match table + bestShortTableBits = 18 // Bits used in the short match table bestShortTableSize = 1 << bestShortTableBits // Size of the table + bestShortLen = 4 // Bytes used for table hash + ) +type match struct { + offset int32 + s int32 + length int32 + rep int32 + est int32 +} + +const highScore = 25000 + +// estBits will estimate output bits from predefined tables. +func (m *match) estBits(bitsPerByte int32) { + mlc := mlCode(uint32(m.length - zstdMinMatch)) + var ofc uint8 + if m.rep < 0 { + ofc = ofCode(uint32(m.s-m.offset) + 3) + } else { + ofc = ofCode(uint32(m.rep)) + } + // Cost, excluding + ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] + + // Add cost of match encoding... + m.est = int32(ofTT.outBits + mlTT.outBits) + m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16) + // Subtract savings compared to literal encoding... + m.est -= (m.length * bitsPerByte) >> 10 + if m.est > 0 { + // Unlikely gain.. + m.length = 0 + m.est = highScore + } +} + // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. // The long match table contains the previous entry with the same hash, // effectively making it a "chain" of length 2. @@ -109,6 +147,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { return } + // Use this to estimate literal cost. + // Scaled by 10 bits. + bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src)) + // Huffman can never go < 1 bit/byte + if bitsPerByte < 1024 { + bitsPerByte = 1024 + } + // Override src src = e.hist sLimit := int32(len(src)) - inputMargin @@ -145,51 +191,44 @@ encodeLoop: panic("offset0 was 0") } - type match struct { - offset int32 - s int32 - length int32 - rep int32 - } - matchAt := func(offset int32, s int32, first uint32, rep int32) match { - if s-offset >= e.maxMatchOff || load3232(src, offset) != first { - return match{offset: offset, s: s} - } - return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} - } - bestOf := func(a, b match) match { - aScore := b.s - a.s + a.length - bScore := a.s - b.s + b.length - if a.rep < 0 { - aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8 - } - if b.rep < 0 { - bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8 - } - if aScore >= bScore { + if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { return a } return b } const goodEnough = 100 - nextHashL := hash8(cv, bestLongTableBits) - nextHashS := hash4x64(cv, bestShortTableBits) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] + matchAt := func(offset int32, s int32, first uint32, rep int32) match { + if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + return match{s: s, est: highScore} + } + m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + m.estBits(bitsPerByte) + return m + } + best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + if canRepeat && best.length < goodEnough { - best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1)) - best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2)) - best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3)) + cv := uint32(cv >> 8) + spp := s + 1 + best = bestOf(best, matchAt(spp-offset1, spp, cv, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv, 3)) if best.length > 0 { - best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1)) - best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2)) - best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3)) + cv >>= 16 + spp += 2 + best = bestOf(best, matchAt(spp-offset1, spp, cv, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv, 3)) } } // Load next and check... @@ -209,22 +248,28 @@ encodeLoop: } s++ - candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)] + candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] cv = load6432(src, s) cv2 := load6432(src, s+1) - candidateL = e.longTable[hash8(cv, bestLongTableBits)] - candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)] + candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] + candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] + // Short at s+1 best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + // Long at s+1, s+2 best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) - + if false { + // Short at s+3. + // Too often worse... + best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) + } // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. if sAt := best.s + best.length; sAt < sLimit { - nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) candidateEnd := e.longTable[nextHashL] if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) @@ -284,8 +329,8 @@ encodeLoop: off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} off++ @@ -352,8 +397,8 @@ encodeLoop: // every entry for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} @@ -374,8 +419,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash4x64(cv, bestShortTableBits) - nextHashL := hash8(cv, bestLongTableBits) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -425,7 +470,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { e.Encode(blk, src) } -// ResetDict will reset and set a dictionary if not nil +// Reset will reset and set a dictionary if not nil func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { @@ -441,10 +486,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { const hashLog = bestShortTableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash4x64(cv, hashLog) // 0 -> 4 - nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5 - nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6 - nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7 + nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 e.dictTable[nextHash] = prevEntry{ prev: e.dictTable[nextHash].offset, offset: i, @@ -472,7 +517,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: e.maxMatchOff, prev: e.dictLongTable[h].offset, @@ -482,7 +527,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { off := 8 // First to read for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: i, prev: e.dictLongTable[h].offset, |