diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd/enc_fast.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/enc_fast.go | 140 |
1 files changed, 111 insertions, 29 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 28134b158..1387b8082 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,6 +6,7 @@ package zstd import ( "fmt" + "math" "math/bits" "github.com/klauspost/compress/zstd/internal/xxhash" @@ -23,7 +24,7 @@ type tableEntry struct { offset int32 } -type fastEncoder struct { +type fastBase struct { o encParams // cur is the offset at the start of hist cur int32 @@ -31,18 +32,22 @@ type fastEncoder struct { maxMatchOff int32 hist []byte crc *xxhash.Digest - table [tableSize]tableEntry tmp [8]byte blk *blockEnc } +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + // CRC returns the underlying CRC writer. -func (e *fastEncoder) CRC() *xxhash.Digest { +func (e *fastBase) CRC() *xxhash.Digest { return e.crc } // AppendCRC will append the CRC to the destination slice and return it. -func (e *fastEncoder) AppendCRC(dst []byte) []byte { +func (e *fastBase) AppendCRC(dst []byte) []byte { crc := e.crc.Sum(e.tmp[:0]) dst = append(dst, crc[7], crc[6], crc[5], crc[4]) return dst @@ -50,7 +55,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastEncoder) WindowSize(size int) int32 { +func (e *fastBase) WindowSize(size int) int32 { if size > 0 && size < int(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. @@ -63,7 +68,7 @@ func (e *fastEncoder) WindowSize(size int) int32 { } // Block returns the current block. -func (e *fastEncoder) Block() *blockEnc { +func (e *fastBase) Block() *blockEnc { return e.blk } @@ -169,9 +174,22 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+6, repIndex+4, src) + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -197,11 +215,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -257,7 +275,20 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -294,7 +325,20 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -412,10 +456,23 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // lenght := 4 + e.matchlen(s+6, repIndex+4, src) - lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -441,11 +498,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -498,7 +555,20 @@ encodeLoop: // Extend the 4-byte match as long as possible. //l := e.matchlenNoHist(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -536,7 +606,20 @@ encodeLoop: // We have at least 4 byte match. // No need to check backwards. We come straight from a match //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -571,7 +654,7 @@ encodeLoop: } } -func (e *fastEncoder) addBlock(src []byte) int32 { +func (e *fastBase) addBlock(src []byte) int32 { if debugAsserts && e.cur > bufferReset { panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) } @@ -602,17 +685,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 { // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. -func (e *fastEncoder) UseBlock(enc *blockEnc) { +func (e *fastBase) UseBlock(enc *blockEnc) { enc.reset(e.blk) e.blk = enc } -func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } -func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { if debugAsserts { if s < 0 { err := fmt.Sprintf("s (%d) < 0", s) @@ -626,18 +709,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) panic(err) } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } } // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. -func (e *fastEncoder) Reset() { +func (e *fastBase) Reset() { if e.blk == nil { e.blk = &blockEnc{} e.blk.init() |