summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r--vendor/github.com/klauspost/compress/flate/crc32_amd64.go42
-rw-r--r--vendor/github.com/klauspost/compress/flate/crc32_amd64.s214
-rw-r--r--vendor/github.com/klauspost/compress/flate/crc32_noasm.go35
-rw-r--r--vendor/github.com/klauspost/compress/flate/deflate.go678
-rw-r--r--vendor/github.com/klauspost/compress/flate/fast_encoder.go257
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go502
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_code.go67
-rw-r--r--vendor/github.com/klauspost/compress/flate/inflate.go71
-rw-r--r--vendor/github.com/klauspost/compress/flate/level1.go174
-rw-r--r--vendor/github.com/klauspost/compress/flate/level2.go199
-rw-r--r--vendor/github.com/klauspost/compress/flate/level3.go225
-rw-r--r--vendor/github.com/klauspost/compress/flate/level4.go210
-rw-r--r--vendor/github.com/klauspost/compress/flate/level5.go276
-rw-r--r--vendor/github.com/klauspost/compress/flate/level6.go279
-rw-r--r--vendor/github.com/klauspost/compress/flate/reverse_bits.go48
-rw-r--r--vendor/github.com/klauspost/compress/flate/snappy.go900
-rw-r--r--vendor/github.com/klauspost/compress/flate/stateless.go252
-rw-r--r--vendor/github.com/klauspost/compress/flate/token.go285
-rw-r--r--vendor/github.com/klauspost/compress/huff0/compress.go17
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress.go9
-rw-r--r--vendor/github.com/klauspost/compress/huff0/huff0.go6
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go11
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockenc.go56
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytebuf.go3
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go26
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_dfast.go6
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder.go22
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder_options.go36
-rw-r--r--vendor/github.com/klauspost/compress/zstd/framedec.go21
-rw-r--r--vendor/github.com/klauspost/compress/zstd/frameenc.go5
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_encoder.go27
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_predefined.go159
-rw-r--r--vendor/github.com/klauspost/compress/zstd/snappy.go1
-rw-r--r--vendor/github.com/klauspost/compress/zstd/zstd.go1
-rw-r--r--vendor/github.com/klauspost/cpuid/.gitignore24
-rw-r--r--vendor/github.com/klauspost/cpuid/.travis.yml23
-rw-r--r--vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt35
-rw-r--r--vendor/github.com/klauspost/cpuid/LICENSE22
-rw-r--r--vendor/github.com/klauspost/cpuid/README.md147
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid.go1049
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid_386.s42
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid_amd64.s42
-rw-r--r--vendor/github.com/klauspost/cpuid/detect_intel.go17
-rw-r--r--vendor/github.com/klauspost/cpuid/detect_ref.go23
-rw-r--r--vendor/github.com/klauspost/cpuid/generate.go4
45 files changed, 2934 insertions, 3614 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.go b/vendor/github.com/klauspost/compress/flate/crc32_amd64.go
deleted file mode 100644
index 8298d309a..000000000
--- a/vendor/github.com/klauspost/compress/flate/crc32_amd64.go
+++ /dev/null
@@ -1,42 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package flate
-
-import (
- "github.com/klauspost/cpuid"
-)
-
-// crc32sse returns a hash for the first 4 bytes of the slice
-// len(a) must be >= 4.
-//go:noescape
-func crc32sse(a []byte) uint32
-
-// crc32sseAll calculates hashes for each 4-byte set in a.
-// dst must be east len(a) - 4 in size.
-// The size is not checked by the assembly.
-//go:noescape
-func crc32sseAll(a []byte, dst []uint32)
-
-// matchLenSSE4 returns the number of matching bytes in a and b
-// up to length 'max'. Both slices must be at least 'max'
-// bytes in size.
-//
-// TODO: drop the "SSE4" name, since it doesn't use any SSE instructions.
-//
-//go:noescape
-func matchLenSSE4(a, b []byte, max int) int
-
-// histogram accumulates a histogram of b in h.
-// h must be at least 256 entries in length,
-// and must be cleared before calling this function.
-//go:noescape
-func histogram(b []byte, h []int32)
-
-// Detect SSE 4.2 feature.
-func init() {
- useSSE42 = cpuid.CPU.SSE42()
-}
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s
deleted file mode 100644
index a79943727..000000000
--- a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s
+++ /dev/null
@@ -1,214 +0,0 @@
-//+build !noasm
-//+build !appengine
-//+build !gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-// func crc32sse(a []byte) uint32
-TEXT ·crc32sse(SB), 4, $0
- MOVQ a+0(FP), R10
- XORQ BX, BX
-
- // CRC32 dword (R10), EBX
- BYTE $0xF2; BYTE $0x41; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0x1a
-
- MOVL BX, ret+24(FP)
- RET
-
-// func crc32sseAll(a []byte, dst []uint32)
-TEXT ·crc32sseAll(SB), 4, $0
- MOVQ a+0(FP), R8 // R8: src
- MOVQ a_len+8(FP), R10 // input length
- MOVQ dst+24(FP), R9 // R9: dst
- SUBQ $4, R10
- JS end
- JZ one_crc
- MOVQ R10, R13
- SHRQ $2, R10 // len/4
- ANDQ $3, R13 // len&3
- XORQ BX, BX
- ADDQ $1, R13
- TESTQ R10, R10
- JZ rem_loop
-
-crc_loop:
- MOVQ (R8), R11
- XORQ BX, BX
- XORQ DX, DX
- XORQ DI, DI
- MOVQ R11, R12
- SHRQ $8, R11
- MOVQ R12, AX
- MOVQ R11, CX
- SHRQ $16, R12
- SHRQ $16, R11
- MOVQ R12, SI
-
- // CRC32 EAX, EBX
- BYTE $0xF2; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0xd8
-
- // CRC32 ECX, EDX
- BYTE $0xF2; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0xd1
-
- // CRC32 ESI, EDI
- BYTE $0xF2; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0xfe
- MOVL BX, (R9)
- MOVL DX, 4(R9)
- MOVL DI, 8(R9)
-
- XORQ BX, BX
- MOVL R11, AX
-
- // CRC32 EAX, EBX
- BYTE $0xF2; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0xd8
- MOVL BX, 12(R9)
-
- ADDQ $16, R9
- ADDQ $4, R8
- XORQ BX, BX
- SUBQ $1, R10
- JNZ crc_loop
-
-rem_loop:
- MOVL (R8), AX
-
- // CRC32 EAX, EBX
- BYTE $0xF2; BYTE $0x0f
- BYTE $0x38; BYTE $0xf1; BYTE $0xd8
-
- MOVL BX, (R9)
- ADDQ $4, R9
- ADDQ $1, R8
- XORQ BX, BX
- SUBQ $1, R13
- JNZ rem_loop
-
-end:
- RET
-
-one_crc:
- MOVQ $1, R13
- XORQ BX, BX
- JMP rem_loop
-
-// func matchLenSSE4(a, b []byte, max int) int
-TEXT ·matchLenSSE4(SB), 4, $0
- MOVQ a_base+0(FP), SI
- MOVQ b_base+24(FP), DI
- MOVQ DI, DX
- MOVQ max+48(FP), CX
-
-cmp8:
- // As long as we are 8 or more bytes before the end of max, we can load and
- // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
- CMPQ CX, $8
- JLT cmp1
- MOVQ (SI), AX
- MOVQ (DI), BX
- CMPQ AX, BX
- JNE bsf
- ADDQ $8, SI
- ADDQ $8, DI
- SUBQ $8, CX
- JMP cmp8
-
-bsf:
- // If those 8 bytes were not equal, XOR the two 8 byte values, and return
- // the index of the first byte that differs. The BSF instruction finds the
- // least significant 1 bit, the amd64 architecture is little-endian, and
- // the shift by 3 converts a bit index to a byte index.
- XORQ AX, BX
- BSFQ BX, BX
- SHRQ $3, BX
- ADDQ BX, DI
-
- // Subtract off &b[0] to convert from &b[ret] to ret, and return.
- SUBQ DX, DI
- MOVQ DI, ret+56(FP)
- RET
-
-cmp1:
- // In the slices' tail, compare 1 byte at a time.
- CMPQ CX, $0
- JEQ matchLenEnd
- MOVB (SI), AX
- MOVB (DI), BX
- CMPB AX, BX
- JNE matchLenEnd
- ADDQ $1, SI
- ADDQ $1, DI
- SUBQ $1, CX
- JMP cmp1
-
-matchLenEnd:
- // Subtract off &b[0] to convert from &b[ret] to ret, and return.
- SUBQ DX, DI
- MOVQ DI, ret+56(FP)
- RET
-
-// func histogram(b []byte, h []int32)
-TEXT ·histogram(SB), 4, $0
- MOVQ b+0(FP), SI // SI: &b
- MOVQ b_len+8(FP), R9 // R9: len(b)
- MOVQ h+24(FP), DI // DI: Histogram
- MOVQ R9, R8
- SHRQ $3, R8
- JZ hist1
- XORQ R11, R11
-
-loop_hist8:
- MOVQ (SI), R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- MOVB R10, R11
- INCL (DI)(R11*4)
- SHRQ $8, R10
-
- INCL (DI)(R10*4)
-
- ADDQ $8, SI
- DECQ R8
- JNZ loop_hist8
-
-hist1:
- ANDQ $7, R9
- JZ end_hist
- XORQ R10, R10
-
-loop_hist1:
- MOVB (SI), R10
- INCL (DI)(R10*4)
- INCQ SI
- DECQ R9
- JNZ loop_hist1
-
-end_hist:
- RET
diff --git a/vendor/github.com/klauspost/compress/flate/crc32_noasm.go b/vendor/github.com/klauspost/compress/flate/crc32_noasm.go
deleted file mode 100644
index dcf43bd50..000000000
--- a/vendor/github.com/klauspost/compress/flate/crc32_noasm.go
+++ /dev/null
@@ -1,35 +0,0 @@
-//+build !amd64 noasm appengine gccgo
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package flate
-
-func init() {
- useSSE42 = false
-}
-
-// crc32sse should never be called.
-func crc32sse(a []byte) uint32 {
- panic("no assembler")
-}
-
-// crc32sseAll should never be called.
-func crc32sseAll(a []byte, dst []uint32) {
- panic("no assembler")
-}
-
-// matchLenSSE4 should never be called.
-func matchLenSSE4(a, b []byte, max int) int {
- panic("no assembler")
- return 0
-}
-
-// histogram accumulates a histogram of b in h.
-//
-// len(h) must be >= 256, and h's elements must be all zeroes.
-func histogram(b []byte, h []int32) {
- h = h[:256]
- for _, t := range b {
- h[t]++
- }
-}
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 628795120..20c94f596 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -50,8 +50,6 @@ const (
skipNever = math.MaxInt32
)
-var useSSE42 bool
-
type compressionLevel struct {
good, lazy, nice, chain, fastSkipHashing, level int
}
@@ -97,9 +95,8 @@ type advancedState struct {
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
- index int
- bulkHasher func([]byte, []uint32)
- hashMatch [maxMatchLength + minMatchLength]uint32
+ index int
+ hashMatch [maxMatchLength + minMatchLength]uint32
}
type compressor struct {
@@ -120,7 +117,7 @@ type compressor struct {
// queued output tokens
tokens tokens
- snap fastEnc
+ fast fastEnc
state *advancedState
}
@@ -164,14 +161,14 @@ func (d *compressor) fillDeflate(b []byte) int {
return n
}
-func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
- d.w.writeBlock(tok.tokens[:tok.n], eof, window)
+ d.w.writeBlock(tok, eof, window)
return d.w.err
}
return nil
@@ -180,20 +177,20 @@ func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
// writeBlockSkip writes the current block and uses the number of tokens
// to determine if the block should be stored on no matches, or
// only huffman encoded.
-func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
if d.blockStart <= index {
window := d.window[d.blockStart:index]
// If we removed less than a 64th of all literals
// we huffman compress the block.
if int(tok.n) > len(window)-int(tok.n>>6) {
- d.w.writeBlockHuff(eof, window)
+ d.w.writeBlockHuff(eof, window, d.sync)
} else {
// Write a dynamic huffman block.
- d.w.writeBlockDynamic(tok.tokens[:tok.n], eof, window)
+ d.w.writeBlockDynamic(tok, eof, window, d.sync)
}
} else {
- d.w.writeBlock(tok.tokens[:tok.n], eof, nil)
+ d.w.writeBlock(tok, eof, nil)
}
d.blockStart = index
return d.w.err
@@ -208,8 +205,16 @@ func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only mode,
// use constant or Snappy compression.
- switch d.compressionLevel.level {
- case 0, 1, 2:
+ if d.level == 0 {
+ return
+ }
+ if d.fast != nil {
+ // encode the last data, but discard the result
+ if len(b) > maxMatchOffset {
+ b = b[len(b)-maxMatchOffset:]
+ }
+ d.fast.Encode(&d.tokens, b)
+ d.tokens.Reset()
return
}
s := d.state
@@ -236,7 +241,7 @@ func (d *compressor) fillWindow(b []byte) {
}
dst := s.hashMatch[:dstSize]
- s.bulkHasher(tocheck, dst)
+ bulkHash4(tocheck, dst)
var newH uint32
for i, val := range dst {
di := i + startindex
@@ -284,62 +289,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
- n := matchLen(win[i:], wPos, minMatchLook)
-
- if n > length && (n > minMatchLength || pos-i <= 4096) {
- length = n
- offset = pos - i
- ok = true
- if n >= nice {
- // The match is good enough that we don't try to find a better one.
- break
- }
- wEnd = win[pos+n]
- }
- }
- if i == minIndex {
- // hashPrev[i & windowMask] has already been overwritten, so stop now.
- break
- }
- i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
- if i < minIndex || i < 0 {
- break
- }
- }
- return
-}
-
-// Try to find a match starting at index whose length is greater than prevSize.
-// We only look at chainCount possibilities before giving up.
-// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
-func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
- minMatchLook := maxMatchLength
- if lookahead < minMatchLook {
- minMatchLook = lookahead
- }
-
- win := d.window[0 : pos+minMatchLook]
-
- // We quit when we get a match that's at least nice long
- nice := len(win) - pos
- if d.nice < nice {
- nice = d.nice
- }
-
- // If we've got a match that's good enough, only look in 1/4 the chain.
- tries := d.chain
- length = prevLength
- if length >= d.good {
- tries >>= 2
- }
-
- wEnd := win[pos+length]
- wPos := win[pos:]
- minIndex := pos - windowSize
-
- for i := prevHead; tries > 0; tries-- {
- if wEnd == win[i+length] {
- n := matchLenSSE4(win[i:], wPos, minMatchLook)
+ n := matchLen(win[i:i+minMatchLook], wPos)
if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
@@ -372,42 +322,27 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
return d.w.err
}
-const hashmul = 0x1e35a7bd
-
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
- return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
+ b = b[:4]
+ return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
- if len(b) < minMatchLength {
+ if len(b) < 4 {
return
}
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
- dst[0] = (hb * hashmul) >> (32 - hashBits)
- end := len(b) - minMatchLength + 1
+ dst[0] = hash4u(hb, hashBits)
+ end := len(b) - 4 + 1
for i := 1; i < end; i++ {
hb = (hb << 8) | uint32(b[i+3])
- dst[i] = (hb * hashmul) >> (32 - hashBits)
- }
-}
-
-// matchLen returns the number of matching bytes in a and b
-// up to length 'max'. Both slices must be at least 'max'
-// bytes in size.
-func matchLen(a, b []byte, max int) int {
- a = a[:max]
- b = b[:len(a)]
- for i, av := range a {
- if b[i] != av {
- return i
- }
+ dst[i] = hash4u(hb, hashBits)
}
- return max
}
func (d *compressor) initDeflate() {
@@ -424,149 +359,6 @@ func (d *compressor) initDeflate() {
s.offset = 0
s.hash = 0
s.chainHead = -1
- s.bulkHasher = bulkHash4
- if useSSE42 {
- s.bulkHasher = crc32sseAll
- }
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazy
-func (d *compressor) deflate() {
- s := d.state
- // Sanity enables additional runtime tests.
- // It's intended to be used during development
- // to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
- return
- }
-
- s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
- if s.index < s.maxInsertIndex {
- s.hash = hash4(d.window[s.index : s.index+minMatchLength])
- }
-
- for {
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- lookahead := d.windowEnd - s.index
- if lookahead < minMatchLength+maxMatchLength {
- if !d.sync {
- return
- }
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- if lookahead == 0 {
- if d.tokens.n > 0 {
- if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- return
- }
- }
- if s.index < s.maxInsertIndex {
- // Update the hash
- s.hash = hash4(d.window[s.index : s.index+minMatchLength])
- ch := s.hashHead[s.hash&hashMask]
- s.chainHead = int(ch)
- s.hashPrev[s.index&windowMask] = ch
- s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
- }
- s.length = minMatchLength - 1
- s.offset = 0
- minIndex := s.index - windowSize
- if minIndex < 0 {
- minIndex = 0
- }
-
- if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
- if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
- s.length = newLength
- s.offset = newOffset
- }
- }
- if s.length >= minMatchLength {
- s.ii = 0
- // There was a match at the previous step, and the current match is
- // not better. Output the previous match.
- // "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
- d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
- d.tokens.n++
- // Insert in the hash table all strings up to the end of the match.
- // index and index-1 are already inserted. If there is not enough
- // lookahead, the last two strings are not inserted into the hash
- // table.
- if s.length <= d.fastSkipHashing {
- var newIndex int
- newIndex = s.index + s.length
- // Calculate missing hashes
- end := newIndex
- if end > s.maxInsertIndex {
- end = s.maxInsertIndex
- }
- end += minMatchLength - 1
- startindex := s.index + 1
- if startindex > s.maxInsertIndex {
- startindex = s.maxInsertIndex
- }
- tocheck := d.window[startindex:end]
- dstSize := len(tocheck) - minMatchLength + 1
- if dstSize > 0 {
- dst := s.hashMatch[:dstSize]
- bulkHash4(tocheck, dst)
- var newH uint32
- for i, val := range dst {
- di := i + startindex
- newH = val & hashMask
- // Get previous value with the same hash.
- // Our chain should point to the previous value.
- s.hashPrev[di&windowMask] = s.hashHead[newH]
- // Set the head of the hash chain to us.
- s.hashHead[newH] = uint32(di + s.hashOffset)
- }
- s.hash = newH
- }
- s.index = newIndex
- } else {
- // For matches this long, we don't bother inserting each individual
- // item into the table.
- s.index += s.length
- if s.index < s.maxInsertIndex {
- s.hash = hash4(d.window[s.index : s.index+minMatchLength])
- }
- }
- if d.tokens.n == maxFlateBlockTokens {
- // The block includes the current character
- if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- } else {
- s.ii++
- end := s.index + int(s.ii>>uint(d.fastSkipHashing)) + 1
- if end > d.windowEnd {
- end = d.windowEnd
- }
- for i := s.index; i < end; i++ {
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
- d.tokens.n++
- if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- }
- s.index = end
- }
- }
}
// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
@@ -603,15 +395,14 @@ func (d *compressor) deflateLazy() {
// Flush current output block if any.
if d.byteAvailable {
// There is still one pending token that needs to be flushed
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
+ d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
}
if d.tokens.n > 0 {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
- d.tokens.n = 0
+ d.tokens.Reset()
}
return
}
@@ -642,8 +433,7 @@ func (d *compressor) deflateLazy() {
if prevLength >= minMatchLength && s.length <= prevLength {
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
- d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
- d.tokens.n++
+ d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
@@ -684,10 +474,10 @@ func (d *compressor) deflateLazy() {
s.length = minMatchLength - 1
if d.tokens.n == maxFlateBlockTokens {
// The block includes the current character
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
- d.tokens.n = 0
+ d.tokens.Reset()
}
} else {
// Reset, if we got a match this run.
@@ -697,13 +487,12 @@ func (d *compressor) deflateLazy() {
// We have a byte waiting. Emit it.
if d.byteAvailable {
s.ii++
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
+ d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
- d.tokens.n = 0
+ d.tokens.Reset()
}
s.index++
@@ -716,343 +505,24 @@ func (d *compressor) deflateLazy() {
break
}
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
+ d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
- d.tokens.n = 0
+ d.tokens.Reset()
}
s.index++
}
// Flush last byte
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
+ d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
- d.tokens.n = 0
- }
- }
- } else {
- s.index++
- d.byteAvailable = true
- }
- }
- }
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazySSE
-func (d *compressor) deflateSSE() {
- s := d.state
- // Sanity enables additional runtime tests.
- // It's intended to be used during development
- // to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
- return
- }
-
- s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
- if s.index < s.maxInsertIndex {
- s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
- }
-
- for {
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- lookahead := d.windowEnd - s.index
- if lookahead < minMatchLength+maxMatchLength {
- if !d.sync {
- return
- }
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- if lookahead == 0 {
- if d.tokens.n > 0 {
- if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- return
- }
- }
- if s.index < s.maxInsertIndex {
- // Update the hash
- s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
- ch := s.hashHead[s.hash]
- s.chainHead = int(ch)
- s.hashPrev[s.index&windowMask] = ch
- s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
- }
- s.length = minMatchLength - 1
- s.offset = 0
- minIndex := s.index - windowSize
- if minIndex < 0 {
- minIndex = 0
- }
-
- if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
- if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
- s.length = newLength
- s.offset = newOffset
- }
- }
- if s.length >= minMatchLength {
- s.ii = 0
- // There was a match at the previous step, and the current match is
- // not better. Output the previous match.
- // "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
- d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
- d.tokens.n++
- // Insert in the hash table all strings up to the end of the match.
- // index and index-1 are already inserted. If there is not enough
- // lookahead, the last two strings are not inserted into the hash
- // table.
- if s.length <= d.fastSkipHashing {
- var newIndex int
- newIndex = s.index + s.length
- // Calculate missing hashes
- end := newIndex
- if end > s.maxInsertIndex {
- end = s.maxInsertIndex
- }
- end += minMatchLength - 1
- startindex := s.index + 1
- if startindex > s.maxInsertIndex {
- startindex = s.maxInsertIndex
- }
- tocheck := d.window[startindex:end]
- dstSize := len(tocheck) - minMatchLength + 1
- if dstSize > 0 {
- dst := s.hashMatch[:dstSize]
-
- crc32sseAll(tocheck, dst)
- var newH uint32
- for i, val := range dst {
- di := i + startindex
- newH = val & hashMask
- // Get previous value with the same hash.
- // Our chain should point to the previous value.
- s.hashPrev[di&windowMask] = s.hashHead[newH]
- // Set the head of the hash chain to us.
- s.hashHead[newH] = uint32(di + s.hashOffset)
- }
- s.hash = newH
- }
- s.index = newIndex
- } else {
- // For matches this long, we don't bother inserting each individual
- // item into the table.
- s.index += s.length
- if s.index < s.maxInsertIndex {
- s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
- }
- }
- if d.tokens.n == maxFlateBlockTokens {
- // The block includes the current character
- if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- } else {
- s.ii++
- end := s.index + int(s.ii>>5) + 1
- if end > d.windowEnd {
- end = d.windowEnd
- }
- for i := s.index; i < end; i++ {
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
- d.tokens.n++
- if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- }
- s.index = end
- }
- }
-}
-
-// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
-// meaning it always has lazy matching on.
-func (d *compressor) deflateLazySSE() {
- s := d.state
- // Sanity enables additional runtime tests.
- // It's intended to be used during development
- // to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
- return
- }
-
- s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
- if s.index < s.maxInsertIndex {
- s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
- }
-
- for {
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- lookahead := d.windowEnd - s.index
- if lookahead < minMatchLength+maxMatchLength {
- if !d.sync {
- return
- }
- if sanity && s.index > d.windowEnd {
- panic("index > windowEnd")
- }
- if lookahead == 0 {
- // Flush current output block if any.
- if d.byteAvailable {
- // There is still one pending token that needs to be flushed
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
- d.byteAvailable = false
- }
- if d.tokens.n > 0 {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- return
- }
- }
- if s.index < s.maxInsertIndex {
- // Update the hash
- s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
- ch := s.hashHead[s.hash]
- s.chainHead = int(ch)
- s.hashPrev[s.index&windowMask] = ch
- s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
- }
- prevLength := s.length
- prevOffset := s.offset
- s.length = minMatchLength - 1
- s.offset = 0
- minIndex := s.index - windowSize
- if minIndex < 0 {
- minIndex = 0
- }
-
- if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
- if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
- s.length = newLength
- s.offset = newOffset
- }
- }
- if prevLength >= minMatchLength && s.length <= prevLength {
- // There was a match at the previous step, and the current match is
- // not better. Output the previous match.
- d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
- d.tokens.n++
-
- // Insert in the hash table all strings up to the end of the match.
- // index and index-1 are already inserted. If there is not enough
- // lookahead, the last two strings are not inserted into the hash
- // table.
- var newIndex int
- newIndex = s.index + prevLength - 1
- // Calculate missing hashes
- end := newIndex
- if end > s.maxInsertIndex {
- end = s.maxInsertIndex
- }
- end += minMatchLength - 1
- startindex := s.index + 1
- if startindex > s.maxInsertIndex {
- startindex = s.maxInsertIndex
- }
- tocheck := d.window[startindex:end]
- dstSize := len(tocheck) - minMatchLength + 1
- if dstSize > 0 {
- dst := s.hashMatch[:dstSize]
- crc32sseAll(tocheck, dst)
- var newH uint32
- for i, val := range dst {
- di := i + startindex
- newH = val & hashMask
- // Get previous value with the same hash.
- // Our chain should point to the previous value.
- s.hashPrev[di&windowMask] = s.hashHead[newH]
- // Set the head of the hash chain to us.
- s.hashHead[newH] = uint32(di + s.hashOffset)
- }
- s.hash = newH
- }
-
- s.index = newIndex
- d.byteAvailable = false
- s.length = minMatchLength - 1
- if d.tokens.n == maxFlateBlockTokens {
- // The block includes the current character
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- } else {
- // Reset, if we got a match this run.
- if s.length >= minMatchLength {
- s.ii = 0
- }
- // We have a byte waiting. Emit it.
- if d.byteAvailable {
- s.ii++
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
- if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- s.index++
-
- // If we have a long run of no matches, skip additional bytes
- // Resets when s.ii overflows after 64KB.
- if s.ii > 31 {
- n := int(s.ii >> 6)
- for j := 0; j < n; j++ {
- if s.index >= d.windowEnd-1 {
- break
- }
-
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
- if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
- }
- s.index++
- }
- // Flush last byte
- d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
- d.tokens.n++
- d.byteAvailable = false
- // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
- if d.tokens.n == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
- return
- }
- d.tokens.n = 0
+ d.tokens.Reset()
}
}
} else {
@@ -1085,17 +555,17 @@ func (d *compressor) storeHuff() {
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
return
}
- d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+ d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
d.windowEnd = 0
}
-// storeHuff will compress and store the currently added data,
+// storeFast will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
-func (d *compressor) storeSnappy() {
+func (d *compressor) storeFast() {
// We only compress if we have maxStoreBlockSize.
- if d.windowEnd < maxStoreBlockSize {
+ if d.windowEnd < len(d.window) {
if !d.sync {
return
}
@@ -1106,32 +576,30 @@ func (d *compressor) storeSnappy() {
}
if d.windowEnd <= 32 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
- d.tokens.n = 0
- d.windowEnd = 0
} else {
- d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+ d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
d.err = d.w.err
}
- d.tokens.n = 0
+ d.tokens.Reset()
d.windowEnd = 0
- d.snap.Reset()
+ d.fast.Reset()
return
}
}
- d.snap.Encode(&d.tokens, d.window[:d.windowEnd])
+ d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
// If we made zero matches, store the block as is.
- if int(d.tokens.n) == d.windowEnd {
+ if d.tokens.n == 0 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
// If we removed less than 1/16th, huffman compress the block.
} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
- d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+ d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
} else {
- d.w.writeBlockDynamic(d.tokens.tokens[:d.tokens.n], false, d.window[:d.windowEnd])
+ d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
}
- d.tokens.n = 0
+ d.tokens.Reset()
d.windowEnd = 0
}
@@ -1176,36 +644,26 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
+ d.w.logReusePenalty = uint(4)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
- case level >= 1 && level <= 4:
- d.snap = newFastEnc(level)
- d.window = make([]byte, maxStoreBlockSize)
- d.fill = (*compressor).fillBlock
- d.step = (*compressor).storeSnappy
case level == DefaultCompression:
level = 5
fallthrough
- case 5 <= level && level <= 9:
+ case level >= 1 && level <= 6:
+ d.w.logReusePenalty = uint(level + 1)
+ d.fast = newFastEnc(level)
+ d.window = make([]byte, maxStoreBlockSize)
+ d.fill = (*compressor).fillBlock
+ d.step = (*compressor).storeFast
+ case 7 <= level && level <= 9:
+ d.w.logReusePenalty = uint(level)
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
d.fill = (*compressor).fillDeflate
- if d.fastSkipHashing == skipNever {
- if useSSE42 {
- d.step = (*compressor).deflateLazySSE
- } else {
- d.step = (*compressor).deflateLazy
- }
- } else {
- if useSSE42 {
- d.step = (*compressor).deflateSSE
- } else {
- d.step = (*compressor).deflate
-
- }
- }
+ d.step = (*compressor).deflateLazy
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
@@ -1218,10 +676,10 @@ func (d *compressor) reset(w io.Writer) {
d.sync = false
d.err = nil
// We only need to reset a few things for Snappy.
- if d.snap != nil {
- d.snap.Reset()
+ if d.fast != nil {
+ d.fast.Reset()
d.windowEnd = 0
- d.tokens.n = 0
+ d.tokens.Reset()
return
}
switch d.compressionLevel.chain {
@@ -1240,7 +698,7 @@ func (d *compressor) reset(w io.Writer) {
s.hashOffset = 1
s.index, d.windowEnd = 0, 0
d.blockStart, d.byteAvailable = 0, false
- d.tokens.n = 0
+ d.tokens.Reset()
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
new file mode 100644
index 000000000..b0a470f92
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -0,0 +1,257 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Modified for deflate by Klaus Post (c) 2015.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "fmt"
+ "math/bits"
+)
+
+type fastEnc interface {
+ Encode(dst *tokens, src []byte)
+ Reset()
+}
+
+func newFastEnc(level int) fastEnc {
+ switch level {
+ case 1:
+ return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
+ case 2:
+ return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
+ case 3:
+ return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
+ case 4:
+ return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
+ case 5:
+ return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
+ case 6:
+ return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
+ default:
+ panic("invalid level specified")
+ }
+}
+
+const (
+ tableBits = 16 // Bits used in the table
+ tableSize = 1 << tableBits // Size of the table
+ tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
+ baseMatchOffset = 1 // The smallest match offset
+ baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
+ maxMatchOffset = 1 << 15 // The largest match offset
+
+ bTableBits = 18 // Bits used in the big tables
+ bTableSize = 1 << bTableBits // Size of the table
+ allocHistory = maxMatchOffset * 10 // Size to preallocate for history.
+ bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize // Reset the buffer offset when reaching this.
+)
+
+const (
+ prime3bytes = 506832829
+ prime4bytes = 2654435761
+ prime5bytes = 889523592379
+ prime6bytes = 227718039650203
+ prime7bytes = 58295818150454627
+ prime8bytes = 0xcf1bbcdcb7a56463
+)
+
+func load32(b []byte, i int) uint32 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:4]
+ return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:8]
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+ uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func load3232(b []byte, i int32) uint32 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:4]
+ return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load6432(b []byte, i int32) uint64 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:8]
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+ uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func hash(u uint32) uint32 {
+ return (u * 0x1e35a7bd) >> tableShift
+}
+
+type tableEntry struct {
+ val uint32
+ offset int32
+}
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastGen struct {
+ hist []byte
+ cur int32
+}
+
+func (e *fastGen) addBlock(src []byte) int32 {
+ // check if we have space already
+ if len(e.hist)+len(src) > cap(e.hist) {
+ if cap(e.hist) == 0 {
+ e.hist = make([]byte, 0, allocHistory)
+ } else {
+ if cap(e.hist) < maxMatchOffset*2 {
+ panic("unexpected buffer size")
+ }
+ // Move down
+ offset := int32(len(e.hist)) - maxMatchOffset
+ copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ e.cur += offset
+ e.hist = e.hist[:maxMatchOffset]
+ }
+ }
+ s := int32(len(e.hist))
+ e.hist = append(e.hist, src...)
+ return s
+}
+
+// hash4 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4u(u uint32, h uint8) uint32 {
+ return (u * prime4bytes) >> ((32 - h) & 31)
+}
+
+type tableEntryPrev struct {
+ Cur tableEntry
+ Prev tableEntry
+}
+
+// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4x64(u uint64, h uint8) uint32 {
+ return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
+}
+
+// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash7(u uint64, h uint8) uint32 {
+ return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
+}
+
+// hash8 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash8(u uint64, h uint8) uint32 {
+ return uint32((u * prime8bytes) >> ((64 - h) & 63))
+}
+
+// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash6(u uint64, h uint8) uint32 {
+ return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
+ if debugDecode {
+ if t >= s {
+ panic(fmt.Sprint("t >=s:", t, s))
+ }
+ if int(s) >= len(src) {
+ panic(fmt.Sprint("s >= len(src):", s, len(src)))
+ }
+ if t < 0 {
+ panic(fmt.Sprint("t < 0:", t))
+ }
+ if s-t > maxMatchOffset {
+ panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+ }
+ }
+ s1 := int(s) + maxMatchLength - 4
+ if s1 > len(src) {
+ s1 = len(src)
+ }
+
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
+ if debugDecode {
+ if t >= s {
+ panic(fmt.Sprint("t >=s:", t, s))
+ }
+ if int(s) >= len(src) {
+ panic(fmt.Sprint("s >= len(src):", s, len(src)))
+ }
+ if t < 0 {
+ panic(fmt.Sprint("t < 0:", t))
+ }
+ if s-t > maxMatchOffset {
+ panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+ }
+ }
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastGen) Reset() {
+ if cap(e.hist) < int(maxMatchOffset*8) {
+ l := maxMatchOffset * 8
+ // Make it at least 1MB.
+ if l < 1<<20 {
+ l = 1 << 20
+ }
+ e.hist = make([]byte, 0, l)
+ }
+ // We offset current position so everything will be out of reach
+ e.cur += maxMatchOffset + int32(len(e.hist))
+ e.hist = e.hist[:0]
+}
+
+// matchLen returns the maximum length.
+// 'a' must be the shortest of the two.
+func matchLen(a, b []byte) int {
+ b = b[:len(a)]
+ var checked int
+ if len(a) > 4 {
+ // Try 4 bytes first
+ if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
+ return bits.TrailingZeros32(diff) >> 3
+ }
+ // Switch to 8 byte matching.
+ checked = 4
+ a = a[4:]
+ b = b[4:]
+ for len(a) >= 8 {
+ b = b[:len(a)]
+ if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
+ return checked + (bits.TrailingZeros64(diff) >> 3)
+ }
+ checked += 8
+ a = a[8:]
+ b = b[8:]
+ }
+ }
+ b = b[:len(a)]
+ for i := range a {
+ if a[i] != b[i] {
+ return int(i) + checked
+ }
+ }
+ return len(a) + checked
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index f46c65418..5ed476aa0 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -85,26 +85,48 @@ type huffmanBitWriter struct {
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
- nbits uint
- bytes [256]byte
- codegenFreq [codegenCodeCount]int32
+ nbits uint16
nbytes uint8
- literalFreq []int32
- offsetFreq []int32
- codegen []uint8
literalEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
+ lastHeader int
+ // Set between 0 (reused block can be up to 2x the size)
+ logReusePenalty uint
+ lastHuffMan bool
+ bytes [256]byte
+ literalFreq [lengthCodesStart + 32]uint16
+ offsetFreq [32]uint16
+ codegenFreq [codegenCodeCount]uint16
+
+ // codegen must have an extra space for the final symbol.
+ codegen [literalCount + offsetCodeCount + 1]uint8
}
+// Huffman reuse.
+//
+// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
+//
+// This is controlled by several variables:
+//
+// If lastHeader is non-zero the Huffman table can be reused.
+// This also indicates that a Huffman table has been generated that can output all
+// possible symbols.
+// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
+// an EOB with the previous table must be written.
+//
+// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
+//
+// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
+// optimal size and adding a penalty in 'logReusePenalty'.
+// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
+// is slower both for compression and decompression.
+
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
- literalFreq: make([]int32, lengthCodesStart+32),
- offsetFreq: make([]int32, 32),
- codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
- literalEncoding: newHuffmanEncoder(maxNumLit),
+ literalEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
@@ -114,6 +136,41 @@ func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
w.bytes = [256]byte{}
+ w.lastHeader = 0
+ w.lastHuffMan = false
+}
+
+func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
+ offsets, lits = true, true
+ a := t.offHist[:offsetCodeCount]
+ b := w.offsetFreq[:len(a)]
+ for i := range a {
+ if b[i] == 0 && a[i] != 0 {
+ offsets = false
+ break
+ }
+ }
+
+ a = t.extraHist[:literalCount-256]
+ b = w.literalFreq[256:literalCount]
+ b = b[:len(a)]
+ for i := range a {
+ if b[i] == 0 && a[i] != 0 {
+ lits = false
+ break
+ }
+ }
+ if lits {
+ a = t.litHist[:]
+ b = w.literalFreq[:len(a)]
+ for i := range a {
+ if b[i] == 0 && a[i] != 0 {
+ lits = false
+ break
+ }
+ }
+ }
+ return
}
func (w *huffmanBitWriter) flush() {
@@ -144,30 +201,11 @@ func (w *huffmanBitWriter) write(b []byte) {
_, w.err = w.writer.Write(b)
}
-func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
- w.bits |= uint64(b) << w.nbits
+func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
+ w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
- bits := w.bits
- w.bits >>= 48
- w.nbits -= 48
- n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
- n += 6
- if n >= bufferFlushSize {
- if w.err != nil {
- n = 0
- return
- }
- w.write(w.bytes[:n])
- n = 0
- }
- w.nbytes = n
+ w.writeOutBits()
}
}
@@ -213,7 +251,7 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
- codegen := w.codegen // cache
+ codegen := w.codegen[:] // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
@@ -292,30 +330,54 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
codegen[outIndex] = badCode
}
-// dynamicSize returns the size of dynamically encoded data in bits.
-func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
+func (w *huffmanBitWriter) codegens() int {
+ numCodegens := len(w.codegenFreq)
+ for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+ numCodegens--
+ }
+ return numCodegens
+}
+
+func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
- header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
+ return 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
- int(w.codegenFreq[18])*7
+ int(w.codegenFreq[18])*7, numCodegens
+}
+
+// dynamicSize returns the size of dynamically encoded data in bits.
+func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
+ header, numCodegens := w.headerSize()
size = header +
- litEnc.bitLength(w.literalFreq) +
- offEnc.bitLength(w.offsetFreq) +
+ litEnc.bitLength(w.literalFreq[:]) +
+ offEnc.bitLength(w.offsetFreq[:]) +
extraBits
-
return size, numCodegens
}
+// extraBitSize will return the number of bits that will be written
+// as "extra" bits on matches.
+func (w *huffmanBitWriter) extraBitSize() int {
+ total := 0
+ for i, n := range w.literalFreq[257:literalCount] {
+ total += int(n) * int(lengthExtraBits[i&31])
+ }
+ for i, n := range w.offsetFreq[:offsetCodeCount] {
+ total += int(n) * int(offsetExtraBits[i&31])
+ }
+ return total
+}
+
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
- fixedLiteralEncoding.bitLength(w.literalFreq) +
- fixedOffsetEncoding.bitLength(w.offsetFreq) +
+ fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
+ fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
extraBits
}
@@ -333,30 +395,36 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
}
func (w *huffmanBitWriter) writeCode(c hcode) {
+ // The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << w.nbits
- w.nbits += uint(c.len)
+ w.nbits += c.len
if w.nbits >= 48 {
- bits := w.bits
- w.bits >>= 48
- w.nbits -= 48
- n := w.nbytes
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
- n += 6
- if n >= bufferFlushSize {
- if w.err != nil {
- n = 0
- return
- }
- w.write(w.bytes[:n])
+ w.writeOutBits()
+ }
+}
+
+// writeOutBits will write bits to the buffer.
+func (w *huffmanBitWriter) writeOutBits() {
+ bits := w.bits
+ w.bits >>= 48
+ w.nbits -= 48
+ n := w.nbytes
+ w.bytes[n] = byte(bits)
+ w.bytes[n+1] = byte(bits >> 8)
+ w.bytes[n+2] = byte(bits >> 16)
+ w.bytes[n+3] = byte(bits >> 24)
+ w.bytes[n+4] = byte(bits >> 32)
+ w.bytes[n+5] = byte(bits >> 40)
+ n += 6
+ if n >= bufferFlushSize {
+ if w.err != nil {
n = 0
+ return
}
- w.nbytes = n
+ w.write(w.bytes[:n])
+ n = 0
}
+ w.nbytes = n
}
// Write the header of a dynamic Huffman block to the output stream.
@@ -412,6 +480,11 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
}
+ if w.lastHeader > 0 {
+ // We owe an EOB
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
+ }
var flag int32
if isEof {
flag = 1
@@ -426,6 +499,12 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
if w.err != nil {
return
}
+ if w.lastHeader > 0 {
+ // We owe an EOB
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
+ }
+
// Indicate that we are a fixed Huffman block
var value int32 = 2
if isEof {
@@ -439,29 +518,23 @@ func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
-func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
+func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
if w.err != nil {
return
}
- tokens = append(tokens, endBlockMarker)
- numLiterals, numOffsets := w.indexTokens(tokens)
-
+ tokens.AddEOB()
+ if w.lastHeader > 0 {
+ // We owe an EOB
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
+ }
+ numLiterals, numOffsets := w.indexTokens(tokens, false)
+ w.generate(tokens)
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
- // We only bother calculating the costs of the extra bits required by
- // the length of offset fields (which will be the same for both fixed
- // and dynamic encoding), if we need to compare those two encodings
- // against stored encoding.
- for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
- // First eight length codes have extra size = 0.
- extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
- }
- for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
- // First four offset codes have extra size = 0.
- extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode&63])
- }
+ extraBits = w.extraBitSize()
}
// Figure out smallest code.
@@ -500,7 +573,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
}
// Write the tokens.
- w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
+ w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
@@ -508,72 +581,103 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
-func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
+func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
- tokens = append(tokens, endBlockMarker)
- numLiterals, numOffsets := w.indexTokens(tokens)
+ sync = sync || eof
+ if sync {
+ tokens.AddEOB()
+ }
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literalEncoding and the offsetEncoding.
- w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
- w.codegenEncoding.generate(w.codegenFreq[:], 7)
- size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
+ // We cannot reuse pure huffman table.
+ if w.lastHuffMan && w.lastHeader > 0 {
+ // We will not try to reuse.
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
+ w.lastHuffMan = false
+ }
+ if !sync {
+ tokens.Fill()
+ }
+ numLiterals, numOffsets := w.indexTokens(tokens, !sync)
- // Store bytes, if we don't get a reasonable improvement.
- if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
- w.writeStoredHeader(len(input), eof)
- w.writeBytes(input)
- return
+ var size int
+ // Check if we should reuse.
+ if w.lastHeader > 0 {
+ // Estimate size for using a new table
+ newSize := w.lastHeader + tokens.EstimatedBits()
+
+ // The estimated size is calculated as an optimal table.
+ // We add a penalty to make it more realistic and re-use a bit more.
+ newSize += newSize >> (w.logReusePenalty & 31)
+ extra := w.extraBitSize()
+ reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra)
+
+ // Check if a new table is better.
+ if newSize < reuseSize {
+ // Write the EOB we owe.
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ size = newSize
+ w.lastHeader = 0
+ } else {
+ size = reuseSize
+ }
+ // Check if we get a reasonable size decrease.
+ if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ w.lastHeader = 0
+ return
+ }
}
- // Write Huffman table.
- w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+ // We want a new block/table
+ if w.lastHeader == 0 {
+ w.generate(tokens)
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literalEncoding and the offsetEncoding.
+ w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
+ w.codegenEncoding.generate(w.codegenFreq[:], 7)
+ var numCodegens int
+ size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
+ // Store bytes, if we don't get a reasonable improvement.
+ if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ w.lastHeader = 0
+ return
+ }
+
+ // Write Huffman table.
+ w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+ w.lastHeader, _ = w.headerSize()
+ w.lastHuffMan = false
+ }
+ if sync {
+ w.lastHeader = 0
+ }
// Write the tokens.
- w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
+ w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
}
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
-func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
- for i := range w.literalFreq {
- w.literalFreq[i] = 0
- }
- for i := range w.offsetFreq {
- w.offsetFreq[i] = 0
- }
+func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
+ copy(w.literalFreq[:], t.litHist[:])
+ copy(w.literalFreq[256:], t.extraHist[:])
+ copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
- if len(tokens) == 0 {
+ if t.n == 0 {
return
}
-
- // Only last token should be endBlockMarker.
- if tokens[len(tokens)-1] == endBlockMarker {
- w.literalFreq[endBlockMarker]++
- tokens = tokens[:len(tokens)-1]
+ if filled {
+ return maxNumLit, maxNumDist
}
-
- // Create slices up to the next power of two to avoid bounds checks.
- lits := w.literalFreq[:256]
- offs := w.offsetFreq[:32]
- lengths := w.literalFreq[lengthCodesStart:]
- lengths = lengths[:32]
- for _, t := range tokens {
- if t < endBlockMarker {
- lits[t.literal()]++
- continue
- }
- length := t.length()
- offset := t.offset()
- lengths[lengthCode(length)&31]++
- offs[offsetCode(offset)&31]++
- }
-
// get the number of literals
numLiterals = len(w.literalFreq)
for w.literalFreq[numLiterals-1] == 0 {
@@ -590,11 +694,14 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
w.offsetFreq[0] = 1
numOffsets = 1
}
- w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)
- w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
return
}
+func (w *huffmanBitWriter) generate(t *tokens) {
+ w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
+ w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
+}
+
// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
@@ -626,8 +733,19 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
// Write the length
length := t.length()
lengthCode := lengthCode(length)
- w.writeCode(lengths[lengthCode&31])
- extraLengthBits := uint(lengthExtraBits[lengthCode&31])
+ if false {
+ w.writeCode(lengths[lengthCode&31])
+ } else {
+ // inlined
+ c := lengths[lengthCode&31]
+ w.bits |= uint64(c.code) << (w.nbits & 63)
+ w.nbits += c.len
+ if w.nbits >= 48 {
+ w.writeOutBits()
+ }
+ }
+
+ extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
extraLength := int32(length - lengthBase[lengthCode&31])
w.writeBits(extraLength, extraLengthBits)
@@ -635,8 +753,18 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
- w.writeCode(offs[offsetCode&31])
- extraOffsetBits := uint(offsetExtraBits[offsetCode&63])
+ if false {
+ w.writeCode(offs[offsetCode&31])
+ } else {
+ // inlined
+ c := offs[offsetCode&31]
+ w.bits |= uint64(c.code) << (w.nbits & 63)
+ w.nbits += c.len
+ if w.nbits >= 48 {
+ w.writeOutBits()
+ }
+ }
+ extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode&63])
w.writeBits(extraOffset, extraOffsetBits)
@@ -661,75 +789,93 @@ func init() {
// writeBlockHuff encodes a block of bytes as either
// Huffman encoded literals or uncompressed bytes if the
// results only gains very little from compression.
-func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
+func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
// Clear histogram
- for i := range w.literalFreq {
+ for i := range w.literalFreq[:] {
w.literalFreq[i] = 0
}
+ if !w.lastHuffMan {
+ for i := range w.offsetFreq[:] {
+ w.offsetFreq[i] = 0
+ }
+ }
// Add everything as literals
- histogram(input, w.literalFreq)
+ estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15
- w.literalFreq[endBlockMarker] = 1
+ // Store bytes, if we don't get a reasonable improvement.
+ ssize, storable := w.storedSize(input)
+ if storable && ssize < (estBits+estBits>>4) {
+ w.writeStoredHeader(len(input), eof)
+ w.writeBytes(input)
+ return
+ }
- const numLiterals = endBlockMarker + 1
- const numOffsets = 1
+ if w.lastHeader > 0 {
+ size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader)
+ estBits += estBits >> (w.logReusePenalty)
- w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)
+ if estBits < size {
+ // We owe an EOB
+ w.writeCode(w.literalEncoding.codes[endBlockMarker])
+ w.lastHeader = 0
+ }
+ }
- // Figure out smallest code.
- // Always use dynamic Huffman or Store
- var numCodegens int
+ const numLiterals = endBlockMarker + 1
+ const numOffsets = 1
+ if w.lastHeader == 0 {
+ w.literalFreq[endBlockMarker] = 1
+ w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literalEncoding and the offsetEncoding.
- w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
- w.codegenEncoding.generate(w.codegenFreq[:], 7)
- size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literalEncoding and the offsetEncoding.
+ w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
+ w.codegenEncoding.generate(w.codegenFreq[:], 7)
+ numCodegens := w.codegens()
- // Store bytes, if we don't get a reasonable improvement.
- if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
- w.writeStoredHeader(len(input), eof)
- w.writeBytes(input)
- return
+ // Huffman.
+ w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+ w.lastHuffMan = true
+ w.lastHeader, _ = w.headerSize()
}
- // Huffman.
- w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
encoding := w.literalEncoding.codes[:257]
- n := w.nbytes
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
- w.bits |= uint64(c.code) << w.nbits
- w.nbits += uint(c.len)
- if w.nbits < 48 {
- continue
- }
- // Store 6 bytes
- bits := w.bits
- w.bits >>= 48
- w.nbits -= 48
- w.bytes[n] = byte(bits)
- w.bytes[n+1] = byte(bits >> 8)
- w.bytes[n+2] = byte(bits >> 16)
- w.bytes[n+3] = byte(bits >> 24)
- w.bytes[n+4] = byte(bits >> 32)
- w.bytes[n+5] = byte(bits >> 40)
- n += 6
- if n < bufferFlushSize {
- continue
- }
- w.write(w.bytes[:n])
- if w.err != nil {
- return // Return early in the event of write failures
+ w.bits |= uint64(c.code) << ((w.nbits) & 63)
+ w.nbits += c.len
+ if w.nbits >= 48 {
+ bits := w.bits
+ w.bits >>= 48
+ w.nbits -= 48
+ n := w.nbytes
+ w.bytes[n] = byte(bits)
+ w.bytes[n+1] = byte(bits >> 8)
+ w.bytes[n+2] = byte(bits >> 16)
+ w.bytes[n+3] = byte(bits >> 24)
+ w.bytes[n+4] = byte(bits >> 32)
+ w.bytes[n+5] = byte(bits >> 40)
+ n += 6
+ if n >= bufferFlushSize {
+ if w.err != nil {
+ n = 0
+ return
+ }
+ w.write(w.bytes[:n])
+ n = 0
+ }
+ w.nbytes = n
}
- n = 0
}
- w.nbytes = n
- w.writeCode(encoding[endBlockMarker])
+ if eof || sync {
+ w.writeCode(encoding[endBlockMarker])
+ w.lastHeader = 0
+ w.lastHuffMan = false
+ }
}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index f65f79336..d0099599c 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -10,6 +10,12 @@ import (
"sort"
)
+const (
+ maxBitsLimit = 16
+ // number of valid literals
+ literalCount = 286
+)
+
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
@@ -25,7 +31,7 @@ type huffmanEncoder struct {
type literalNode struct {
literal uint16
- freq int32
+ freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
@@ -54,7 +60,11 @@ func (h *hcode) set(code uint16, length uint16) {
h.code = code
}
-func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
+func reverseBits(number uint16, bitLength byte) uint16 {
+ return bits.Reverse16(number << ((16 - bitLength) & 15))
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
@@ -64,10 +74,10 @@ func newHuffmanEncoder(size int) *huffmanEncoder {
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
- h := newHuffmanEncoder(maxNumLit)
+ h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
- for ch = 0; ch < maxNumLit; ch++ {
+ for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint16
switch {
@@ -108,7 +118,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
-func (h *huffmanEncoder) bitLength(freq []int32) int {
+func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
@@ -118,8 +128,6 @@ func (h *huffmanEncoder) bitLength(freq []int32) int {
return total
}
-const maxBitsLimit = 16
-
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
@@ -163,9 +171,9 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
- lastFreq: list[1].freq,
- nextCharFreq: list[2].freq,
- nextPairFreq: list[0].freq + list[1].freq,
+ lastFreq: int32(list[1].freq),
+ nextCharFreq: int32(list[2].freq),
+ nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
}
leafCounts[level][level] = 2
if level == 1 {
@@ -197,7 +205,12 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
- l.nextCharFreq = list[n].freq
+ e := list[n]
+ if e.literal < math.MaxUint16 {
+ l.nextCharFreq = int32(e.freq)
+ } else {
+ l.nextCharFreq = math.MaxInt32
+ }
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
@@ -273,12 +286,12 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
-func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
+func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
- // Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
- // The largest of these is maxNumLit, so we allocate for that case.
- h.freqcache = make([]literalNode, maxNumLit+1)
+ // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+ // The largest of these is literalCount, so we allocate for that case.
+ h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
@@ -345,3 +358,27 @@ func (s byFreq) Less(i, j int) bool {
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+
+// histogramSize accumulates a histogram of b in h.
+// An estimated size in bits is returned.
+// Unassigned values are assigned '1' in the histogram.
+// len(h) must be >= 256, and h's elements must be all zeroes.
+func histogramSize(b []byte, h []uint16, fill bool) int {
+ h = h[:256]
+ for _, t := range b {
+ h[t]++
+ }
+ invTotal := 1.0 / float64(len(b))
+ shannon := 0.0
+ single := math.Ceil(-math.Log2(invTotal))
+ for i, v := range h[:] {
+ if v > 0 {
+ n := float64(v)
+ shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+ } else if fill {
+ shannon += single
+ h[i] = 1
+ }
+ }
+ return int(shannon + 0.99)
+}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 800d0ce9e..6dc5b5d06 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -9,6 +9,7 @@ package flate
import (
"bufio"
+ "fmt"
"io"
"math/bits"
"strconv"
@@ -24,6 +25,8 @@ const (
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
+
+ debugDecode = false
)
// Initialize the fixedHuffmanDecoder only once upon first use.
@@ -104,8 +107,8 @@ const (
type huffmanDecoder struct {
min int // the minimum code length
- chunks *[huffmanNumChunks]uint32 // chunks as described above
- links [][]uint32 // overflow links
+ chunks *[huffmanNumChunks]uint16 // chunks as described above
+ links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
}
@@ -121,7 +124,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
const sanity = false
if h.chunks == nil {
- h.chunks = &[huffmanNumChunks]uint32{}
+ h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
@@ -169,6 +172,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
+ if debugDecode {
+ fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
+ }
return false
}
@@ -185,7 +191,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
if cap(h.links) < huffmanNumChunks-link {
- h.links = make([][]uint32, huffmanNumChunks-link)
+ h.links = make([][]uint16, huffmanNumChunks-link)
} else {
h.links = h.links[:huffmanNumChunks-link]
}
@@ -196,9 +202,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
- h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+ h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
if cap(h.links[off]) < numLinks {
- h.links[off] = make([]uint32, numLinks)
+ h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
@@ -214,7 +220,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
}
code := nextcode[n]
nextcode[n]++
- chunk := uint32(i<<huffmanValueShift | n)
+ chunk := uint16(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
@@ -347,6 +353,9 @@ func (f *decompressor) nextBlock() {
f.huffmanBlock()
default:
// 3 is reserved.
+ if debugDecode {
+ fmt.Println("reserved data block encountered")
+ }
f.err = CorruptInputError(f.roffset)
}
}
@@ -425,11 +434,17 @@ func (f *decompressor) readHuffman() error {
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
+ if debugDecode {
+ fmt.Println("nlit > maxNumLit", nlit)
+ }
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
+ if debugDecode {
+ fmt.Println("ndist > maxNumDist", ndist)
+ }
return CorruptInputError(f.roffset)
}
f.b >>= 5
@@ -453,6 +468,9 @@ func (f *decompressor) readHuffman() error {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
+ if debugDecode {
+ fmt.Println("init codebits failed")
+ }
return CorruptInputError(f.roffset)
}
@@ -480,6 +498,9 @@ func (f *decompressor) readHuffman() error {
rep = 3
nb = 2
if i == 0 {
+ if debugDecode {
+ fmt.Println("i==0")
+ }
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
@@ -494,6 +515,9 @@ func (f *decompressor) readHuffman() error {
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
+ if debugDecode {
+ fmt.Println("morebits:", err)
+ }
return err
}
}
@@ -501,6 +525,9 @@ func (f *decompressor) readHuffman() error {
f.b >>= nb
f.nb -= nb
if i+rep > n {
+ if debugDecode {
+ fmt.Println("i+rep > n", i, rep, n)
+ }
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
@@ -510,6 +537,9 @@ func (f *decompressor) readHuffman() error {
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+ if debugDecode {
+ fmt.Println("init2 failed")
+ }
return CorruptInputError(f.roffset)
}
@@ -587,12 +617,18 @@ readLiteral:
length = 258
n = 0
default:
+ if debugDecode {
+ fmt.Println(v, ">= maxNumLit")
+ }
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
+ if debugDecode {
+ fmt.Println("morebits n>0:", err)
+ }
f.err = err
return
}
@@ -606,6 +642,9 @@ readLiteral:
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
+ if debugDecode {
+ fmt.Println("morebits f.nb<5:", err)
+ }
f.err = err
return
}
@@ -615,6 +654,9 @@ readLiteral:
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
+ if debugDecode {
+ fmt.Println("huffsym:", err)
+ }
f.err = err
return
}
@@ -629,6 +671,9 @@ readLiteral:
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
+ if debugDecode {
+ fmt.Println("morebits f.nb<nb:", err)
+ }
f.err = err
return
}
@@ -638,12 +683,18 @@ readLiteral:
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
+ if debugDecode {
+ fmt.Println("dist too big:", dist, maxNumDist)
+ }
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
+ if debugDecode {
+ fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+ }
f.err = CorruptInputError(f.roffset)
return
}
@@ -688,6 +739,9 @@ func (f *decompressor) dataBlock() {
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
+ if debugDecode {
+ fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
+ }
f.err = CorruptInputError(f.roffset)
return
}
@@ -789,6 +843,9 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
if n == 0 {
f.b = b
f.nb = nb
+ if debugDecode {
+ fmt.Println("huffsym: n==0")
+ }
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
new file mode 100644
index 000000000..20de8f11f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -0,0 +1,174 @@
+package flate
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL1 struct {
+ fastGen
+ table [tableSize]tableEntry
+}
+
+// EncodeL1 uses a similar algorithm to level 1
+func (e *fastEncL1) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load3232(src, s)
+
+ for {
+ const skipLog = 5
+ const doEvery = 2
+
+ nextS := s
+ var candidate tableEntry
+ for {
+ nextHash := hash(cv)
+ candidate = e.table[nextHash]
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+
+ now := load6432(src, nextS)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+ nextHash = hash(uint32(now))
+
+ offset := s - (candidate.offset - e.cur)
+ if offset < maxMatchOffset && cv == candidate.val {
+ e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+ break
+ }
+
+ // Do one right away...
+ cv = uint32(now)
+ s = nextS
+ nextS++
+ candidate = e.table[nextHash]
+ now >>= 8
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+
+ offset = s - (candidate.offset - e.cur)
+ if offset < maxMatchOffset && cv == candidate.val {
+ e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+ break
+ }
+ cv = uint32(now)
+ s = nextS
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+ for {
+ // Invariant: we have a 4-byte match at s, and no need to emit any
+ // literal bytes prior to s.
+
+ // Extend the 4-byte match as long as possible.
+ t := candidate.offset - e.cur
+ l := e.matchlenLong(s+4, t+4, src) + 4
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+
+ // Save the match found
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+ if s >= sLimit {
+ // Index first pair after match end.
+ if int(s+l+4) < len(src) {
+ cv := load3232(src, s)
+ e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
+ }
+ goto emitRemainder
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-2 and at s. If
+ // another emitCopy is not our next move, also calculate nextHash
+ // at s+1. At least on GOARCH=amd64, these three hash calculations
+ // are faster as one load64 call (with some shifts) instead of
+ // three load32 calls.
+ x := load6432(src, s-2)
+ o := e.cur + s - 2
+ prevHash := hash(uint32(x))
+ e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
+ x >>= 16
+ currHash := hash(uint32(x))
+ candidate = e.table[currHash]
+ e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
+
+ offset := s - (candidate.offset - e.cur)
+ if offset > maxMatchOffset || uint32(x) != candidate.val {
+ cv = uint32(x >> 8)
+ s++
+ break
+ }
+ }
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
new file mode 100644
index 000000000..7c824431e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -0,0 +1,199 @@
+package flate
+
+// fastGen maintains the table for matches,
+// and the previous byte block for level 2.
+// This is the generic implementation.
+type fastEncL2 struct {
+ fastGen
+ table [bTableSize]tableEntry
+}
+
+// EncodeL2 uses a similar algorithm to level 1, but is capable
+// of matching across blocks giving better compression at a small slowdown.
+func (e *fastEncL2) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load3232(src, s)
+ for {
+ // When should we start skipping if we haven't found matches in a long while.
+ const skipLog = 5
+ const doEvery = 2
+
+ nextS := s
+ var candidate tableEntry
+ for {
+ nextHash := hash4u(cv, bTableBits)
+ s = nextS
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ candidate = e.table[nextHash]
+ now := load6432(src, nextS)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+ nextHash = hash4u(uint32(now), bTableBits)
+
+ offset := s - (candidate.offset - e.cur)
+ if offset < maxMatchOffset && cv == candidate.val {
+ e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+ break
+ }
+
+ // Do one right away...
+ cv = uint32(now)
+ s = nextS
+ nextS++
+ candidate = e.table[nextHash]
+ now >>= 8
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+
+ offset = s - (candidate.offset - e.cur)
+ if offset < maxMatchOffset && cv == candidate.val {
+ break
+ }
+ cv = uint32(now)
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+
+ // Call emitCopy, and then see if another emitCopy could be our next
+ // move. Repeat until we find no match for the input immediately after
+ // what was consumed by the last emitCopy call.
+ //
+ // If we exit this loop normally then we need to call emitLiteral next,
+ // though we don't yet know how big the literal will be. We handle that
+ // by proceeding to the next iteration of the main loop. We also can
+ // exit this loop via goto if we get close to exhausting the input.
+ for {
+ // Invariant: we have a 4-byte match at s, and no need to emit any
+ // literal bytes prior to s.
+
+ // Extend the 4-byte match as long as possible.
+ t := candidate.offset - e.cur
+ l := e.matchlenLong(s+4, t+4, src) + 4
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ // Index first pair after match end.
+ if int(s+l+4) < len(src) {
+ cv := load3232(src, s)
+ e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
+ }
+ goto emitRemainder
+ }
+
+ // Store every second hash in-between, but offset by 1.
+ for i := s - l + 2; i < s-5; i += 7 {
+ x := load6432(src, int32(i))
+ nextHash := hash4u(uint32(x), bTableBits)
+ e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
+ // Skip one
+ x >>= 16
+ nextHash = hash4u(uint32(x), bTableBits)
+ e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
+ // Skip one
+ x >>= 16
+ nextHash = hash4u(uint32(x), bTableBits)
+ e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-2 to s. If
+ // another emitCopy is not our next move, also calculate nextHash
+ // at s+1. At least on GOARCH=amd64, these three hash calculations
+ // are faster as one load64 call (with some shifts) instead of
+ // three load32 calls.
+ x := load6432(src, s-2)
+ o := e.cur + s - 2
+ prevHash := hash4u(uint32(x), bTableBits)
+ prevHash2 := hash4u(uint32(x>>8), bTableBits)
+ e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
+ e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
+ currHash := hash4u(uint32(x>>16), bTableBits)
+ candidate = e.table[currHash]
+ e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
+
+ offset := s - (candidate.offset - e.cur)
+ if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
+ cv = uint32(x >> 24)
+ s++
+ break
+ }
+ }
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
new file mode 100644
index 000000000..4153d24c9
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -0,0 +1,225 @@
+package flate
+
+// fastEncL3
+type fastEncL3 struct {
+ fastGen
+ table [tableSize]tableEntryPrev
+}
+
+// Encode uses a similar algorithm to level 2, will check up to two candidates.
+func (e *fastEncL3) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 8 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntryPrev{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i]
+ if v.Cur.offset <= minOff {
+ v.Cur.offset = 0
+ } else {
+ v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+ }
+ if v.Prev.offset <= minOff {
+ v.Prev.offset = 0
+ } else {
+ v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+ }
+ e.table[i] = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // Skip if too small.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load3232(src, s)
+ for {
+ const skipLog = 6
+ nextS := s
+ var candidate tableEntry
+ for {
+ nextHash := hash(cv)
+ s = nextS
+ nextS = s + 1 + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ candidates := e.table[nextHash]
+ now := load3232(src, nextS)
+ e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
+
+ // Check both candidates
+ candidate = candidates.Cur
+ offset := s - (candidate.offset - e.cur)
+ if cv == candidate.val {
+ if offset > maxMatchOffset {
+ cv = now
+ // Previous will also be invalid, we have nothing.
+ continue
+ }
+ o2 := s - (candidates.Prev.offset - e.cur)
+ if cv != candidates.Prev.val || o2 > maxMatchOffset {
+ break
+ }
+ // Both match and are valid, pick longest.
+ l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
+ if l2 > l1 {
+ candidate = candidates.Prev
+ }
+ break
+ } else {
+ // We only check if value mismatches.
+ // Offset will always be invalid in other cases.
+ candidate = candidates.Prev
+ if cv == candidate.val {
+ offset := s - (candidate.offset - e.cur)
+ if offset <= maxMatchOffset {
+ break
+ }
+ }
+ }
+ cv = now
+ }
+
+ // Call emitCopy, and then see if another emitCopy could be our next
+ // move. Repeat until we find no match for the input immediately after
+ // what was consumed by the last emitCopy call.
+ //
+ // If we exit this loop normally then we need to call emitLiteral next,
+ // though we don't yet know how big the literal will be. We handle that
+ // by proceeding to the next iteration of the main loop. We also can
+ // exit this loop via goto if we get close to exhausting the input.
+ for {
+ // Invariant: we have a 4-byte match at s, and no need to emit any
+ // literal bytes prior to s.
+
+ // Extend the 4-byte match as long as possible.
+ //
+ t := candidate.offset - e.cur
+ l := e.matchlenLong(s+4, t+4, src) + 4
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ t += l
+ // Index first pair after match end.
+ if int(t+4) < len(src) && t > 0 {
+ cv := load3232(src, t)
+ nextHash := hash(cv)
+ e.table[nextHash] = tableEntryPrev{
+ Prev: e.table[nextHash].Cur,
+ Cur: tableEntry{offset: e.cur + t, val: cv},
+ }
+ }
+ goto emitRemainder
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-3 to s.
+ x := load6432(src, s-3)
+ prevHash := hash(uint32(x))
+ e.table[prevHash] = tableEntryPrev{
+ Prev: e.table[prevHash].Cur,
+ Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
+ }
+ x >>= 8
+ prevHash = hash(uint32(x))
+
+ e.table[prevHash] = tableEntryPrev{
+ Prev: e.table[prevHash].Cur,
+ Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
+ }
+ x >>= 8
+ prevHash = hash(uint32(x))
+
+ e.table[prevHash] = tableEntryPrev{
+ Prev: e.table[prevHash].Cur,
+ Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
+ }
+ x >>= 8
+ currHash := hash(uint32(x))
+ candidates := e.table[currHash]
+ cv = uint32(x)
+ e.table[currHash] = tableEntryPrev{
+ Prev: candidates.Cur,
+ Cur: tableEntry{offset: s + e.cur, val: cv},
+ }
+
+ // Check both candidates
+ candidate = candidates.Cur
+ if cv == candidate.val {
+ offset := s - (candidate.offset - e.cur)
+ if offset <= maxMatchOffset {
+ continue
+ }
+ } else {
+ // We only check if value mismatches.
+ // Offset will always be invalid in other cases.
+ candidate = candidates.Prev
+ if cv == candidate.val {
+ offset := s - (candidate.offset - e.cur)
+ if offset <= maxMatchOffset {
+ continue
+ }
+ }
+ }
+ cv = uint32(x >> 8)
+ s++
+ break
+ }
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go
new file mode 100644
index 000000000..c689ac771
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level4.go
@@ -0,0 +1,210 @@
+package flate
+
+import "fmt"
+
+type fastEncL4 struct {
+ fastGen
+ table [tableSize]tableEntry
+ bTable [tableSize]tableEntry
+}
+
+func (e *fastEncL4) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.bTable[:] {
+ e.bTable[i] = tableEntry{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.bTable[:] {
+ v := e.bTable[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.bTable[i].offset = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load6432(src, s)
+ for {
+ const skipLog = 6
+ const doEvery = 1
+
+ nextS := s
+ var t int32
+ for {
+ nextHashS := hash4x64(cv, tableBits)
+ nextHashL := hash7(cv, tableBits)
+
+ s = nextS
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ // Fetch a short+long candidate
+ sCandidate := e.table[nextHashS]
+ lCandidate := e.bTable[nextHashL]
+ next := load6432(src, nextS)
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.table[nextHashS] = entry
+ e.bTable[nextHashL] = entry
+
+ t = lCandidate.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
+ // We got a long match. Use that.
+ break
+ }
+
+ t = sCandidate.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+ // Found a 4 match...
+ lCandidate = e.bTable[hash7(next, tableBits)]
+
+ // If the next long is a candidate, check if we should use that instead...
+ lOff := nextS - (lCandidate.offset - e.cur)
+ if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
+ l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
+ if l2 > l1 {
+ s = nextS
+ t = lCandidate.offset - e.cur
+ }
+ }
+ break
+ }
+ cv = next
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+
+ // Extend the 4-byte match as long as possible.
+ l := e.matchlenLong(s+4, t+4, src) + 4
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+ if false {
+ if t >= s {
+ panic("s-t")
+ }
+ if (s - t) > maxMatchOffset {
+ panic(fmt.Sprintln("mmo", t))
+ }
+ if l < baseMatchLength {
+ panic("bml")
+ }
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ // Index first pair after match end.
+ if int(s+8) < len(src) {
+ cv := load6432(src, s)
+ e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ }
+ goto emitRemainder
+ }
+
+ // Store every 3rd hash in-between
+ if true {
+ i := nextS
+ if i < s-1 {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+ t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
+ e.bTable[hash7(cv, tableBits)] = t
+ e.bTable[hash7(cv>>8, tableBits)] = t2
+ e.table[hash4u(t2.val, tableBits)] = t2
+
+ i += 3
+ for ; i < s-1; i += 3 {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+ t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
+ e.bTable[hash7(cv, tableBits)] = t
+ e.bTable[hash7(cv>>8, tableBits)] = t2
+ e.table[hash4u(t2.val, tableBits)] = t2
+ }
+ }
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-1 and at s.
+ x := load6432(src, s-1)
+ o := e.cur + s - 1
+ prevHashS := hash4x64(x, tableBits)
+ prevHashL := hash7(x, tableBits)
+ e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
+ e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
+ cv = x >> 8
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
new file mode 100644
index 000000000..14a235612
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -0,0 +1,276 @@
+package flate
+
+import "fmt"
+
+type fastEncL5 struct {
+ fastGen
+ table [tableSize]tableEntry
+ bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.bTable[:] {
+ e.bTable[i] = tableEntryPrev{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.bTable[:] {
+ v := e.bTable[i]
+ if v.Cur.offset <= minOff {
+ v.Cur.offset = 0
+ v.Prev.offset = 0
+ } else {
+ v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+ if v.Prev.offset <= minOff {
+ v.Prev.offset = 0
+ } else {
+ v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+ }
+ }
+ e.bTable[i] = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load6432(src, s)
+ for {
+ const skipLog = 6
+ const doEvery = 1
+
+ nextS := s
+ var l int32
+ var t int32
+ for {
+ nextHashS := hash4x64(cv, tableBits)
+ nextHashL := hash7(cv, tableBits)
+
+ s = nextS
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ // Fetch a short+long candidate
+ sCandidate := e.table[nextHashS]
+ lCandidate := e.bTable[nextHashL]
+ next := load6432(src, nextS)
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.table[nextHashS] = entry
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+ nextHashS = hash4x64(next, tableBits)
+ nextHashL = hash7(next, tableBits)
+
+ t = lCandidate.Cur.offset - e.cur
+ if s-t < maxMatchOffset {
+ if uint32(cv) == lCandidate.Cur.val {
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+
+ t2 := lCandidate.Prev.offset - e.cur
+ if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+ l = e.matchlen(s+4, t+4, src) + 4
+ ml1 := e.matchlen(s+4, t2+4, src) + 4
+ if ml1 > l {
+ t = t2
+ l = ml1
+ break
+ }
+ }
+ break
+ }
+ t = lCandidate.Prev.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+ break
+ }
+ }
+
+ t = sCandidate.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+ // Found a 4 match...
+ l = e.matchlen(s+4, t+4, src) + 4
+ lCandidate = e.bTable[nextHashL]
+ // Store the next match
+
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+
+ // If the next long is a candidate, use that...
+ t2 := lCandidate.Cur.offset - e.cur
+ if nextS-t2 < maxMatchOffset {
+ if lCandidate.Cur.val == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ break
+ }
+ }
+ // If the previous long is a candidate, use that...
+ t2 = lCandidate.Prev.offset - e.cur
+ if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ break
+ }
+ }
+ }
+ break
+ }
+ cv = next
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+
+ // Extend the 4-byte match as long as possible.
+ if l == 0 {
+ l = e.matchlenLong(s+4, t+4, src) + 4
+ } else if l == maxMatchLength {
+ l += e.matchlenLong(s+l, t+l, src)
+ }
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+ if false {
+ if t >= s {
+ panic(fmt.Sprintln("s-t", s, t))
+ }
+ if (s - t) > maxMatchOffset {
+ panic(fmt.Sprintln("mmo", s-t))
+ }
+ if l < baseMatchLength {
+ panic("bml")
+ }
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ goto emitRemainder
+ }
+
+ // Store every 3rd hash in-between.
+ if true {
+ const hashEvery = 3
+ i := s - l + 1
+ if i < s-1 {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+ e.table[hash4x64(cv, tableBits)] = t
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+
+ // Do an long at i+1
+ cv >>= 8
+ t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
+ eLong = &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+
+ // We only have enough bits for a short entry at i+2
+ cv >>= 8
+ t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
+ e.table[hash4x64(cv, tableBits)] = t
+
+ // Skip one - otherwise we risk hitting 's'
+ i += 4
+ for ; i < s-1; i += hashEvery {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+ t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+ e.table[hash4u(t2.val, tableBits)] = t2
+ }
+ }
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-1 and at s.
+ x := load6432(src, s-1)
+ o := e.cur + s - 1
+ prevHashS := hash4x64(x, tableBits)
+ prevHashL := hash7(x, tableBits)
+ e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
+ eLong := &e.bTable[prevHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
+ cv = x >> 8
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go
new file mode 100644
index 000000000..cad0c7df7
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@@ -0,0 +1,279 @@
+package flate
+
+import "fmt"
+
+type fastEncL6 struct {
+ fastGen
+ table [tableSize]tableEntry
+ bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL6) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.bTable[:] {
+ e.bTable[i] = tableEntryPrev{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.bTable[:] {
+ v := e.bTable[i]
+ if v.Cur.offset <= minOff {
+ v.Cur.offset = 0
+ v.Prev.offset = 0
+ } else {
+ v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+ if v.Prev.offset <= minOff {
+ v.Prev.offset = 0
+ } else {
+ v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+ }
+ }
+ e.bTable[i] = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load6432(src, s)
+ // Repeat MUST be > 1 and within range
+ repeat := int32(1)
+ for {
+ const skipLog = 7
+ const doEvery = 1
+
+ nextS := s
+ var l int32
+ var t int32
+ for {
+ nextHashS := hash4x64(cv, tableBits)
+ nextHashL := hash7(cv, tableBits)
+ s = nextS
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ // Fetch a short+long candidate
+ sCandidate := e.table[nextHashS]
+ lCandidate := e.bTable[nextHashL]
+ next := load6432(src, nextS)
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.table[nextHashS] = entry
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+ // Calculate hashes of 'next'
+ nextHashS = hash4x64(next, tableBits)
+ nextHashL = hash7(next, tableBits)
+
+ t = lCandidate.Cur.offset - e.cur
+ if s-t < maxMatchOffset {
+ if uint32(cv) == lCandidate.Cur.val {
+ // Long candidate matches at least 4 bytes.
+
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+
+ // Check the previous long candidate as well.
+ t2 := lCandidate.Prev.offset - e.cur
+ if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+ l = e.matchlen(s+4, t+4, src) + 4
+ ml1 := e.matchlen(s+4, t2+4, src) + 4
+ if ml1 > l {
+ t = t2
+ l = ml1
+ break
+ }
+ }
+ break
+ }
+ // Current value did not match, but check if previous long value does.
+ t = lCandidate.Prev.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+ break
+ }
+ }
+
+ t = sCandidate.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+ // Found a 4 match...
+ l = e.matchlen(s+4, t+4, src) + 4
+
+ // Look up next long candidate (at nextS)
+ lCandidate = e.bTable[nextHashL]
+
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+
+ // Check repeat at s + repOff
+ const repOff = 1
+ t2 := s - repeat + repOff
+ if load3232(src, t2) == uint32(cv>>(8*repOff)) {
+ ml := e.matchlen(s+4+repOff, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ l = ml
+ s += repOff
+ // Not worth checking more.
+ break
+ }
+ }
+
+ // If the next long is a candidate, use that...
+ t2 = lCandidate.Cur.offset - e.cur
+ if nextS-t2 < maxMatchOffset {
+ if lCandidate.Cur.val == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ // This is ok, but check previous as well.
+ }
+ }
+ // If the previous long is a candidate, use that...
+ t2 = lCandidate.Prev.offset - e.cur
+ if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ break
+ }
+ }
+ }
+ break
+ }
+ cv = next
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+
+ // Extend the 4-byte match as long as possible.
+ if l == 0 {
+ l = e.matchlenLong(s+4, t+4, src) + 4
+ } else if l == maxMatchLength {
+ l += e.matchlenLong(s+l, t+l, src)
+ }
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+ if false {
+ if t >= s {
+ panic(fmt.Sprintln("s-t", s, t))
+ }
+ if (s - t) > maxMatchOffset {
+ panic(fmt.Sprintln("mmo", s-t))
+ }
+ if l < baseMatchLength {
+ panic("bml")
+ }
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ repeat = s - t
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ // Index after match end.
+ for i := nextS + 1; i < int32(len(src))-8; i += 2 {
+ cv := load6432(src, i)
+ e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
+ }
+ goto emitRemainder
+ }
+
+ // Store every long hash in-between and every second short.
+ if true {
+ for i := nextS + 1; i < s-1; i += 2 {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+ t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
+ e.table[hash4x64(cv, tableBits)] = t
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+ eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
+ }
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-1 and at s.
+ cv = load6432(src, s)
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/reverse_bits.go b/vendor/github.com/klauspost/compress/flate/reverse_bits.go
deleted file mode 100644
index c1a02720d..000000000
--- a/vendor/github.com/klauspost/compress/flate/reverse_bits.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flate
-
-var reverseByte = [256]byte{
- 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
- 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
- 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
- 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
- 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
- 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
- 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
- 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
- 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
- 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
- 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
- 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
- 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
- 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
- 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
- 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
- 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
- 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
- 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
- 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
- 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
- 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
- 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
- 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
- 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
- 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
- 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
- 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
- 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
- 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
- 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
- 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
-}
-
-func reverseUint16(v uint16) uint16 {
- return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8
-}
-
-func reverseBits(number uint16, bitLength byte) uint16 {
- return reverseUint16(number << uint8(16-bitLength))
-}
diff --git a/vendor/github.com/klauspost/compress/flate/snappy.go b/vendor/github.com/klauspost/compress/flate/snappy.go
deleted file mode 100644
index aebebd524..000000000
--- a/vendor/github.com/klauspost/compress/flate/snappy.go
+++ /dev/null
@@ -1,900 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Modified for deflate by Klaus Post (c) 2015.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flate
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-func emitLiteral(dst *tokens, lit []byte) {
- ol := int(dst.n)
- for i, v := range lit {
- dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
- }
- dst.n += uint16(len(lit))
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-func emitCopy(dst *tokens, offset, length int) {
- dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize))
- dst.n++
-}
-
-type fastEnc interface {
- Encode(dst *tokens, src []byte)
- Reset()
-}
-
-func newFastEnc(level int) fastEnc {
- switch level {
- case 1:
- return &snappyL1{}
- case 2:
- return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
- case 3:
- return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
- case 4:
- return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}}
- default:
- panic("invalid level specified")
- }
-}
-
-const (
- tableBits = 14 // Bits used in the table
- tableSize = 1 << tableBits // Size of the table
- tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
- tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
- baseMatchOffset = 1 // The smallest match offset
- baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
- maxMatchOffset = 1 << 15 // The largest match offset
-)
-
-func load32(b []byte, i int) uint32 {
- b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func load64(b []byte, i int) uint64 {
- b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
-func hash(u uint32) uint32 {
- return (u * 0x1e35a7bd) >> tableShift
-}
-
-// snappyL1 encapsulates level 1 compression
-type snappyL1 struct{}
-
-func (e *snappyL1) Reset() {}
-
-func (e *snappyL1) Encode(dst *tokens, src []byte) {
- const (
- inputMargin = 16 - 1
- minNonLiteralBlockSize = 1 + 1 + inputMargin
- )
-
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if len(src) < minNonLiteralBlockSize {
- // We do not fill the token table.
- // This will be picked up by caller.
- dst.n = uint16(len(src))
- return
- }
-
- // Initialize the hash table.
- //
- // The table element type is uint16, as s < sLimit and sLimit < len(src)
- // and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
- var table [tableSize]uint16
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- nextHash := hash(load32(src, s))
-
- for {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip := 32
-
- nextS := s
- candidate := 0
- for {
- s = nextS
- bytesBetweenHashLookups := skip >> 5
- nextS = s + bytesBetweenHashLookups
- skip += bytesBetweenHashLookups
- if nextS > sLimit {
- goto emitRemainder
- }
- candidate = int(table[nextHash&tableMask])
- table[nextHash&tableMask] = uint16(s)
- nextHash = hash(load32(src, nextS))
- if s-candidate <= maxMatchOffset && load32(src, s) == load32(src, candidate) {
- break
- }
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- emitLiteral(dst, src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
- base := s
-
- // Extend the 4-byte match as long as possible.
- //
- // This is an inlined version of Snappy's:
- // s = extendMatch(src, candidate+4, s+4)
- s += 4
- s1 := base + maxMatchLength
- if s1 > len(src) {
- s1 = len(src)
- }
- a := src[s:s1]
- b := src[candidate+4:]
- b = b[:len(a)]
- l := len(a)
- for i := range a {
- if a[i] != b[i] {
- l = i
- break
- }
- }
- s += l
-
- // matchToken is flate's equivalent of Snappy's emitCopy.
- dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))
- dst.n++
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-1 and at s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load64(src, s-1)
- prevHash := hash(uint32(x >> 0))
- table[prevHash&tableMask] = uint16(s - 1)
- currHash := hash(uint32(x >> 8))
- candidate = int(table[currHash&tableMask])
- table[currHash&tableMask] = uint16(s)
- if s-candidate > maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
- nextHash = hash(uint32(x >> 16))
- s++
- break
- }
- }
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- emitLiteral(dst, src[nextEmit:])
- }
-}
-
-type tableEntry struct {
- val uint32
- offset int32
-}
-
-func load3232(b []byte, i int32) uint32 {
- b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func load6432(b []byte, i int32) uint64 {
- b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
-// snappyGen maintains the table for matches,
-// and the previous byte block for level 2.
-// This is the generic implementation.
-type snappyGen struct {
- prev []byte
- cur int32
-}
-
-// snappyGen maintains the table for matches,
-// and the previous byte block for level 2.
-// This is the generic implementation.
-type snappyL2 struct {
- snappyGen
- table [tableSize]tableEntry
-}
-
-// EncodeL2 uses a similar algorithm to level 1, but is capable
-// of matching across blocks giving better compression at a small slowdown.
-func (e *snappyL2) Encode(dst *tokens, src []byte) {
- const (
- inputMargin = 8 - 1
- minNonLiteralBlockSize = 1 + 1 + inputMargin
- )
-
- // Protect against e.cur wraparound.
- if e.cur > 1<<30 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
- e.cur = maxStoreBlockSize
- }
-
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if len(src) < minNonLiteralBlockSize {
- // We do not fill the token table.
- // This will be picked up by caller.
- dst.n = uint16(len(src))
- e.cur += maxStoreBlockSize
- e.prev = e.prev[:0]
- return
- }
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := int32(len(src) - inputMargin)
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := int32(0)
- s := int32(0)
- cv := load3232(src, s)
- nextHash := hash(cv)
-
- for {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip := int32(32)
-
- nextS := s
- var candidate tableEntry
- for {
- s = nextS
- bytesBetweenHashLookups := skip >> 5
- nextS = s + bytesBetweenHashLookups
- skip += bytesBetweenHashLookups
- if nextS > sLimit {
- goto emitRemainder
- }
- candidate = e.table[nextHash&tableMask]
- now := load3232(src, nextS)
- e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
- nextHash = hash(now)
-
- offset := s - (candidate.offset - e.cur)
- if offset > maxMatchOffset || cv != candidate.val {
- // Out of range or not matched.
- cv = now
- continue
- }
- break
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- emitLiteral(dst, src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
-
- // Extend the 4-byte match as long as possible.
- //
- s += 4
- t := candidate.offset - e.cur + 4
- l := e.matchlen(s, t, src)
-
- // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
- dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
- dst.n++
- s += l
- nextEmit = s
- if s >= sLimit {
- t += l
- // Index first pair after match end.
- if int(t+4) < len(src) && t > 0 {
- cv := load3232(src, t)
- e.table[hash(cv)&tableMask] = tableEntry{offset: t + e.cur, val: cv}
- }
- goto emitRemainder
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-1 and at s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load6432(src, s-1)
- prevHash := hash(uint32(x))
- e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
- x >>= 8
- currHash := hash(uint32(x))
- candidate = e.table[currHash&tableMask]
- e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
-
- offset := s - (candidate.offset - e.cur)
- if offset > maxMatchOffset || uint32(x) != candidate.val {
- cv = uint32(x >> 8)
- nextHash = hash(cv)
- s++
- break
- }
- }
- }
-
-emitRemainder:
- if int(nextEmit) < len(src) {
- emitLiteral(dst, src[nextEmit:])
- }
- e.cur += int32(len(src))
- e.prev = e.prev[:len(src)]
- copy(e.prev, src)
-}
-
-type tableEntryPrev struct {
- Cur tableEntry
- Prev tableEntry
-}
-
-// snappyL3
-type snappyL3 struct {
- snappyGen
- table [tableSize]tableEntryPrev
-}
-
-// Encode uses a similar algorithm to level 2, will check up to two candidates.
-func (e *snappyL3) Encode(dst *tokens, src []byte) {
- const (
- inputMargin = 8 - 1
- minNonLiteralBlockSize = 1 + 1 + inputMargin
- )
-
- // Protect against e.cur wraparound.
- if e.cur > 1<<30 {
- for i := range e.table[:] {
- e.table[i] = tableEntryPrev{}
- }
- e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
- }
-
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if len(src) < minNonLiteralBlockSize {
- // We do not fill the token table.
- // This will be picked up by caller.
- dst.n = uint16(len(src))
- e.cur += maxStoreBlockSize
- e.prev = e.prev[:0]
- return
- }
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := int32(len(src) - inputMargin)
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := int32(0)
- s := int32(0)
- cv := load3232(src, s)
- nextHash := hash(cv)
-
- for {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip := int32(32)
-
- nextS := s
- var candidate tableEntry
- for {
- s = nextS
- bytesBetweenHashLookups := skip >> 5
- nextS = s + bytesBetweenHashLookups
- skip += bytesBetweenHashLookups
- if nextS > sLimit {
- goto emitRemainder
- }
- candidates := e.table[nextHash&tableMask]
- now := load3232(src, nextS)
- e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
- nextHash = hash(now)
-
- // Check both candidates
- candidate = candidates.Cur
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- break
- }
- } else {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
- candidate = candidates.Prev
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- break
- }
- }
- }
- cv = now
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- emitLiteral(dst, src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
-
- // Extend the 4-byte match as long as possible.
- //
- s += 4
- t := candidate.offset - e.cur + 4
- l := e.matchlen(s, t, src)
-
- // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
- dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
- dst.n++
- s += l
- nextEmit = s
- if s >= sLimit {
- t += l
- // Index first pair after match end.
- if int(t+4) < len(src) && t > 0 {
- cv := load3232(src, t)
- nextHash = hash(cv)
- e.table[nextHash&tableMask] = tableEntryPrev{
- Prev: e.table[nextHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + t, val: cv},
- }
- }
- goto emitRemainder
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-3 to s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load6432(src, s-3)
- prevHash := hash(uint32(x))
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
- }
- x >>= 8
- prevHash = hash(uint32(x))
-
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
- }
- x >>= 8
- prevHash = hash(uint32(x))
-
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
- }
- x >>= 8
- currHash := hash(uint32(x))
- candidates := e.table[currHash&tableMask]
- cv = uint32(x)
- e.table[currHash&tableMask] = tableEntryPrev{
- Prev: candidates.Cur,
- Cur: tableEntry{offset: s + e.cur, val: cv},
- }
-
- // Check both candidates
- candidate = candidates.Cur
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- continue
- }
- } else {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
- candidate = candidates.Prev
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- continue
- }
- }
- }
- cv = uint32(x >> 8)
- nextHash = hash(cv)
- s++
- break
- }
- }
-
-emitRemainder:
- if int(nextEmit) < len(src) {
- emitLiteral(dst, src[nextEmit:])
- }
- e.cur += int32(len(src))
- e.prev = e.prev[:len(src)]
- copy(e.prev, src)
-}
-
-// snappyL4
-type snappyL4 struct {
- snappyL3
-}
-
-// Encode uses a similar algorithm to level 3,
-// but will check up to two candidates if first isn't long enough.
-func (e *snappyL4) Encode(dst *tokens, src []byte) {
- const (
- inputMargin = 8 - 3
- minNonLiteralBlockSize = 1 + 1 + inputMargin
- matchLenGood = 12
- )
-
- // Protect against e.cur wraparound.
- if e.cur > 1<<30 {
- for i := range e.table[:] {
- e.table[i] = tableEntryPrev{}
- }
- e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]}
- }
-
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if len(src) < minNonLiteralBlockSize {
- // We do not fill the token table.
- // This will be picked up by caller.
- dst.n = uint16(len(src))
- e.cur += maxStoreBlockSize
- e.prev = e.prev[:0]
- return
- }
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := int32(len(src) - inputMargin)
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := int32(0)
- s := int32(0)
- cv := load3232(src, s)
- nextHash := hash(cv)
-
- for {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip := int32(32)
-
- nextS := s
- var candidate tableEntry
- var candidateAlt tableEntry
- for {
- s = nextS
- bytesBetweenHashLookups := skip >> 5
- nextS = s + bytesBetweenHashLookups
- skip += bytesBetweenHashLookups
- if nextS > sLimit {
- goto emitRemainder
- }
- candidates := e.table[nextHash&tableMask]
- now := load3232(src, nextS)
- e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
- nextHash = hash(now)
-
- // Check both candidates
- candidate = candidates.Cur
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset {
- offset = s - (candidates.Prev.offset - e.cur)
- if cv == candidates.Prev.val && offset < maxMatchOffset {
- candidateAlt = candidates.Prev
- }
- break
- }
- } else {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
- candidate = candidates.Prev
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset {
- break
- }
- }
- }
- cv = now
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- emitLiteral(dst, src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
-
- // Extend the 4-byte match as long as possible.
- //
- s += 4
- t := candidate.offset - e.cur + 4
- l := e.matchlen(s, t, src)
- // Try alternative candidate if match length < matchLenGood.
- if l < matchLenGood-4 && candidateAlt.offset != 0 {
- t2 := candidateAlt.offset - e.cur + 4
- l2 := e.matchlen(s, t2, src)
- if l2 > l {
- l = l2
- t = t2
- }
- }
- // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
- dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
- dst.n++
- s += l
- nextEmit = s
- if s >= sLimit {
- t += l
- // Index first pair after match end.
- if int(t+4) < len(src) && t > 0 {
- cv := load3232(src, t)
- nextHash = hash(cv)
- e.table[nextHash&tableMask] = tableEntryPrev{
- Prev: e.table[nextHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + t, val: cv},
- }
- }
- goto emitRemainder
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-3 to s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load6432(src, s-3)
- prevHash := hash(uint32(x))
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
- }
- x >>= 8
- prevHash = hash(uint32(x))
-
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
- }
- x >>= 8
- prevHash = hash(uint32(x))
-
- e.table[prevHash&tableMask] = tableEntryPrev{
- Prev: e.table[prevHash&tableMask].Cur,
- Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
- }
- x >>= 8
- currHash := hash(uint32(x))
- candidates := e.table[currHash&tableMask]
- cv = uint32(x)
- e.table[currHash&tableMask] = tableEntryPrev{
- Prev: candidates.Cur,
- Cur: tableEntry{offset: s + e.cur, val: cv},
- }
-
- // Check both candidates
- candidate = candidates.Cur
- candidateAlt = tableEntry{}
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- offset = s - (candidates.Prev.offset - e.cur)
- if cv == candidates.Prev.val && offset <= maxMatchOffset {
- candidateAlt = candidates.Prev
- }
- continue
- }
- } else {
- // We only check if value mismatches.
- // Offset will always be invalid in other cases.
- candidate = candidates.Prev
- if cv == candidate.val {
- offset := s - (candidate.offset - e.cur)
- if offset <= maxMatchOffset {
- continue
- }
- }
- }
- cv = uint32(x >> 8)
- nextHash = hash(cv)
- s++
- break
- }
- }
-
-emitRemainder:
- if int(nextEmit) < len(src) {
- emitLiteral(dst, src[nextEmit:])
- }
- e.cur += int32(len(src))
- e.prev = e.prev[:len(src)]
- copy(e.prev, src)
-}
-
-func (e *snappyGen) matchlen(s, t int32, src []byte) int32 {
- s1 := int(s) + maxMatchLength - 4
- if s1 > len(src) {
- s1 = len(src)
- }
-
- // If we are inside the current block
- if t >= 0 {
- b := src[t:]
- a := src[s:s1]
- b = b[:len(a)]
- // Extend the match to be as long as possible.
- for i := range a {
- if a[i] != b[i] {
- return int32(i)
- }
- }
- return int32(len(a))
- }
-
- // We found a match in the previous block.
- tp := int32(len(e.prev)) + t
- if tp < 0 {
- return 0
- }
-
- // Extend the match to be as long as possible.
- a := src[s:s1]
- b := e.prev[tp:]
- if len(b) > len(a) {
- b = b[:len(a)]
- }
- a = a[:len(b)]
- for i := range b {
- if a[i] != b[i] {
- return int32(i)
- }
- }
-
- // If we reached our limit, we matched everything we are
- // allowed to in the previous block and we return.
- n := int32(len(b))
- if int(s+n) == s1 {
- return n
- }
-
- // Continue looking for more matches in the current block.
- a = src[s+n : s1]
- b = src[:len(a)]
- for i := range a {
- if a[i] != b[i] {
- return int32(i) + n
- }
- }
- return int32(len(a)) + n
-}
-
-// Reset the encoding table.
-func (e *snappyGen) Reset() {
- e.prev = e.prev[:0]
- e.cur += maxMatchOffset
-}
diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go
new file mode 100644
index 000000000..524ee0ae3
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/stateless.go
@@ -0,0 +1,252 @@
+package flate
+
+import (
+ "io"
+ "math"
+)
+
+const (
+ maxStatelessBlock = math.MaxInt16
+
+ slTableBits = 13
+ slTableSize = 1 << slTableBits
+ slTableShift = 32 - slTableBits
+)
+
+type statelessWriter struct {
+ dst io.Writer
+ closed bool
+}
+
+func (s *statelessWriter) Close() error {
+ if s.closed {
+ return nil
+ }
+ s.closed = true
+ // Emit EOF block
+ return StatelessDeflate(s.dst, nil, true)
+}
+
+func (s *statelessWriter) Write(p []byte) (n int, err error) {
+ err = StatelessDeflate(s.dst, p, false)
+ if err != nil {
+ return 0, err
+ }
+ return len(p), nil
+}
+
+func (s *statelessWriter) Reset(w io.Writer) {
+ s.dst = w
+ s.closed = false
+}
+
+// NewStatelessWriter will do compression but without maintaining any state
+// between Write calls.
+// There will be no memory kept between Write calls,
+// but compression and speed will be suboptimal.
+// Because of this, the size of actual Write calls will affect output size.
+func NewStatelessWriter(dst io.Writer) io.WriteCloser {
+ return &statelessWriter{dst: dst}
+}
+
+// StatelessDeflate allows to compress directly to a Writer without retaining state.
+// When returning everything will be flushed.
+func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
+ var dst tokens
+ bw := newHuffmanBitWriter(out)
+ if eof && len(in) == 0 {
+ // Just write an EOF block.
+ // Could be faster...
+ bw.writeStoredHeader(0, true)
+ bw.flush()
+ return bw.err
+ }
+
+ for len(in) > 0 {
+ todo := in
+ if len(todo) > maxStatelessBlock {
+ todo = todo[:maxStatelessBlock]
+ }
+ in = in[len(todo):]
+ // Compress
+ statelessEnc(&dst, todo)
+ isEof := eof && len(in) == 0
+
+ if dst.n == 0 {
+ bw.writeStoredHeader(len(todo), isEof)
+ if bw.err != nil {
+ return bw.err
+ }
+ bw.writeBytes(todo)
+ } else if int(dst.n) > len(todo)-len(todo)>>4 {
+ // If we removed less than 1/16th, huffman compress the block.
+ bw.writeBlockHuff(isEof, todo, false)
+ } else {
+ bw.writeBlockDynamic(&dst, isEof, todo, false)
+ }
+ if bw.err != nil {
+ return bw.err
+ }
+ dst.Reset()
+ }
+ if !eof {
+ // Align.
+ bw.writeStoredHeader(0, false)
+ }
+ bw.flush()
+ return bw.err
+}
+
+func hashSL(u uint32) uint32 {
+ return (u * 0x1e35a7bd) >> slTableShift
+}
+
+func load3216(b []byte, i int16) uint32 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:4]
+ return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load6416(b []byte, i int16) uint64 {
+ // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
+ b = b[i:]
+ b = b[:8]
+ return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+ uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func statelessEnc(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ type tableEntry struct {
+ offset int16
+ }
+
+ var table [slTableSize]tableEntry
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ s := int16(1)
+ nextEmit := int16(0)
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int16(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load3216(src, s)
+
+ for {
+ const skipLog = 5
+ const doEvery = 2
+
+ nextS := s
+ var candidate tableEntry
+ for {
+ nextHash := hashSL(cv)
+ candidate = table[nextHash]
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit || nextS <= 0 {
+ goto emitRemainder
+ }
+
+ now := load6416(src, nextS)
+ table[nextHash] = tableEntry{offset: s}
+ nextHash = hashSL(uint32(now))
+
+ if cv == load3216(src, candidate.offset) {
+ table[nextHash] = tableEntry{offset: nextS}
+ break
+ }
+
+ // Do one right away...
+ cv = uint32(now)
+ s = nextS
+ nextS++
+ candidate = table[nextHash]
+ now >>= 8
+ table[nextHash] = tableEntry{offset: s}
+
+ if cv == load3216(src, candidate.offset) {
+ table[nextHash] = tableEntry{offset: nextS}
+ break
+ }
+ cv = uint32(now)
+ s = nextS
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+ for {
+ // Invariant: we have a 4-byte match at s, and no need to emit any
+ // literal bytes prior to s.
+
+ // Extend the 4-byte match as long as possible.
+ t := candidate.offset
+ l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ emitLiteral(dst, src[nextEmit:s])
+ }
+
+ // Save the match found
+ dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+ if s >= sLimit {
+ goto emitRemainder
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-2 and at s. If
+ // another emitCopy is not our next move, also calculate nextHash
+ // at s+1. At least on GOARCH=amd64, these three hash calculations
+ // are faster as one load64 call (with some shifts) instead of
+ // three load32 calls.
+ x := load6416(src, s-2)
+ o := s - 2
+ prevHash := hashSL(uint32(x))
+ table[prevHash] = tableEntry{offset: o}
+ x >>= 16
+ currHash := hashSL(uint32(x))
+ candidate = table[currHash]
+ table[currHash] = tableEntry{offset: o + 2}
+
+ if uint32(x) != load3216(src, candidate.offset) {
+ cv = uint32(x >> 8)
+ s++
+ break
+ }
+ }
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
index 141299b97..b3df0d894 100644
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -4,6 +4,14 @@
package flate
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "io"
+ "math"
+)
+
const (
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
@@ -46,6 +54,36 @@ var lengthCodes = [256]uint8{
27, 27, 27, 27, 27, 28,
}
+// lengthCodes1 is length codes, but starting at 1.
+var lengthCodes1 = [256]uint8{
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
+ 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
+ 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
+ 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
+ 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
+ 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
+ 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 29,
+}
+
var offsetCodes = [256]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
@@ -65,19 +103,236 @@ var offsetCodes = [256]uint32{
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
+// offsetCodes14 are offsetCodes, but with 14 added.
+var offsetCodes14 = [256]uint32{
+ 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
+ 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+}
+
type token uint32
type tokens struct {
- tokens [maxStoreBlockSize + 1]token
- n uint16 // Must be able to contain maxStoreBlockSize
+ nLits int
+ extraHist [32]uint16 // codes 256->maxnumlit
+ offHist [32]uint16 // offset codes
+ litHist [256]uint16 // codes 0->255
+ n uint16 // Must be able to contain maxStoreBlockSize
+ tokens [maxStoreBlockSize + 1]token
+}
+
+func (t *tokens) Reset() {
+ if t.n == 0 {
+ return
+ }
+ t.n = 0
+ t.nLits = 0
+ for i := range t.litHist[:] {
+ t.litHist[i] = 0
+ }
+ for i := range t.extraHist[:] {
+ t.extraHist[i] = 0
+ }
+ for i := range t.offHist[:] {
+ t.offHist[i] = 0
+ }
+}
+
+func (t *tokens) Fill() {
+ if t.n == 0 {
+ return
+ }
+ for i, v := range t.litHist[:] {
+ if v == 0 {
+ t.litHist[i] = 1
+ t.nLits++
+ }
+ }
+ for i, v := range t.extraHist[:literalCount-256] {
+ if v == 0 {
+ t.nLits++
+ t.extraHist[i] = 1
+ }
+ }
+ for i, v := range t.offHist[:offsetCodeCount] {
+ if v == 0 {
+ t.offHist[i] = 1
+ }
+ }
+}
+
+func indexTokens(in []token) tokens {
+ var t tokens
+ t.indexTokens(in)
+ return t
+}
+
+func (t *tokens) indexTokens(in []token) {
+ t.Reset()
+ for _, tok := range in {
+ if tok < matchType {
+ t.tokens[t.n] = tok
+ t.litHist[tok]++
+ t.n++
+ continue
+ }
+ t.AddMatch(uint32(tok.length()), tok.offset())
+ }
}
-// Convert a literal into a literal token.
-func literalToken(literal uint32) token { return token(literalType + literal) }
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+func emitLiteral(dst *tokens, lit []byte) {
+ ol := int(dst.n)
+ for i, v := range lit {
+ dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
+ dst.litHist[v]++
+ }
+ dst.n += uint16(len(lit))
+ dst.nLits += len(lit)
+}
-// Convert a < xlength, xoffset > pair into a match token.
-func matchToken(xlength uint32, xoffset uint32) token {
- return token(matchType + xlength<<lengthShift + xoffset)
+func (t *tokens) AddLiteral(lit byte) {
+ t.tokens[t.n] = token(lit)
+ t.litHist[lit]++
+ t.n++
+ t.nLits++
+}
+
+// EstimatedBits will return an minimum size estimated by an *optimal*
+// compression of the block.
+// The size of the block
+func (t *tokens) EstimatedBits() int {
+ shannon := float64(0)
+ bits := int(0)
+ nMatches := 0
+ if t.nLits > 0 {
+ invTotal := 1.0 / float64(t.nLits)
+ for _, v := range t.litHist[:] {
+ if v > 0 {
+ n := float64(v)
+ shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+ }
+ }
+ // Just add 15 for EOB
+ shannon += 15
+ for _, v := range t.extraHist[1 : literalCount-256] {
+ if v > 0 {
+ n := float64(v)
+ shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+ bits += int(lengthExtraBits[v&31]) * int(v)
+ nMatches += int(v)
+ }
+ }
+ }
+ if nMatches > 0 {
+ invTotal := 1.0 / float64(nMatches)
+ for _, v := range t.offHist[:offsetCodeCount] {
+ if v > 0 {
+ n := float64(v)
+ shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+ bits += int(offsetExtraBits[v&31]) * int(n)
+ }
+ }
+ }
+
+ return int(shannon) + bits
+}
+
+// AddMatch adds a match to the tokens.
+// This function is very sensitive to inlining and right on the border.
+func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
+ if debugDecode {
+ if xlength >= maxMatchLength+baseMatchLength {
+ panic(fmt.Errorf("invalid length: %v", xlength))
+ }
+ if xoffset >= maxMatchOffset+baseMatchOffset {
+ panic(fmt.Errorf("invalid offset: %v", xoffset))
+ }
+ }
+ t.nLits++
+ lengthCode := lengthCodes1[uint8(xlength)] & 31
+ t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
+ t.extraHist[lengthCode]++
+ t.offHist[offsetCode(xoffset)&31]++
+ t.n++
+}
+
+// AddMatchLong adds a match to the tokens, potentially longer than max match length.
+// Length should NOT have the base subtracted, only offset should.
+func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
+ if debugDecode {
+ if xoffset >= maxMatchOffset+baseMatchOffset {
+ panic(fmt.Errorf("invalid offset: %v", xoffset))
+ }
+ }
+ oc := offsetCode(xoffset) & 31
+ for xlength > 0 {
+ xl := xlength
+ if xl > 258 {
+ // We need to have at least baseMatchLength left over for next loop.
+ xl = 258 - baseMatchLength
+ }
+ xlength -= xl
+ xl -= 3
+ t.nLits++
+ lengthCode := lengthCodes1[uint8(xl)] & 31
+ t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
+ t.extraHist[lengthCode]++
+ t.offHist[oc]++
+ t.n++
+ }
+}
+
+func (t *tokens) AddEOB() {
+ t.tokens[t.n] = token(endBlockMarker)
+ t.extraHist[0]++
+ t.n++
+}
+
+func (t *tokens) Slice() []token {
+ return t.tokens[:t.n]
+}
+
+// VarInt returns the tokens as varint encoded bytes.
+func (t *tokens) VarInt() []byte {
+ var b = make([]byte, binary.MaxVarintLen32*int(t.n))
+ var off int
+ for _, v := range t.tokens[:t.n] {
+ off += binary.PutUvarint(b[off:], uint64(v))
+ }
+ return b[:off]
+}
+
+// FromVarInt restores t to the varint encoded tokens provided.
+// Any data in t is removed.
+func (t *tokens) FromVarInt(b []byte) error {
+ var buf = bytes.NewReader(b)
+ var toks []token
+ for {
+ r, err := binary.ReadUvarint(buf)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+ toks = append(toks, token(r))
+ }
+ t.indexTokens(toks)
+ return nil
}
// Returns the type of a token
@@ -96,11 +351,17 @@ func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
+ if false {
+ if off < uint32(len(offsetCodes)) {
+ return offsetCodes[off&255]
+ } else if off>>7 < uint32(len(offsetCodes)) {
+ return offsetCodes[(off>>7)&255] + 14
+ } else {
+ return offsetCodes[(off>>14)&255] + 28
+ }
+ }
if off < uint32(len(offsetCodes)) {
- return offsetCodes[off&255]
- } else if off>>7 < uint32(len(offsetCodes)) {
- return offsetCodes[(off>>7)&255] + 14
- } else {
- return offsetCodes[(off>>14)&255] + 28
+ return offsetCodes[uint8(off)]
}
+ return offsetCodes14[uint8(off>>7)]
}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index dd4f7fefb..51e00aaeb 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -54,6 +54,12 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
canReuse = s.canUseTable(s.prevTable)
}
+ // We want the output size to be less than this:
+ wantSize := len(in)
+ if s.WantLogLess > 0 {
+ wantSize -= wantSize >> s.WantLogLess
+ }
+
// Reset for next run.
s.clearCount = true
s.maxCount = 0
@@ -77,7 +83,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.cTable = s.prevTable
s.Out, err = compressor(in)
s.cTable = keepTable
- if err == nil && len(s.Out) < len(in) {
+ if err == nil && len(s.Out) < wantSize {
s.OutData = s.Out
return s.Out, true, nil
}
@@ -100,13 +106,16 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
hSize := len(s.Out)
oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
- if oldSize <= hSize+newSize || hSize+12 >= len(in) {
+ if oldSize <= hSize+newSize || hSize+12 >= wantSize {
// Retain cTable even if we re-use.
keepTable := s.cTable
s.cTable = s.prevTable
s.Out, err = compressor(in)
s.cTable = keepTable
- if len(s.Out) >= len(in) {
+ if err != nil {
+ return nil, false, err
+ }
+ if len(s.Out) >= wantSize {
return nil, false, ErrIncompressible
}
s.OutData = s.Out
@@ -128,7 +137,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.OutTable = nil
return nil, false, err
}
- if len(s.Out) >= len(in) {
+ if len(s.Out) >= wantSize {
s.OutTable = nil
return nil, false, ErrIncompressible
}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 43b4815b3..7e68a4eb4 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -276,6 +276,7 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
// Use temp table to avoid bound checks/append penalty.
var tmp = s.huffWeight[:256]
var off uint8
+ var decoded int
// Decode 2 values from each decoder/loop.
const bufoff = 256 / 4
@@ -306,6 +307,7 @@ bigloop:
copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4])
off = 0
dstOut = dstOut[bufoff:]
+ decoded += 256
// There must at least be 3 buffers left.
if len(dstOut) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
@@ -321,9 +323,11 @@ bigloop:
copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2])
copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3])
copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
dstOut = dstOut[off:]
}
+ // Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
@@ -335,12 +339,15 @@ bigloop:
dstOut[offset] = decode(br)
offset++
}
+ decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
-
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
return s.Out, nil
}
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index 6f823f94d..6bc23bbf0 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -89,6 +89,12 @@ type Scratch struct {
// Reuse will specify the reuse policy
Reuse ReusePolicy
+ // WantLogLess allows to specify a log 2 reduction that should at least be achieved,
+ // otherwise the block will be returned as incompressible.
+ // The reduction should then at least be (input size >> WantLogLess)
+ // If WantLogLess == 0 any improvement will do.
+ WantLogLess uint8
+
// MaxDecodedSize will set the maximum allowed output size.
// This value will automatically be set to BlockSizeMax if not set.
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 3e161ea15..47cc21d6d 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -11,6 +11,7 @@ import (
"sync"
"github.com/klauspost/compress/huff0"
+ "github.com/klauspost/compress/zstd/internal/xxhash"
)
type blockType uint8
@@ -160,7 +161,8 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debug {
- println("Reading block:", err)
+ println("Reading block:", err, "(", cSize, ")", len(b.data))
+ printf("%T", br)
}
return err
}
@@ -275,7 +277,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
hist.b = nil
err := b.decodeCompressed(hist)
if debug {
- println("Decompressed to total", len(b.dst), "bytes, error:", err)
+ println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
}
hist.b = b.dst
b.dst = saved
@@ -368,7 +370,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
}
if debug {
- println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize", litCompSize)
+ println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
}
var literals []byte
var huff *huff0.Scratch
@@ -426,7 +428,6 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
literals = in[:litCompSize]
in = in[litCompSize:]
-
huff = huffDecoderPool.Get().(*huff0.Scratch)
var err error
// Ensure we have space to store it.
@@ -637,7 +638,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
hist.huffTree = huff
}
if debug {
- println("Final literals:", len(literals), "and", nSeqs, "sequences.")
+ println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
}
if nSeqs == 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index 9d9151a0e..8383279d2 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -51,7 +51,7 @@ func (b *blockEnc) init() {
b.coders.llEnc = &fseEncoder{}
b.coders.llPrev = &fseEncoder{}
}
- b.litEnc = &huff0.Scratch{}
+ b.litEnc = &huff0.Scratch{WantLogLess: 4}
b.reset(nil)
}
@@ -391,6 +391,52 @@ func (b *blockEnc) encodeLits() error {
return nil
}
+// fuzzFseEncoder can be used to fuzz the FSE encoder.
+func fuzzFseEncoder(data []byte) int {
+ if len(data) > maxSequences || len(data) < 2 {
+ return 0
+ }
+ enc := fseEncoder{}
+ hist := enc.Histogram()[:256]
+ maxSym := uint8(0)
+ for i, v := range data {
+ v = v & 63
+ data[i] = v
+ hist[v]++
+ if v > maxSym {
+ maxSym = v
+ }
+ }
+ if maxSym == 0 {
+ // All 0
+ return 0
+ }
+ maxCount := func(a []uint32) int {
+ var max uint32
+ for _, v := range a {
+ if v > max {
+ max = v
+ }
+ }
+ return int(max)
+ }
+ cnt := maxCount(hist[:maxSym])
+ if cnt == len(data) {
+ // RLE
+ return 0
+ }
+ enc.HistogramFinished(maxSym, cnt)
+ err := enc.normalizeCount(len(data))
+ if err != nil {
+ return 0
+ }
+ _, err = enc.writeCount(nil)
+ if err != nil {
+ panic(err)
+ }
+ return 1
+}
+
// encode will encode the block and put the output in b.output.
func (b *blockEnc) encode() error {
if len(b.sequences) == 0 {
@@ -415,16 +461,10 @@ func (b *blockEnc) encode() error {
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
- if len(out) > len(b.literals)-len(b.literals)>>4 {
- err = huff0.ErrIncompressible
- }
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
- if len(out) > len(b.literals)-len(b.literals)>>4 {
- err = huff0.ErrIncompressible
- }
} else {
err = huff0.ErrIncompressible
}
@@ -711,7 +751,7 @@ func (b *blockEnc) encode() error {
return nil
}
-var errIncompressible = errors.New("uncompressible")
+var errIncompressible = errors.New("incompressible")
func (b *blockEnc) genCodes() {
if len(b.sequences) == 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index 3538063f1..07321acb1 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -101,6 +101,9 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
dst = make([]byte, n)
}
n2, err := io.ReadFull(r.r, dst[:n])
+ if err == io.EOF && n > 0 {
+ err = io.ErrUnexpectedEOF
+ }
return dst[:n2], err
}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index f4db3096a..1de94eef0 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -75,6 +75,7 @@ var (
// The Reset function can be used to initiate a new stream, which is will considerably
// reduce the allocations normally caused by NewReader.
func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
+ initPredefined()
var d Decoder
d.o.setDefault()
for _, o := range opts {
@@ -123,7 +124,9 @@ func (d *Decoder) Read(p []byte) (int, error) {
if d.current.err != nil {
break
}
- d.nextBlock()
+ if !d.nextBlock(n == 0) {
+ return n, nil
+ }
}
}
if len(d.current.b) > 0 {
@@ -251,7 +254,7 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.current.err != nil {
break
}
- d.nextBlock()
+ d.nextBlock(true)
}
err := d.current.err
if err != nil {
@@ -328,7 +331,10 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// nextBlock returns the next block.
// If an error occurs d.err will be set.
-func (d *Decoder) nextBlock() {
+// Optionally the function can block for new output.
+// If non-blocking mode is used the returned boolean will be false
+// if no data was available without blocking.
+func (d *Decoder) nextBlock(blocking bool) (ok bool) {
if d.current.d != nil {
if debug {
printf("re-adding current decoder %p", d.current.d)
@@ -338,12 +344,22 @@ func (d *Decoder) nextBlock() {
}
if d.current.err != nil {
// Keep error state.
- return
+ return blocking
+ }
+
+ if blocking {
+ d.current.decodeOutput = <-d.current.output
+ } else {
+ select {
+ case d.current.decodeOutput = <-d.current.output:
+ default:
+ return false
+ }
}
- d.current.decodeOutput = <-d.current.output
if debug {
println("got", len(d.current.b), "bytes, error:", d.current.err)
}
+ return true
}
// Close will release all resources.
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index e120625d8..2f41bcd0d 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -235,7 +235,7 @@ encodeLoop:
if debug && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
- if debug {
+ if debugMatches {
println("long match")
}
break
@@ -259,7 +259,7 @@ encodeLoop:
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
s += checkAt
- if debug {
+ if debugMatches {
println("long match (after short)")
}
break
@@ -275,7 +275,7 @@ encodeLoop:
if debug && t < 0 {
panic("t<0")
}
- if debug {
+ if debugMatches {
println("short match")
}
break
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index b7011be29..d79188271 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -59,6 +59,7 @@ type encoderState struct {
// NewWriter will create a new Zstandard encoder.
// If the encoder will be used for encoding blocks a nil writer can be used.
func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) {
+ initPredefined()
var e Encoder
e.o.setDefault()
for _, o := range opts {
@@ -393,12 +394,31 @@ func (e *Encoder) Close() error {
// EncodeAll will encode all input in src and append it to dst.
// This function can be called concurrently, but each call will only run on a single goroutine.
-// If empty input is given, nothing is returned.
+// If empty input is given, nothing is returned, unless WithZeroFrames is specified.
// Encoded blocks can be concatenated and the result will be the combined input stream.
// Data compressed with EncodeAll can be decoded with the Decoder,
// using either a stream or DecodeAll.
func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(src) == 0 {
+ if e.o.fullZero {
+ // Add frame header.
+ fh := frameHeader{
+ ContentSize: 0,
+ WindowSize: MinWindowSize,
+ SingleSegment: true,
+ // Adding a checksum would be a waste of space.
+ Checksum: false,
+ DictID: 0,
+ }
+ dst, _ = fh.appendTo(dst)
+
+ // Write raw block as last one only.
+ var blk blockHeader
+ blk.setSize(0)
+ blk.setType(blockTypeRaw)
+ blk.setLast(true)
+ dst = blk.appendTo(dst)
+ }
return dst
}
e.init.Do(func() {
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index a8559e900..0f83a325a 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -1,6 +1,7 @@
package zstd
import (
+ "errors"
"fmt"
"runtime"
"strings"
@@ -18,6 +19,7 @@ type encoderOptions struct {
blockSize int
windowSize int
level EncoderLevel
+ fullZero bool
}
func (o *encoderOptions) setDefault() {
@@ -63,6 +65,30 @@ func WithEncoderConcurrency(n int) EOption {
}
}
+// WithWindowSize will set the maximum allowed back-reference distance.
+// The value must be a power of two between WindowSizeMin and WindowSizeMax.
+// A larger value will enable better compression but allocate more memory and,
+// for above-default values, take considerably longer.
+// The default value is determined by the compression level.
+func WithWindowSize(n int) EOption {
+ return func(o *encoderOptions) error {
+ switch {
+ case n < MinWindowSize:
+ return fmt.Errorf("window size must be at least %d", MinWindowSize)
+ case n > MaxWindowSize:
+ return fmt.Errorf("window size must be at most %d", MaxWindowSize)
+ case (n & (n - 1)) != 0:
+ return errors.New("window size must be a power of 2")
+ }
+
+ o.windowSize = n
+ if o.blockSize > o.windowSize {
+ o.blockSize = o.windowSize
+ }
+ return nil
+ }
+}
+
// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
// This can be used to obfuscate the exact output size or make blocks of a certain size.
// The contents will be a skippable frame, so it will be invisible by the decoder.
@@ -166,6 +192,16 @@ func WithEncoderLevel(l EncoderLevel) EOption {
}
}
+// WithZeroFrames will encode 0 length input as full frames.
+// This can be needed for compatibility with zstandard usage,
+// but is not needed for this package.
+func WithZeroFrames(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.fullZero = b
+ return nil
+ }
+}
+
// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
// If this flag is set, data must be regenerated within a single continuous memory segment.
// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 839a95fbf..9e00437a2 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -49,7 +49,8 @@ type frameDec struct {
const (
// The minimum Window_Size is 1 KB.
- minWindowSize = 1 << 10
+ MinWindowSize = 1 << 10
+ MaxWindowSize = 1 << 30
)
var (
@@ -60,7 +61,7 @@ var (
func newFrameDec(o decoderOptions) *frameDec {
d := frameDec{
o: o,
- maxWindowSize: 1 << 30,
+ maxWindowSize: MaxWindowSize,
}
if d.maxWindowSize > o.maxDecodedSize {
d.maxWindowSize = o.maxDecodedSize
@@ -193,14 +194,14 @@ func (d *frameDec) reset(br byteBuffer) error {
// When FCS_Field_Size is 2, the offset of 256 is added.
d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
case 4:
- d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3] << 24))
+ d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
case 8:
d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
if debug {
- println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]))
+ println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
}
}
// Move this to shared.
@@ -215,8 +216,8 @@ func (d *frameDec) reset(br byteBuffer) error {
if d.WindowSize == 0 && d.SingleSegment {
// We may not need window in this case.
d.WindowSize = d.FrameContentSize
- if d.WindowSize < minWindowSize {
- d.WindowSize = minWindowSize
+ if d.WindowSize < MinWindowSize {
+ d.WindowSize = MinWindowSize
}
}
@@ -225,7 +226,7 @@ func (d *frameDec) reset(br byteBuffer) error {
return ErrWindowSizeExceeded
}
// The minimum Window_Size is 1 KB.
- if d.WindowSize < minWindowSize {
+ if d.WindowSize < MinWindowSize {
println("got window size: ", d.WindowSize)
return ErrWindowSizeTooSmall
}
@@ -309,7 +310,9 @@ func (d *frameDec) checkCRC() error {
}
return ErrCRCMismatch
}
- println("CRC ok")
+ if debug {
+ println("CRC ok", tmp[:])
+ }
return nil
}
@@ -411,6 +414,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
}
written += int64(len(r.b))
if d.SingleSegment && uint64(written) > d.FrameContentSize {
+ println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize)
r.err = ErrFrameSizeExceeded
output <- r
return
@@ -461,6 +465,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
break
}
if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
+ println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
err = ErrFrameSizeExceeded
break
}
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
index acac32527..4479cfe18 100644
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -5,7 +5,6 @@
package zstd
import (
- "errors"
"fmt"
"io"
"math"
@@ -49,9 +48,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3
dst = append(dst, uint8(windowLog))
}
- if f.SingleSegment && f.ContentSize == 0 {
- return nil, errors.New("single segment, but no size set")
- }
+
switch fcs {
case 0:
if f.SingleSegment {
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index dfa6cf7ce..619836f52 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -502,13 +502,22 @@ func (s *fseEncoder) validateNorm() (err error) {
// writeCount will write the normalized histogram count to header.
// This is read back by readNCount.
func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
+ if s.useRLE {
+ return append(out, s.rleVal), nil
+ }
+ if s.preDefined || s.reUsed {
+ // Never write predefined.
+ return out, nil
+ }
+
var (
tableLog = s.actualTableLog
tableSize = 1 << tableLog
previous0 bool
charnum uint16
- maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+ // maximum header size plus 2 extra bytes for final output if bitCount == 0.
+ maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
// Write Table Size
bitStream = uint32(tableLog - minEncTablelog)
@@ -516,15 +525,12 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
threshold = int16(tableSize)
nbBits = uint(tableLog + 1)
+ outP = len(out)
)
- if s.useRLE {
- return append(out, s.rleVal), nil
+ if cap(out) < outP+maxHeaderSize {
+ out = append(out, make([]byte, maxHeaderSize*3)...)
+ out = out[:len(out)-maxHeaderSize*3]
}
- if s.preDefined || s.reUsed {
- // Never write predefined.
- return out, nil
- }
- outP := len(out)
out = out[:outP+maxHeaderSize]
// stops at 1
@@ -594,11 +600,14 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
}
}
+ if outP+2 > len(out) {
+ return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
+ }
out[outP] = byte(bitStream)
out[outP+1] = byte(bitStream >> 8)
outP += int((bitCount + 7) / 8)
- if uint16(charnum) > s.symbolLen {
+ if charnum > s.symbolLen {
return nil, errors.New("internal error: charnum > s.symbolLen")
}
return out[:outP], nil
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
index 5186de802..6c17dc17f 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
@@ -7,6 +7,7 @@ package zstd
import (
"fmt"
"math"
+ "sync"
)
var (
@@ -69,85 +70,89 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) {
}
}
-func init() {
- // Literals length codes
- tmp := make([]baseOffset, 36)
- for i := range tmp[:16] {
- tmp[i] = baseOffset{
- baseLine: uint32(i),
- addBits: 0,
+var predef sync.Once
+
+func initPredefined() {
+ predef.Do(func() {
+ // Literals length codes
+ tmp := make([]baseOffset, 36)
+ for i := range tmp[:16] {
+ tmp[i] = baseOffset{
+ baseLine: uint32(i),
+ addBits: 0,
+ }
}
- }
- fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
- symbolTableX[tableLiteralLengths] = tmp
-
- // Match length codes
- tmp = make([]baseOffset, 53)
- for i := range tmp[:32] {
- tmp[i] = baseOffset{
- // The transformation adds the 3 length.
- baseLine: uint32(i) + 3,
- addBits: 0,
- }
- }
- fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
- symbolTableX[tableMatchLengths] = tmp
-
- // Offset codes
- tmp = make([]baseOffset, maxOffsetBits+1)
- tmp[1] = baseOffset{
- baseLine: 1,
- addBits: 1,
- }
- fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
- symbolTableX[tableOffsets] = tmp
-
- // Fill predefined tables and transform them.
- // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
- for i := range fsePredef[:] {
- f := &fsePredef[i]
- switch tableIndex(i) {
- case tableLiteralLengths:
- // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243
- f.actualTableLog = 6
- copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
- -1, -1, -1, -1})
- f.symbolLen = 36
- case tableOffsets:
- // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281
- f.actualTableLog = 5
- copy(f.norm[:], []int16{
- 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1})
- f.symbolLen = 29
- case tableMatchLengths:
- //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304
- f.actualTableLog = 6
- copy(f.norm[:], []int16{
- 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
- -1, -1, -1, -1, -1})
- f.symbolLen = 53
+ fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ symbolTableX[tableLiteralLengths] = tmp
+
+ // Match length codes
+ tmp = make([]baseOffset, 53)
+ for i := range tmp[:32] {
+ tmp[i] = baseOffset{
+ // The transformation adds the 3 length.
+ baseLine: uint32(i) + 3,
+ addBits: 0,
+ }
}
- if err := f.buildDtable(); err != nil {
- panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ symbolTableX[tableMatchLengths] = tmp
+
+ // Offset codes
+ tmp = make([]baseOffset, maxOffsetBits+1)
+ tmp[1] = baseOffset{
+ baseLine: 1,
+ addBits: 1,
}
- if err := f.transform(symbolTableX[i]); err != nil {
- panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
+ symbolTableX[tableOffsets] = tmp
+
+ // Fill predefined tables and transform them.
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
+ for i := range fsePredef[:] {
+ f := &fsePredef[i]
+ switch tableIndex(i) {
+ case tableLiteralLengths:
+ // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243
+ f.actualTableLog = 6
+ copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1})
+ f.symbolLen = 36
+ case tableOffsets:
+ // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281
+ f.actualTableLog = 5
+ copy(f.norm[:], []int16{
+ 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1})
+ f.symbolLen = 29
+ case tableMatchLengths:
+ //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304
+ f.actualTableLog = 6
+ copy(f.norm[:], []int16{
+ 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
+ -1, -1, -1, -1, -1})
+ f.symbolLen = 53
+ }
+ if err := f.buildDtable(); err != nil {
+ panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ }
+ if err := f.transform(symbolTableX[i]); err != nil {
+ panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ }
+ f.preDefined = true
+
+ // Create encoder as well
+ enc := &fsePredefEnc[i]
+ copy(enc.norm[:], f.norm[:])
+ enc.symbolLen = f.symbolLen
+ enc.actualTableLog = f.actualTableLog
+ if err := enc.buildCTable(); err != nil {
+ panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err))
+ }
+ enc.setBits(bitTables[i])
+ enc.preDefined = true
}
- f.preDefined = true
-
- // Create encoder as well
- enc := &fsePredefEnc[i]
- copy(enc.norm[:], f.norm[:])
- enc.symbolLen = f.symbolLen
- enc.actualTableLog = f.actualTableLog
- if err := enc.buildCTable(); err != nil {
- panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err))
- }
- enc.setBits(bitTables[i])
- enc.preDefined = true
- }
+ })
}
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index e9e518570..a048818f9 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -80,6 +80,7 @@ type SnappyConverter struct {
// If any error is detected on the Snappy stream it is returned.
// The number of bytes written is returned.
func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
+ initPredefined()
r.err = nil
r.r = in
if r.block == nil {
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index b975954c1..57a8a2f5b 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -11,6 +11,7 @@ import (
const debug = false
const debugSequences = false
+const debugMatches = false
// force encoder to use predefined tables.
const forcePreDef = false
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/.gitignore
deleted file mode 100644
index daf913b1b..000000000
--- a/vendor/github.com/klauspost/cpuid/.gitignore
+++ /dev/null
@@ -1,24 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml
deleted file mode 100644
index 630192d59..000000000
--- a/vendor/github.com/klauspost/cpuid/.travis.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-language: go
-
-sudo: false
-
-os:
- - linux
- - osx
-go:
- - 1.8.x
- - 1.9.x
- - 1.10.x
- - master
-
-script:
- - go vet ./...
- - go test -v ./...
- - go test -race ./...
- - diff <(gofmt -d .) <("")
-
-matrix:
- allow_failures:
- - go: 'master'
- fast_finish: true
diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
deleted file mode 100644
index 2ef4714f7..000000000
--- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Developer Certificate of Origin
-Version 1.1
-
-Copyright (C) 2015- Klaus Post & Contributors.
-Email: klauspost@gmail.com
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-
-Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
-(c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
-(d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE
deleted file mode 100644
index 5cec7ee94..000000000
--- a/vendor/github.com/klauspost/cpuid/LICENSE
+++ /dev/null
@@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md
deleted file mode 100644
index a7fb41fbe..000000000
--- a/vendor/github.com/klauspost/cpuid/README.md
+++ /dev/null
@@ -1,147 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
-[2]: https://godoc.org/github.com/klauspost/cpuid
-[3]: https://travis-ci.org/klauspost/cpuid.svg
-[4]: https://travis-ci.org/klauspost/cpuid
-
-# features
-## CPU Instructions
-* **CMOV** (i686 CMOV)
-* **NX** (NX (No-Execute) bit)
-* **AMD3DNOW** (AMD 3DNOW)
-* **AMD3DNOWEXT** (AMD 3DNowExt)
-* **MMX** (standard MMX)
-* **MMXEXT** (SSE integer functions or AMD MMX ext)
-* **SSE** (SSE functions)
-* **SSE2** (P4 SSE functions)
-* **SSE3** (Prescott SSE3 functions)
-* **SSSE3** (Conroe SSSE3 functions)
-* **SSE4** (Penryn SSE4.1 functions)
-* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
-* **SSE42** (Nehalem SSE4.2 functions)
-* **AVX** (AVX functions)
-* **AVX2** (AVX2 functions)
-* **FMA3** (Intel FMA 3)
-* **FMA4** (Bulldozer FMA4 functions)
-* **XOP** (Bulldozer XOP functions)
-* **F16C** (Half-precision floating-point conversion)
-* **BMI1** (Bit Manipulation Instruction Set 1)
-* **BMI2** (Bit Manipulation Instruction Set 2)
-* **TBM** (AMD Trailing Bit Manipulation)
-* **LZCNT** (LZCNT instruction)
-* **POPCNT** (POPCNT instruction)
-* **AESNI** (Advanced Encryption Standard New Instructions)
-* **CLMUL** (Carry-less Multiplication)
-* **HTT** (Hyperthreading (enabled))
-* **HLE** (Hardware Lock Elision)
-* **RTM** (Restricted Transactional Memory)
-* **RDRAND** (RDRAND instruction is available)
-* **RDSEED** (RDSEED instruction is available)
-* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-* **SHA** (Intel SHA Extensions)
-* **AVX512F** (AVX-512 Foundation)
-* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
-* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
-* **AVX512PF** (AVX-512 Prefetch Instructions)
-* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
-* **AVX512CD** (AVX-512 Conflict Detection Instructions)
-* **AVX512BW** (AVX-512 Byte and Word Instructions)
-* **AVX512VL** (AVX-512 Vector Length Extensions)
-* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
-* **MPX** (Intel MPX (Memory Protection Extensions))
-* **ERMS** (Enhanced REP MOVSB/STOSB)
-* **RDTSCP** (RDTSCP Instruction)
-* **CX16** (CMPXCHG16B Instruction)
-* **SGX** (Software Guard Extensions, with activation details)
-
-## Performance
-* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
-* **SSE2SLOW** (SSE2 is supported, but usually not faster)
-* **SSE3SLOW** (SSE3 is supported, but usually not faster)
-* **ATOM** (Atom processor, some SSSE3 instructions are slower)
-* **Cache line** (Probable size of a cache line).
-* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
-
-## Cpu Vendor/VM
-* **Intel**
-* **AMD**
-* **VIA**
-* **Transmeta**
-* **NSC**
-* **KVM** (Kernel-based Virtual Machine)
-* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
-* **VMware**
-* **XenHVM**
-* **Bhyve**
-* **Hygon**
-
-# installing
-
-```go get github.com/klauspost/cpuid```
-
-# example
-
-```Go
-package main
-
-import (
- "fmt"
- "github.com/klauspost/cpuid"
-)
-
-func main() {
- // Print basic CPU information:
- fmt.Println("Name:", cpuid.CPU.BrandName)
- fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
- fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
- fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
- fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
- fmt.Println("Features:", cpuid.CPU.Features)
- fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
- fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
- fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
- fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
- fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
-
- // Test if we have a specific feature:
- if cpuid.CPU.SSE() {
- fmt.Println("We have Streaming SIMD Extensions")
- }
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
-PhysicalCores: 2
-ThreadsPerCore: 2
-LogicalCores: 4
-Family 6 Model: 42
-Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
-Cacheline bytes: 64
-We have Streaming SIMD Extensions
-```
-
-# private package
-
-In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
-
-For this purpose all exports are removed, and functions and constants are lowercased.
-
-This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go
deleted file mode 100644
index db9591321..000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid.go
+++ /dev/null
@@ -1,1049 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// Package cpuid provides information about the CPU running the current program.
-//
-// CPU features are detected on startup, and kept for fast access through the life of the application.
-// Currently x86 / x64 (AMD64) is supported.
-//
-// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-//
-// Package home: https://github.com/klauspost/cpuid
-package cpuid
-
-import "strings"
-
-// Vendor is a representation of a CPU vendor.
-type Vendor int
-
-const (
- Other Vendor = iota
- Intel
- AMD
- VIA
- Transmeta
- NSC
- KVM // Kernel-based Virtual Machine
- MSVM // Microsoft Hyper-V or Windows Virtual PC
- VMware
- XenHVM
- Bhyve
- Hygon
-)
-
-const (
- CMOV = 1 << iota // i686 CMOV
- NX // NX (No-Execute) bit
- AMD3DNOW // AMD 3DNOW
- AMD3DNOWEXT // AMD 3DNowExt
- MMX // standard MMX
- MMXEXT // SSE integer functions or AMD MMX ext
- SSE // SSE functions
- SSE2 // P4 SSE functions
- SSE3 // Prescott SSE3 functions
- SSSE3 // Conroe SSSE3 functions
- SSE4 // Penryn SSE4.1 functions
- SSE4A // AMD Barcelona microarchitecture SSE4a instructions
- SSE42 // Nehalem SSE4.2 functions
- AVX // AVX functions
- AVX2 // AVX2 functions
- FMA3 // Intel FMA 3
- FMA4 // Bulldozer FMA4 functions
- XOP // Bulldozer XOP functions
- F16C // Half-precision floating-point conversion
- BMI1 // Bit Manipulation Instruction Set 1
- BMI2 // Bit Manipulation Instruction Set 2
- TBM // AMD Trailing Bit Manipulation
- LZCNT // LZCNT instruction
- POPCNT // POPCNT instruction
- AESNI // Advanced Encryption Standard New Instructions
- CLMUL // Carry-less Multiplication
- HTT // Hyperthreading (enabled)
- HLE // Hardware Lock Elision
- RTM // Restricted Transactional Memory
- RDRAND // RDRAND instruction is available
- RDSEED // RDSEED instruction is available
- ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
- SHA // Intel SHA Extensions
- AVX512F // AVX-512 Foundation
- AVX512DQ // AVX-512 Doubleword and Quadword Instructions
- AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
- AVX512PF // AVX-512 Prefetch Instructions
- AVX512ER // AVX-512 Exponential and Reciprocal Instructions
- AVX512CD // AVX-512 Conflict Detection Instructions
- AVX512BW // AVX-512 Byte and Word Instructions
- AVX512VL // AVX-512 Vector Length Extensions
- AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
- MPX // Intel MPX (Memory Protection Extensions)
- ERMS // Enhanced REP MOVSB/STOSB
- RDTSCP // RDTSCP Instruction
- CX16 // CMPXCHG16B Instruction
- SGX // Software Guard Extensions
- IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
- STIBP // Single Thread Indirect Branch Predictors
-
- // Performance indicators
- SSE2SLOW // SSE2 is supported, but usually not faster
- SSE3SLOW // SSE3 is supported, but usually not faster
- ATOM // Atom processor, some SSSE3 instructions are slower
-)
-
-var flagNames = map[Flags]string{
- CMOV: "CMOV", // i686 CMOV
- NX: "NX", // NX (No-Execute) bit
- AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
- AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
- MMX: "MMX", // Standard MMX
- MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
- SSE: "SSE", // SSE functions
- SSE2: "SSE2", // P4 SSE2 functions
- SSE3: "SSE3", // Prescott SSE3 functions
- SSSE3: "SSSE3", // Conroe SSSE3 functions
- SSE4: "SSE4.1", // Penryn SSE4.1 functions
- SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
- SSE42: "SSE4.2", // Nehalem SSE4.2 functions
- AVX: "AVX", // AVX functions
- AVX2: "AVX2", // AVX functions
- FMA3: "FMA3", // Intel FMA 3
- FMA4: "FMA4", // Bulldozer FMA4 functions
- XOP: "XOP", // Bulldozer XOP functions
- F16C: "F16C", // Half-precision floating-point conversion
- BMI1: "BMI1", // Bit Manipulation Instruction Set 1
- BMI2: "BMI2", // Bit Manipulation Instruction Set 2
- TBM: "TBM", // AMD Trailing Bit Manipulation
- LZCNT: "LZCNT", // LZCNT instruction
- POPCNT: "POPCNT", // POPCNT instruction
- AESNI: "AESNI", // Advanced Encryption Standard New Instructions
- CLMUL: "CLMUL", // Carry-less Multiplication
- HTT: "HTT", // Hyperthreading (enabled)
- HLE: "HLE", // Hardware Lock Elision
- RTM: "RTM", // Restricted Transactional Memory
- RDRAND: "RDRAND", // RDRAND instruction is available
- RDSEED: "RDSEED", // RDSEED instruction is available
- ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
- SHA: "SHA", // Intel SHA Extensions
- AVX512F: "AVX512F", // AVX-512 Foundation
- AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
- AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
- AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
- AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
- AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
- AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
- AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
- AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
- MPX: "MPX", // Intel MPX (Memory Protection Extensions)
- ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
- RDTSCP: "RDTSCP", // RDTSCP Instruction
- CX16: "CX16", // CMPXCHG16B Instruction
- SGX: "SGX", // Software Guard Extensions
- IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
- STIBP: "STIBP", // Single Thread Indirect Branch Predictors
-
- // Performance indicators
- SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
- SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
- ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
-
-}
-
-// CPUInfo contains information about the detected system CPU.
-type CPUInfo struct {
- BrandName string // Brand name reported by the CPU
- VendorID Vendor // Comparable CPU vendor ID
- Features Flags // Features of the CPU
- PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
- ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
- LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
- Family int // CPU family number
- Model int // CPU model number
- CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
- Cache struct {
- L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
- L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
- L2 int // L2 Cache (per core or shared). Will be -1 if undetected
- L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
- }
- SGX SGXSupport
- maxFunc uint32
- maxExFunc uint32
-}
-
-var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
-var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-var xgetbv func(index uint32) (eax, edx uint32)
-var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
-
-// CPU contains information about the CPU as detected on startup,
-// or when Detect last was called.
-//
-// Use this as the primary entry point to you data,
-// this way queries are
-var CPU CPUInfo
-
-func init() {
- initCPU()
- Detect()
-}
-
-// Detect will re-detect current CPU info.
-// This will replace the content of the exported CPU variable.
-//
-// Unless you expect the CPU to change while you are running your program
-// you should not need to call this function.
-// If you call this, you must ensure that no other goroutine is accessing the
-// exported CPU variable.
-func Detect() {
- CPU.maxFunc = maxFunctionID()
- CPU.maxExFunc = maxExtendedFunction()
- CPU.BrandName = brandName()
- CPU.CacheLine = cacheLine()
- CPU.Family, CPU.Model = familyModel()
- CPU.Features = support()
- CPU.SGX = hasSGX(CPU.Features&SGX != 0)
- CPU.ThreadsPerCore = threadsPerCore()
- CPU.LogicalCores = logicalCores()
- CPU.PhysicalCores = physicalCores()
- CPU.VendorID = vendorID()
- CPU.cacheSize()
-}
-
-// Generated here: http://play.golang.org/p/BxFH2Gdc0G
-
-// Cmov indicates support of CMOV instructions
-func (c CPUInfo) Cmov() bool {
- return c.Features&CMOV != 0
-}
-
-// Amd3dnow indicates support of AMD 3DNOW! instructions
-func (c CPUInfo) Amd3dnow() bool {
- return c.Features&AMD3DNOW != 0
-}
-
-// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
-func (c CPUInfo) Amd3dnowExt() bool {
- return c.Features&AMD3DNOWEXT != 0
-}
-
-// MMX indicates support of MMX instructions
-func (c CPUInfo) MMX() bool {
- return c.Features&MMX != 0
-}
-
-// MMXExt indicates support of MMXEXT instructions
-// (SSE integer functions or AMD MMX ext)
-func (c CPUInfo) MMXExt() bool {
- return c.Features&MMXEXT != 0
-}
-
-// SSE indicates support of SSE instructions
-func (c CPUInfo) SSE() bool {
- return c.Features&SSE != 0
-}
-
-// SSE2 indicates support of SSE 2 instructions
-func (c CPUInfo) SSE2() bool {
- return c.Features&SSE2 != 0
-}
-
-// SSE3 indicates support of SSE 3 instructions
-func (c CPUInfo) SSE3() bool {
- return c.Features&SSE3 != 0
-}
-
-// SSSE3 indicates support of SSSE 3 instructions
-func (c CPUInfo) SSSE3() bool {
- return c.Features&SSSE3 != 0
-}
-
-// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
-func (c CPUInfo) SSE4() bool {
- return c.Features&SSE4 != 0
-}
-
-// SSE42 indicates support of SSE4.2 instructions
-func (c CPUInfo) SSE42() bool {
- return c.Features&SSE42 != 0
-}
-
-// AVX indicates support of AVX instructions
-// and operating system support of AVX instructions
-func (c CPUInfo) AVX() bool {
- return c.Features&AVX != 0
-}
-
-// AVX2 indicates support of AVX2 instructions
-func (c CPUInfo) AVX2() bool {
- return c.Features&AVX2 != 0
-}
-
-// FMA3 indicates support of FMA3 instructions
-func (c CPUInfo) FMA3() bool {
- return c.Features&FMA3 != 0
-}
-
-// FMA4 indicates support of FMA4 instructions
-func (c CPUInfo) FMA4() bool {
- return c.Features&FMA4 != 0
-}
-
-// XOP indicates support of XOP instructions
-func (c CPUInfo) XOP() bool {
- return c.Features&XOP != 0
-}
-
-// F16C indicates support of F16C instructions
-func (c CPUInfo) F16C() bool {
- return c.Features&F16C != 0
-}
-
-// BMI1 indicates support of BMI1 instructions
-func (c CPUInfo) BMI1() bool {
- return c.Features&BMI1 != 0
-}
-
-// BMI2 indicates support of BMI2 instructions
-func (c CPUInfo) BMI2() bool {
- return c.Features&BMI2 != 0
-}
-
-// TBM indicates support of TBM instructions
-// (AMD Trailing Bit Manipulation)
-func (c CPUInfo) TBM() bool {
- return c.Features&TBM != 0
-}
-
-// Lzcnt indicates support of LZCNT instruction
-func (c CPUInfo) Lzcnt() bool {
- return c.Features&LZCNT != 0
-}
-
-// Popcnt indicates support of POPCNT instruction
-func (c CPUInfo) Popcnt() bool {
- return c.Features&POPCNT != 0
-}
-
-// HTT indicates the processor has Hyperthreading enabled
-func (c CPUInfo) HTT() bool {
- return c.Features&HTT != 0
-}
-
-// SSE2Slow indicates that SSE2 may be slow on this processor
-func (c CPUInfo) SSE2Slow() bool {
- return c.Features&SSE2SLOW != 0
-}
-
-// SSE3Slow indicates that SSE3 may be slow on this processor
-func (c CPUInfo) SSE3Slow() bool {
- return c.Features&SSE3SLOW != 0
-}
-
-// AesNi indicates support of AES-NI instructions
-// (Advanced Encryption Standard New Instructions)
-func (c CPUInfo) AesNi() bool {
- return c.Features&AESNI != 0
-}
-
-// Clmul indicates support of CLMUL instructions
-// (Carry-less Multiplication)
-func (c CPUInfo) Clmul() bool {
- return c.Features&CLMUL != 0
-}
-
-// NX indicates support of NX (No-Execute) bit
-func (c CPUInfo) NX() bool {
- return c.Features&NX != 0
-}
-
-// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
-func (c CPUInfo) SSE4A() bool {
- return c.Features&SSE4A != 0
-}
-
-// HLE indicates support of Hardware Lock Elision
-func (c CPUInfo) HLE() bool {
- return c.Features&HLE != 0
-}
-
-// RTM indicates support of Restricted Transactional Memory
-func (c CPUInfo) RTM() bool {
- return c.Features&RTM != 0
-}
-
-// Rdrand indicates support of RDRAND instruction is available
-func (c CPUInfo) Rdrand() bool {
- return c.Features&RDRAND != 0
-}
-
-// Rdseed indicates support of RDSEED instruction is available
-func (c CPUInfo) Rdseed() bool {
- return c.Features&RDSEED != 0
-}
-
-// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-func (c CPUInfo) ADX() bool {
- return c.Features&ADX != 0
-}
-
-// SHA indicates support of Intel SHA Extensions
-func (c CPUInfo) SHA() bool {
- return c.Features&SHA != 0
-}
-
-// AVX512F indicates support of AVX-512 Foundation
-func (c CPUInfo) AVX512F() bool {
- return c.Features&AVX512F != 0
-}
-
-// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
-func (c CPUInfo) AVX512DQ() bool {
- return c.Features&AVX512DQ != 0
-}
-
-// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
-func (c CPUInfo) AVX512IFMA() bool {
- return c.Features&AVX512IFMA != 0
-}
-
-// AVX512PF indicates support of AVX-512 Prefetch Instructions
-func (c CPUInfo) AVX512PF() bool {
- return c.Features&AVX512PF != 0
-}
-
-// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
-func (c CPUInfo) AVX512ER() bool {
- return c.Features&AVX512ER != 0
-}
-
-// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
-func (c CPUInfo) AVX512CD() bool {
- return c.Features&AVX512CD != 0
-}
-
-// AVX512BW indicates support of AVX-512 Byte and Word Instructions
-func (c CPUInfo) AVX512BW() bool {
- return c.Features&AVX512BW != 0
-}
-
-// AVX512VL indicates support of AVX-512 Vector Length Extensions
-func (c CPUInfo) AVX512VL() bool {
- return c.Features&AVX512VL != 0
-}
-
-// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
-func (c CPUInfo) AVX512VBMI() bool {
- return c.Features&AVX512VBMI != 0
-}
-
-// MPX indicates support of Intel MPX (Memory Protection Extensions)
-func (c CPUInfo) MPX() bool {
- return c.Features&MPX != 0
-}
-
-// ERMS indicates support of Enhanced REP MOVSB/STOSB
-func (c CPUInfo) ERMS() bool {
- return c.Features&ERMS != 0
-}
-
-// RDTSCP Instruction is available.
-func (c CPUInfo) RDTSCP() bool {
- return c.Features&RDTSCP != 0
-}
-
-// CX16 indicates if CMPXCHG16B instruction is available.
-func (c CPUInfo) CX16() bool {
- return c.Features&CX16 != 0
-}
-
-// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
-// So TSX simply checks that.
-func (c CPUInfo) TSX() bool {
- return c.Features&(HLE|RTM) == HLE|RTM
-}
-
-// Atom indicates an Atom processor
-func (c CPUInfo) Atom() bool {
- return c.Features&ATOM != 0
-}
-
-// Intel returns true if vendor is recognized as Intel
-func (c CPUInfo) Intel() bool {
- return c.VendorID == Intel
-}
-
-// AMD returns true if vendor is recognized as AMD
-func (c CPUInfo) AMD() bool {
- return c.VendorID == AMD
-}
-
-// Hygon returns true if vendor is recognized as Hygon
-func (c CPUInfo) Hygon() bool {
- return c.VendorID == Hygon
-}
-
-// Transmeta returns true if vendor is recognized as Transmeta
-func (c CPUInfo) Transmeta() bool {
- return c.VendorID == Transmeta
-}
-
-// NSC returns true if vendor is recognized as National Semiconductor
-func (c CPUInfo) NSC() bool {
- return c.VendorID == NSC
-}
-
-// VIA returns true if vendor is recognized as VIA
-func (c CPUInfo) VIA() bool {
- return c.VendorID == VIA
-}
-
-// RTCounter returns the 64-bit time-stamp counter
-// Uses the RDTSCP instruction. The value 0 is returned
-// if the CPU does not support the instruction.
-func (c CPUInfo) RTCounter() uint64 {
- if !c.RDTSCP() {
- return 0
- }
- a, _, _, d := rdtscpAsm()
- return uint64(a) | (uint64(d) << 32)
-}
-
-// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
-// This variable is OS dependent, but on Linux contains information
-// about the current cpu/core the code is running on.
-// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
-func (c CPUInfo) Ia32TscAux() uint32 {
- if !c.RDTSCP() {
- return 0
- }
- _, _, ecx, _ := rdtscpAsm()
- return ecx
-}
-
-// LogicalCPU will return the Logical CPU the code is currently executing on.
-// This is likely to change when the OS re-schedules the running thread
-// to another CPU.
-// If the current core cannot be detected, -1 will be returned.
-func (c CPUInfo) LogicalCPU() int {
- if c.maxFunc < 1 {
- return -1
- }
- _, ebx, _, _ := cpuid(1)
- return int(ebx >> 24)
-}
-
-// VM Will return true if the cpu id indicates we are in
-// a virtual machine. This is only a hint, and will very likely
-// have many false negatives.
-func (c CPUInfo) VM() bool {
- switch c.VendorID {
- case MSVM, KVM, VMware, XenHVM, Bhyve:
- return true
- }
- return false
-}
-
-// Flags contains detected cpu features and caracteristics
-type Flags uint64
-
-// String returns a string representation of the detected
-// CPU features.
-func (f Flags) String() string {
- return strings.Join(f.Strings(), ",")
-}
-
-// Strings returns and array of the detected features.
-func (f Flags) Strings() []string {
- s := support()
- r := make([]string, 0, 20)
- for i := uint(0); i < 64; i++ {
- key := Flags(1 << i)
- val := flagNames[key]
- if s&key != 0 {
- r = append(r, val)
- }
- }
- return r
-}
-
-func maxExtendedFunction() uint32 {
- eax, _, _, _ := cpuid(0x80000000)
- return eax
-}
-
-func maxFunctionID() uint32 {
- a, _, _, _ := cpuid(0)
- return a
-}
-
-func brandName() string {
- if maxExtendedFunction() >= 0x80000004 {
- v := make([]uint32, 0, 48)
- for i := uint32(0); i < 3; i++ {
- a, b, c, d := cpuid(0x80000002 + i)
- v = append(v, a, b, c, d)
- }
- return strings.Trim(string(valAsString(v...)), " ")
- }
- return "unknown"
-}
-
-func threadsPerCore() int {
- mfi := maxFunctionID()
- if mfi < 0x4 || vendorID() != Intel {
- return 1
- }
-
- if mfi < 0xb {
- _, b, _, d := cpuid(1)
- if (d & (1 << 28)) != 0 {
- // v will contain logical core count
- v := (b >> 16) & 255
- if v > 1 {
- a4, _, _, _ := cpuid(4)
- // physical cores
- v2 := (a4 >> 26) + 1
- if v2 > 0 {
- return int(v) / int(v2)
- }
- }
- }
- return 1
- }
- _, b, _, _ := cpuidex(0xb, 0)
- if b&0xffff == 0 {
- return 1
- }
- return int(b & 0xffff)
-}
-
-func logicalCores() int {
- mfi := maxFunctionID()
- switch vendorID() {
- case Intel:
- // Use this on old Intel processors
- if mfi < 0xb {
- if mfi < 1 {
- return 0
- }
- // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
- // that can be assigned to logical processors in a physical package.
- // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
- _, ebx, _, _ := cpuid(1)
- logical := (ebx >> 16) & 0xff
- return int(logical)
- }
- _, b, _, _ := cpuidex(0xb, 1)
- return int(b & 0xffff)
- case AMD, Hygon:
- _, b, _, _ := cpuid(1)
- return int((b >> 16) & 0xff)
- default:
- return 0
- }
-}
-
-func familyModel() (int, int) {
- if maxFunctionID() < 0x1 {
- return 0, 0
- }
- eax, _, _, _ := cpuid(1)
- family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
- model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
- return int(family), int(model)
-}
-
-func physicalCores() int {
- switch vendorID() {
- case Intel:
- return logicalCores() / threadsPerCore()
- case AMD, Hygon:
- if maxExtendedFunction() >= 0x80000008 {
- _, _, c, _ := cpuid(0x80000008)
- return int(c&0xff) + 1
- }
- }
- return 0
-}
-
-// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
-var vendorMapping = map[string]Vendor{
- "AMDisbetter!": AMD,
- "AuthenticAMD": AMD,
- "CentaurHauls": VIA,
- "GenuineIntel": Intel,
- "TransmetaCPU": Transmeta,
- "GenuineTMx86": Transmeta,
- "Geode by NSC": NSC,
- "VIA VIA VIA ": VIA,
- "KVMKVMKVMKVM": KVM,
- "Microsoft Hv": MSVM,
- "VMwareVMware": VMware,
- "XenVMMXenVMM": XenHVM,
- "bhyve bhyve ": Bhyve,
- "HygonGenuine": Hygon,
-}
-
-func vendorID() Vendor {
- _, b, c, d := cpuid(0)
- v := valAsString(b, d, c)
- vend, ok := vendorMapping[string(v)]
- if !ok {
- return Other
- }
- return vend
-}
-
-func cacheLine() int {
- if maxFunctionID() < 0x1 {
- return 0
- }
-
- _, ebx, _, _ := cpuid(1)
- cache := (ebx & 0xff00) >> 5 // cflush size
- if cache == 0 && maxExtendedFunction() >= 0x80000006 {
- _, _, ecx, _ := cpuid(0x80000006)
- cache = ecx & 0xff // cacheline size
- }
- // TODO: Read from Cache and TLB Information
- return int(cache)
-}
-
-func (c *CPUInfo) cacheSize() {
- c.Cache.L1D = -1
- c.Cache.L1I = -1
- c.Cache.L2 = -1
- c.Cache.L3 = -1
- vendor := vendorID()
- switch vendor {
- case Intel:
- if maxFunctionID() < 4 {
- return
- }
- for i := uint32(0); ; i++ {
- eax, ebx, ecx, _ := cpuidex(4, i)
- cacheType := eax & 15
- if cacheType == 0 {
- break
- }
- cacheLevel := (eax >> 5) & 7
- coherency := int(ebx&0xfff) + 1
- partitions := int((ebx>>12)&0x3ff) + 1
- associativity := int((ebx>>22)&0x3ff) + 1
- sets := int(ecx) + 1
- size := associativity * partitions * coherency * sets
- switch cacheLevel {
- case 1:
- if cacheType == 1 {
- // 1 = Data Cache
- c.Cache.L1D = size
- } else if cacheType == 2 {
- // 2 = Instruction Cache
- c.Cache.L1I = size
- } else {
- if c.Cache.L1D < 0 {
- c.Cache.L1I = size
- }
- if c.Cache.L1I < 0 {
- c.Cache.L1I = size
- }
- }
- case 2:
- c.Cache.L2 = size
- case 3:
- c.Cache.L3 = size
- }
- }
- case AMD, Hygon:
- // Untested.
- if maxExtendedFunction() < 0x80000005 {
- return
- }
- _, _, ecx, edx := cpuid(0x80000005)
- c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
- c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
-
- if maxExtendedFunction() < 0x80000006 {
- return
- }
- _, _, ecx, _ = cpuid(0x80000006)
- c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
- }
-
- return
-}
-
-type SGXSupport struct {
- Available bool
- SGX1Supported bool
- SGX2Supported bool
- MaxEnclaveSizeNot64 int64
- MaxEnclaveSize64 int64
-}
-
-func hasSGX(available bool) (rval SGXSupport) {
- rval.Available = available
-
- if !available {
- return
- }
-
- a, _, _, d := cpuidex(0x12, 0)
- rval.SGX1Supported = a&0x01 != 0
- rval.SGX2Supported = a&0x02 != 0
- rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
- rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
-
- return
-}
-
-func support() Flags {
- mfi := maxFunctionID()
- vend := vendorID()
- if mfi < 0x1 {
- return 0
- }
- rval := uint64(0)
- _, _, c, d := cpuid(1)
- if (d & (1 << 15)) != 0 {
- rval |= CMOV
- }
- if (d & (1 << 23)) != 0 {
- rval |= MMX
- }
- if (d & (1 << 25)) != 0 {
- rval |= MMXEXT
- }
- if (d & (1 << 25)) != 0 {
- rval |= SSE
- }
- if (d & (1 << 26)) != 0 {
- rval |= SSE2
- }
- if (c & 1) != 0 {
- rval |= SSE3
- }
- if (c & 0x00000200) != 0 {
- rval |= SSSE3
- }
- if (c & 0x00080000) != 0 {
- rval |= SSE4
- }
- if (c & 0x00100000) != 0 {
- rval |= SSE42
- }
- if (c & (1 << 25)) != 0 {
- rval |= AESNI
- }
- if (c & (1 << 1)) != 0 {
- rval |= CLMUL
- }
- if c&(1<<23) != 0 {
- rval |= POPCNT
- }
- if c&(1<<30) != 0 {
- rval |= RDRAND
- }
- if c&(1<<29) != 0 {
- rval |= F16C
- }
- if c&(1<<13) != 0 {
- rval |= CX16
- }
- if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
- if threadsPerCore() > 1 {
- rval |= HTT
- }
- }
-
- // Check XGETBV, OXSAVE and AVX bits
- if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
- // Check for OS support
- eax, _ := xgetbv(0)
- if (eax & 0x6) == 0x6 {
- rval |= AVX
- if (c & 0x00001000) != 0 {
- rval |= FMA3
- }
- }
- }
-
- // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
- if mfi >= 7 {
- _, ebx, ecx, edx := cpuidex(7, 0)
- if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
- rval |= AVX2
- }
- if (ebx & 0x00000008) != 0 {
- rval |= BMI1
- if (ebx & 0x00000100) != 0 {
- rval |= BMI2
- }
- }
- if ebx&(1<<2) != 0 {
- rval |= SGX
- }
- if ebx&(1<<4) != 0 {
- rval |= HLE
- }
- if ebx&(1<<9) != 0 {
- rval |= ERMS
- }
- if ebx&(1<<11) != 0 {
- rval |= RTM
- }
- if ebx&(1<<14) != 0 {
- rval |= MPX
- }
- if ebx&(1<<18) != 0 {
- rval |= RDSEED
- }
- if ebx&(1<<19) != 0 {
- rval |= ADX
- }
- if ebx&(1<<29) != 0 {
- rval |= SHA
- }
- if edx&(1<<26) != 0 {
- rval |= IBPB
- }
- if edx&(1<<27) != 0 {
- rval |= STIBP
- }
-
- // Only detect AVX-512 features if XGETBV is supported
- if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
- // Check for OS support
- eax, _ := xgetbv(0)
-
- // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
- // ZMM16-ZMM31 state are enabled by OS)
- /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
- if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
- if ebx&(1<<16) != 0 {
- rval |= AVX512F
- }
- if ebx&(1<<17) != 0 {
- rval |= AVX512DQ
- }
- if ebx&(1<<21) != 0 {
- rval |= AVX512IFMA
- }
- if ebx&(1<<26) != 0 {
- rval |= AVX512PF
- }
- if ebx&(1<<27) != 0 {
- rval |= AVX512ER
- }
- if ebx&(1<<28) != 0 {
- rval |= AVX512CD
- }
- if ebx&(1<<30) != 0 {
- rval |= AVX512BW
- }
- if ebx&(1<<31) != 0 {
- rval |= AVX512VL
- }
- // ecx
- if ecx&(1<<1) != 0 {
- rval |= AVX512VBMI
- }
- }
- }
- }
-
- if maxExtendedFunction() >= 0x80000001 {
- _, _, c, d := cpuid(0x80000001)
- if (c & (1 << 5)) != 0 {
- rval |= LZCNT
- rval |= POPCNT
- }
- if (d & (1 << 31)) != 0 {
- rval |= AMD3DNOW
- }
- if (d & (1 << 30)) != 0 {
- rval |= AMD3DNOWEXT
- }
- if (d & (1 << 23)) != 0 {
- rval |= MMX
- }
- if (d & (1 << 22)) != 0 {
- rval |= MMXEXT
- }
- if (c & (1 << 6)) != 0 {
- rval |= SSE4A
- }
- if d&(1<<20) != 0 {
- rval |= NX
- }
- if d&(1<<27) != 0 {
- rval |= RDTSCP
- }
-
- /* Allow for selectively disabling SSE2 functions on AMD processors
- with SSE2 support but not SSE4a. This includes Athlon64, some
- Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
- than SSE2 often enough to utilize this special-case flag.
- AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
- so that SSE2 is used unless explicitly disabled by checking
- AV_CPU_FLAG_SSE2SLOW. */
- if vendorID() != Intel &&
- rval&SSE2 != 0 && (c&0x00000040) == 0 {
- rval |= SSE2SLOW
- }
-
- /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
- * used unless the OS has AVX support. */
- if (rval & AVX) != 0 {
- if (c & 0x00000800) != 0 {
- rval |= XOP
- }
- if (c & 0x00010000) != 0 {
- rval |= FMA4
- }
- }
-
- if vendorID() == Intel {
- family, model := familyModel()
- if family == 6 && (model == 9 || model == 13 || model == 14) {
- /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
- * 6/14 (core1 "yonah") theoretically support sse2, but it's
- * usually slower than mmx. */
- if (rval & SSE2) != 0 {
- rval |= SSE2SLOW
- }
- if (rval & SSE3) != 0 {
- rval |= SSE3SLOW
- }
- }
- /* The Atom processor has SSSE3 support, which is useful in many cases,
- * but sometimes the SSSE3 version is slower than the SSE2 equivalent
- * on the Atom, but is generally faster on other processors supporting
- * SSSE3. This flag allows for selectively disabling certain SSSE3
- * functions on the Atom. */
- if family == 6 && model == 28 {
- rval |= ATOM
- }
- }
- }
- return Flags(rval)
-}
-
-func valAsString(values ...uint32) []byte {
- r := make([]byte, 4*len(values))
- for i, v := range values {
- dst := r[i*4:]
- dst[0] = byte(v & 0xff)
- dst[1] = byte((v >> 8) & 0xff)
- dst[2] = byte((v >> 16) & 0xff)
- dst[3] = byte((v >> 24) & 0xff)
- switch {
- case dst[0] == 0:
- return r[:i*4]
- case dst[1] == 0:
- return r[:i*4+1]
- case dst[2] == 0:
- return r[:i*4+2]
- case dst[3] == 0:
- return r[:i*4+3]
- }
- }
- return r
-}
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s
deleted file mode 100644
index 4d731711e..000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid_386.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build 386,!gccgo
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORL CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+4(FP)
- MOVL BX, ebx+8(FP)
- MOVL CX, ecx+12(FP)
- MOVL DX, edx+16(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func xgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+4(FP)
- MOVL DX, edx+8(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
deleted file mode 100644
index 3c1d60e42..000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build amd64,!gccgo
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
- XORQ CX, CX
- MOVL op+0(FP), AX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
- MOVL op+0(FP), AX
- MOVL op2+4(FP), CX
- CPUID
- MOVL AX, eax+8(FP)
- MOVL BX, ebx+12(FP)
- MOVL CX, ecx+16(FP)
- MOVL DX, edx+20(FP)
- RET
-
-// func asmXgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
- MOVL index+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
- MOVL AX, eax+8(FP)
- MOVL DX, edx+12(FP)
- RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
- BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
- MOVL AX, eax+0(FP)
- MOVL BX, ebx+4(FP)
- MOVL CX, ecx+8(FP)
- MOVL DX, edx+12(FP)
- RET
diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go
deleted file mode 100644
index a5f04dd6d..000000000
--- a/vendor/github.com/klauspost/cpuid/detect_intel.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build 386,!gccgo amd64,!gccgo
-
-package cpuid
-
-func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-func asmXgetbv(index uint32) (eax, edx uint32)
-func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-
-func initCPU() {
- cpuid = asmCpuid
- cpuidex = asmCpuidex
- xgetbv = asmXgetbv
- rdtscpAsm = asmRdtscpAsm
-}
diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go
deleted file mode 100644
index 909c5d9a7..000000000
--- a/vendor/github.com/klauspost/cpuid/detect_ref.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// +build !amd64,!386 gccgo
-
-package cpuid
-
-func initCPU() {
- cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-
- cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-
- xgetbv = func(index uint32) (eax, edx uint32) {
- return 0, 0
- }
-
- rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
- return 0, 0, 0, 0
- }
-}
diff --git a/vendor/github.com/klauspost/cpuid/generate.go b/vendor/github.com/klauspost/cpuid/generate.go
deleted file mode 100644
index 90e7a98d2..000000000
--- a/vendor/github.com/klauspost/cpuid/generate.go
+++ /dev/null
@@ -1,4 +0,0 @@
-package cpuid
-
-//go:generate go run private-gen.go
-//go:generate gofmt -w ./private