diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate')
12 files changed, 4848 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/copy.go b/vendor/github.com/klauspost/compress/flate/copy.go new file mode 100644 index 000000000..a3200a8f4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/copy.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// forwardCopy is like the built-in copy function except that it always goes +// forward from the start, even if the dst and src overlap. +// It is equivalent to: +// for i := 0; i < n; i++ { +// mem[dst+i] = mem[src+i] +// } +func forwardCopy(mem []byte, dst, src, n int) { + if dst <= src { + copy(mem[dst:dst+n], mem[src:src+n]) + return + } + for { + if dst >= src+n { + copy(mem[dst:dst+n], mem[src:src+n]) + return + } + // There is some forward overlap. The destination + // will be filled with a repeated pattern of mem[src:src+k]. + // We copy one instance of the pattern here, then repeat. + // Each time around this loop k will double. + k := dst - src + copy(mem[dst:dst+k], mem[src:src+k]) + n -= k + dst += k + } +} diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.go b/vendor/github.com/klauspost/compress/flate/crc32_amd64.go new file mode 100644 index 000000000..8298d309a --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/crc32_amd64.go @@ -0,0 +1,42 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +package flate + +import ( + "github.com/klauspost/cpuid" +) + +// crc32sse returns a hash for the first 4 bytes of the slice +// len(a) must be >= 4. +//go:noescape +func crc32sse(a []byte) uint32 + +// crc32sseAll calculates hashes for each 4-byte set in a. +// dst must be east len(a) - 4 in size. +// The size is not checked by the assembly. +//go:noescape +func crc32sseAll(a []byte, dst []uint32) + +// matchLenSSE4 returns the number of matching bytes in a and b +// up to length 'max'. Both slices must be at least 'max' +// bytes in size. +// +// TODO: drop the "SSE4" name, since it doesn't use any SSE instructions. +// +//go:noescape +func matchLenSSE4(a, b []byte, max int) int + +// histogram accumulates a histogram of b in h. +// h must be at least 256 entries in length, +// and must be cleared before calling this function. +//go:noescape +func histogram(b []byte, h []int32) + +// Detect SSE 4.2 feature. +func init() { + useSSE42 = cpuid.CPU.SSE42() +} diff --git a/vendor/github.com/klauspost/compress/flate/crc32_amd64.s b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s new file mode 100644 index 000000000..a79943727 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/crc32_amd64.s @@ -0,0 +1,214 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +// func crc32sse(a []byte) uint32 +TEXT ·crc32sse(SB), 4, $0 + MOVQ a+0(FP), R10 + XORQ BX, BX + + // CRC32 dword (R10), EBX + BYTE $0xF2; BYTE $0x41; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0x1a + + MOVL BX, ret+24(FP) + RET + +// func crc32sseAll(a []byte, dst []uint32) +TEXT ·crc32sseAll(SB), 4, $0 + MOVQ a+0(FP), R8 // R8: src + MOVQ a_len+8(FP), R10 // input length + MOVQ dst+24(FP), R9 // R9: dst + SUBQ $4, R10 + JS end + JZ one_crc + MOVQ R10, R13 + SHRQ $2, R10 // len/4 + ANDQ $3, R13 // len&3 + XORQ BX, BX + ADDQ $1, R13 + TESTQ R10, R10 + JZ rem_loop + +crc_loop: + MOVQ (R8), R11 + XORQ BX, BX + XORQ DX, DX + XORQ DI, DI + MOVQ R11, R12 + SHRQ $8, R11 + MOVQ R12, AX + MOVQ R11, CX + SHRQ $16, R12 + SHRQ $16, R11 + MOVQ R12, SI + + // CRC32 EAX, EBX + BYTE $0xF2; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0xd8 + + // CRC32 ECX, EDX + BYTE $0xF2; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0xd1 + + // CRC32 ESI, EDI + BYTE $0xF2; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0xfe + MOVL BX, (R9) + MOVL DX, 4(R9) + MOVL DI, 8(R9) + + XORQ BX, BX + MOVL R11, AX + + // CRC32 EAX, EBX + BYTE $0xF2; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0xd8 + MOVL BX, 12(R9) + + ADDQ $16, R9 + ADDQ $4, R8 + XORQ BX, BX + SUBQ $1, R10 + JNZ crc_loop + +rem_loop: + MOVL (R8), AX + + // CRC32 EAX, EBX + BYTE $0xF2; BYTE $0x0f + BYTE $0x38; BYTE $0xf1; BYTE $0xd8 + + MOVL BX, (R9) + ADDQ $4, R9 + ADDQ $1, R8 + XORQ BX, BX + SUBQ $1, R13 + JNZ rem_loop + +end: + RET + +one_crc: + MOVQ $1, R13 + XORQ BX, BX + JMP rem_loop + +// func matchLenSSE4(a, b []byte, max int) int +TEXT ·matchLenSSE4(SB), 4, $0 + MOVQ a_base+0(FP), SI + MOVQ b_base+24(FP), DI + MOVQ DI, DX + MOVQ max+48(FP), CX + +cmp8: + // As long as we are 8 or more bytes before the end of max, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ CX, $8 + JLT cmp1 + MOVQ (SI), AX + MOVQ (DI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, SI + ADDQ $8, DI + SUBQ $8, CX + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, DI + + // Subtract off &b[0] to convert from &b[ret] to ret, and return. + SUBQ DX, DI + MOVQ DI, ret+56(FP) + RET + +cmp1: + // In the slices' tail, compare 1 byte at a time. + CMPQ CX, $0 + JEQ matchLenEnd + MOVB (SI), AX + MOVB (DI), BX + CMPB AX, BX + JNE matchLenEnd + ADDQ $1, SI + ADDQ $1, DI + SUBQ $1, CX + JMP cmp1 + +matchLenEnd: + // Subtract off &b[0] to convert from &b[ret] to ret, and return. + SUBQ DX, DI + MOVQ DI, ret+56(FP) + RET + +// func histogram(b []byte, h []int32) +TEXT ·histogram(SB), 4, $0 + MOVQ b+0(FP), SI // SI: &b + MOVQ b_len+8(FP), R9 // R9: len(b) + MOVQ h+24(FP), DI // DI: Histogram + MOVQ R9, R8 + SHRQ $3, R8 + JZ hist1 + XORQ R11, R11 + +loop_hist8: + MOVQ (SI), R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + MOVB R10, R11 + INCL (DI)(R11*4) + SHRQ $8, R10 + + INCL (DI)(R10*4) + + ADDQ $8, SI + DECQ R8 + JNZ loop_hist8 + +hist1: + ANDQ $7, R9 + JZ end_hist + XORQ R10, R10 + +loop_hist1: + MOVB (SI), R10 + INCL (DI)(R10*4) + INCQ SI + DECQ R9 + JNZ loop_hist1 + +end_hist: + RET diff --git a/vendor/github.com/klauspost/compress/flate/crc32_noasm.go b/vendor/github.com/klauspost/compress/flate/crc32_noasm.go new file mode 100644 index 000000000..dcf43bd50 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/crc32_noasm.go @@ -0,0 +1,35 @@ +//+build !amd64 noasm appengine gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +package flate + +func init() { + useSSE42 = false +} + +// crc32sse should never be called. +func crc32sse(a []byte) uint32 { + panic("no assembler") +} + +// crc32sseAll should never be called. +func crc32sseAll(a []byte, dst []uint32) { + panic("no assembler") +} + +// matchLenSSE4 should never be called. +func matchLenSSE4(a, b []byte, max int) int { + panic("no assembler") + return 0 +} + +// histogram accumulates a histogram of b in h. +// +// len(h) must be >= 256, and h's elements must be all zeroes. +func histogram(b []byte, h []int32) { + h = h[:256] + for _, t := range b { + h[t]++ + } +} diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go new file mode 100644 index 000000000..9e6e7ff0c --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -0,0 +1,1353 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 24 + + skipNever = math.MaxInt32 +) + +var useSSE42 bool + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-4 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + // For levels 5-6 we don't bother trying with lazy matches. + // Lazy matching is at least 30% slower, with 1.5% increase. + {6, 0, 12, 8, 12, 5}, + {8, 0, 24, 16, 16, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 8, 24, 16, skipNever, 7}, + {10, 16, 24, 64, skipNever, 8}, + {32, 258, 258, 4096, skipNever, 9}, +} + +type compressor struct { + compressionLevel + + w *huffmanBitWriter + bulkHasher func([]byte, []uint32) + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + chainHead int + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 + hashOffset int + + // input window: unprocessed data is window[index:windowEnd] + index int + window []byte + windowEnd int + blockStart int // window index where current tokens start + byteAvailable bool // if true, still need to process window[index-1]. + + // queued output tokens + tokens tokens + + // deflate state + length int + offset int + hash uint32 + maxInsertIndex int + err error + ii uint16 // position of last match, intended to overflow to reset. + + snap snappyEnc + hashMatch [maxMatchLength + minMatchLength]uint32 +} + +func (d *compressor) fillDeflate(b []byte) int { + if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window[:], d.window[windowSize:2*windowSize]) + d.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + d.hashOffset += windowSize + if d.hashOffset > maxHashOffset { + delta := d.hashOffset - 1 + d.hashOffset -= delta + d.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range d.hashPrev[:] { + if int(v) > delta { + d.hashPrev[i] = uint32(int(v) - delta) + } else { + d.hashPrev[i] = 0 + } + } + for i, v := range d.hashHead[:] { + if int(v) > delta { + d.hashHead[i] = uint32(int(v) - delta) + } else { + d.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tok.tokens[:tok.n], eof, window) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok.tokens[:tok.n], eof, window) + } + } else { + d.w.writeBlock(tok.tokens[:tok.n], eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only mode, + // use constant or Snappy compression. + switch d.compressionLevel.level { + case 0, 1, 2: + return + } + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + startindex := j * 256 + end := startindex + 256 + minMatchLength - 1 + if end > n { + end = n + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := d.hashMatch[:dstSize] + d.bulkHasher(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + // Update window information. + d.windowEnd += n + d.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:], wPos, minMatchLook) + + if n > length && (n > minMatchLength || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.hashPrev[i&windowMask]) - d.hashOffset + if i < minIndex || i < 0 { + break + } + } + return +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLenSSE4(win[i:], wPos, minMatchLook) + + if n > length && (n > minMatchLength || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.hashPrev[i&windowMask]) - d.hashOffset + if i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +const hashmul = 0x1e35a7bd + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < minMatchLength { + return + } + hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + dst[0] = (hb * hashmul) >> (32 - hashBits) + end := len(b) - minMatchLength + 1 + for i := 1; i < end; i++ { + hb = (hb << 8) | uint32(b[i+3]) + dst[i] = (hb * hashmul) >> (32 - hashBits) + } +} + +// matchLen returns the number of matching bytes in a and b +// up to length 'max'. Both slices must be at least 'max' +// bytes in size. +func matchLen(a, b []byte, max int) int { + a = a[:max] + b = b[:len(a)] + for i, av := range a { + if b[i] != av { + return i + } + } + return max +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.hashOffset = 1 + d.length = minMatchLength - 1 + d.offset = 0 + d.byteAvailable = false + d.index = 0 + d.hash = 0 + d.chainHead = -1 + d.bulkHasher = bulkHash4 + if useSSE42 { + d.bulkHasher = crc32sseAll + } +} + +// Assumes that d.fastSkipHashing != skipNever, +// otherwise use deflateLazy +func (d *compressor) deflate() { + + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = false + + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + } + + for { + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + if d.tokens.n > 0 { + if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + return + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + ch := d.hashHead[d.hash&hashMask] + d.chainHead = int(ch) + d.hashPrev[d.index&windowMask] = ch + d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset) + } + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 { + if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if d.length >= minMatchLength { + d.ii = 0 + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + // "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3 + d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)) + d.tokens.n++ + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + if d.length <= d.fastSkipHashing { + var newIndex int + newIndex = d.index + d.length + // Calculate missing hashes + end := newIndex + if end > d.maxInsertIndex { + end = d.maxInsertIndex + } + end += minMatchLength - 1 + startindex := d.index + 1 + if startindex > d.maxInsertIndex { + startindex = d.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := d.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + d.index = newIndex + } else { + // For matches this long, we don't bother inserting each individual + // item into the table. + d.index += d.length + if d.index < d.maxInsertIndex { + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + } + } + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } else { + d.ii++ + end := d.index + int(d.ii>>uint(d.fastSkipHashing)) + 1 + if end > d.windowEnd { + end = d.windowEnd + } + for i := d.index; i < end; i++ { + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } + d.index = end + } + } +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = false + + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + } + + for { + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + return + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = hash4(d.window[d.index : d.index+minMatchLength]) + ch := d.hashHead[d.hash&hashMask] + d.chainHead = int(ch) + d.hashPrev[d.index&windowMask] = ch + d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset) + } + prevLength := d.length + prevOffset := d.offset + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if prevLength >= minMatchLength && d.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + d.tokens.n++ + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + var newIndex int + newIndex = d.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > d.maxInsertIndex { + end = d.maxInsertIndex + } + end += minMatchLength - 1 + startindex := d.index + 1 + if startindex > d.maxInsertIndex { + startindex = d.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := d.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + + d.index = newIndex + d.byteAvailable = false + d.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } else { + // Reset, if we got a match this run. + if d.length >= minMatchLength { + d.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + d.ii++ + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + d.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when d.ii overflows after 64KB. + if d.ii > 31 { + n := int(d.ii >> 5) + for j := 0; j < n; j++ { + if d.index >= d.windowEnd-1 { + break + } + + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + d.index++ + } + // Flush last byte + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + d.byteAvailable = false + // d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } + } else { + d.index++ + d.byteAvailable = true + } + } + } +} + +// Assumes that d.fastSkipHashing != skipNever, +// otherwise use deflateLazySSE +func (d *compressor) deflateSSE() { + + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = false + + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask + } + + for { + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + if d.tokens.n > 0 { + if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + return + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask + ch := d.hashHead[d.hash] + d.chainHead = int(ch) + d.hashPrev[d.index&windowMask] = ch + d.hashHead[d.hash] = uint32(d.index + d.hashOffset) + } + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 { + if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if d.length >= minMatchLength { + d.ii = 0 + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + // "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3 + d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)) + d.tokens.n++ + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + if d.length <= d.fastSkipHashing { + var newIndex int + newIndex = d.index + d.length + // Calculate missing hashes + end := newIndex + if end > d.maxInsertIndex { + end = d.maxInsertIndex + } + end += minMatchLength - 1 + startindex := d.index + 1 + if startindex > d.maxInsertIndex { + startindex = d.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := d.hashMatch[:dstSize] + + crc32sseAll(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + d.index = newIndex + } else { + // For matches this long, we don't bother inserting each individual + // item into the table. + d.index += d.length + if d.index < d.maxInsertIndex { + d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask + } + } + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } else { + d.ii++ + end := d.index + int(d.ii>>5) + 1 + if end > d.windowEnd { + end = d.windowEnd + } + for i := d.index; i < end; i++ { + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } + d.index = end + } + } +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazySSE() { + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = false + + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask + } + + for { + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + return + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask + ch := d.hashHead[d.hash] + d.chainHead = int(ch) + d.hashPrev[d.index&windowMask] = ch + d.hashHead[d.hash] = uint32(d.index + d.hashOffset) + } + prevLength := d.length + prevOffset := d.offset + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if prevLength >= minMatchLength && d.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + d.tokens.n++ + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + var newIndex int + newIndex = d.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > d.maxInsertIndex { + end = d.maxInsertIndex + } + end += minMatchLength - 1 + startindex := d.index + 1 + if startindex > d.maxInsertIndex { + startindex = d.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := d.hashMatch[:dstSize] + crc32sseAll(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + + d.index = newIndex + d.byteAvailable = false + d.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } else { + // Reset, if we got a match this run. + if d.length >= minMatchLength { + d.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + d.ii++ + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + d.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when d.ii overflows after 64KB. + if d.ii > 31 { + n := int(d.ii >> 6) + for j := 0; j < n; j++ { + if d.index >= d.windowEnd-1 { + break + } + + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + d.index++ + } + // Flush last byte + d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1])) + d.tokens.n++ + d.byteAvailable = false + // d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens.n = 0 + } + } + } else { + d.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeSnappy() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < maxStoreBlockSize { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.tokens.n = 0 + d.windowEnd = 0 + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + d.err = d.w.err + } + d.tokens.n = 0 + d.windowEnd = 0 + d.snap.Reset() + return + } + } + + d.snap.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if int(d.tokens.n) == d.windowEnd { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd]) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(d.tokens.tokens[:d.tokens.n], false, d.window[:d.windowEnd]) + d.err = d.w.err + } + d.tokens.n = 0 + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level >= 1 && level <= 4: + d.snap = newSnappy(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeSnappy + case level == DefaultCompression: + level = 5 + fallthrough + case 5 <= level && level <= 9: + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + if d.fastSkipHashing == skipNever { + if useSSE42 { + d.step = (*compressor).deflateLazySSE + } else { + d.step = (*compressor).deflateLazy + } + } else { + if useSSE42 { + d.step = (*compressor).deflateSSE + } else { + d.step = (*compressor).deflate + + } + } + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.snap != nil { + d.snap.Reset() + d.windowEnd = 0 + d.tokens.n = 0 + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompresssion. + d.windowEnd = 0 + default: + d.chainHead = -1 + for i := range d.hashHead { + d.hashHead[i] = 0 + } + for i := range d.hashPrev { + d.hashPrev[i] = 0 + } + d.hashOffset = 1 + d.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.n = 0 + d.length = minMatchLength - 1 + d.offset = 0 + d.hash = 0 + d.ii = 0 + d.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + dw := &dictWriter{w} + zw, err := NewWriter(dw, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +type dictWriter struct { + w io.Writer +} + +func (w *dictWriter) Write(b []byte) (n int, err error) { + return w.w.Write(b) +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if dw, ok := w.d.w.writer.(*dictWriter); ok { + // w was created with NewWriterDict + dw.w = dst + w.d.reset(dw) + w.d.fillWindow(w.dict) + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go new file mode 100644 index 000000000..71c75a065 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -0,0 +1,184 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// * Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// * Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..f9b2a699a --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -0,0 +1,701 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 240 + + // bufferSize is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + bufferSize = bufferFlushSize + 8 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = []int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = []uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = []int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = []uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint + bytes [bufferSize]byte + codegenFreq [codegenCodeCount]int32 + nbytes int + literalFreq []int32 + offsetFreq []int32 + codegen []uint8 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error +} + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalFreq: make([]int32, maxNumLit), + offsetFreq: make([]int32, offsetCodeCount), + codegen: make([]uint8, maxNumLit+offsetCodeCount+1), + literalEncoding: newHuffmanEncoder(maxNumLit), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.bytes = [bufferSize]byte{} +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + w.write(w.bytes[:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint) { + if w.err != nil { + return + } + w.bits |= uint64(b) << w.nbits + w.nbits += nb + if w.nbits >= 48 { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + bytes := w.bytes[n : n+6] + bytes[0] = byte(bits) + bytes[1] = byte(bits >> 8) + bytes[2] = byte(bits >> 16) + bytes[3] = byte(bits >> 24) + bytes[4] = byte(bits >> 32) + bytes[5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = uint8(litEnc.codes[i].len) + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = uint8(offEnc.codes[i].len) + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + header := 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7 + size = header + + litEnc.bitLength(w.literalFreq) + + offEnc.bitLength(w.offsetFreq) + + extraBits + + return size, numCodegens +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq) + + fixedOffsetEncoding.bitLength(w.offsetFreq) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + if w.err != nil { + return + } + w.bits |= uint64(c.code) << w.nbits + w.nbits += uint(c.len) + if w.nbits >= 48 { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + bytes := w.bytes[n : n+6] + bytes[0] = byte(bits) + bytes[1] = byte(bits >> 8) + bytes[2] = byte(bits >> 16) + bytes[3] = byte(bits >> 24) + bytes[4] = byte(bits >> 32) + bytes[5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n + } +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord int = int(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[uint32(codeWord)]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + break + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + break + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + break + } + } +} + +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens = append(tokens, endBlockMarker) + numLiterals, numOffsets := w.indexTokens(tokens) + + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + // We only bother calculating the costs of the extra bits required by + // the length of offset fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { + // First eight length codes have extra size = 0. + extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart]) + } + for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { + // First four offset codes have extra size = 0. + extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode]) + } + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = w.fixedSize(extraBits) + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize < size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens = append(tokens, endBlockMarker) + numLiterals, numOffsets := w.indexTokens(tokens) + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0) + + // Store bytes, if we don't get a reasonable improvement. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + + // Write the tokens. + w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes) +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) { + for i := range w.literalFreq { + w.literalFreq[i] = 0 + } + for i := range w.offsetFreq { + w.offsetFreq[i] = 0 + } + + for _, t := range tokens { + if t < matchType { + w.literalFreq[t.literal()]++ + continue + } + length := t.length() + offset := t.offset() + w.literalFreq[lengthCodesStart+lengthCode(length)]++ + w.offsetFreq[offsetCode(offset)]++ + } + + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + w.literalEncoding.generate(w.literalFreq, 15) + w.offsetEncoding.generate(w.offsetFreq, 15) + return +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + for _, t := range tokens { + if t < matchType { + w.writeCode(leCodes[t.literal()]) + continue + } + // Write the length + length := t.length() + lengthCode := lengthCode(length) + w.writeCode(leCodes[lengthCode+lengthCodesStart]) + extraLengthBits := uint(lengthExtraBits[lengthCode]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + w.writeCode(oeCodes[offsetCode]) + extraOffsetBits := uint(offsetExtraBits[offsetCode]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode]) + w.writeBits(extraOffset, extraOffsetBits) + } + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq, 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq { + w.literalFreq[i] = 0 + } + + // Add everything as literals + histogram(input, w.literalFreq) + + w.literalFreq[endBlockMarker] = 1 + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + + w.literalEncoding.generate(w.literalFreq, 15) + + // Figure out smallest code. + // Always use dynamic Huffman or Store + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0) + + // Store bytes, if we don't get a reasonable improvement. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + encoding := w.literalEncoding.codes[:257] + n := w.nbytes + for _, t := range input { + // Bitwriting inlined, ~30% speedup + c := encoding[t] + w.bits |= uint64(c.code) << w.nbits + w.nbits += uint(c.len) + if w.nbits < 48 { + continue + } + // Store 6 bytes + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + bytes := w.bytes[n : n+6] + bytes[0] = byte(bits) + bytes[1] = byte(bits >> 8) + bytes[2] = byte(bits >> 16) + bytes[3] = byte(bits >> 24) + bytes[4] = byte(bits >> 32) + bytes[5] = byte(bits >> 40) + n += 6 + if n < bufferFlushSize { + continue + } + w.write(w.bytes[:n]) + if w.err != nil { + return // Return early in the event of write failures + } + n = 0 + } + w.nbytes = n + w.writeCode(encoding[endBlockMarker]) +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go new file mode 100644 index 000000000..bdcbd823b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -0,0 +1,344 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "sort" +) + +// hcode is a huffman code with a bit code and bit length. +type hcode struct { + code, len uint16 +} + +type huffmanEncoder struct { + codes []hcode + freqcache []literalNode + bitCount [17]int32 + lns byLiteral // stored to avoid repeated allocation in generate + lfs byFreq // stored to avoid repeated allocation in generate +} + +type literalNode struct { + literal uint16 + freq int32 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint16) { + h.len = length + h.code = code +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + return &huffmanEncoder{codes: make([]hcode, size)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(maxNumLit) + codes := h.codes + var ch uint16 + for ch = 0; ch < maxNumLit; ch++ { + var bits uint16 + var size uint16 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + break + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + break + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + break + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size} + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codes := h.codes + for ch := range codes { + codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5} + } + return h +} + +var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() +var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []int32) int { + var total int + for i, f := range freq { + if f != 0 { + total += int(f) * int(h.codes[i].len) + } + } + return total +} + +const maxBitsLimit = 16 + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: list[1].freq, + nextCharFreq: list[2].freq, + nextPairFreq: list[0].freq + list[1].freq, + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := maxBits + for { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + h.lns.sort(chunk) + for _, node := range chunk { + h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { + if h.freqcache == nil { + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit. + // The largest of these is maxNumLit, so we allocate for that case. + h.freqcache = make([]literalNode, maxNumLit+1) + } + list := h.freqcache[:len(freq)+1] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + list[count] = literalNode{} + h.codes[i].len = 0 + } + } + list[len(freq)] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + h.lfs.sort(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +type byLiteral []literalNode + +func (s *byLiteral) sort(a []literalNode) { + *s = byLiteral(a) + sort.Sort(s) +} + +func (s byLiteral) Len() int { return len(s) } + +func (s byLiteral) Less(i, j int) bool { + return s[i].literal < s[j].literal +} + +func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +type byFreq []literalNode + +func (s *byFreq) sort(a []literalNode) { + *s = byFreq(a) + sort.Sort(s) +} + +func (s byFreq) Len() int { return len(s) } + +func (s byFreq) Less(i, j int) bool { + if s[i].freq == s[j].freq { + return s[i].literal < s[j].literal + } + return s[i].freq < s[j].freq +} + +func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go new file mode 100644 index 000000000..800d0ce9e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -0,0 +1,880 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "io" + "math/bits" + "strconv" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code +) + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks *[huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = &[huffmanNumChunks]uint32{} + } + if h.min != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1<<uint(max) && !(code == 1 && max == 1) { + return false + } + + h.min = min + chunks := h.chunks[:] + for i := range chunks { + chunks[i] = 0 + } + + if max > huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint32, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1)) + if cap(h.links[off]) < numLinks { + h.links[off] = make([]uint32, numLinks) + } else { + links := h.links[off][:0] + h.links[off] = links[:numLinks] + } + } + } else { + h.links = h.links[:0] + } + + for i, n := range lengths { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint32(i<<huffmanValueShift | n) + reverse := int(bits.Reverse16(uint16(code))) + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + final bool + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.step(f) + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the min bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.min < f.bits[endBlockMarker] { + f.h1.min = f.bits[endBlockMarker] + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.min) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/vendor/github.com/klauspost/compress/flate/reverse_bits.go b/vendor/github.com/klauspost/compress/flate/reverse_bits.go new file mode 100644 index 000000000..c1a02720d --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/reverse_bits.go @@ -0,0 +1,48 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +var reverseByte = [256]byte{ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +} + +func reverseUint16(v uint16) uint16 { + return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8 +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return reverseUint16(number << uint8(16-bitLength)) +} diff --git a/vendor/github.com/klauspost/compress/flate/snappy.go b/vendor/github.com/klauspost/compress/flate/snappy.go new file mode 100644 index 000000000..d853320a7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/snappy.go @@ -0,0 +1,900 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + ol := int(dst.n) + for i, v := range lit { + dst.tokens[(i+ol)&maxStoreBlockSize] = token(v) + } + dst.n += uint16(len(lit)) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +func emitCopy(dst *tokens, offset, length int) { + dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize)) + dst.n++ +} + +type snappyEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newSnappy(level int) snappyEnc { + switch level { + case 1: + return &snappyL1{} + case 2: + return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}} + case 3: + return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}} + case 4: + return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 14 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset +) + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func hash(u uint32) uint32 { + return (u * 0x1e35a7bd) >> tableShift +} + +// snappyL1 encapsulates level 1 compression +type snappyL1 struct{} + +func (e *snappyL1) Reset() {} + +func (e *snappyL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 16 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Initialize the hash table. + // + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535. + var table [tableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s)) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS)) + if s-candidate <= maxMatchOffset && load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + emitLiteral(dst, src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of Snappy's: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + s1 := base + maxMatchLength + if s1 > len(src) { + s1 = len(src) + } + a := src[s:s1] + b := src[candidate+4:] + b = b[:len(a)] + l := len(a) + for i := range a { + if a[i] != b[i] { + l = i + break + } + } + s += l + + // matchToken is flate's equivalent of Snappy's emitCopy. + dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)) + dst.n++ + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x >> 0)) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x >> 8)) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if s-candidate > maxMatchOffset || uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x >> 16)) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + emitLiteral(dst, src[nextEmit:]) + } +} + +type tableEntry struct { + val uint32 + offset int32 +} + +func load3232(b []byte, i int32) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6432(b []byte, i int32) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// snappyGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type snappyGen struct { + prev []byte + cur int32 +} + +// snappyGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type snappyL2 struct { + snappyGen + table [tableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *snappyL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + // Protect against e.cur wraparound. + if e.cur > 1<<30 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxStoreBlockSize + } + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + e.cur += maxStoreBlockSize + e.prev = e.prev[:0] + return + } + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := int32(0) + s := int32(0) + cv := load3232(src, s) + nextHash := hash(cv) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := int32(32) + + nextS := s + var candidate tableEntry + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash&tableMask] + now := load3232(src, nextS) + e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv} + nextHash = hash(now) + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || cv != candidate.val { + // Out of range or not matched. + cv = now + continue + } + break + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + emitLiteral(dst, src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + s += 4 + t := candidate.offset - e.cur + 4 + l := e.matchlen(s, t, src) + + // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset) + dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)) + dst.n++ + s += l + nextEmit = s + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + e.table[hash(cv)&tableMask] = tableEntry{offset: t + e.cur, val: cv} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-1) + prevHash := hash(uint32(x)) + e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)} + x >>= 8 + currHash := hash(uint32(x)) + candidate = e.table[currHash&tableMask] + e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x) != candidate.val { + cv = uint32(x >> 8) + nextHash = hash(cv) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + emitLiteral(dst, src[nextEmit:]) + } + e.cur += int32(len(src)) + e.prev = e.prev[:len(src)] + copy(e.prev, src) +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// snappyL3 +type snappyL3 struct { + snappyGen + table [tableSize]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *snappyL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + // Protect against e.cur wraparound. + if e.cur > 1<<30 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]} + } + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + e.cur += maxStoreBlockSize + e.prev = e.prev[:0] + return + } + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := int32(0) + s := int32(0) + cv := load3232(src, s) + nextHash := hash(cv) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := int32(32) + + nextS := s + var candidate tableEntry + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash&tableMask] + now := load3232(src, nextS) + e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + nextHash = hash(now) + + // Check both candidates + candidate = candidates.Cur + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + break + } + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + break + } + } + } + cv = now + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + emitLiteral(dst, src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + s += 4 + t := candidate.offset - e.cur + 4 + l := e.matchlen(s, t, src) + + // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset) + dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)) + dst.n++ + s += l + nextEmit = s + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash = hash(cv) + e.table[nextHash&tableMask] = tableEntryPrev{ + Prev: e.table[nextHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + t, val: cv}, + } + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-3 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-3) + prevHash := hash(uint32(x)) + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + } + x >>= 8 + currHash := hash(uint32(x)) + candidates := e.table[currHash&tableMask] + cv = uint32(x) + e.table[currHash&tableMask] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur, val: cv}, + } + + // Check both candidates + candidate = candidates.Cur + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } + } + cv = uint32(x >> 8) + nextHash = hash(cv) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + emitLiteral(dst, src[nextEmit:]) + } + e.cur += int32(len(src)) + e.prev = e.prev[:len(src)] + copy(e.prev, src) +} + +// snappyL4 +type snappyL4 struct { + snappyL3 +} + +// Encode uses a similar algorithm to level 3, +// but will check up to two candidates if first isn't long enough. +func (e *snappyL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 3 + minNonLiteralBlockSize = 1 + 1 + inputMargin + matchLenGood = 12 + ) + + // Protect against e.cur wraparound. + if e.cur > 1<<30 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.snappyGen = snappyGen{cur: maxStoreBlockSize, prev: e.prev[:0]} + } + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + e.cur += maxStoreBlockSize + e.prev = e.prev[:0] + return + } + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := int32(0) + s := int32(0) + cv := load3232(src, s) + nextHash := hash(cv) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := int32(32) + + nextS := s + var candidate tableEntry + var candidateAlt tableEntry + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash&tableMask] + now := load3232(src, nextS) + e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + nextHash = hash(now) + + // Check both candidates + candidate = candidates.Cur + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset { + offset = s - (candidates.Prev.offset - e.cur) + if cv == candidates.Prev.val && offset < maxMatchOffset { + candidateAlt = candidates.Prev + } + break + } + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset { + break + } + } + } + cv = now + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + emitLiteral(dst, src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + s += 4 + t := candidate.offset - e.cur + 4 + l := e.matchlen(s, t, src) + // Try alternative candidate if match length < matchLenGood. + if l < matchLenGood-4 && candidateAlt.offset != 0 { + t2 := candidateAlt.offset - e.cur + 4 + l2 := e.matchlen(s, t2, src) + if l2 > l { + l = l2 + t = t2 + } + } + // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset) + dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)) + dst.n++ + s += l + nextEmit = s + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash = hash(cv) + e.table[nextHash&tableMask] = tableEntryPrev{ + Prev: e.table[nextHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + t, val: cv}, + } + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-3 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-3) + prevHash := hash(uint32(x)) + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash&tableMask] = tableEntryPrev{ + Prev: e.table[prevHash&tableMask].Cur, + Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + } + x >>= 8 + currHash := hash(uint32(x)) + candidates := e.table[currHash&tableMask] + cv = uint32(x) + e.table[currHash&tableMask] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur, val: cv}, + } + + // Check both candidates + candidate = candidates.Cur + candidateAlt = tableEntry{} + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + offset = s - (candidates.Prev.offset - e.cur) + if cv == candidates.Prev.val && offset <= maxMatchOffset { + candidateAlt = candidates.Prev + } + continue + } + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if cv == candidate.val { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } + } + cv = uint32(x >> 8) + nextHash = hash(cv) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + emitLiteral(dst, src[nextEmit:]) + } + e.cur += int32(len(src)) + e.prev = e.prev[:len(src)] + copy(e.prev, src) +} + +func (e *snappyGen) matchlen(s, t int32, src []byte) int32 { + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // If we are inside the current block + if t >= 0 { + b := src[t:] + a := src[s:s1] + b = b[:len(a)] + // Extend the match to be as long as possible. + for i := range a { + if a[i] != b[i] { + return int32(i) + } + } + return int32(len(a)) + } + + // We found a match in the previous block. + tp := int32(len(e.prev)) + t + if tp < 0 { + return 0 + } + + // Extend the match to be as long as possible. + a := src[s:s1] + b := e.prev[tp:] + if len(b) > len(a) { + b = b[:len(a)] + } + a = a[:len(b)] + for i := range b { + if a[i] != b[i] { + return int32(i) + } + } + + // If we reached our limit, we matched everything we are + // allowed to in the previous block and we return. + n := int32(len(b)) + if int(s+n) == s1 { + return n + } + + // Continue looking for more matches in the current block. + a = src[s+n : s1] + b = src[:len(a)] + for i := range a { + if a[i] != b[i] { + return int32(i) + n + } + } + return int32(len(a)) + n +} + +// Reset the encoding table. +func (e *snappyGen) Reset() { + e.prev = e.prev[:0] + e.cur += maxMatchOffset +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go new file mode 100644 index 000000000..4f275ea61 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -0,0 +1,115 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import "fmt" + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [...]uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +var offsetCodes = [...]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +type token uint32 + +type tokens struct { + tokens [maxStoreBlockSize + 1]token + n uint16 // Must be able to contain maxStoreBlockSize +} + +// Convert a literal into a literal token. +func literalToken(literal uint32) token { return token(literalType + literal) } + +// Convert a < xlength, xoffset > pair into a match token. +func matchToken(xlength uint32, xoffset uint32) token { + return token(matchType + xlength<<lengthShift + xoffset) +} + +func matchTokend(xlength uint32, xoffset uint32) token { + if xlength > maxMatchLength || xoffset > maxMatchOffset { + panic(fmt.Sprintf("Invalid match: len: %d, offset: %d\n", xlength, xoffset)) + return token(matchType) + } + return token(matchType + xlength<<lengthShift + xoffset) +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint32 { return uint32(t - literalType) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) } + +func lengthCode(len uint32) uint32 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[off>>7] + 14 + } else { + return offsetCodes[off>>14] + 28 + } +} |