6 files changed, 384 insertions, 373 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/copy.go b/vendor/github.com/klauspost/compress/flate/copy.go
deleted file mode 100644
index a3200a8f4..000000000
--- a/vendor/github.com/klauspost/compress/flate/copy.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flate
-
-// forwardCopy is like the built-in copy function except that it always goes
-// forward from the start, even if the dst and src overlap.
-// It is equivalent to:
-//   for i := 0; i < n; i++ {
-//     mem[dst+i] = mem[src+i]
-//   }
-func forwardCopy(mem []byte, dst, src, n int) {
-	if dst <= src {
-		copy(mem[dst:dst+n], mem[src:src+n])
-		return
-	}
-	for {
-		if dst >= src+n {
-			copy(mem[dst:dst+n], mem[src:src+n])
-			return
-		}
-		// There is some forward overlap.  The destination
-		// will be filled with a repeated pattern of mem[src:src+k].
-		// We copy one instance of the pattern here, then repeat.
-		// Each time around this loop k will double.
-		k := dst - src
-		copy(mem[dst:dst+k], mem[src:src+k])
-		n -= k
-		dst += k
-	}
-}
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 9e6e7ff0c..628795120 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -77,16 +77,14 @@ var levels = []compressionLevel{
 	{32, 258, 258, 4096, skipNever, 9},
 }
 
-type compressor struct {
-	compressionLevel
-
-	w          *huffmanBitWriter
-	bulkHasher func([]byte, []uint32)
-
-	// compression algorithm
-	fill func(*compressor, []byte) int // copy data to window
-	step func(*compressor)             // process window
-	sync bool                          // requesting flush
+// advancedState contains state for the advanced levels, with bigger hash tables, etc.
+type advancedState struct {
+	// deflate state
+	length         int
+	offset         int
+	hash           uint32
+	maxInsertIndex int
+	ii             uint16 // position of last match, intended to overflow to reset.
 
 	// Input hash chains
 	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
@@ -99,57 +97,64 @@ type compressor struct {
 	hashOffset int
 
 	// input window: unprocessed data is window[index:windowEnd]
-	index         int
+	index      int
+	bulkHasher func([]byte, []uint32)
+	hashMatch  [maxMatchLength + minMatchLength]uint32
+}
+
+type compressor struct {
+	compressionLevel
+
+	w *huffmanBitWriter
+
+	// compression algorithm
+	fill func(*compressor, []byte) int // copy data to window
+	step func(*compressor)             // process window
+	sync bool                          // requesting flush
+
 	window        []byte
 	windowEnd     int
 	blockStart    int  // window index where current tokens start
 	byteAvailable bool // if true, still need to process window[index-1].
+	err           error
 
 	// queued output tokens
 	tokens tokens
-
-	// deflate state
-	length         int
-	offset         int
-	hash           uint32
-	maxInsertIndex int
-	err            error
-	ii             uint16 // position of last match, intended to overflow to reset.
-
-	snap      snappyEnc
-	hashMatch [maxMatchLength + minMatchLength]uint32
+	snap   fastEnc
+	state  *advancedState
 }
 
 func (d *compressor) fillDeflate(b []byte) int {
-	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+	s := d.state
+	if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
 		// shift the window by windowSize
 		copy(d.window[:], d.window[windowSize:2*windowSize])
-		d.index -= windowSize
+		s.index -= windowSize
 		d.windowEnd -= windowSize
 		if d.blockStart >= windowSize {
 			d.blockStart -= windowSize
 		} else {
 			d.blockStart = math.MaxInt32
 		}
-		d.hashOffset += windowSize
-		if d.hashOffset > maxHashOffset {
-			delta := d.hashOffset - 1
-			d.hashOffset -= delta
-			d.chainHead -= delta
+		s.hashOffset += windowSize
+		if s.hashOffset > maxHashOffset {
+			delta := s.hashOffset - 1
+			s.hashOffset -= delta
+			s.chainHead -= delta
 			// Iterate over slices instead of arrays to avoid copying
 			// the entire table onto the stack (Issue #18625).
-			for i, v := range d.hashPrev[:] {
+			for i, v := range s.hashPrev[:] {
 				if int(v) > delta {
-					d.hashPrev[i] = uint32(int(v) - delta)
+					s.hashPrev[i] = uint32(int(v) - delta)
 				} else {
-					d.hashPrev[i] = 0
+					s.hashPrev[i] = 0
 				}
 			}
-			for i, v := range d.hashHead[:] {
+			for i, v := range s.hashHead[:] {
 				if int(v) > delta {
-					d.hashHead[i] = uint32(int(v) - delta)
+					s.hashHead[i] = uint32(int(v) - delta)
 				} else {
-					d.hashHead[i] = 0
+					s.hashHead[i] = 0
 				}
 			}
 		}
@@ -207,6 +212,7 @@ func (d *compressor) fillWindow(b []byte) {
 	case 0, 1, 2:
 		return
 	}
+	s := d.state
 	// If we are given too much, cut it.
 	if len(b) > windowSize {
 		b = b[len(b)-windowSize:]
@@ -229,28 +235,28 @@ func (d *compressor) fillWindow(b []byte) {
 			continue
 		}
 
-		dst := d.hashMatch[:dstSize]
-		d.bulkHasher(tocheck, dst)
+		dst := s.hashMatch[:dstSize]
+		s.bulkHasher(tocheck, dst)
 		var newH uint32
 		for i, val := range dst {
 			di := i + startindex
 			newH = val & hashMask
 			// Get previous value with the same hash.
 			// Our chain should point to the previous value.
-			d.hashPrev[di&windowMask] = d.hashHead[newH]
+			s.hashPrev[di&windowMask] = s.hashHead[newH]
 			// Set the head of the hash chain to us.
-			d.hashHead[newH] = uint32(di + d.hashOffset)
+			s.hashHead[newH] = uint32(di + s.hashOffset)
 		}
-		d.hash = newH
+		s.hash = newH
 	}
 	// Update window information.
 	d.windowEnd += n
-	d.index = n
+	s.index = n
 }
 
 // Try to find a match starting at index whose length is greater than prevSize.
 // We only look at chainCount possibilities before giving up.
-// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
+// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
 	minMatchLook := maxMatchLength
 	if lookahead < minMatchLook {
@@ -295,7 +301,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 			// hashPrev[i & windowMask] has already been overwritten, so stop now.
 			break
 		}
-		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+		i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
 		if i < minIndex || i < 0 {
 			break
 		}
@@ -305,7 +311,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 
 // Try to find a match starting at index whose length is greater than prevSize.
 // We only look at chainCount possibilities before giving up.
-// pos = d.index, prevHead = d.chainHead-d.hashOffset, prevLength=minMatchLength-1, lookahead
+// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
 func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
 	minMatchLook := maxMatchLength
 	if lookahead < minMatchLook {
@@ -350,7 +356,7 @@ func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahe
 			// hashPrev[i & windowMask] has already been overwritten, so stop now.
 			break
 		}
-		i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+		i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
 		if i < minIndex || i < 0 {
 			break
 		}
@@ -406,52 +412,57 @@ func matchLen(a, b []byte, max int) int {
 
 func (d *compressor) initDeflate() {
 	d.window = make([]byte, 2*windowSize)
-	d.hashOffset = 1
-	d.length = minMatchLength - 1
-	d.offset = 0
 	d.byteAvailable = false
-	d.index = 0
-	d.hash = 0
-	d.chainHead = -1
-	d.bulkHasher = bulkHash4
+	d.err = nil
+	if d.state == nil {
+		return
+	}
+	s := d.state
+	s.index = 0
+	s.hashOffset = 1
+	s.length = minMatchLength - 1
+	s.offset = 0
+	s.hash = 0
+	s.chainHead = -1
+	s.bulkHasher = bulkHash4
 	if useSSE42 {
-		d.bulkHasher = crc32sseAll
+		s.bulkHasher = crc32sseAll
 	}
 }
 
 // Assumes that d.fastSkipHashing != skipNever,
 // otherwise use deflateLazy
 func (d *compressor) deflate() {
-
+	s := d.state
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
 	const sanity = false
 
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
 
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if s.index < s.maxInsertIndex {
+		s.hash = hash4(d.window[s.index : s.index+minMatchLength])
 	}
 
 	for {
-		if sanity && d.index > d.windowEnd {
+		if sanity && s.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := d.windowEnd - d.index
+		lookahead := d.windowEnd - s.index
 		if lookahead < minMatchLength+maxMatchLength {
 			if !d.sync {
 				return
 			}
-			if sanity && d.index > d.windowEnd {
+			if sanity && s.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
 			if lookahead == 0 {
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
@@ -459,55 +470,55 @@ func (d *compressor) deflate() {
 				return
 			}
 		}
-		if d.index < d.maxInsertIndex {
+		if s.index < s.maxInsertIndex {
 			// Update the hash
-			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-			ch := d.hashHead[d.hash&hashMask]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
+			s.hash = hash4(d.window[s.index : s.index+minMatchLength])
+			ch := s.hashHead[s.hash&hashMask]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
 		}
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
+			if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
 			}
 		}
-		if d.length >= minMatchLength {
-			d.ii = 0
+		if s.length >= minMatchLength {
+			s.ii = 0
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
+			// "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
 			d.tokens.n++
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
-			if d.length <= d.fastSkipHashing {
+			if s.length <= d.fastSkipHashing {
 				var newIndex int
-				newIndex = d.index + d.length
+				newIndex = s.index + s.length
 				// Calculate missing hashes
 				end := newIndex
-				if end > d.maxInsertIndex {
-					end = d.maxInsertIndex
+				if end > s.maxInsertIndex {
+					end = s.maxInsertIndex
 				}
 				end += minMatchLength - 1
-				startindex := d.index + 1
-				if startindex > d.maxInsertIndex {
-					startindex = d.maxInsertIndex
+				startindex := s.index + 1
+				if startindex > s.maxInsertIndex {
+					startindex = s.maxInsertIndex
 				}
 				tocheck := d.window[startindex:end]
 				dstSize := len(tocheck) - minMatchLength + 1
 				if dstSize > 0 {
-					dst := d.hashMatch[:dstSize]
+					dst := s.hashMatch[:dstSize]
 					bulkHash4(tocheck, dst)
 					var newH uint32
 					for i, val := range dst {
@@ -515,35 +526,35 @@ func (d *compressor) deflate() {
 						newH = val & hashMask
 						// Get previous value with the same hash.
 						// Our chain should point to the previous value.
-						d.hashPrev[di&windowMask] = d.hashHead[newH]
+						s.hashPrev[di&windowMask] = s.hashHead[newH]
 						// Set the head of the hash chain to us.
-						d.hashHead[newH] = uint32(di + d.hashOffset)
+						s.hashHead[newH] = uint32(di + s.hashOffset)
 					}
-					d.hash = newH
+					s.hash = newH
 				}
-				d.index = newIndex
+				s.index = newIndex
 			} else {
 				// For matches this long, we don't bother inserting each individual
 				// item into the table.
-				d.index += d.length
-				if d.index < d.maxInsertIndex {
-					d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+				s.index += s.length
+				if s.index < s.maxInsertIndex {
+					s.hash = hash4(d.window[s.index : s.index+minMatchLength])
 				}
 			}
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+				if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
 					return
 				}
 				d.tokens.n = 0
 			}
 		} else {
-			d.ii++
-			end := d.index + int(d.ii>>uint(d.fastSkipHashing)) + 1
+			s.ii++
+			end := s.index + int(s.ii>>uint(d.fastSkipHashing)) + 1
 			if end > d.windowEnd {
 				end = d.windowEnd
 			}
-			for i := d.index; i < end; i++ {
+			for i := s.index; i < end; i++ {
 				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
 				d.tokens.n++
 				if d.tokens.n == maxFlateBlockTokens {
@@ -553,7 +564,7 @@ func (d *compressor) deflate() {
 					d.tokens.n = 0
 				}
 			}
-			d.index = end
+			s.index = end
 		}
 	}
 }
@@ -561,42 +572,43 @@ func (d *compressor) deflate() {
 // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
 // meaning it always has lazy matching on.
 func (d *compressor) deflateLazy() {
+	s := d.state
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
 	const sanity = false
 
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
 
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if s.index < s.maxInsertIndex {
+		s.hash = hash4(d.window[s.index : s.index+minMatchLength])
 	}
 
 	for {
-		if sanity && d.index > d.windowEnd {
+		if sanity && s.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := d.windowEnd - d.index
+		lookahead := d.windowEnd - s.index
 		if lookahead < minMatchLength+maxMatchLength {
 			if !d.sync {
 				return
 			}
-			if sanity && d.index > d.windowEnd {
+			if sanity && s.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
 			if lookahead == 0 {
 				// Flush current output block if any.
 				if d.byteAvailable {
 					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 					d.tokens.n++
 					d.byteAvailable = false
 				}
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
@@ -604,30 +616,30 @@ func (d *compressor) deflateLazy() {
 				return
 			}
 		}
-		if d.index < d.maxInsertIndex {
+		if s.index < s.maxInsertIndex {
 			// Update the hash
-			d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-			ch := d.hashHead[d.hash&hashMask]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash&hashMask] = uint32(d.index + d.hashOffset)
+			s.hash = hash4(d.window[s.index : s.index+minMatchLength])
+			ch := s.hashHead[s.hash&hashMask]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
 		}
-		prevLength := d.length
-		prevOffset := d.offset
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
+		prevLength := s.length
+		prevOffset := s.offset
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
-			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
 			}
 		}
-		if prevLength >= minMatchLength && d.length <= prevLength {
+		if prevLength >= minMatchLength && s.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
 			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
@@ -638,21 +650,21 @@ func (d *compressor) deflateLazy() {
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
 			var newIndex int
-			newIndex = d.index + prevLength - 1
+			newIndex = s.index + prevLength - 1
 			// Calculate missing hashes
 			end := newIndex
-			if end > d.maxInsertIndex {
-				end = d.maxInsertIndex
+			if end > s.maxInsertIndex {
+				end = s.maxInsertIndex
 			}
 			end += minMatchLength - 1
-			startindex := d.index + 1
-			if startindex > d.maxInsertIndex {
-				startindex = d.maxInsertIndex
+			startindex := s.index + 1
+			if startindex > s.maxInsertIndex {
+				startindex = s.maxInsertIndex
 			}
 			tocheck := d.window[startindex:end]
 			dstSize := len(tocheck) - minMatchLength + 1
 			if dstSize > 0 {
-				dst := d.hashMatch[:dstSize]
+				dst := s.hashMatch[:dstSize]
 				bulkHash4(tocheck, dst)
 				var newH uint32
 				for i, val := range dst {
@@ -660,74 +672,74 @@ func (d *compressor) deflateLazy() {
 					newH = val & hashMask
 					// Get previous value with the same hash.
 					// Our chain should point to the previous value.
-					d.hashPrev[di&windowMask] = d.hashHead[newH]
+					s.hashPrev[di&windowMask] = s.hashHead[newH]
 					// Set the head of the hash chain to us.
-					d.hashHead[newH] = uint32(di + d.hashOffset)
+					s.hashHead[newH] = uint32(di + s.hashOffset)
 				}
-				d.hash = newH
+				s.hash = newH
 			}
 
-			d.index = newIndex
+			s.index = newIndex
 			d.byteAvailable = false
-			d.length = minMatchLength - 1
+			s.length = minMatchLength - 1
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+				if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 					return
 				}
 				d.tokens.n = 0
 			}
 		} else {
 			// Reset, if we got a match this run.
-			if d.length >= minMatchLength {
-				d.ii = 0
+			if s.length >= minMatchLength {
+				s.ii = 0
 			}
 			// We have a byte waiting. Emit it.
 			if d.byteAvailable {
-				d.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+				s.ii++
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 				d.tokens.n++
 				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
 				}
-				d.index++
+				s.index++
 
 				// If we have a long run of no matches, skip additional bytes
-				// Resets when d.ii overflows after 64KB.
-				if d.ii > 31 {
-					n := int(d.ii >> 5)
+				// Resets when s.ii overflows after 64KB.
+				if s.ii > 31 {
+					n := int(s.ii >> 5)
 					for j := 0; j < n; j++ {
-						if d.index >= d.windowEnd-1 {
+						if s.index >= d.windowEnd-1 {
 							break
 						}
 
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 						d.tokens.n++
 						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+							if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 								return
 							}
 							d.tokens.n = 0
 						}
-						d.index++
+						s.index++
 					}
 					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 					d.tokens.n++
 					d.byteAvailable = false
-					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
+					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
 					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 							return
 						}
 						d.tokens.n = 0
 					}
 				}
 			} else {
-				d.index++
+				s.index++
 				d.byteAvailable = true
 			}
 		}
@@ -737,36 +749,36 @@ func (d *compressor) deflateLazy() {
 // Assumes that d.fastSkipHashing != skipNever,
 // otherwise use deflateLazySSE
 func (d *compressor) deflateSSE() {
-
+	s := d.state
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
 	const sanity = false
 
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
 
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if s.index < s.maxInsertIndex {
+		s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
 	}
 
 	for {
-		if sanity && d.index > d.windowEnd {
+		if sanity && s.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := d.windowEnd - d.index
+		lookahead := d.windowEnd - s.index
 		if lookahead < minMatchLength+maxMatchLength {
 			if !d.sync {
 				return
 			}
-			if sanity && d.index > d.windowEnd {
+			if sanity && s.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
 			if lookahead == 0 {
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
@@ -774,55 +786,55 @@ func (d *compressor) deflateSSE() {
 				return
 			}
 		}
-		if d.index < d.maxInsertIndex {
+		if s.index < s.maxInsertIndex {
 			// Update the hash
-			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-			ch := d.hashHead[d.hash]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+			s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
+			ch := s.hashHead[s.hash]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
 		}
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
+			if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
 			}
 		}
-		if d.length >= minMatchLength {
-			d.ii = 0
+		if s.length >= minMatchLength {
+			s.ii = 0
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			// "d.length-3" should NOT be "d.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))
+			// "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
+			d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
 			d.tokens.n++
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
-			if d.length <= d.fastSkipHashing {
+			if s.length <= d.fastSkipHashing {
 				var newIndex int
-				newIndex = d.index + d.length
+				newIndex = s.index + s.length
 				// Calculate missing hashes
 				end := newIndex
-				if end > d.maxInsertIndex {
-					end = d.maxInsertIndex
+				if end > s.maxInsertIndex {
+					end = s.maxInsertIndex
 				}
 				end += minMatchLength - 1
-				startindex := d.index + 1
-				if startindex > d.maxInsertIndex {
-					startindex = d.maxInsertIndex
+				startindex := s.index + 1
+				if startindex > s.maxInsertIndex {
+					startindex = s.maxInsertIndex
 				}
 				tocheck := d.window[startindex:end]
 				dstSize := len(tocheck) - minMatchLength + 1
 				if dstSize > 0 {
-					dst := d.hashMatch[:dstSize]
+					dst := s.hashMatch[:dstSize]
 
 					crc32sseAll(tocheck, dst)
 					var newH uint32
@@ -831,35 +843,35 @@ func (d *compressor) deflateSSE() {
 						newH = val & hashMask
 						// Get previous value with the same hash.
 						// Our chain should point to the previous value.
-						d.hashPrev[di&windowMask] = d.hashHead[newH]
+						s.hashPrev[di&windowMask] = s.hashHead[newH]
 						// Set the head of the hash chain to us.
-						d.hashHead[newH] = uint32(di + d.hashOffset)
+						s.hashHead[newH] = uint32(di + s.hashOffset)
 					}
-					d.hash = newH
+					s.hash = newH
 				}
-				d.index = newIndex
+				s.index = newIndex
 			} else {
 				// For matches this long, we don't bother inserting each individual
 				// item into the table.
-				d.index += d.length
-				if d.index < d.maxInsertIndex {
-					d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+				s.index += s.length
+				if s.index < s.maxInsertIndex {
+					s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
 				}
 			}
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, d.index, false); d.err != nil {
+				if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
 					return
 				}
 				d.tokens.n = 0
 			}
 		} else {
-			d.ii++
-			end := d.index + int(d.ii>>5) + 1
+			s.ii++
+			end := s.index + int(s.ii>>5) + 1
 			if end > d.windowEnd {
 				end = d.windowEnd
 			}
-			for i := d.index; i < end; i++ {
+			for i := s.index; i < end; i++ {
 				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
 				d.tokens.n++
 				if d.tokens.n == maxFlateBlockTokens {
@@ -869,7 +881,7 @@ func (d *compressor) deflateSSE() {
 					d.tokens.n = 0
 				}
 			}
-			d.index = end
+			s.index = end
 		}
 	}
 }
@@ -877,42 +889,43 @@ func (d *compressor) deflateSSE() {
 // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
 // meaning it always has lazy matching on.
 func (d *compressor) deflateLazySSE() {
+	s := d.state
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
 	const sanity = false
 
-	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
 
-	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if d.index < d.maxInsertIndex {
-		d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
+	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if s.index < s.maxInsertIndex {
+		s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
 	}
 
 	for {
-		if sanity && d.index > d.windowEnd {
+		if sanity && s.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := d.windowEnd - d.index
+		lookahead := d.windowEnd - s.index
 		if lookahead < minMatchLength+maxMatchLength {
 			if !d.sync {
 				return
 			}
-			if sanity && d.index > d.windowEnd {
+			if sanity && s.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
 			if lookahead == 0 {
 				// Flush current output block if any.
 				if d.byteAvailable {
 					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 					d.tokens.n++
 					d.byteAvailable = false
 				}
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
@@ -920,30 +933,30 @@ func (d *compressor) deflateLazySSE() {
 				return
 			}
 		}
-		if d.index < d.maxInsertIndex {
+		if s.index < s.maxInsertIndex {
 			// Update the hash
-			d.hash = crc32sse(d.window[d.index:d.index+minMatchLength]) & hashMask
-			ch := d.hashHead[d.hash]
-			d.chainHead = int(ch)
-			d.hashPrev[d.index&windowMask] = ch
-			d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+			s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
+			ch := s.hashHead[s.hash]
+			s.chainHead = int(ch)
+			s.hashPrev[s.index&windowMask] = ch
+			s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
 		}
-		prevLength := d.length
-		prevOffset := d.offset
-		d.length = minMatchLength - 1
-		d.offset = 0
-		minIndex := d.index - windowSize
+		prevLength := s.length
+		prevOffset := s.offset
+		s.length = minMatchLength - 1
+		s.offset = 0
+		minIndex := s.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if d.chainHead-d.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
-			if newLength, newOffset, ok := d.findMatchSSE(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
-				d.length = newLength
-				d.offset = newOffset
+		if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
+			if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
+				s.length = newLength
+				s.offset = newOffset
 			}
 		}
-		if prevLength >= minMatchLength && d.length <= prevLength {
+		if prevLength >= minMatchLength && s.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
 			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
@@ -954,21 +967,21 @@ func (d *compressor) deflateLazySSE() {
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
 			var newIndex int
-			newIndex = d.index + prevLength - 1
+			newIndex = s.index + prevLength - 1
 			// Calculate missing hashes
 			end := newIndex
-			if end > d.maxInsertIndex {
-				end = d.maxInsertIndex
+			if end > s.maxInsertIndex {
+				end = s.maxInsertIndex
 			}
 			end += minMatchLength - 1
-			startindex := d.index + 1
-			if startindex > d.maxInsertIndex {
-				startindex = d.maxInsertIndex
+			startindex := s.index + 1
+			if startindex > s.maxInsertIndex {
+				startindex = s.maxInsertIndex
 			}
 			tocheck := d.window[startindex:end]
 			dstSize := len(tocheck) - minMatchLength + 1
 			if dstSize > 0 {
-				dst := d.hashMatch[:dstSize]
+				dst := s.hashMatch[:dstSize]
 				crc32sseAll(tocheck, dst)
 				var newH uint32
 				for i, val := range dst {
@@ -976,74 +989,74 @@ func (d *compressor) deflateLazySSE() {
 					newH = val & hashMask
 					// Get previous value with the same hash.
 					// Our chain should point to the previous value.
-					d.hashPrev[di&windowMask] = d.hashHead[newH]
+					s.hashPrev[di&windowMask] = s.hashHead[newH]
 					// Set the head of the hash chain to us.
-					d.hashHead[newH] = uint32(di + d.hashOffset)
+					s.hashHead[newH] = uint32(di + s.hashOffset)
 				}
-				d.hash = newH
+				s.hash = newH
 			}
 
-			d.index = newIndex
+			s.index = newIndex
 			d.byteAvailable = false
-			d.length = minMatchLength - 1
+			s.length = minMatchLength - 1
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+				if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 					return
 				}
 				d.tokens.n = 0
 			}
 		} else {
 			// Reset, if we got a match this run.
-			if d.length >= minMatchLength {
-				d.ii = 0
+			if s.length >= minMatchLength {
+				s.ii = 0
 			}
 			// We have a byte waiting. Emit it.
 			if d.byteAvailable {
-				d.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+				s.ii++
+				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 				d.tokens.n++
 				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 						return
 					}
 					d.tokens.n = 0
 				}
-				d.index++
+				s.index++
 
 				// If we have a long run of no matches, skip additional bytes
-				// Resets when d.ii overflows after 64KB.
-				if d.ii > 31 {
-					n := int(d.ii >> 6)
+				// Resets when s.ii overflows after 64KB.
+				if s.ii > 31 {
+					n := int(s.ii >> 6)
 					for j := 0; j < n; j++ {
-						if d.index >= d.windowEnd-1 {
+						if s.index >= d.windowEnd-1 {
 							break
 						}
 
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 						d.tokens.n++
 						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+							if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 								return
 							}
 							d.tokens.n = 0
 						}
-						d.index++
+						s.index++
 					}
 					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[d.index-1]))
+					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
 					d.tokens.n++
 					d.byteAvailable = false
-					// d.length = minMatchLength - 1 // not needed, since d.ii is reset above, so it should never be > minMatchLength
+					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
 					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+						if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
 							return
 						}
 						d.tokens.n = 0
 					}
 				}
 			} else {
-				d.index++
+				s.index++
 				d.byteAvailable = true
 			}
 		}
@@ -1167,7 +1180,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeHuff
 	case level >= 1 && level <= 4:
-		d.snap = newSnappy(level)
+		d.snap = newFastEnc(level)
 		d.window = make([]byte, maxStoreBlockSize)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeSnappy
@@ -1175,6 +1188,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		level = 5
 		fallthrough
 	case 5 <= level && level <= 9:
+		d.state = &advancedState{}
 		d.compressionLevel = levels[level]
 		d.initDeflate()
 		d.fill = (*compressor).fillDeflate
@@ -1215,22 +1229,23 @@ func (d *compressor) reset(w io.Writer) {
 		// level was NoCompression or ConstantCompresssion.
 		d.windowEnd = 0
 	default:
-		d.chainHead = -1
-		for i := range d.hashHead {
-			d.hashHead[i] = 0
+		s := d.state
+		s.chainHead = -1
+		for i := range s.hashHead {
+			s.hashHead[i] = 0
 		}
-		for i := range d.hashPrev {
-			d.hashPrev[i] = 0
+		for i := range s.hashPrev {
+			s.hashPrev[i] = 0
 		}
-		d.hashOffset = 1
-		d.index, d.windowEnd = 0, 0
+		s.hashOffset = 1
+		s.index, d.windowEnd = 0, 0
 		d.blockStart, d.byteAvailable = 0, false
 		d.tokens.n = 0
-		d.length = minMatchLength - 1
-		d.offset = 0
-		d.hash = 0
-		d.ii = 0
-		d.maxInsertIndex = 0
+		s.length = minMatchLength - 1
+		s.offset = 0
+		s.hash = 0
+		s.ii = 0
+		s.maxInsertIndex = 0
 	}
 }
 
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index f9b2a699a..f46c65418 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -35,7 +35,7 @@ const (
 )
 
 // The number of extra bits needed by length code X - LENGTH_CODES_START.
-var lengthExtraBits = []int8{
+var lengthExtraBits = [32]int8{
 	/* 257 */ 0, 0, 0,
 	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
 	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
@@ -43,14 +43,14 @@ var lengthExtraBits = []int8{
 }
 
 // The length indicated by length code X - LENGTH_CODES_START.
-var lengthBase = []uint32{
+var lengthBase = [32]uint8{
 	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
 	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
 	64, 80, 96, 112, 128, 160, 192, 224, 255,
 }
 
 // offset code word extra bits.
-var offsetExtraBits = []int8{
+var offsetExtraBits = [64]int8{
 	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
 	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
 	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
@@ -58,7 +58,7 @@ var offsetExtraBits = []int8{
 	14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
 }
 
-var offsetBase = []uint32{
+var offsetBase = [64]uint32{
 	/* normal deflate */
 	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
 	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
@@ -86,9 +86,9 @@ type huffmanBitWriter struct {
 	// and then the low nbits of bits.
 	bits            uint64
 	nbits           uint
-	bytes           [bufferSize]byte
+	bytes           [256]byte
 	codegenFreq     [codegenCodeCount]int32
-	nbytes          int
+	nbytes          uint8
 	literalFreq     []int32
 	offsetFreq      []int32
 	codegen         []uint8
@@ -101,8 +101,8 @@ type huffmanBitWriter struct {
 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 	return &huffmanBitWriter{
 		writer:          w,
-		literalFreq:     make([]int32, maxNumLit),
-		offsetFreq:      make([]int32, offsetCodeCount),
+		literalFreq:     make([]int32, lengthCodesStart+32),
+		offsetFreq:      make([]int32, 32),
 		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
 		literalEncoding: newHuffmanEncoder(maxNumLit),
 		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
@@ -113,7 +113,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 func (w *huffmanBitWriter) reset(writer io.Writer) {
 	w.writer = writer
 	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
-	w.bytes = [bufferSize]byte{}
+	w.bytes = [256]byte{}
 }
 
 func (w *huffmanBitWriter) flush() {
@@ -145,9 +145,6 @@ func (w *huffmanBitWriter) write(b []byte) {
 }
 
 func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
-	if w.err != nil {
-		return
-	}
 	w.bits |= uint64(b) << w.nbits
 	w.nbits += nb
 	if w.nbits >= 48 {
@@ -155,15 +152,18 @@ func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
 		w.bits >>= 48
 		w.nbits -= 48
 		n := w.nbytes
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
+			if w.err != nil {
+				n = 0
+				return
+			}
 			w.write(w.bytes[:n])
 			n = 0
 		}
@@ -333,9 +333,6 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
 }
 
 func (w *huffmanBitWriter) writeCode(c hcode) {
-	if w.err != nil {
-		return
-	}
 	w.bits |= uint64(c.code) << w.nbits
 	w.nbits += uint(c.len)
 	if w.nbits >= 48 {
@@ -343,15 +340,18 @@ func (w *huffmanBitWriter) writeCode(c hcode) {
 		w.bits >>= 48
 		w.nbits -= 48
 		n := w.nbytes
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
+			if w.err != nil {
+				n = 0
+				return
+			}
 			w.write(w.bytes[:n])
 			n = 0
 		}
@@ -460,7 +460,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 		}
 		for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
 			// First four offset codes have extra size = 0.
-			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
+			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode&63])
 		}
 	}
 
@@ -548,15 +548,30 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
 		w.offsetFreq[i] = 0
 	}
 
+	if len(tokens) == 0 {
+		return
+	}
+
+	// Only last token should be endBlockMarker.
+	if tokens[len(tokens)-1] == endBlockMarker {
+		w.literalFreq[endBlockMarker]++
+		tokens = tokens[:len(tokens)-1]
+	}
+
+	// Create slices up to the next power of two to avoid bounds checks.
+	lits := w.literalFreq[:256]
+	offs := w.offsetFreq[:32]
+	lengths := w.literalFreq[lengthCodesStart:]
+	lengths = lengths[:32]
 	for _, t := range tokens {
-		if t < matchType {
-			w.literalFreq[t.literal()]++
+		if t < endBlockMarker {
+			lits[t.literal()]++
 			continue
 		}
 		length := t.length()
 		offset := t.offset()
-		w.literalFreq[lengthCodesStart+lengthCode(length)]++
-		w.offsetFreq[offsetCode(offset)]++
+		lengths[lengthCode(length)&31]++
+		offs[offsetCode(offset)&31]++
 	}
 
 	// get the number of literals
@@ -575,8 +590,8 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
 		w.offsetFreq[0] = 1
 		numOffsets = 1
 	}
-	w.literalEncoding.generate(w.literalFreq, 15)
-	w.offsetEncoding.generate(w.offsetFreq, 15)
+	w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)
+	w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
 	return
 }
 
@@ -586,30 +601,50 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 	if w.err != nil {
 		return
 	}
+	if len(tokens) == 0 {
+		return
+	}
+
+	// Only last token should be endBlockMarker.
+	var deferEOB bool
+	if tokens[len(tokens)-1] == endBlockMarker {
+		tokens = tokens[:len(tokens)-1]
+		deferEOB = true
+	}
+
+	// Create slices up to the next power of two to avoid bounds checks.
+	lits := leCodes[:256]
+	offs := oeCodes[:32]
+	lengths := leCodes[lengthCodesStart:]
+	lengths = lengths[:32]
 	for _, t := range tokens {
 		if t < matchType {
-			w.writeCode(leCodes[t.literal()])
+			w.writeCode(lits[t.literal()])
 			continue
 		}
+
 		// Write the length
 		length := t.length()
 		lengthCode := lengthCode(length)
-		w.writeCode(leCodes[lengthCode+lengthCodesStart])
-		extraLengthBits := uint(lengthExtraBits[lengthCode])
+		w.writeCode(lengths[lengthCode&31])
+		extraLengthBits := uint(lengthExtraBits[lengthCode&31])
 		if extraLengthBits > 0 {
-			extraLength := int32(length - lengthBase[lengthCode])
+			extraLength := int32(length - lengthBase[lengthCode&31])
 			w.writeBits(extraLength, extraLengthBits)
 		}
 		// Write the offset
 		offset := t.offset()
 		offsetCode := offsetCode(offset)
-		w.writeCode(oeCodes[offsetCode])
-		extraOffsetBits := uint(offsetExtraBits[offsetCode])
+		w.writeCode(offs[offsetCode&31])
+		extraOffsetBits := uint(offsetExtraBits[offsetCode&63])
 		if extraOffsetBits > 0 {
-			extraOffset := int32(offset - offsetBase[offsetCode])
+			extraOffset := int32(offset - offsetBase[offsetCode&63])
 			w.writeBits(extraOffset, extraOffsetBits)
 		}
 	}
+	if deferEOB {
+		w.writeCode(leCodes[endBlockMarker])
+	}
 }
 
 // huffOffset is a static offset encoder used for huffman only encoding.
@@ -620,7 +655,7 @@ func init() {
 	w := newHuffmanBitWriter(nil)
 	w.offsetFreq[0] = 1
 	huffOffset = newHuffmanEncoder(offsetCodeCount)
-	huffOffset.generate(w.offsetFreq, 15)
+	huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
 }
 
 // writeBlockHuff encodes a block of bytes as either
@@ -644,7 +679,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
 	const numLiterals = endBlockMarker + 1
 	const numOffsets = 1
 
-	w.literalEncoding.generate(w.literalFreq, 15)
+	w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)
 
 	// Figure out smallest code.
 	// Always use dynamic Huffman or Store
@@ -679,13 +714,12 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
 		bits := w.bits
 		w.bits >>= 48
 		w.nbits -= 48
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n < bufferFlushSize {
 			continue
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index bdcbd823b..f65f79336 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -6,6 +6,7 @@ package flate
 
 import (
 	"math"
+	"math/bits"
 	"sort"
 )
 
@@ -56,7 +57,9 @@ func (h *hcode) set(code uint16, length uint16) {
 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
 
 func newHuffmanEncoder(size int) *huffmanEncoder {
-	return &huffmanEncoder{codes: make([]hcode, size)}
+	// Make capacity to next power of two.
+	c := uint(bits.Len32(uint32(size - 1)))
+	return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
 }
 
 // Generates a HuffmanCode corresponding to the fixed literal table
diff --git a/vendor/github.com/klauspost/compress/flate/snappy.go b/vendor/github.com/klauspost/compress/flate/snappy.go
index d853320a7..aebebd524 100644
--- a/vendor/github.com/klauspost/compress/flate/snappy.go
+++ b/vendor/github.com/klauspost/compress/flate/snappy.go
@@ -20,12 +20,12 @@ func emitCopy(dst *tokens, offset, length int) {
 	dst.n++
 }
 
-type snappyEnc interface {
+type fastEnc interface {
 	Encode(dst *tokens, src []byte)
 	Reset()
 }
 
-func newSnappy(level int) snappyEnc {
+func newFastEnc(level int) fastEnc {
 	switch level {
 	case 1:
 		return &snappyL1{}
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
index 4f275ea61..141299b97 100644
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -4,8 +4,6 @@
 
 package flate
 
-import "fmt"
-
 const (
 	// 2 bits:   type   0 = literal  1=EOF  2=Match   3=Unused
 	// 8 bits:   xlength = length - MIN_MATCH_LENGTH
@@ -19,7 +17,7 @@ const (
 
 // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
 // is lengthCodes[length - MIN_MATCH_LENGTH]
-var lengthCodes = [...]uint32{
+var lengthCodes = [256]uint8{
 	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
 	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
 	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
@@ -48,7 +46,7 @@ var lengthCodes = [...]uint32{
 	27, 27, 27, 27, 27, 28,
 }
 
-var offsetCodes = [...]uint32{
+var offsetCodes = [256]uint32{
 	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
 	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
@@ -82,34 +80,27 @@ func matchToken(xlength uint32, xoffset uint32) token {
 	return token(matchType + xlength<<lengthShift + xoffset)
 }
 
-func matchTokend(xlength uint32, xoffset uint32) token {
-	if xlength > maxMatchLength || xoffset > maxMatchOffset {
-		panic(fmt.Sprintf("Invalid match: len: %d, offset: %d\n", xlength, xoffset))
-		return token(matchType)
-	}
-	return token(matchType + xlength<<lengthShift + xoffset)
-}
-
 // Returns the type of a token
 func (t token) typ() uint32 { return uint32(t) & typeMask }
 
 // Returns the literal of a literal token
-func (t token) literal() uint32 { return uint32(t - literalType) }
+func (t token) literal() uint8 { return uint8(t) }
 
 // Returns the extra offset of a match token
 func (t token) offset() uint32 { return uint32(t) & offsetMask }
 
-func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
+func (t token) length() uint8 { return uint8(t >> lengthShift) }
 
-func lengthCode(len uint32) uint32 { return lengthCodes[len] }
+// The code is never more than 8 bits, but is returned as uint32 for convenience.
+func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
 
 // Returns the offset code corresponding to a specific offset
 func offsetCode(off uint32) uint32 {
 	if off < uint32(len(offsetCodes)) {
-		return offsetCodes[off]
+		return offsetCodes[off&255]
 	} else if off>>7 < uint32(len(offsetCodes)) {
-		return offsetCodes[off>>7] + 14
+		return offsetCodes[(off>>7)&255] + 14
 	} else {
-		return offsetCodes[off>>14] + 28
+		return offsetCodes[(off>>14)&255] + 28
 	}
 }