25 files changed, 1522 insertions, 234 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index d9948ab40..2b101d26b 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -48,6 +48,8 @@ const (
 	maxHashOffset       = 1 << 24
 
 	skipNever = math.MaxInt32
+
+	debugDeflate = false
 )
 
 type compressionLevel struct {
@@ -59,15 +61,13 @@ type compressionLevel struct {
 // See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
 var levels = []compressionLevel{
 	{}, // 0
-	// Level 1-4 uses specialized algorithm - values not used
+	// Level 1-6 uses specialized algorithm - values not used
 	{0, 0, 0, 0, 0, 1},
 	{0, 0, 0, 0, 0, 2},
 	{0, 0, 0, 0, 0, 3},
 	{0, 0, 0, 0, 0, 4},
-	// For levels 5-6 we don't bother trying with lazy matches.
-	// Lazy matching is at least 30% slower, with 1.5% increase.
-	{6, 0, 12, 8, 12, 5},
-	{8, 0, 24, 16, 16, 6},
+	{0, 0, 0, 0, 0, 5},
+	{0, 0, 0, 0, 0, 6},
 	// Levels 7-9 use increasingly more lazy matching
 	// and increasingly stringent conditions for "good enough".
 	{8, 8, 24, 16, skipNever, 7},
@@ -203,9 +203,8 @@ func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
 // This is much faster than doing a full encode.
 // Should only be used after a start/reset.
 func (d *compressor) fillWindow(b []byte) {
-	// Do not fill window if we are in store-only mode,
-	// use constant or Snappy compression.
-	if d.level == 0 {
+	// Do not fill window if we are in store-only or huffman mode.
+	if d.level <= 0 {
 		return
 	}
 	if d.fast != nil {
@@ -368,7 +367,7 @@ func (d *compressor) deflateLazy() {
 	// Sanity enables additional runtime tests.
 	// It's intended to be used during development
 	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
+	const sanity = debugDeflate
 
 	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
 		return
@@ -667,6 +666,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 	default:
 		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
 	}
+	d.level = level
 	return nil
 }
 
@@ -720,6 +720,7 @@ func (d *compressor) close() error {
 		return d.w.err
 	}
 	d.w.flush()
+	d.w.reset(nil)
 	return d.w.err
 }
 
@@ -750,8 +751,7 @@ func NewWriter(w io.Writer, level int) (*Writer, error) {
 // can only be decompressed by a Reader initialized with the
 // same dictionary.
 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
-	dw := &dictWriter{w}
-	zw, err := NewWriter(dw, level)
+	zw, err := NewWriter(w, level)
 	if err != nil {
 		return nil, err
 	}
@@ -760,14 +760,6 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
 	return zw, err
 }
 
-type dictWriter struct {
-	w io.Writer
-}
-
-func (w *dictWriter) Write(b []byte) (n int, err error) {
-	return w.w.Write(b)
-}
-
 // A Writer takes data written to it and writes the compressed
 // form of that data to an underlying writer (see NewWriter).
 type Writer struct {
@@ -805,11 +797,12 @@ func (w *Writer) Close() error {
 // the result of NewWriter or NewWriterDict called with dst
 // and w's level and dictionary.
 func (w *Writer) Reset(dst io.Writer) {
-	if dw, ok := w.d.w.writer.(*dictWriter); ok {
+	if len(w.dict) > 0 {
 		// w was created with NewWriterDict
-		dw.w = dst
-		w.d.reset(dw)
-		w.d.fillWindow(w.dict)
+		w.d.reset(dst)
+		if dst != nil {
+			w.d.fillWindow(w.dict)
+		}
 	} else {
 		// w was created with NewWriter
 		w.d.reset(dst)
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 3d2fdcd77..6d4c1e98b 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -35,16 +35,16 @@ func newFastEnc(level int) fastEnc {
 }
 
 const (
-	tableBits       = 16             // Bits used in the table
+	tableBits       = 15             // Bits used in the table
 	tableSize       = 1 << tableBits // Size of the table
 	tableShift      = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
 	baseMatchOffset = 1              // The smallest match offset
 	baseMatchLength = 3              // The smallest match length per the RFC section 3.2.5
 	maxMatchOffset  = 1 << 15        // The largest match offset
 
-	bTableBits   = 18                                               // Bits used in the big tables
+	bTableBits   = 17                                               // Bits used in the big tables
 	bTableSize   = 1 << bTableBits                                  // Size of the table
-	allocHistory = maxStoreBlockSize * 20                           // Size to preallocate for history.
+	allocHistory = maxStoreBlockSize * 10                           // Size to preallocate for history.
 	bufferReset  = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
 )
 
@@ -92,7 +92,6 @@ func hash(u uint32) uint32 {
 }
 
 type tableEntry struct {
-	val    uint32
 	offset int32
 }
 
diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go
new file mode 100644
index 000000000..c74a95fe7
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/gen_inflate.go
@@ -0,0 +1,274 @@
+// +build generate
+
+//go:generate go run $GOFILE && gofmt -w inflate_gen.go
+
+package main
+
+import (
+	"os"
+	"strings"
+)
+
+func main() {
+	f, err := os.Create("inflate_gen.go")
+	if err != nil {
+		panic(err)
+	}
+	defer f.Close()
+	types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"}
+	names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"}
+	imports := []string{"bytes", "bufio", "io", "strings", "math/bits"}
+	f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT.
+
+package flate
+
+import (
+`)
+
+	for _, imp := range imports {
+		f.WriteString("\t\"" + imp + "\"\n")
+	}
+	f.WriteString(")\n\n")
+
+	template := `
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) $FUNCNAME$() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.($TYPE$)
+	moreBits := func() error {
+		c, err := fr.ReadByte()
+		if err != nil {
+			return noEOF(err)
+		}
+		f.roffset++
+		f.b |= uint32(c) << f.nb
+		f.nb += 8
+		return nil
+	}
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var n uint // number of bits extra
+		var length int
+		var err error
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).$FUNCNAME$
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				if debugDecode {
+					fmt.Println("huffsym:", err)
+				}
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			if debugDecode {
+				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+`
+	for i, t := range types {
+		s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1)
+		s = strings.Replace(s, "$TYPE$", t, -1)
+		f.WriteString(s)
+	}
+	f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
+	f.WriteString("\tswitch f.r.(type) {\n")
+	for i, t := range types {
+		f.WriteString("\t\tcase " + t + ":\n")
+		f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
+	}
+	f.WriteString("\t\tdefault:\n")
+	f.WriteString("\t\t\treturn f.huffmanBlockGeneric")
+	f.WriteString("\t}\n}\n")
+}
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index 56ee6dc8b..53fe1d06e 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -484,6 +484,9 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
 	}
 }
 
+// writeStoredHeader will write a stored header.
+// If the stored block is only used for EOF,
+// it is replaced with a fixed huffman block.
 func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
 	if w.err != nil {
 		return
@@ -493,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
 		w.lastHeader = 0
 	}
+
+	// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
+	if length == 0 && isEof {
+		w.writeFixedHeader(isEof)
+		// EOB: 7 bits, value: 0
+		w.writeBits(0, 7)
+		w.flush()
+		return
+	}
+
 	var flag int32
 	if isEof {
 		flag = 1
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index 9d8e81ad6..4c39a3018 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -109,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
 	return h
 }
 
-var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
-var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
+var fixedLiteralEncoding = generateFixedLiteralEncoding()
+var fixedOffsetEncoding = generateFixedOffsetEncoding()
 
 func (h *huffmanEncoder) bitLength(freq []uint16) int {
 	var total int
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 6dc5b5d06..7f175a4ec 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -106,7 +106,7 @@ const (
 )
 
 type huffmanDecoder struct {
-	min      int                       // the minimum code length
+	maxRead  int                       // the maximum number of bits we can read and not overread
 	chunks   *[huffmanNumChunks]uint16 // chunks as described above
 	links    [][]uint16                // overflow links
 	linkMask uint32                    // mask the width of the link table
@@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	if h.chunks == nil {
 		h.chunks = &[huffmanNumChunks]uint16{}
 	}
-	if h.min != 0 {
+	if h.maxRead != 0 {
 		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
 	}
 
 	// Count number of codes of each length,
-	// compute min and max length.
+	// compute maxRead and max length.
 	var count [maxCodeLen]int
 	var min, max int
 	for _, n := range lengths {
@@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 		return false
 	}
 
-	h.min = min
+	h.maxRead = min
 	chunks := h.chunks[:]
 	for i := range chunks {
 		chunks[i] = 0
@@ -342,7 +342,7 @@ func (f *decompressor) nextBlock() {
 		// compressed, fixed Huffman tables
 		f.hl = &fixedHuffmanDecoder
 		f.hd = nil
-		f.huffmanBlock()
+		f.huffmanBlockDecoder()()
 	case 2:
 		// compressed, dynamic Huffman tables
 		if f.err = f.readHuffman(); f.err != nil {
@@ -350,7 +350,7 @@ func (f *decompressor) nextBlock() {
 		}
 		f.hl = &f.h1
 		f.hd = &f.h2
-		f.huffmanBlock()
+		f.huffmanBlockDecoder()()
 	default:
 		// 3 is reserved.
 		if debugDecode {
@@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error {
 		return CorruptInputError(f.roffset)
 	}
 
-	// As an optimization, we can initialize the min bits to read at a time
+	// As an optimization, we can initialize the maxRead bits to read at a time
 	// for the HLIT tree to the length of the EOB marker since we know that
 	// every block must terminate with one. This preserves the property that
 	// we never read any extra bytes after the end of the DEFLATE stream.
-	if f.h1.min < f.bits[endBlockMarker] {
-		f.h1.min = f.bits[endBlockMarker]
+	if f.h1.maxRead < f.bits[endBlockMarker] {
+		f.h1.maxRead = f.bits[endBlockMarker]
+	}
+	if !f.final {
+		// If not the final block, the smallest block possible is
+		// a predefined table, BTYPE=01, with a single EOB marker.
+		// This will take up 3 + 7 bits.
+		f.h1.maxRead += 10
 	}
 
 	return nil
@@ -558,7 +564,7 @@ func (f *decompressor) readHuffman() error {
 // hl and hd are the Huffman states for the lit/length values
 // and the distance values, respectively. If hd == nil, using the
 // fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) huffmanBlock() {
+func (f *decompressor) huffmanBlockGeneric() {
 	const (
 		stateInit = iota // Zero value must be stateInit
 		stateDict
@@ -574,19 +580,64 @@ func (f *decompressor) huffmanBlock() {
 readLiteral:
 	// Read literal and/or (length, distance) according to RFC section 3.2.3.
 	{
-		v, err := f.huffSym(f.hl)
-		if err != nil {
-			f.err = err
-			return
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := f.r.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
 		}
+
 		var n uint // number of bits extra
 		var length int
+		var err error
 		switch {
 		case v < 256:
 			f.dict.writeByte(byte(v))
 			if f.dict.availWrite() == 0 {
 				f.toRead = f.dict.readFlush()
-				f.step = (*decompressor).huffmanBlock
+				f.step = (*decompressor).huffmanBlockGeneric
 				f.stepState = stateInit
 				return
 			}
@@ -714,7 +765,7 @@ copyHistory:
 
 		if f.dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = f.dict.readFlush()
-			f.step = (*decompressor).huffmanBlock // We need to continue this work
+			f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
 			f.stepState = stateDict
 			return
 		}
@@ -726,21 +777,33 @@ copyHistory:
 func (f *decompressor) dataBlock() {
 	// Uncompressed.
 	// Discard current half-byte.
-	f.nb = 0
-	f.b = 0
+	left := (f.nb) & 7
+	f.nb -= left
+	f.b >>= left
+
+	offBytes := f.nb >> 3
+	// Unfilled values will be overwritten.
+	f.buf[0] = uint8(f.b)
+	f.buf[1] = uint8(f.b >> 8)
+	f.buf[2] = uint8(f.b >> 16)
+	f.buf[3] = uint8(f.b >> 24)
+
+	f.roffset += int64(offBytes)
+	f.nb, f.b = 0, 0
 
 	// Length then ones-complement of length.
-	nr, err := io.ReadFull(f.r, f.buf[0:4])
+	nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
 	f.roffset += int64(nr)
 	if err != nil {
 		f.err = noEOF(err)
 		return
 	}
-	n := int(f.buf[0]) | int(f.buf[1])<<8
-	nn := int(f.buf[2]) | int(f.buf[3])<<8
-	if uint16(nn) != uint16(^n) {
+	n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
+	nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
+	if nn != ^n {
 		if debugDecode {
-			fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
+			ncomp := ^n
+			fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
 		}
 		f.err = CorruptInputError(f.roffset)
 		return
@@ -752,7 +815,7 @@ func (f *decompressor) dataBlock() {
 		return
 	}
 
-	f.copyLen = n
+	f.copyLen = int(n)
 	f.copyData()
 }
 
@@ -816,7 +879,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
 	// with single element, huffSym must error on these two edge cases. In both
 	// cases, the chunks slice will be 0 for the invalid sequence, leading it
 	// satisfy the n == 0 check below.
-	n := uint(h.min)
+	n := uint(h.maxRead)
 	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
 	// but is smart enough to keep local variables in registers, so use nb and b,
 	// inline call to moreBits and reassign b,nb back to f on return.
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
new file mode 100644
index 000000000..397dc1b1a
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -0,0 +1,922 @@
+// Code generated by go generate gen_inflate.go. DO NOT EDIT.
+
+package flate
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"math/bits"
+	"strings"
+)
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBytesBuffer() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bytes.Buffer)
+	moreBits := func() error {
+		c, err := fr.ReadByte()
+		if err != nil {
+			return noEOF(err)
+		}
+		f.roffset++
+		f.b |= uint32(c) << f.nb
+		f.nb += 8
+		return nil
+	}
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var n uint // number of bits extra
+		var length int
+		var err error
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanBytesBuffer
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				if debugDecode {
+					fmt.Println("huffsym:", err)
+				}
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			if debugDecode {
+				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBytesReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bytes.Reader)
+	moreBits := func() error {
+		c, err := fr.ReadByte()
+		if err != nil {
+			return noEOF(err)
+		}
+		f.roffset++
+		f.b |= uint32(c) << f.nb
+		f.nb += 8
+		return nil
+	}
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var n uint // number of bits extra
+		var length int
+		var err error
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanBytesReader
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				if debugDecode {
+					fmt.Println("huffsym:", err)
+				}
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			if debugDecode {
+				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanBytesReader // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBufioReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*bufio.Reader)
+	moreBits := func() error {
+		c, err := fr.ReadByte()
+		if err != nil {
+			return noEOF(err)
+		}
+		f.roffset++
+		f.b |= uint32(c) << f.nb
+		f.nb += 8
+		return nil
+	}
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var n uint // number of bits extra
+		var length int
+		var err error
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanBufioReader
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				if debugDecode {
+					fmt.Println("huffsym:", err)
+				}
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			if debugDecode {
+				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanBufioReader // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanStringsReader() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+	fr := f.r.(*strings.Reader)
+	moreBits := func() error {
+		c, err := fr.ReadByte()
+		if err != nil {
+			return noEOF(err)
+		}
+		f.roffset++
+		f.b |= uint32(c) << f.nb
+		f.nb += 8
+		return nil
+	}
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		var v int
+		{
+			// Inlined v, err := f.huffSym(f.hl)
+			// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+			// with single element, huffSym must error on these two edge cases. In both
+			// cases, the chunks slice will be 0 for the invalid sequence, leading it
+			// satisfy the n == 0 check below.
+			n := uint(f.hl.maxRead)
+			// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+			// but is smart enough to keep local variables in registers, so use nb and b,
+			// inline call to moreBits and reassign b,nb back to f on return.
+			nb, b := f.nb, f.b
+			for {
+				for nb < n {
+					c, err := fr.ReadByte()
+					if err != nil {
+						f.b = b
+						f.nb = nb
+						f.err = noEOF(err)
+						return
+					}
+					f.roffset++
+					b |= uint32(c) << (nb & 31)
+					nb += 8
+				}
+				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
+				n = uint(chunk & huffmanCountMask)
+				if n > huffmanChunkBits {
+					chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
+					n = uint(chunk & huffmanCountMask)
+				}
+				if n <= nb {
+					if n == 0 {
+						f.b = b
+						f.nb = nb
+						if debugDecode {
+							fmt.Println("huffsym: n==0")
+						}
+						f.err = CorruptInputError(f.roffset)
+						return
+					}
+					f.b = b >> (n & 31)
+					f.nb = nb - n
+					v = int(chunk >> huffmanValueShift)
+					break
+				}
+			}
+		}
+
+		var n uint // number of bits extra
+		var length int
+		var err error
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanStringsReader
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			if debugDecode {
+				fmt.Println(v, ">= maxNumLit")
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits n>0:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<5:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				if debugDecode {
+					fmt.Println("huffsym:", err)
+				}
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = moreBits(); err != nil {
+					if debugDecode {
+						fmt.Println("morebits f.nb<nb:", err)
+					}
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			if debugDecode {
+				fmt.Println("dist too big:", dist, maxNumDist)
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			if debugDecode {
+				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
+			}
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+func (f *decompressor) huffmanBlockDecoder() func() {
+	switch f.r.(type) {
+	case *bytes.Buffer:
+		return f.huffmanBytesBuffer
+	case *bytes.Reader:
+		return f.huffmanBytesReader
+	case *bufio.Reader:
+		return f.huffmanBufioReader
+	case *strings.Reader:
+		return f.huffmanStringsReader
+	default:
+		return f.huffmanBlockGeneric
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
index 102fc74c7..1e5eea396 100644
--- a/vendor/github.com/klauspost/compress/flate/level1.go
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -16,7 +16,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 		inputMargin            = 12 - 1
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 
@@ -81,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 			}
 
 			now := load6432(src, nextS)
-			e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
 			nextHash = hash(uint32(now))
 
 			offset := s - (candidate.offset - e.cur)
-			if offset < maxMatchOffset && cv == candidate.val {
-				e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+			if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
 				break
 			}
 
@@ -96,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 			nextS++
 			candidate = e.table[nextHash]
 			now >>= 8
-			e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
 
 			offset = s - (candidate.offset - e.cur)
-			if offset < maxMatchOffset && cv == candidate.val {
-				e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+			if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
 				break
 			}
 			cv = uint32(now)
@@ -139,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 				// Index first pair after match end.
 				if int(s+l+4) < len(src) {
 					cv := load3232(src, s)
-					e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
+					e.table[hash(cv)] = tableEntry{offset: s + e.cur}
 				}
 				goto emitRemainder
 			}
@@ -153,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
 			x := load6432(src, s-2)
 			o := e.cur + s - 2
 			prevHash := hash(uint32(x))
-			e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
+			e.table[prevHash] = tableEntry{offset: o}
 			x >>= 16
 			currHash := hash(uint32(x))
 			candidate = e.table[currHash]
-			e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
+			e.table[currHash] = tableEntry{offset: o + 2}
 
 			offset := s - (candidate.offset - e.cur)
-			if offset > maxMatchOffset || uint32(x) != candidate.val {
+			if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
 				cv = uint32(x >> 8)
 				s++
 				break
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
index dc6b1d314..5b986a194 100644
--- a/vendor/github.com/klauspost/compress/flate/level2.go
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -18,7 +18,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
 
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 
@@ -83,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 			}
 			candidate = e.table[nextHash]
 			now := load6432(src, nextS)
-			e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
 			nextHash = hash4u(uint32(now), bTableBits)
 
 			offset := s - (candidate.offset - e.cur)
-			if offset < maxMatchOffset && cv == candidate.val {
-				e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
+			if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+				e.table[nextHash] = tableEntry{offset: nextS + e.cur}
 				break
 			}
 
@@ -98,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 			nextS++
 			candidate = e.table[nextHash]
 			now >>= 8
-			e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
+			e.table[nextHash] = tableEntry{offset: s + e.cur}
 
 			offset = s - (candidate.offset - e.cur)
-			if offset < maxMatchOffset && cv == candidate.val {
+			if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
 				break
 			}
 			cv = uint32(now)
@@ -148,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 				// Index first pair after match end.
 				if int(s+l+4) < len(src) {
 					cv := load3232(src, s)
-					e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
+					e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
 				}
 				goto emitRemainder
 			}
@@ -157,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 			for i := s - l + 2; i < s-5; i += 7 {
 				x := load6432(src, int32(i))
 				nextHash := hash4u(uint32(x), bTableBits)
-				e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
+				e.table[nextHash] = tableEntry{offset: e.cur + i}
 				// Skip one
 				x >>= 16
 				nextHash = hash4u(uint32(x), bTableBits)
-				e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
 				// Skip one
 				x >>= 16
 				nextHash = hash4u(uint32(x), bTableBits)
-				e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
+				e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
 			}
 
 			// We could immediately start working at s now, but to improve
@@ -178,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
 			o := e.cur + s - 2
 			prevHash := hash4u(uint32(x), bTableBits)
 			prevHash2 := hash4u(uint32(x>>8), bTableBits)
-			e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
-			e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
+			e.table[prevHash] = tableEntry{offset: o}
+			e.table[prevHash2] = tableEntry{offset: o + 1}
 			currHash := hash4u(uint32(x>>16), bTableBits)
 			candidate = e.table[currHash]
-			e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
+			e.table[currHash] = tableEntry{offset: o + 2}
 
 			offset := s - (candidate.offset - e.cur)
-			if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
+			if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
 				cv = uint32(x >> 24)
 				s++
 				break
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
index 1a3ff9b6b..c22b4244a 100644
--- a/vendor/github.com/klauspost/compress/flate/level3.go
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -15,7 +15,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
 
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 
@@ -81,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 			}
 			candidates := e.table[nextHash]
 			now := load3232(src, nextS)
-			e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
+
+			// Safe offset distance until s + 4...
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+			e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
 
 			// Check both candidates
 			candidate = candidates.Cur
-			offset := s - (candidate.offset - e.cur)
-			if cv == candidate.val {
-				if offset > maxMatchOffset {
-					cv = now
-					// Previous will also be invalid, we have nothing.
-					continue
-				}
-				o2 := s - (candidates.Prev.offset - e.cur)
-				if cv != candidates.Prev.val || o2 > maxMatchOffset {
+			if candidate.offset < minOffset {
+				cv = now
+				// Previous will also be invalid, we have nothing.
+				continue
+			}
+
+			if cv == load3232(src, candidate.offset-e.cur) {
+				if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
 					break
 				}
 				// Both match and are valid, pick longest.
+				offset := s - (candidate.offset - e.cur)
+				o2 := s - (candidates.Prev.offset - e.cur)
 				l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
 				if l2 > l1 {
 					candidate = candidates.Prev
@@ -106,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 				// We only check if value mismatches.
 				// Offset will always be invalid in other cases.
 				candidate = candidates.Prev
-				if cv == candidate.val {
-					offset := s - (candidate.offset - e.cur)
-					if offset <= maxMatchOffset {
-						break
-					}
+				if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
+					break
 				}
 			}
 			cv = now
@@ -158,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 					nextHash := hash(cv)
 					e.table[nextHash] = tableEntryPrev{
 						Prev: e.table[nextHash].Cur,
-						Cur:  tableEntry{offset: e.cur + t, val: cv},
+						Cur:  tableEntry{offset: e.cur + t},
 					}
 				}
 				goto emitRemainder
@@ -170,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 			prevHash := hash(uint32(x))
 			e.table[prevHash] = tableEntryPrev{
 				Prev: e.table[prevHash].Cur,
-				Cur:  tableEntry{offset: e.cur + s - 3, val: uint32(x)},
+				Cur:  tableEntry{offset: e.cur + s - 3},
 			}
 			x >>= 8
 			prevHash = hash(uint32(x))
 
 			e.table[prevHash] = tableEntryPrev{
 				Prev: e.table[prevHash].Cur,
-				Cur:  tableEntry{offset: e.cur + s - 2, val: uint32(x)},
+				Cur:  tableEntry{offset: e.cur + s - 2},
 			}
 			x >>= 8
 			prevHash = hash(uint32(x))
 
 			e.table[prevHash] = tableEntryPrev{
 				Prev: e.table[prevHash].Cur,
-				Cur:  tableEntry{offset: e.cur + s - 1, val: uint32(x)},
+				Cur:  tableEntry{offset: e.cur + s - 1},
 			}
 			x >>= 8
 			currHash := hash(uint32(x))
@@ -192,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
 			cv = uint32(x)
 			e.table[currHash] = tableEntryPrev{
 				Prev: candidates.Cur,
-				Cur:  tableEntry{offset: s + e.cur, val: cv},
+				Cur:  tableEntry{offset: s + e.cur},
 			}
 
 			// Check both candidates
 			candidate = candidates.Cur
-			if cv == candidate.val {
-				offset := s - (candidate.offset - e.cur)
-				if offset <= maxMatchOffset {
-					continue
-				}
-			} else {
+			minOffset := e.cur + s - (maxMatchOffset - 4)
+
+			if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
 				// We only check if value mismatches.
 				// Offset will always be invalid in other cases.
 				candidate = candidates.Prev
-				if cv == candidate.val {
+				if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
 					offset := s - (candidate.offset - e.cur)
 					if offset <= maxMatchOffset {
 						continue
diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go
index f3ecc9c4d..e62f0c02b 100644
--- a/vendor/github.com/klauspost/compress/flate/level4.go
+++ b/vendor/github.com/klauspost/compress/flate/level4.go
@@ -13,7 +13,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 		inputMargin            = 12 - 1
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 	// Protect against e.cur wraparound.
@@ -92,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			sCandidate := e.table[nextHashS]
 			lCandidate := e.bTable[nextHashL]
 			next := load6432(src, nextS)
-			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+			entry := tableEntry{offset: s + e.cur}
 			e.table[nextHashS] = entry
 			e.bTable[nextHashL] = entry
 
 			t = lCandidate.offset - e.cur
-			if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
 				// We got a long match. Use that.
 				break
 			}
 
 			t = sCandidate.offset - e.cur
-			if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
 				// Found a 4 match...
 				lCandidate = e.bTable[hash7(next, tableBits)]
 
 				// If the next long is a candidate, check if we should use that instead...
 				lOff := nextS - (lCandidate.offset - e.cur)
-				if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
+				if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
 					l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
 					if l2 > l1 {
 						s = nextS
@@ -137,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 		if nextEmit < s {
 			emitLiteral(dst, src[nextEmit:s])
 		}
-		if false {
+		if debugDeflate {
 			if t >= s {
 				panic("s-t")
 			}
@@ -160,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			// Index first pair after match end.
 			if int(s+8) < len(src) {
 				cv := load6432(src, s)
-				e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
-				e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+				e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur}
+				e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
 			}
 			goto emitRemainder
 		}
@@ -171,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			i := nextS
 			if i < s-1 {
 				cv := load6432(src, i)
-				t := tableEntry{offset: i + e.cur, val: uint32(cv)}
-				t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
 				e.bTable[hash7(cv, tableBits)] = t
 				e.bTable[hash7(cv>>8, tableBits)] = t2
-				e.table[hash4u(t2.val, tableBits)] = t2
+				e.table[hash4u(uint32(cv>>8), tableBits)] = t2
 
 				i += 3
 				for ; i < s-1; i += 3 {
 					cv := load6432(src, i)
-					t := tableEntry{offset: i + e.cur, val: uint32(cv)}
-					t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
 					e.bTable[hash7(cv, tableBits)] = t
 					e.bTable[hash7(cv>>8, tableBits)] = t2
-					e.table[hash4u(t2.val, tableBits)] = t2
+					e.table[hash4u(uint32(cv>>8), tableBits)] = t2
 				}
 			}
 		}
@@ -195,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 		o := e.cur + s - 1
 		prevHashS := hash4x64(x, tableBits)
 		prevHashL := hash7(x, tableBits)
-		e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
-		e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
+		e.table[prevHashS] = tableEntry{offset: o}
+		e.bTable[prevHashL] = tableEntry{offset: o}
 		cv = x >> 8
 	}
 
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
index 4e3916825..d513f1ffd 100644
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -13,7 +13,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		inputMargin            = 12 - 1
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 
@@ -100,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			sCandidate := e.table[nextHashS]
 			lCandidate := e.bTable[nextHashL]
 			next := load6432(src, nextS)
-			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+			entry := tableEntry{offset: s + e.cur}
 			e.table[nextHashS] = entry
 			eLong := &e.bTable[nextHashL]
 			eLong.Cur, eLong.Prev = entry, eLong.Cur
@@ -110,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 
 			t = lCandidate.Cur.offset - e.cur
 			if s-t < maxMatchOffset {
-				if uint32(cv) == lCandidate.Cur.val {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
 					// Store the next match
-					e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 					eLong := &e.bTable[nextHashL]
-					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 
 					t2 := lCandidate.Prev.offset - e.cur
-					if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
 						l = e.matchlen(s+4, t+4, src) + 4
 						ml1 := e.matchlen(s+4, t2+4, src) + 4
 						if ml1 > l {
@@ -129,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 					break
 				}
 				t = lCandidate.Prev.offset - e.cur
-				if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
 					// Store the next match
-					e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 					eLong := &e.bTable[nextHashL]
-					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 					break
 				}
 			}
 
 			t = sCandidate.offset - e.cur
-			if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
 				// Found a 4 match...
 				l = e.matchlen(s+4, t+4, src) + 4
 				lCandidate = e.bTable[nextHashL]
 				// Store the next match
 
-				e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 				eLong := &e.bTable[nextHashL]
-				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 
 				// If the next long is a candidate, use that...
 				t2 := lCandidate.Cur.offset - e.cur
 				if nextS-t2 < maxMatchOffset {
-					if lCandidate.Cur.val == uint32(next) {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
 						ml := e.matchlen(nextS+4, t2+4, src) + 4
 						if ml > l {
 							t = t2
@@ -163,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 					}
 					// If the previous long is a candidate, use that...
 					t2 = lCandidate.Prev.offset - e.cur
-					if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
 						ml := e.matchlen(nextS+4, t2+4, src) + 4
 						if ml > l {
 							t = t2
@@ -197,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		if nextEmit < s {
 			emitLiteral(dst, src[nextEmit:s])
 		}
-		if false {
+		if debugDeflate {
 			if t >= s {
 				panic(fmt.Sprintln("s-t", s, t))
 			}
@@ -226,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			i := s - l + 1
 			if i < s-1 {
 				cv := load6432(src, i)
-				t := tableEntry{offset: i + e.cur, val: uint32(cv)}
+				t := tableEntry{offset: i + e.cur}
 				e.table[hash4x64(cv, tableBits)] = t
 				eLong := &e.bTable[hash7(cv, tableBits)]
 				eLong.Cur, eLong.Prev = t, eLong.Cur
 
 				// Do an long at i+1
 				cv >>= 8
-				t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
+				t = tableEntry{offset: t.offset + 1}
 				eLong = &e.bTable[hash7(cv, tableBits)]
 				eLong.Cur, eLong.Prev = t, eLong.Cur
 
 				// We only have enough bits for a short entry at i+2
 				cv >>= 8
-				t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
+				t = tableEntry{offset: t.offset + 1}
 				e.table[hash4x64(cv, tableBits)] = t
 
 				// Skip one - otherwise we risk hitting 's'
 				i += 4
 				for ; i < s-1; i += hashEvery {
 					cv := load6432(src, i)
-					t := tableEntry{offset: i + e.cur, val: uint32(cv)}
-					t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
+					t := tableEntry{offset: i + e.cur}
+					t2 := tableEntry{offset: t.offset + 1}
 					eLong := &e.bTable[hash7(cv, tableBits)]
 					eLong.Cur, eLong.Prev = t, eLong.Cur
-					e.table[hash4u(t2.val, tableBits)] = t2
+					e.table[hash4u(uint32(cv>>8), tableBits)] = t2
 				}
 			}
 		}
@@ -261,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		o := e.cur + s - 1
 		prevHashS := hash4x64(x, tableBits)
 		prevHashL := hash7(x, tableBits)
-		e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
+		e.table[prevHashS] = tableEntry{offset: o}
 		eLong := &e.bTable[prevHashL]
-		eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
+		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
 		cv = x >> 8
 	}
 
diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go
index 00a311977..a52c80ea4 100644
--- a/vendor/github.com/klauspost/compress/flate/level6.go
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@@ -13,7 +13,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 		inputMargin            = 12 - 1
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
-	if debugDecode && e.cur < 0 {
+	if debugDeflate && e.cur < 0 {
 		panic(fmt.Sprint("e.cur < 0: ", e.cur))
 	}
 
@@ -101,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			sCandidate := e.table[nextHashS]
 			lCandidate := e.bTable[nextHashL]
 			next := load6432(src, nextS)
-			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+			entry := tableEntry{offset: s + e.cur}
 			e.table[nextHashS] = entry
 			eLong := &e.bTable[nextHashL]
 			eLong.Cur, eLong.Prev = entry, eLong.Cur
@@ -112,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 
 			t = lCandidate.Cur.offset - e.cur
 			if s-t < maxMatchOffset {
-				if uint32(cv) == lCandidate.Cur.val {
+				if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
 					// Long candidate matches at least 4 bytes.
 
 					// Store the next match
-					e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 					eLong := &e.bTable[nextHashL]
-					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 
 					// Check the previous long candidate as well.
 					t2 := lCandidate.Prev.offset - e.cur
-					if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+					if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
 						l = e.matchlen(s+4, t+4, src) + 4
 						ml1 := e.matchlen(s+4, t2+4, src) + 4
 						if ml1 > l {
@@ -135,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 				}
 				// Current value did not match, but check if previous long value does.
 				t = lCandidate.Prev.offset - e.cur
-				if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
+				if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
 					// Store the next match
-					e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+					e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 					eLong := &e.bTable[nextHashL]
-					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+					eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 					break
 				}
 			}
 
 			t = sCandidate.offset - e.cur
-			if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
+			if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
 				// Found a 4 match...
 				l = e.matchlen(s+4, t+4, src) + 4
 
@@ -153,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 				lCandidate = e.bTable[nextHashL]
 
 				// Store the next match
-				e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
+				e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
 				eLong := &e.bTable[nextHashL]
-				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
+				eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
 
 				// Check repeat at s + repOff
 				const repOff = 1
@@ -174,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 				// If the next long is a candidate, use that...
 				t2 = lCandidate.Cur.offset - e.cur
 				if nextS-t2 < maxMatchOffset {
-					if lCandidate.Cur.val == uint32(next) {
+					if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
 						ml := e.matchlen(nextS+4, t2+4, src) + 4
 						if ml > l {
 							t = t2
@@ -185,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 					}
 					// If the previous long is a candidate, use that...
 					t2 = lCandidate.Prev.offset - e.cur
-					if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
+					if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
 						ml := e.matchlen(nextS+4, t2+4, src) + 4
 						if ml > l {
 							t = t2
@@ -244,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			// Index after match end.
 			for i := nextS + 1; i < int32(len(src))-8; i += 2 {
 				cv := load6432(src, i)
-				e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
+				e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
 				eLong := &e.bTable[hash7(cv, tableBits)]
-				eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
+				eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
 			}
 			goto emitRemainder
 		}
@@ -255,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 		if true {
 			for i := nextS + 1; i < s-1; i += 2 {
 				cv := load6432(src, i)
-				t := tableEntry{offset: i + e.cur, val: uint32(cv)}
-				t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
+				t := tableEntry{offset: i + e.cur}
+				t2 := tableEntry{offset: t.offset + 1}
 				eLong := &e.bTable[hash7(cv, tableBits)]
 				eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
 				e.table[hash4x64(cv, tableBits)] = t
diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go
index 099c0ddbc..f9abf606d 100644
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@@ -262,7 +262,7 @@ func (t *tokens) EstimatedBits() int {
 // AddMatch adds a match to the tokens.
 // This function is very sensitive to inlining and right on the border.
 func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
-	if debugDecode {
+	if debugDeflate {
 		if xlength >= maxMatchLength+baseMatchLength {
 			panic(fmt.Errorf("invalid length: %v", xlength))
 		}
@@ -281,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
 // AddMatchLong adds a match to the tokens, potentially longer than max match length.
 // Length should NOT have the base subtracted, only offset should.
 func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
-	if debugDecode {
+	if debugDeflate {
 		if xoffset >= maxMatchOffset+baseMatchOffset {
 			panic(fmt.Errorf("invalid offset: %v", xoffset))
 		}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index 507757d52..4f0eba22f 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -806,7 +806,7 @@ func (b *blockEnc) genCodes() {
 		mlH[v]++
 		if v > mlMax {
 			mlMax = v
-			if debug && mlMax > maxMatchLengthSymbol {
+			if debugAsserts && mlMax > maxMatchLengthSymbol {
 				panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
 			}
 		}
@@ -821,13 +821,13 @@ func (b *blockEnc) genCodes() {
 		}
 		return int(max)
 	}
-	if mlMax > maxMatchLengthSymbol {
+	if debugAsserts && mlMax > maxMatchLengthSymbol {
 		panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
 	}
-	if ofMax > maxOffsetBits {
+	if debugAsserts && ofMax > maxOffsetBits {
 		panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
 	}
-	if llMax > maxLiteralLengthSymbol {
+	if debugAsserts && llMax > maxLiteralLengthSymbol {
 		panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
 	}
 
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index 07321acb1..658ef7838 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -30,7 +30,7 @@ type byteBuffer interface {
 type byteBuf []byte
 
 func (b *byteBuf) readSmall(n int) []byte {
-	if debug && n > 8 {
+	if debugAsserts && n > 8 {
 		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
 	}
 	bb := *b
@@ -82,7 +82,7 @@ type readerWrapper struct {
 }
 
 func (r *readerWrapper) readSmall(n int) []byte {
-	if debug && n > 8 {
+	if debugAsserts && n > 8 {
 		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
 	}
 	n2, err := io.ReadFull(r.r, r.tmp[:n])
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 35a3cda91..73ac3c630 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -315,7 +315,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 			if size > 1<<20 {
 				size = 1 << 20
 			}
-			dst = make([]byte, 0, frame.WindowSize)
+			dst = make([]byte, 0, size)
 		}
 
 		dst, err = frame.runDecoder(dst, block)
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index ee3b09b02..0ffea7655 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -4,6 +4,8 @@
 
 package zstd
 
+import "fmt"
+
 const (
 	dFastLongTableBits = 17                      // Bits used in the long match table
 	dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
@@ -29,7 +31,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur > (1<<30)+e.maxMatchOff {
+	for e.cur >= bufferReset {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@@ -61,6 +63,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 			e.longTable[i].offset = v
 		}
 		e.cur = e.maxMatchOff
+		break
 	}
 
 	s := e.addBlock(src)
@@ -110,7 +113,7 @@ encodeLoop:
 		canRepeat := len(blk.sequences) > 2
 
 		for {
-			if debug && canRepeat && offset1 == 0 {
+			if debugAsserts && canRepeat && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -229,10 +232,10 @@ encodeLoop:
 				// Reference encoder checks all 8 bytes, we only check 4,
 				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
 				t = candidateL.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
 				if debugMatches {
@@ -266,13 +269,13 @@ encodeLoop:
 				}
 
 				t = candidateS.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
-				if debug && t < 0 {
+				if debugAsserts && t < 0 {
 					panic("t<0")
 				}
 				if debugMatches {
@@ -294,11 +297,11 @@ encodeLoop:
 		offset2 = offset1
 		offset1 = s - t
 
-		if debug && s <= t {
-			panic("s <= t")
+		if debugAsserts && s <= t {
+			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debug && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && canRepeat && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -424,7 +427,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	if e.cur > (1<<30)+e.maxMatchOff {
+	if e.cur >= bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@@ -545,10 +548,10 @@ encodeLoop:
 				// Reference encoder checks all 8 bytes, we only check 4,
 				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
 				t = candidateL.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
 				if debugMatches {
@@ -582,13 +585,13 @@ encodeLoop:
 				}
 
 				t = candidateS.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
-				if debug && t < 0 {
+				if debugAsserts && t < 0 {
 					panic("t<0")
 				}
 				if debugMatches {
@@ -610,8 +613,8 @@ encodeLoop:
 		offset2 = offset1
 		offset1 = s - t
 
-		if debug && s <= t {
-			panic("s <= t")
+		if debugAsserts && s <= t {
+			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
 		// Extend the 4-byte match as long as possible.
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index 0bdddac5b..28134b158 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -5,6 +5,7 @@
 package zstd
 
 import (
+	"fmt"
 	"math/bits"
 
 	"github.com/klauspost/compress/zstd/internal/xxhash"
@@ -74,7 +75,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur > (1<<30)+e.maxMatchOff {
+	for e.cur >= bufferReset {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@@ -94,6 +95,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 			e.table[i].offset = v
 		}
 		e.cur = e.maxMatchOff
+		break
 	}
 
 	s := e.addBlock(src)
@@ -151,7 +153,7 @@ encodeLoop:
 		canRepeat := len(blk.sequences) > 2
 
 		for {
-			if debug && canRepeat && offset1 == 0 {
+			if debugAsserts && canRepeat && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
@@ -212,10 +214,10 @@ encodeLoop:
 			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
 				// found a regular match
 				t = candidate.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
 				break
@@ -225,13 +227,13 @@ encodeLoop:
 				// found a regular match
 				t = candidate2.offset - e.cur
 				s++
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
-				if debug && t < 0 {
+				if debugAsserts && t < 0 {
 					panic("t<0")
 				}
 				break
@@ -246,11 +248,11 @@ encodeLoop:
 		offset2 = offset1
 		offset1 = s - t
 
-		if debug && s <= t {
-			panic("s <= t")
+		if debugAsserts && s <= t {
+			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
-		if debug && canRepeat && int(offset1) > len(src) {
+		if debugAsserts && canRepeat && int(offset1) > len(src) {
 			panic("invalid offset")
 		}
 
@@ -343,7 +345,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 		}
 	}
 	// Protect against e.cur wraparound.
-	if e.cur > (1<<30)+e.maxMatchOff {
+	if e.cur >= bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@@ -456,10 +458,10 @@ encodeLoop:
 			if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
 				// found a regular match
 				t = candidate.offset - e.cur
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
 				break
@@ -469,13 +471,13 @@ encodeLoop:
 				// found a regular match
 				t = candidate2.offset - e.cur
 				s++
-				if debug && s <= t {
-					panic("s <= t")
+				if debugAsserts && s <= t {
+					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 				}
-				if debug && s-t > e.maxMatchOff {
+				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
 				}
-				if debug && t < 0 {
+				if debugAsserts && t < 0 {
 					panic("t<0")
 				}
 				break
@@ -490,8 +492,8 @@ encodeLoop:
 		offset2 = offset1
 		offset1 = s - t
 
-		if debug && s <= t {
-			panic("s <= t")
+		if debugAsserts && s <= t {
+			panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
 		}
 
 		// Extend the 4-byte match as long as possible.
@@ -570,6 +572,9 @@ encodeLoop:
 }
 
 func (e *fastEncoder) addBlock(src []byte) int32 {
+	if debugAsserts && e.cur > bufferReset {
+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+	}
 	// check if we have space already
 	if len(e.hist)+len(src) > cap(e.hist) {
 		if cap(e.hist) == 0 {
@@ -608,15 +613,18 @@ func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 {
 }
 
 func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
-	if debug {
+	if debugAsserts {
 		if s < 0 {
-			panic("s<0")
+			err := fmt.Sprintf("s (%d) < 0", s)
+			panic(err)
 		}
 		if t < 0 {
-			panic("t<0")
+			err := fmt.Sprintf("s (%d) < 0", s)
+			panic(err)
 		}
 		if s-t > e.maxMatchOff {
-			panic(s - t)
+			err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
+			panic(err)
 		}
 	}
 	s1 := int(s) + maxMatchLength - 4
@@ -650,7 +658,10 @@ func (e *fastEncoder) Reset() {
 		}
 		e.hist = make([]byte, 0, l)
 	}
-	// We offset current position so everything will be out of reach
-	e.cur += e.maxMatchOff + int32(len(e.hist))
+	// We offset current position so everything will be out of reach.
+	// If above reset line, history will be purged.
+	if e.cur < bufferReset {
+		e.cur += e.maxMatchOff + int32(len(e.hist))
+	}
 	e.hist = e.hist[:0]
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 366dd66bd..4032fb9fc 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -156,7 +156,7 @@ func (e *Encoder) Write(p []byte) (n int, err error) {
 		if err != nil {
 			return n, err
 		}
-		if debug && len(s.filling) > 0 {
+		if debugAsserts && len(s.filling) > 0 {
 			panic(len(s.filling))
 		}
 	}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 40790747a..cda590b5f 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -50,7 +50,7 @@ type frameDec struct {
 const (
 	// The minimum Window_Size is 1 KB.
 	MinWindowSize = 1 << 10
-	MaxWindowSize = 1 << 30
+	MaxWindowSize = 1 << 29
 )
 
 var (
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index 9efe34feb..e002be98b 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -118,7 +118,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
 
 		if int32(bitStream)&(threshold-1) < max {
 			count = int32(bitStream) & (threshold - 1)
-			if debug && nbBits < 1 {
+			if debugAsserts && nbBits < 1 {
 				panic("nbBits underflow")
 			}
 			bitCount += nbBits - 1
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index 619836f52..aa9eba88b 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -327,7 +327,7 @@ func (s *fseEncoder) normalizeCount(length int) error {
 		if err != nil {
 			return err
 		}
-		if debug {
+		if debugAsserts {
 			err = s.validateNorm()
 			if err != nil {
 				return err
@@ -336,7 +336,7 @@ func (s *fseEncoder) normalizeCount(length int) error {
 		return s.buildCTable()
 	}
 	s.norm[largest] += stillToDistribute
-	if debug {
+	if debugAsserts {
 		err := s.validateNorm()
 		if err != nil {
 			return err
@@ -619,7 +619,7 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
 func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
 	minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16
 	threshold := (minNbBits + 1) << 16
-	if debug {
+	if debugAsserts {
 		if !(s.actualTableLog < 16) {
 			panic("!s.actualTableLog < 16")
 		}
@@ -633,7 +633,7 @@ func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
 	// linear interpolation (very approximate)
 	normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog
 	bitMultiplier := uint32(1) << accuracyLog
-	if debug {
+	if debugAsserts {
 		if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold {
 			panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold")
 		}
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
index d580e32ae..2c9c5357a 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -179,13 +179,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
 	MOVQ ·prime2v(SB), R14
 
 	// Load slice.
-	MOVQ b_base+8(FP), CX
-	MOVQ b_len+16(FP), DX
+	MOVQ arg1_base+8(FP), CX
+	MOVQ arg1_len+16(FP), DX
 	LEAQ (CX)(DX*1), BX
 	SUBQ $32, BX
 
 	// Load vN from d.
-	MOVQ d+0(FP), AX
+	MOVQ arg+0(FP), AX
 	MOVQ 0(AX), R8   // v1
 	MOVQ 8(AX), R9   // v2
 	MOVQ 16(AX), R10 // v3
@@ -209,7 +209,7 @@ blockLoop:
 	MOVQ R11, 24(AX)
 
 	// The number of bytes written is CX minus the old base pointer.
-	SUBQ b_base+8(FP), CX
+	SUBQ arg1_base+8(FP), CX
 	MOVQ CX, ret+32(FP)
 
 	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 57a8a2f5b..5e0b64ccc 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -6,11 +6,20 @@ package zstd
 import (
 	"errors"
 	"log"
+	"math"
 	"math/bits"
 )
 
+// enable debug printing
 const debug = false
+
+// Enable extra assertions.
+const debugAsserts = debug || false
+
+// print sequence details
 const debugSequences = false
+
+// print detailed matching information
 const debugMatches = false
 
 // force encoder to use predefined tables.
@@ -19,6 +28,9 @@ const forcePreDef = false
 // zstdMinMatch is the minimum zstd match length.
 const zstdMinMatch = 3
 
+// Reset the buffer offset when reaching this.
+const bufferReset = math.MaxInt32 - MaxWindowSize
+
 var (
 	// ErrReservedBlockType is returned when a reserved block type is found.
 	// Typically this indicates wrong or corrupted input.