24 files changed, 668 insertions, 489 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 6d4c1e98b..4a73e1bdd 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -127,7 +127,7 @@ func (e *fastGen) addBlock(src []byte) int32 {
 // hash4 returns the hash of u to fit in a hash table with h bits.
 // Preferably h should be a constant and should always be <32.
 func hash4u(u uint32, h uint8) uint32 {
-	return (u * prime4bytes) >> ((32 - h) & 31)
+	return (u * prime4bytes) >> ((32 - h) & reg8SizeMask32)
 }
 
 type tableEntryPrev struct {
@@ -138,25 +138,25 @@ type tableEntryPrev struct {
 // hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
 // Preferably h should be a constant and should always be <32.
 func hash4x64(u uint64, h uint8) uint32 {
-	return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
+	return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
 }
 
 // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
 // Preferably h should be a constant and should always be <64.
 func hash7(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
+	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
 }
 
 // hash8 returns the hash of u to fit in a hash table with h bits.
 // Preferably h should be a constant and should always be <64.
 func hash8(u uint64, h uint8) uint32 {
-	return uint32((u * prime8bytes) >> ((64 - h) & 63))
+	return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64))
 }
 
 // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
 // Preferably h should be a constant and should always be <64.
 func hash6(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
+	return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64))
 }
 
 // matchlen will return the match length between offsets and t in src.
diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go
index c74a95fe7..b26d19ec2 100644
--- a/vendor/github.com/klauspost/compress/flate/gen_inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/gen_inflate.go
@@ -85,7 +85,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -104,7 +104,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -167,15 +167,15 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<5:", err)
 					}
@@ -183,17 +183,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -202,9 +204,9 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<nb:", err)
 					}
@@ -212,10 +214,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -225,7 +227,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -233,7 +235,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index 53fe1d06e..208d66711 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -206,7 +206,7 @@ func (w *huffmanBitWriter) write(b []byte) {
 }
 
 func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
-	w.bits |= uint64(b) << (w.nbits & 63)
+	w.bits |= uint64(b) << (w.nbits & reg16SizeMask64)
 	w.nbits += nb
 	if w.nbits >= 48 {
 		w.writeOutBits()
@@ -759,7 +759,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 		} else {
 			// inlined
 			c := lengths[lengthCode&31]
-			w.bits |= uint64(c.code) << (w.nbits & 63)
+			w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
 			w.nbits += c.len
 			if w.nbits >= 48 {
 				w.writeOutBits()
@@ -779,7 +779,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 		} else {
 			// inlined
 			c := offs[offsetCode&31]
-			w.bits |= uint64(c.code) << (w.nbits & 63)
+			w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
 			w.nbits += c.len
 			if w.nbits >= 48 {
 				w.writeOutBits()
@@ -878,7 +878,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
 	for _, t := range input {
 		// Bitwriting inlined, ~30% speedup
 		c := encoding[t]
-		w.bits |= uint64(c.code) << ((w.nbits) & 63)
+		w.bits |= uint64(c.code) << ((w.nbits) & reg16SizeMask64)
 		w.nbits += c.len
 		if w.nbits >= 48 {
 			bits := w.bits
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 3e4259f15..189e9fe0b 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -522,8 +522,8 @@ func (f *decompressor) readHuffman() error {
 				return err
 			}
 		}
-		rep += int(f.b & uint32(1<<nb-1))
-		f.b >>= nb
+		rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
+		f.b >>= nb & regSizeMaskUint32
 		f.nb -= nb
 		if i+rep > n {
 			if debugDecode {
@@ -603,7 +603,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -622,7 +622,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -685,12 +685,12 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
 				if err = f.moreBits(); err != nil {
@@ -701,17 +701,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -720,7 +722,7 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
 				if err = f.moreBits(); err != nil {
 					if debugDecode {
@@ -730,10 +732,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -743,7 +745,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -751,7 +753,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
@@ -869,7 +871,7 @@ func (f *decompressor) moreBits() error {
 		return noEOF(err)
 	}
 	f.roffset++
-	f.b |= uint32(c) << f.nb
+	f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
 	f.nb += 8
 	return nil
 }
@@ -894,7 +896,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
 				return 0, noEOF(err)
 			}
 			f.roffset++
-			b |= uint32(c) << (nb & 31)
+			b |= uint32(c) << (nb & regSizeMaskUint32)
 			nb += 8
 		}
 		chunk := h.chunks[b&(huffmanNumChunks-1)]
@@ -913,7 +915,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
 				f.err = CorruptInputError(f.roffset)
 				return 0, f.err
 			}
-			f.b = b >> (n & 31)
+			f.b = b >> (n & regSizeMaskUint32)
 			f.nb = nb - n
 			return int(chunk >> huffmanValueShift), nil
 		}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
index 397dc1b1a..9a92a1b30 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -63,7 +63,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -82,7 +82,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -145,15 +145,15 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<5:", err)
 					}
@@ -161,17 +161,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -180,9 +182,9 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<nb:", err)
 					}
@@ -190,10 +192,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -203,7 +205,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -211,7 +213,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
@@ -287,7 +289,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -306,7 +308,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -369,15 +371,15 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<5:", err)
 					}
@@ -385,17 +387,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -404,9 +408,9 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<nb:", err)
 					}
@@ -414,10 +418,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -427,7 +431,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -435,7 +439,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
@@ -511,7 +515,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -530,7 +534,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -593,15 +597,15 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<5:", err)
 					}
@@ -609,17 +613,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -628,9 +634,9 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<nb:", err)
 					}
@@ -638,10 +644,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -651,7 +657,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -659,7 +665,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
@@ -735,7 +741,7 @@ readLiteral:
 						return
 					}
 					f.roffset++
-					b |= uint32(c) << (nb & 31)
+					b |= uint32(c) << (nb & regSizeMaskUint32)
 					nb += 8
 				}
 				chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
@@ -754,7 +760,7 @@ readLiteral:
 						f.err = CorruptInputError(f.roffset)
 						return
 					}
-					f.b = b >> (n & 31)
+					f.b = b >> (n & regSizeMaskUint32)
 					f.nb = nb - n
 					v = int(chunk >> huffmanValueShift)
 					break
@@ -817,15 +823,15 @@ readLiteral:
 					return
 				}
 			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
+			length += int(f.b & uint32(1<<(n&regSizeMaskUint32)-1))
+			f.b >>= n & regSizeMaskUint32
 			f.nb -= n
 		}
 
-		var dist int
+		var dist uint32
 		if f.hd == nil {
 			for f.nb < 5 {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<5:", err)
 					}
@@ -833,17 +839,19 @@ readLiteral:
 					return
 				}
 			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			dist = uint32(bits.Reverse8(uint8(f.b & 0x1F << 3)))
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
+			sym, err := f.huffSym(f.hd)
+			if err != nil {
 				if debugDecode {
 					fmt.Println("huffsym:", err)
 				}
 				f.err = err
 				return
 			}
+			dist = uint32(sym)
 		}
 
 		switch {
@@ -852,9 +860,9 @@ readLiteral:
 		case dist < maxNumDist:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
+			extra := (dist & 1) << (nb & regSizeMaskUint32)
 			for f.nb < nb {
-				if err = moreBits(); err != nil {
+				if err = f.moreBits(); err != nil {
 					if debugDecode {
 						fmt.Println("morebits f.nb<nb:", err)
 					}
@@ -862,10 +870,10 @@ readLiteral:
 					return
 				}
 			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
+			extra |= f.b & uint32(1<<(nb&regSizeMaskUint32)-1)
+			f.b >>= nb & regSizeMaskUint32
 			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
+			dist = 1<<((nb+1)&regSizeMaskUint32) + 1 + extra
 		default:
 			if debugDecode {
 				fmt.Println("dist too big:", dist, maxNumDist)
@@ -875,7 +883,7 @@ readLiteral:
 		}
 
 		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
+		if dist > uint32(f.dict.histSize()) {
 			if debugDecode {
 				fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
 			}
@@ -883,7 +891,7 @@ readLiteral:
 			return
 		}
 
-		f.copyLen, f.copyDist = length, dist
+		f.copyLen, f.copyDist = length, int(dist)
 		goto copyHistory
 	}
 
diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
new file mode 100644
index 000000000..6ed28061b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go
@@ -0,0 +1,37 @@
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 7
+	reg8SizeMask16 = 15
+	reg8SizeMask32 = 31
+	reg8SizeMask64 = 63
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = reg8SizeMask8
+	reg16SizeMask16 = reg8SizeMask16
+	reg16SizeMask32 = reg8SizeMask32
+	reg16SizeMask64 = reg8SizeMask64
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = reg8SizeMask8
+	reg32SizeMask16 = reg8SizeMask16
+	reg32SizeMask32 = reg8SizeMask32
+	reg32SizeMask64 = reg8SizeMask64
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = reg8SizeMask8
+	reg64SizeMask16 = reg8SizeMask16
+	reg64SizeMask32 = reg8SizeMask32
+	reg64SizeMask64 = reg8SizeMask64
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = reg8SizeMask8
+	regSizeMaskUint16 = reg8SizeMask16
+	regSizeMaskUint32 = reg8SizeMask32
+	regSizeMaskUint64 = reg8SizeMask64
+)
diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go
new file mode 100644
index 000000000..f477a5d6e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go
@@ -0,0 +1,39 @@
+//+build !amd64
+
+package flate
+
+const (
+	// Masks for shifts with register sizes of the shift value.
+	// This can be used to work around the x86 design of shifting by mod register size.
+	// It can be used when a variable shift is always smaller than the register size.
+
+	// reg8SizeMaskX - shift value is 8 bits, shifted is X
+	reg8SizeMask8  = 0xff
+	reg8SizeMask16 = 0xff
+	reg8SizeMask32 = 0xff
+	reg8SizeMask64 = 0xff
+
+	// reg16SizeMaskX - shift value is 16 bits, shifted is X
+	reg16SizeMask8  = 0xffff
+	reg16SizeMask16 = 0xffff
+	reg16SizeMask32 = 0xffff
+	reg16SizeMask64 = 0xffff
+
+	// reg32SizeMaskX - shift value is 32 bits, shifted is X
+	reg32SizeMask8  = 0xffffffff
+	reg32SizeMask16 = 0xffffffff
+	reg32SizeMask32 = 0xffffffff
+	reg32SizeMask64 = 0xffffffff
+
+	// reg64SizeMaskX - shift value is 64 bits, shifted is X
+	reg64SizeMask8  = 0xffffffffffffffff
+	reg64SizeMask16 = 0xffffffffffffffff
+	reg64SizeMask32 = 0xffffffffffffffff
+	reg64SizeMask64 = 0xffffffffffffffff
+
+	// regSizeMaskUintX - shift value is uint, shifted is X
+	regSizeMaskUint8  = ^uint(0)
+	regSizeMaskUint16 = ^uint(0)
+	regSizeMaskUint32 = ^uint(0)
+	regSizeMaskUint64 = ^uint(0)
+)
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index 5dd66854b..7ec2022b6 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -119,6 +119,16 @@ type Scratch struct {
 	huffWeight     [maxSymbolValue + 1]byte
 }
 
+// TransferCTable will transfer the previously used compression table.
+func (s *Scratch) TransferCTable(src *Scratch) {
+	if cap(s.prevTable) < len(src.prevTable) {
+		s.prevTable = make(cTable, 0, maxSymbolValue+1)
+	}
+	s.prevTable = s.prevTable[:len(src.prevTable)]
+	copy(s.prevTable, src.prevTable)
+	s.prevTableLog = src.prevTableLog
+}
+
 func (s *Scratch) prepare(in []byte) (*Scratch, error) {
 	if len(in) > BlockSizeMax {
 		return nil, ErrTooBig
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index ac3640dc9..ea3e51082 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -5,7 +5,6 @@ It offers a very wide range of compression / speed trade-off, while being backed
 A high performance compression algorithm is implemented. For now focused on speed. 
 
 This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. 
-Note that custom dictionaries are only supported for decompression.
 
 This package is pure Go and without use of "unsafe". 
 
@@ -232,41 +231,6 @@ nyc-taxi-data-10M.csv   gzstd   1   3325605752  928656485   23876   132.83
 nyc-taxi-data-10M.csv   gzkp    1   3325605752  924718719   16388   193.53
 ```
 
-### Converters
-
-As part of the development process a *Snappy* -> *Zstandard* converter was also built.
-
-This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. 
-Note that a single block is not framed.
-
-Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
-Therefore the compression ratio is much less than what can be done by a full decompression
-and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without
-any errors being generated.
-No CRC value is being generated and not all CRC values of the Snappy stream are checked.
-However, it provides really fast re-compression of Snappy streams.
-
-
-```
-BenchmarkSnappy_ConvertSilesia-8           1  1156001600 ns/op   183.35 MB/s
-Snappy len 103008711 -> zstd len 82687318
-
-BenchmarkSnappy_Enwik9-8           1  6472998400 ns/op   154.49 MB/s
-Snappy len 508028601 -> zstd len 390921079
-```
-
-
-```Go
-    s := zstd.SnappyConverter{}
-    n, err = s.Convert(input, output)
-    if err != nil {
-        fmt.Println("Re-compressed stream to", n, "bytes")
-    }
-```
-
-The converter `s` can be reused to avoid allocations, even after errors.
-
-
 ## Decompressor
 
 Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
@@ -337,6 +301,21 @@ A re-used Decoder will still contain the dictionaries registered.
 
 When registering multiple dictionaries with the same ID, the last one will be used.
 
+It is possible to use dictionaries when compressing data.
+
+To enable a dictionary use `WithEncoderDict(dict []byte)`. Here only one dictionary will be used 
+and it will likely be used even if it doesn't improve compression. 
+
+The used dictionary must be used to decompress the content.
+
+For any real gains, the dictionary should be built with similar data. 
+If an unsuitable dictionary is used the output may be slightly larger than using no dictionary.
+Use the [zstd commandline tool](https://github.com/facebook/zstd/releases) to build a dictionary from sample data.
+For information see [zstd dictionary information](https://github.com/facebook/zstd#the-case-for-small-data-compression). 
+
+For now there is a fixed startup performance penalty for compressing content with dictionaries. 
+This will likely be improved over time. Just be aware to test performance when implementing.  
+
 ### Allocation-less operation
 
 The decoder has been designed to operate without allocations after a warmup. 
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index c8ec6e331..4733ea876 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -646,7 +646,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		}
 	} else {
 		if hist.huffTree != nil && huff != nil {
-			if hist.dict == nil || hist.dict.litDec != hist.huffTree {
+			if hist.dict == nil || hist.dict.litEnc != hist.huffTree {
 				huffDecoderPool.Put(hist.huffTree)
 			}
 			hist.huffTree = nil
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index be718afd4..083fbb502 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -14,12 +14,13 @@ import (
 )
 
 type blockEnc struct {
-	size      int
-	literals  []byte
-	sequences []seq
-	coders    seqCoders
-	litEnc    *huff0.Scratch
-	wr        bitWriter
+	size       int
+	literals   []byte
+	sequences  []seq
+	coders     seqCoders
+	litEnc     *huff0.Scratch
+	dictLitEnc *huff0.Scratch
+	wr         bitWriter
 
 	extraLits int
 	last      bool
@@ -314,19 +315,19 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
 }
 
 // encodeLits can be used if the block is only litLen.
-func (b *blockEnc) encodeLits(raw bool) error {
+func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	var bh blockHeader
 	bh.setLast(b.last)
-	bh.setSize(uint32(len(b.literals)))
+	bh.setSize(uint32(len(lits)))
 
 	// Don't compress extremely small blocks
-	if len(b.literals) < 32 || raw {
+	if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
 		if debug {
-			println("Adding RAW block, length", len(b.literals), "last:", b.last)
+			println("Adding RAW block, length", len(lits), "last:", b.last)
 		}
 		bh.setType(blockTypeRaw)
 		b.output = bh.appendTo(b.output)
-		b.output = append(b.output, b.literals...)
+		b.output = append(b.output, lits...)
 		return nil
 	}
 
@@ -335,13 +336,18 @@ func (b *blockEnc) encodeLits(raw bool) error {
 		reUsed, single bool
 		err            error
 	)
-	if len(b.literals) >= 1024 {
+	if b.dictLitEnc != nil {
+		b.litEnc.TransferCTable(b.dictLitEnc)
+		b.litEnc.Reuse = huff0.ReusePolicyAllow
+		b.dictLitEnc = nil
+	}
+	if len(lits) >= 1024 {
 		// Use 4 Streams.
-		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
-	} else if len(b.literals) > 32 {
+		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
+	} else if len(lits) > 32 {
 		// Use 1 stream
 		single = true
-		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
+		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 	} else {
 		err = huff0.ErrIncompressible
 	}
@@ -349,19 +355,19 @@ func (b *blockEnc) encodeLits(raw bool) error {
 	switch err {
 	case huff0.ErrIncompressible:
 		if debug {
-			println("Adding RAW block, length", len(b.literals), "last:", b.last)
+			println("Adding RAW block, length", len(lits), "last:", b.last)
 		}
 		bh.setType(blockTypeRaw)
 		b.output = bh.appendTo(b.output)
-		b.output = append(b.output, b.literals...)
+		b.output = append(b.output, lits...)
 		return nil
 	case huff0.ErrUseRLE:
 		if debug {
-			println("Adding RLE block, length", len(b.literals))
+			println("Adding RLE block, length", len(lits))
 		}
 		bh.setType(blockTypeRLE)
 		b.output = bh.appendTo(b.output)
-		b.output = append(b.output, b.literals[0])
+		b.output = append(b.output, lits[0])
 		return nil
 	default:
 		return err
@@ -384,7 +390,7 @@ func (b *blockEnc) encodeLits(raw bool) error {
 		lh.setType(literalsBlockCompressed)
 	}
 	// Set sizes
-	lh.setSizes(len(out), len(b.literals), single)
+	lh.setSizes(len(out), len(lits), single)
 	bh.setSize(uint32(len(out) + lh.size() + 1))
 
 	// Write block headers.
@@ -444,13 +450,19 @@ func fuzzFseEncoder(data []byte) int {
 }
 
 // encode will encode the block and append the output in b.output.
-func (b *blockEnc) encode(raw, rawAllLits bool) error {
+// Previous offset codes must be pushed if more blocks are expected.
+func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.sequences) == 0 {
-		return b.encodeLits(rawAllLits)
+		return b.encodeLits(b.literals, rawAllLits)
 	}
-	// We want some difference
-	if len(b.literals) > (b.size - (b.size >> 5)) {
-		return errIncompressible
+	// We want some difference to at least account for the headers.
+	saved := b.size - len(b.literals) - (b.size >> 5)
+	if saved < 16 {
+		if org == nil {
+			return errIncompressible
+		}
+		b.popOffsets()
+		return b.encodeLits(org, rawAllLits)
 	}
 
 	var bh blockHeader
@@ -466,6 +478,11 @@ func (b *blockEnc) encode(raw, rawAllLits bool) error {
 		reUsed, single bool
 		err            error
 	)
+	if b.dictLitEnc != nil {
+		b.litEnc.TransferCTable(b.dictLitEnc)
+		b.litEnc.Reuse = huff0.ReusePolicyAllow
+		b.dictLitEnc = nil
+	}
 	if len(b.literals) >= 1024 && !raw {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 66b51bf2d..d78be6d42 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -187,7 +187,7 @@ func (d *Decoder) Reset(r io.Reader) error {
 		d.current.err = err
 		d.current.flushed = true
 		if debug {
-			println("sync decode to ", len(dst), "bytes, err:", err)
+			println("sync decode to", len(dst), "bytes, err:", err)
 		}
 		return nil
 	}
@@ -303,6 +303,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 		frame.history.reset()
 		err := frame.reset(&frame.bBuf)
 		if err == io.EOF {
+			if debug {
+				println("frame reset return EOF")
+			}
 			return dst, nil
 		}
 		if frame.DictionaryID != nil {
@@ -341,6 +344,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 			return dst, err
 		}
 		if len(frame.bBuf) == 0 {
+			if debug {
+				println("frame dbuf empty")
+			}
 			break
 		}
 	}
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
index 8eb6f6ba3..fa25a18d8 100644
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -13,14 +13,31 @@ import (
 type dict struct {
 	id uint32
 
-	litDec              *huff0.Scratch
+	litEnc              *huff0.Scratch
 	llDec, ofDec, mlDec sequenceDec
-	offsets             [3]int
-	content             []byte
+	//llEnc, ofEnc, mlEnc []*fseEncoder
+	offsets [3]int
+	content []byte
 }
 
 var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
 
+// ID returns the dictionary id or 0 if d is nil.
+func (d *dict) ID() uint32 {
+	if d == nil {
+		return 0
+	}
+	return d.id
+}
+
+// DictContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) DictContentSize() int {
+	if d == nil {
+		return 0
+	}
+	return len(d.content)
+}
+
 // Load a dictionary as described in
 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
 func loadDict(b []byte) (*dict, error) {
@@ -43,10 +60,11 @@ func loadDict(b []byte) (*dict, error) {
 
 	// Read literal table
 	var err error
-	d.litDec, b, err = huff0.ReadTable(b[8:], nil)
+	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
 	if err != nil {
 		return nil, err
 	}
+	d.litEnc.Reuse = huff0.ReusePolicyMust
 
 	br := byteReader{
 		b:   b,
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
new file mode 100644
index 000000000..b1b7c6e6a
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -0,0 +1,155 @@
+package zstd
+
+import (
+	"fmt"
+	"math/bits"
+
+	"github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+type fastBase struct {
+	// cur is the offset at the start of hist
+	cur int32
+	// maximum offset. Should be at least 2x block size.
+	maxMatchOff int32
+	hist        []byte
+	crc         *xxhash.Digest
+	tmp         [8]byte
+	blk         *blockEnc
+	lastDictID  uint32
+}
+
+// CRC returns the underlying CRC writer.
+func (e *fastBase) CRC() *xxhash.Digest {
+	return e.crc
+}
+
+// AppendCRC will append the CRC to the destination slice and return it.
+func (e *fastBase) AppendCRC(dst []byte) []byte {
+	crc := e.crc.Sum(e.tmp[:0])
+	dst = append(dst, crc[7], crc[6], crc[5], crc[4])
+	return dst
+}
+
+// WindowSize returns the window size of the encoder,
+// or a window size small enough to contain the input size, if > 0.
+func (e *fastBase) WindowSize(size int) int32 {
+	if size > 0 && size < int(e.maxMatchOff) {
+		b := int32(1) << uint(bits.Len(uint(size)))
+		// Keep minimum window.
+		if b < 1024 {
+			b = 1024
+		}
+		return b
+	}
+	return e.maxMatchOff
+}
+
+// Block returns the current block.
+func (e *fastBase) Block() *blockEnc {
+	return e.blk
+}
+
+func (e *fastBase) addBlock(src []byte) int32 {
+	if debugAsserts && e.cur > bufferReset {
+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+	}
+	// check if we have space already
+	if len(e.hist)+len(src) > cap(e.hist) {
+		if cap(e.hist) == 0 {
+			l := e.maxMatchOff * 2
+			// Make it at least 1MB.
+			if l < 1<<20 {
+				l = 1 << 20
+			}
+			e.hist = make([]byte, 0, l)
+		} else {
+			if cap(e.hist) < int(e.maxMatchOff*2) {
+				panic("unexpected buffer size")
+			}
+			// Move down
+			offset := int32(len(e.hist)) - e.maxMatchOff
+			copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
+			e.cur += offset
+			e.hist = e.hist[:e.maxMatchOff]
+		}
+	}
+	s := int32(len(e.hist))
+	e.hist = append(e.hist, src...)
+	return s
+}
+
+// useBlock will replace the block with the provided one,
+// but transfer recent offsets from the previous.
+func (e *fastBase) UseBlock(enc *blockEnc) {
+	enc.reset(e.blk)
+	e.blk = enc
+}
+
+func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
+
+func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
+	if debugAsserts {
+		if s < 0 {
+			err := fmt.Sprintf("s (%d) < 0", s)
+			panic(err)
+		}
+		if t < 0 {
+			err := fmt.Sprintf("s (%d) < 0", s)
+			panic(err)
+		}
+		if s-t > e.maxMatchOff {
+			err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
+			panic(err)
+		}
+		if len(src)-int(s) > maxCompressedBlockSize {
+			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
+		}
+	}
+
+	// Extend the match to be as long as possible.
+	return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastBase) resetBase(d *dict, singleBlock bool) {
+	if e.blk == nil {
+		e.blk = &blockEnc{}
+		e.blk.init()
+	} else {
+		e.blk.reset(nil)
+	}
+	e.blk.initNewEncode()
+	if e.crc == nil {
+		e.crc = xxhash.New()
+	} else {
+		e.crc.Reset()
+	}
+	if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() {
+		l := e.maxMatchOff*2 + int32(d.DictContentSize())
+		// Make it at least 1MB.
+		if l < 1<<20 {
+			l = 1 << 20
+		}
+		e.hist = make([]byte, 0, l)
+	}
+	// We offset current position so everything will be out of reach.
+	// If above reset line, history will be purged.
+	if e.cur < bufferReset {
+		e.cur += e.maxMatchOff + int32(len(e.hist))
+	}
+	e.hist = e.hist[:0]
+	if d != nil {
+		// Set offsets (currently not used)
+		for i, off := range d.offsets {
+			e.blk.recentOffsets[i] = uint32(off)
+			e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
+		}
+		// Transfer litenc.
+		e.blk.dictLitEnc = d.litEnc
+		e.hist = append(e.hist, d.content...)
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index c120d9054..94a5343d0 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -31,8 +31,10 @@ type prevEntry struct {
 // and that it is longer (lazy matching).
 type betterFastEncoder struct {
 	fastBase
-	table     [betterShortTableSize]tableEntry
-	longTable [betterLongTableSize]prevEntry
+	table         [betterShortTableSize]tableEntry
+	longTable     [betterLongTableSize]prevEntry
+	dictTable     []tableEntry
+	dictLongTable []prevEntry
 }
 
 // Encode improves compression...
@@ -516,3 +518,78 @@ encodeLoop:
 func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	e.Encode(blk, src)
 }
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
+	e.resetBase(d, singleBlock)
+	if d == nil {
+		return
+	}
+	// Init or copy dict table
+	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+		if len(e.dictTable) != len(e.table) {
+			e.dictTable = make([]tableEntry, len(e.table))
+		}
+		end := int32(len(d.content)) - 8 + e.maxMatchOff
+		for i := e.maxMatchOff; i < end; i += 4 {
+			const hashLog = betterShortTableBits
+
+			cv := load6432(d.content, i-e.maxMatchOff)
+			nextHash := hash5(cv, hashLog)      // 0 -> 4
+			nextHash1 := hash5(cv>>8, hashLog)  // 1 -> 5
+			nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
+			nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
+			e.dictTable[nextHash] = tableEntry{
+				val:    uint32(cv),
+				offset: i,
+			}
+			e.dictTable[nextHash1] = tableEntry{
+				val:    uint32(cv >> 8),
+				offset: i + 1,
+			}
+			e.dictTable[nextHash2] = tableEntry{
+				val:    uint32(cv >> 16),
+				offset: i + 2,
+			}
+			e.dictTable[nextHash3] = tableEntry{
+				val:    uint32(cv >> 24),
+				offset: i + 3,
+			}
+		}
+		e.lastDictID = d.id
+	}
+
+	// Init or copy dict table
+	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+		if len(e.dictLongTable) != len(e.longTable) {
+			e.dictLongTable = make([]prevEntry, len(e.longTable))
+		}
+		if len(d.content) >= 8 {
+			cv := load6432(d.content, 0)
+			h := hash8(cv, betterLongTableBits)
+			e.dictLongTable[h] = prevEntry{
+				offset: e.maxMatchOff,
+				prev:   e.dictLongTable[h].offset,
+			}
+
+			end := int32(len(d.content)) - 8 + e.maxMatchOff
+			off := 8 // First to read
+			for i := e.maxMatchOff + 1; i < end; i++ {
+				cv = cv>>8 | (uint64(d.content[off]) << 56)
+				h := hash8(cv, betterLongTableBits)
+				e.dictLongTable[h] = prevEntry{
+					offset: i,
+					prev:   e.dictLongTable[h].offset,
+				}
+				off++
+			}
+		}
+		e.lastDictID = d.id
+	}
+	// Reset table to initial state
+	copy(e.longTable[:], e.dictLongTable)
+
+	e.cur = e.maxMatchOff
+	// Reset table to initial state
+	copy(e.table[:], e.dictTable)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index 50276bcde..19eebf66e 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -18,7 +18,8 @@ const (
 
 type doubleFastEncoder struct {
 	fastEncoder
-	longTable [dFastLongTableSize]tableEntry
+	longTable     [dFastLongTableSize]tableEntry
+	dictLongTable []tableEntry
 }
 
 // Encode mimmics functionality in zstd_dfast.c
@@ -494,7 +495,7 @@ encodeLoop:
 				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
 				t = candidateL.offset - e.cur
 				if debugAsserts && s <= t {
-					panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+					panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
 				}
 				if debugAsserts && s-t > e.maxMatchOff {
 					panic("s - t >e.maxMatchOff")
@@ -676,3 +677,37 @@ encodeLoop:
 		e.cur += int32(len(src))
 	}
 }
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
+	e.fastEncoder.Reset(d, singleBlock)
+	if d == nil {
+		return
+	}
+
+	// Init or copy dict table
+	if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+		if len(e.dictLongTable) != len(e.longTable) {
+			e.dictLongTable = make([]tableEntry, len(e.longTable))
+		}
+		if len(d.content) >= 8 {
+			cv := load6432(d.content, 0)
+			e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+				val:    uint32(cv),
+				offset: e.maxMatchOff,
+			}
+			end := int32(len(d.content)) - 8 + e.maxMatchOff
+			for i := e.maxMatchOff + 1; i < end; i++ {
+				cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
+				e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+					val:    uint32(cv),
+					offset: i,
+				}
+			}
+		}
+		e.lastDictID = d.id
+	}
+	// Reset table to initial state
+	e.cur = e.maxMatchOff
+	copy(e.longTable[:], e.dictLongTable)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index 4104b456c..0b301df43 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -8,8 +8,6 @@ import (
 	"fmt"
 	"math"
 	"math/bits"
-
-	"github.com/klauspost/compress/zstd/internal/xxhash"
 )
 
 const (
@@ -24,51 +22,10 @@ type tableEntry struct {
 	offset int32
 }
 
-type fastBase struct {
-	// cur is the offset at the start of hist
-	cur int32
-	// maximum offset. Should be at least 2x block size.
-	maxMatchOff int32
-	hist        []byte
-	crc         *xxhash.Digest
-	tmp         [8]byte
-	blk         *blockEnc
-}
-
 type fastEncoder struct {
 	fastBase
-	table [tableSize]tableEntry
-}
-
-// CRC returns the underlying CRC writer.
-func (e *fastBase) CRC() *xxhash.Digest {
-	return e.crc
-}
-
-// AppendCRC will append the CRC to the destination slice and return it.
-func (e *fastBase) AppendCRC(dst []byte) []byte {
-	crc := e.crc.Sum(e.tmp[:0])
-	dst = append(dst, crc[7], crc[6], crc[5], crc[4])
-	return dst
-}
-
-// WindowSize returns the window size of the encoder,
-// or a window size small enough to contain the input size, if > 0.
-func (e *fastBase) WindowSize(size int) int32 {
-	if size > 0 && size < int(e.maxMatchOff) {
-		b := int32(1) << uint(bits.Len(uint(size)))
-		// Keep minimum window.
-		if b < 1024 {
-			b = 1024
-		}
-		return b
-	}
-	return e.maxMatchOff
-}
-
-// Block returns the current block.
-func (e *fastBase) Block() *blockEnc {
-	return e.blk
+	table     [tableSize]tableEntry
+	dictTable []tableEntry
 }
 
 // Encode mimmics functionality in zstd_fast.c
@@ -660,96 +617,45 @@ encodeLoop:
 	}
 }
 
-func (e *fastBase) addBlock(src []byte) int32 {
-	if debugAsserts && e.cur > bufferReset {
-		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
-	}
-	// check if we have space already
-	if len(e.hist)+len(src) > cap(e.hist) {
-		if cap(e.hist) == 0 {
-			l := e.maxMatchOff * 2
-			// Make it at least 1MB.
-			if l < 1<<20 {
-				l = 1 << 20
-			}
-			e.hist = make([]byte, 0, l)
-		} else {
-			if cap(e.hist) < int(e.maxMatchOff*2) {
-				panic("unexpected buffer size")
-			}
-			// Move down
-			offset := int32(len(e.hist)) - e.maxMatchOff
-			copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
-			e.cur += offset
-			e.hist = e.hist[:e.maxMatchOff]
-		}
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
+	e.resetBase(d, singleBlock)
+	if d == nil {
+		return
 	}
-	s := int32(len(e.hist))
-	e.hist = append(e.hist, src...)
-	return s
-}
 
-// useBlock will replace the block with the provided one,
-// but transfer recent offsets from the previous.
-func (e *fastBase) UseBlock(enc *blockEnc) {
-	enc.reset(e.blk)
-	e.blk = enc
-}
-
-func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
-	// Extend the match to be as long as possible.
-	return int32(matchLen(src[s:], src[t:]))
-}
-
-func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
-	if debugAsserts {
-		if s < 0 {
-			err := fmt.Sprintf("s (%d) < 0", s)
-			panic(err)
-		}
-		if t < 0 {
-			err := fmt.Sprintf("s (%d) < 0", s)
-			panic(err)
-		}
-		if s-t > e.maxMatchOff {
-			err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
-			panic(err)
-		}
-		if len(src)-int(s) > maxCompressedBlockSize {
-			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
+	// Init or copy dict table
+	if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+		if len(e.dictTable) != len(e.table) {
+			e.dictTable = make([]tableEntry, len(e.table))
+		}
+		if true {
+			end := e.maxMatchOff + int32(len(d.content)) - 8
+			for i := e.maxMatchOff; i < end; i += 3 {
+				const hashLog = tableBits
+
+				cv := load6432(d.content, i-e.maxMatchOff)
+				nextHash := hash6(cv, hashLog)      // 0 -> 5
+				nextHash1 := hash6(cv>>8, hashLog)  // 1 -> 6
+				nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
+				e.dictTable[nextHash] = tableEntry{
+					val:    uint32(cv),
+					offset: i,
+				}
+				e.dictTable[nextHash1] = tableEntry{
+					val:    uint32(cv >> 8),
+					offset: i + 1,
+				}
+				e.dictTable[nextHash2] = tableEntry{
+					val:    uint32(cv >> 16),
+					offset: i + 2,
+				}
+			}
 		}
+		e.lastDictID = d.id
 	}
 
-	// Extend the match to be as long as possible.
-	return int32(matchLen(src[s:], src[t:]))
-}
-
-// Reset the encoding table.
-func (e *fastBase) Reset(singleBlock bool) {
-	if e.blk == nil {
-		e.blk = &blockEnc{}
-		e.blk.init()
-	} else {
-		e.blk.reset(nil)
-	}
-	e.blk.initNewEncode()
-	if e.crc == nil {
-		e.crc = xxhash.New()
-	} else {
-		e.crc.Reset()
-	}
-	if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) {
-		l := e.maxMatchOff * 2
-		// Make it at least 1MB.
-		if l < 1<<20 {
-			l = 1 << 20
-		}
-		e.hist = make([]byte, 0, l)
-	}
-	// We offset current position so everything will be out of reach.
-	// If above reset line, history will be purged.
-	if e.cur < bufferReset {
-		e.cur += e.maxMatchOff + int32(len(e.hist))
-	}
-	e.hist = e.hist[:0]
+	e.cur = e.maxMatchOff
+	// Reset table to initial state
+	copy(e.table[:], e.dictTable)
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_params.go b/vendor/github.com/klauspost/compress/zstd/enc_params.go
deleted file mode 100644
index d874116f7..000000000
--- a/vendor/github.com/klauspost/compress/zstd/enc_params.go
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright 2019+ Klaus Post. All rights reserved.
-// License information can be found in the LICENSE file.
-// Based on work by Yann Collet, released under BSD License.
-
-package zstd
-
-/*
-// encParams are not really used, just here for reference.
-type encParams struct {
-	// largest match distance : larger == more compression, more memory needed during decompression
-	windowLog uint8
-
-	// fully searched segment : larger == more compression, slower, more memory (useless for fast)
-	chainLog uint8
-
-	//  dispatch table : larger == faster, more memory
-	hashLog uint8
-
-	// < nb of searches : larger == more compression, slower
-	searchLog uint8
-
-	// < match length searched : larger == faster decompression, sometimes less compression
-	minMatch uint8
-
-	// acceptable match size for optimal parser (only) : larger == more compression, slower
-	targetLength uint32
-
-	// see ZSTD_strategy definition above
-	strategy strategy
-}
-
-// strategy defines the algorithm to use when generating sequences.
-type strategy uint8
-
-const (
-	// Compression strategies, listed from fastest to strongest
-	strategyFast strategy = iota + 1
-	strategyDfast
-	strategyGreedy
-	strategyLazy
-	strategyLazy2
-	strategyBtlazy2
-	strategyBtopt
-	strategyBtultra
-	strategyBtultra2
-	// note : new strategies _might_ be added in the future.
-	//   Only the order (from fast to strong) is guaranteed
-
-)
-
-var defEncParams = [4][]encParams{
-	{ // "default" - for any srcSize > 256 KB
-		// W,  C,  H,  S,  L, TL, strat
-		{19, 12, 13, 1, 6, 1, strategyFast},       // base for negative levels
-		{19, 13, 14, 1, 7, 0, strategyFast},       // level  1
-		{20, 15, 16, 1, 6, 0, strategyFast},       // level  2
-		{21, 16, 17, 1, 5, 1, strategyDfast},      // level  3
-		{21, 18, 18, 1, 5, 1, strategyDfast},      // level  4
-		{21, 18, 19, 2, 5, 2, strategyGreedy},     // level  5
-		{21, 19, 19, 3, 5, 4, strategyGreedy},     // level  6
-		{21, 19, 19, 3, 5, 8, strategyLazy},       // level  7
-		{21, 19, 19, 3, 5, 16, strategyLazy2},     // level  8
-		{21, 19, 20, 4, 5, 16, strategyLazy2},     // level  9
-		{22, 20, 21, 4, 5, 16, strategyLazy2},     // level 10
-		{22, 21, 22, 4, 5, 16, strategyLazy2},     // level 11
-		{22, 21, 22, 5, 5, 16, strategyLazy2},     // level 12
-		{22, 21, 22, 5, 5, 32, strategyBtlazy2},   // level 13
-		{22, 22, 23, 5, 5, 32, strategyBtlazy2},   // level 14
-		{22, 23, 23, 6, 5, 32, strategyBtlazy2},   // level 15
-		{22, 22, 22, 5, 5, 48, strategyBtopt},     // level 16
-		{23, 23, 22, 5, 4, 64, strategyBtopt},     // level 17
-		{23, 23, 22, 6, 3, 64, strategyBtultra},   // level 18
-		{23, 24, 22, 7, 3, 256, strategyBtultra2}, // level 19
-		{25, 25, 23, 7, 3, 256, strategyBtultra2}, // level 20
-		{26, 26, 24, 7, 3, 512, strategyBtultra2}, // level 21
-		{27, 27, 25, 9, 3, 999, strategyBtultra2}, // level 22
-	},
-	{ // for srcSize <= 256 KB
-		// W,  C,  H,  S,  L,  T, strat
-		{18, 12, 13, 1, 5, 1, strategyFast},        // base for negative levels
-		{18, 13, 14, 1, 6, 0, strategyFast},        // level  1
-		{18, 14, 14, 1, 5, 1, strategyDfast},       // level  2
-		{18, 16, 16, 1, 4, 1, strategyDfast},       // level  3
-		{18, 16, 17, 2, 5, 2, strategyGreedy},      // level  4.
-		{18, 18, 18, 3, 5, 2, strategyGreedy},      // level  5.
-		{18, 18, 19, 3, 5, 4, strategyLazy},        // level  6.
-		{18, 18, 19, 4, 4, 4, strategyLazy},        // level  7
-		{18, 18, 19, 4, 4, 8, strategyLazy2},       // level  8
-		{18, 18, 19, 5, 4, 8, strategyLazy2},       // level  9
-		{18, 18, 19, 6, 4, 8, strategyLazy2},       // level 10
-		{18, 18, 19, 5, 4, 12, strategyBtlazy2},    // level 11.
-		{18, 19, 19, 7, 4, 12, strategyBtlazy2},    // level 12.
-		{18, 18, 19, 4, 4, 16, strategyBtopt},      // level 13
-		{18, 18, 19, 4, 3, 32, strategyBtopt},      // level 14.
-		{18, 18, 19, 6, 3, 128, strategyBtopt},     // level 15.
-		{18, 19, 19, 6, 3, 128, strategyBtultra},   // level 16.
-		{18, 19, 19, 8, 3, 256, strategyBtultra},   // level 17.
-		{18, 19, 19, 6, 3, 128, strategyBtultra2},  // level 18.
-		{18, 19, 19, 8, 3, 256, strategyBtultra2},  // level 19.
-		{18, 19, 19, 10, 3, 512, strategyBtultra2}, // level 20.
-		{18, 19, 19, 12, 3, 512, strategyBtultra2}, // level 21.
-		{18, 19, 19, 13, 3, 999, strategyBtultra2}, // level 22.
-	},
-	{ // for srcSize <= 128 KB
-		// W,  C,  H,  S,  L,  T, strat
-		{17, 12, 12, 1, 5, 1, strategyFast},        // base for negative levels
-		{17, 12, 13, 1, 6, 0, strategyFast},        // level  1
-		{17, 13, 15, 1, 5, 0, strategyFast},        // level  2
-		{17, 15, 16, 2, 5, 1, strategyDfast},       // level  3
-		{17, 17, 17, 2, 4, 1, strategyDfast},       // level  4
-		{17, 16, 17, 3, 4, 2, strategyGreedy},      // level  5
-		{17, 17, 17, 3, 4, 4, strategyLazy},        // level  6
-		{17, 17, 17, 3, 4, 8, strategyLazy2},       // level  7
-		{17, 17, 17, 4, 4, 8, strategyLazy2},       // level  8
-		{17, 17, 17, 5, 4, 8, strategyLazy2},       // level  9
-		{17, 17, 17, 6, 4, 8, strategyLazy2},       // level 10
-		{17, 17, 17, 5, 4, 8, strategyBtlazy2},     // level 11
-		{17, 18, 17, 7, 4, 12, strategyBtlazy2},    // level 12
-		{17, 18, 17, 3, 4, 12, strategyBtopt},      // level 13.
-		{17, 18, 17, 4, 3, 32, strategyBtopt},      // level 14.
-		{17, 18, 17, 6, 3, 256, strategyBtopt},     // level 15.
-		{17, 18, 17, 6, 3, 128, strategyBtultra},   // level 16.
-		{17, 18, 17, 8, 3, 256, strategyBtultra},   // level 17.
-		{17, 18, 17, 10, 3, 512, strategyBtultra},  // level 18.
-		{17, 18, 17, 5, 3, 256, strategyBtultra2},  // level 19.
-		{17, 18, 17, 7, 3, 512, strategyBtultra2},  // level 20.
-		{17, 18, 17, 9, 3, 512, strategyBtultra2},  // level 21.
-		{17, 18, 17, 11, 3, 999, strategyBtultra2}, // level 22.
-	},
-	{ // for srcSize <= 16 KB
-		// W,  C,  H,  S,  L,  T, strat
-		{14, 12, 13, 1, 5, 1, strategyFast},        // base for negative levels
-		{14, 14, 15, 1, 5, 0, strategyFast},        // level  1
-		{14, 14, 15, 1, 4, 0, strategyFast},        // level  2
-		{14, 14, 15, 2, 4, 1, strategyDfast},       // level  3
-		{14, 14, 14, 4, 4, 2, strategyGreedy},      // level  4
-		{14, 14, 14, 3, 4, 4, strategyLazy},        // level  5.
-		{14, 14, 14, 4, 4, 8, strategyLazy2},       // level  6
-		{14, 14, 14, 6, 4, 8, strategyLazy2},       // level  7
-		{14, 14, 14, 8, 4, 8, strategyLazy2},       // level  8.
-		{14, 15, 14, 5, 4, 8, strategyBtlazy2},     // level  9.
-		{14, 15, 14, 9, 4, 8, strategyBtlazy2},     // level 10.
-		{14, 15, 14, 3, 4, 12, strategyBtopt},      // level 11.
-		{14, 15, 14, 4, 3, 24, strategyBtopt},      // level 12.
-		{14, 15, 14, 5, 3, 32, strategyBtultra},    // level 13.
-		{14, 15, 15, 6, 3, 64, strategyBtultra},    // level 14.
-		{14, 15, 15, 7, 3, 256, strategyBtultra},   // level 15.
-		{14, 15, 15, 5, 3, 48, strategyBtultra2},   // level 16.
-		{14, 15, 15, 6, 3, 128, strategyBtultra2},  // level 17.
-		{14, 15, 15, 7, 3, 256, strategyBtultra2},  // level 18.
-		{14, 15, 15, 8, 3, 256, strategyBtultra2},  // level 19.
-		{14, 15, 15, 8, 3, 512, strategyBtultra2},  // level 20.
-		{14, 15, 15, 9, 3, 512, strategyBtultra2},  // level 21.
-		{14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22.
-	},
-}
-*/
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 95ebc3d84..f5759211d 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -35,7 +35,7 @@ type encoder interface {
 	AppendCRC([]byte) []byte
 	WindowSize(size int) int32
 	UseBlock(*blockEnc)
-	Reset(singleBlock bool)
+	Reset(d *dict, singleBlock bool)
 }
 
 type encoderState struct {
@@ -83,8 +83,6 @@ func (e *Encoder) initialize() {
 	e.encoders = make(chan encoder, e.o.concurrent)
 	for i := 0; i < e.o.concurrent; i++ {
 		enc := e.o.encoder()
-		// If not single block, history will be allocated on first use.
-		enc.Reset(true)
 		e.encoders <- enc
 	}
 }
@@ -115,7 +113,7 @@ func (e *Encoder) Reset(w io.Writer) {
 	s.filling = s.filling[:0]
 	s.current = s.current[:0]
 	s.previous = s.previous[:0]
-	s.encoder.Reset(false)
+	s.encoder.Reset(e.o.dict, false)
 	s.headerWritten = false
 	s.eofWritten = false
 	s.fullFrameWritten = false
@@ -200,8 +198,9 @@ func (e *Encoder) nextBlock(final bool) error {
 			WindowSize:    uint32(s.encoder.WindowSize(0)),
 			SingleSegment: false,
 			Checksum:      e.o.crc,
-			DictID:        0,
+			DictID:        e.o.dict.ID(),
 		}
+
 		dst, err := fh.appendTo(tmp[:0])
 		if err != nil {
 			return err
@@ -281,7 +280,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			// If we got the exact same number of literals as input,
 			// assume the literals cannot be compressed.
 			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-				err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy)
+				err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
 			}
 			switch err {
 			case errIncompressible:
@@ -311,7 +310,13 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
 	if debug {
 		println("Using ReadFrom")
 	}
-	// Maybe handle stuff queued?
+
+	// Flush any current writes.
+	if len(e.state.filling) > 0 {
+		if err := e.nextBlock(false); err != nil {
+			return 0, err
+		}
+	}
 	e.state.filling = e.state.filling[:e.o.blockSize]
 	src := e.state.filling
 	for {
@@ -328,7 +333,7 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
 			if debug {
 				println("ReadFrom: got EOF final block:", len(e.state.filling))
 			}
-			return n, e.nextBlock(true)
+			return n, nil
 		default:
 			if debug {
 				println("ReadFrom: got error:", err)
@@ -450,7 +455,6 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	defer func() {
 		// Release encoder reference to last block.
 		// If a non-single block is needed the encoder will reset again.
-		enc.Reset(true)
 		e.encoders <- enc
 	}()
 	// Use single segments when above minimum window and below 1MB.
@@ -463,7 +467,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		WindowSize:    uint32(enc.WindowSize(len(src))),
 		SingleSegment: single,
 		Checksum:      e.o.crc,
-		DictID:        0,
+		DictID:        e.o.dict.ID(),
 	}
 
 	// If less than 1MB, allocate a buffer up front.
@@ -477,13 +481,18 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 
 	// If we can do everything in one block, prefer that.
 	if len(src) <= maxCompressedBlockSize {
+		enc.Reset(e.o.dict, true)
 		// Slightly faster with no history and everything in one block.
 		if e.o.crc {
 			_, _ = enc.CRC().Write(src)
 		}
 		blk := enc.Block()
 		blk.last = true
-		enc.EncodeNoHist(blk, src)
+		if e.o.dict == nil {
+			enc.EncodeNoHist(blk, src)
+		} else {
+			enc.Encode(blk, src)
+		}
 
 		// If we got the exact same number of literals as input,
 		// assume the literals cannot be compressed.
@@ -492,7 +501,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
 			// Output directly to dst
 			blk.output = dst
-			err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy)
+			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
 		}
 
 		switch err {
@@ -508,7 +517,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		}
 		blk.output = oldout
 	} else {
-		enc.Reset(false)
+		enc.Reset(e.o.dict, false)
 		blk := enc.Block()
 		for len(src) > 0 {
 			todo := src
@@ -519,7 +528,6 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			if e.o.crc {
 				_, _ = enc.CRC().Write(todo)
 			}
-			blk.reset(nil)
 			blk.pushOffsets()
 			enc.Encode(blk, todo)
 			if len(src) == 0 {
@@ -529,7 +537,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			// If we got the exact same number of literals as input,
 			// assume the literals cannot be compressed.
 			if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
-				err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy)
+				err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
 			}
 
 			switch err {
@@ -544,6 +552,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			default:
 				panic(err)
 			}
+			blk.reset(nil)
 		}
 	}
 	if e.o.crc {
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index dfac14ddd..579206163 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -24,6 +24,7 @@ type encoderOptions struct {
 	allLitEntropy   bool
 	customWindow    bool
 	customALEntropy bool
+	dict            *dict
 }
 
 func (o *encoderOptions) setDefault() {
@@ -265,3 +266,16 @@ func WithSingleSegment(b bool) EOption {
 		return nil
 	}
 }
+
+// WithEncoderDict allows to register a dictionary that will be used for the encode.
+// The encoder *may* choose to use no dictionary instead for certain payloads.
+func WithEncoderDict(dict []byte) EOption {
+	return func(o *encoderOptions) error {
+		d, err := loadDict(dict)
+		if err != nil {
+			return err
+		}
+		o.dict = d
+		return nil
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
index 4479cfe18..4ef7f5a3e 100644
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -5,6 +5,7 @@
 package zstd
 
 import (
+	"encoding/binary"
 	"fmt"
 	"io"
 	"math"
@@ -16,7 +17,7 @@ type frameHeader struct {
 	WindowSize    uint32
 	SingleSegment bool
 	Checksum      bool
-	DictID        uint32 // Not stored.
+	DictID        uint32
 }
 
 const maxHeaderSize = 14
@@ -30,6 +31,24 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	if f.SingleSegment {
 		fhd |= 1 << 5
 	}
+
+	var dictIDContent []byte
+	if f.DictID > 0 {
+		var tmp [4]byte
+		if f.DictID < 256 {
+			fhd |= 1
+			tmp[0] = uint8(f.DictID)
+			dictIDContent = tmp[:1]
+		} else if f.DictID < 1<<16 {
+			fhd |= 2
+			binary.LittleEndian.PutUint16(tmp[:2], uint16(f.DictID))
+			dictIDContent = tmp[:2]
+		} else {
+			fhd |= 3
+			binary.LittleEndian.PutUint32(tmp[:4], f.DictID)
+			dictIDContent = tmp[:4]
+		}
+	}
 	var fcs uint8
 	if f.ContentSize >= 256 {
 		fcs++
@@ -40,6 +59,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	if f.ContentSize >= 0xffffffff {
 		fcs++
 	}
+
 	fhd |= fcs << 6
 
 	dst = append(dst, fhd)
@@ -48,7 +68,9 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 		windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3
 		dst = append(dst, uint8(windowLog))
 	}
-
+	if f.DictID > 0 {
+		dst = append(dst, dictIDContent...)
+	}
 	switch fcs {
 	case 0:
 		if f.SingleSegment {
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index f418f50fc..f783e32d2 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -37,7 +37,7 @@ func (h *history) reset() {
 	}
 	h.decoders = sequenceDecs{}
 	if h.huffTree != nil {
-		if h.dict == nil || h.dict.litDec != h.huffTree {
+		if h.dict == nil || h.dict.litEnc != h.huffTree {
 			huffDecoderPool.Put(h.huffTree)
 		}
 	}
@@ -55,7 +55,7 @@ func (h *history) setDict(dict *dict) {
 	h.decoders.offsets = dict.ofDec
 	h.decoders.matchLengths = dict.mlDec
 	h.recentOffsets = dict.offsets
-	h.huffTree = dict.litDec
+	h.huffTree = dict.litEnc
 }
 
 // append bytes to history.
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index 7ff870400..b5c8ef133 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -196,6 +196,10 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 		s.literals = s.literals[ll:]
 		out := s.out
 
+		if mo == 0 && ml > 0 {
+			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+		}
+
 		if mo > len(s.out)+len(hist) || mo > s.windowSize {
 			if len(s.dict) == 0 {
 				return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
@@ -218,10 +222,6 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 			}
 		}
 
-		if mo == 0 && ml > 0 {
-			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
-		}
-
 		// Copy from history.
 		// TODO: Blocks without history could be made to ignore this completely.
 		if v := mo - len(s.out); v > 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index 690428cd2..841fd95ac 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -111,7 +111,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 			// Add empty last block
 			r.block.reset(nil)
 			r.block.last = true
-			err := r.block.encodeLits(false)
+			err := r.block.encodeLits(r.block.literals, false)
 			if err != nil {
 				return written, err
 			}
@@ -178,7 +178,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 				r.err = ErrSnappyCorrupt
 				return written, r.err
 			}
-			err = r.block.encode(false, false)
+			err = r.block.encode(nil, false, false)
 			switch err {
 			case errIncompressible:
 				r.block.popOffsets()
@@ -188,7 +188,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 					println("snappy.Decode:", err)
 					return written, err
 				}
-				err = r.block.encodeLits(false)
+				err = r.block.encodeLits(r.block.literals, false)
 				if err != nil {
 					return written, err
 				}
@@ -235,7 +235,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 				r.err = ErrSnappyCorrupt
 				return written, r.err
 			}
-			err := r.block.encodeLits(false)
+			err := r.block.encodeLits(r.block.literals, false)
 			if err != nil {
 				return written, err
 			}