1 files changed, 68 insertions, 610 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 628795120..20c94f596 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -50,8 +50,6 @@ const (
 	skipNever = math.MaxInt32
 )
 
-var useSSE42 bool
-
 type compressionLevel struct {
 	good, lazy, nice, chain, fastSkipHashing, level int
 }
@@ -97,9 +95,8 @@ type advancedState struct {
 	hashOffset int
 
 	// input window: unprocessed data is window[index:windowEnd]
-	index      int
-	bulkHasher func([]byte, []uint32)
-	hashMatch  [maxMatchLength + minMatchLength]uint32
+	index     int
+	hashMatch [maxMatchLength + minMatchLength]uint32
 }
 
 type compressor struct {
@@ -120,7 +117,7 @@ type compressor struct {
 
 	// queued output tokens
 	tokens tokens
-	snap   fastEnc
+	fast   fastEnc
 	state  *advancedState
 }
 
@@ -164,14 +161,14 @@ func (d *compressor) fillDeflate(b []byte) int {
 	return n
 }
 
-func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
 	if index > 0 || eof {
 		var window []byte
 		if d.blockStart <= index {
 			window = d.window[d.blockStart:index]
 		}
 		d.blockStart = index
-		d.w.writeBlock(tok.tokens[:tok.n], eof, window)
+		d.w.writeBlock(tok, eof, window)
 		return d.w.err
 	}
 	return nil
@@ -180,20 +177,20 @@ func (d *compressor) writeBlock(tok tokens, index int, eof bool) error {
 // writeBlockSkip writes the current block and uses the number of tokens
 // to determine if the block should be stored on no matches, or
 // only huffman encoded.
-func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
+func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
 	if index > 0 || eof {
 		if d.blockStart <= index {
 			window := d.window[d.blockStart:index]
 			// If we removed less than a 64th of all literals
 			// we huffman compress the block.
 			if int(tok.n) > len(window)-int(tok.n>>6) {
-				d.w.writeBlockHuff(eof, window)
+				d.w.writeBlockHuff(eof, window, d.sync)
 			} else {
 				// Write a dynamic huffman block.
-				d.w.writeBlockDynamic(tok.tokens[:tok.n], eof, window)
+				d.w.writeBlockDynamic(tok, eof, window, d.sync)
 			}
 		} else {
-			d.w.writeBlock(tok.tokens[:tok.n], eof, nil)
+			d.w.writeBlock(tok, eof, nil)
 		}
 		d.blockStart = index
 		return d.w.err
@@ -208,8 +205,16 @@ func (d *compressor) writeBlockSkip(tok tokens, index int, eof bool) error {
 func (d *compressor) fillWindow(b []byte) {
 	// Do not fill window if we are in store-only mode,
 	// use constant or Snappy compression.
-	switch d.compressionLevel.level {
-	case 0, 1, 2:
+	if d.level == 0 {
+		return
+	}
+	if d.fast != nil {
+		// encode the last data, but discard the result
+		if len(b) > maxMatchOffset {
+			b = b[len(b)-maxMatchOffset:]
+		}
+		d.fast.Encode(&d.tokens, b)
+		d.tokens.Reset()
 		return
 	}
 	s := d.state
@@ -236,7 +241,7 @@ func (d *compressor) fillWindow(b []byte) {
 		}
 
 		dst := s.hashMatch[:dstSize]
-		s.bulkHasher(tocheck, dst)
+		bulkHash4(tocheck, dst)
 		var newH uint32
 		for i, val := range dst {
 			di := i + startindex
@@ -284,62 +289,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 
 	for i := prevHead; tries > 0; tries-- {
 		if wEnd == win[i+length] {
-			n := matchLen(win[i:], wPos, minMatchLook)
-
-			if n > length && (n > minMatchLength || pos-i <= 4096) {
-				length = n
-				offset = pos - i
-				ok = true
-				if n >= nice {
-					// The match is good enough that we don't try to find a better one.
-					break
-				}
-				wEnd = win[pos+n]
-			}
-		}
-		if i == minIndex {
-			// hashPrev[i & windowMask] has already been overwritten, so stop now.
-			break
-		}
-		i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
-		if i < minIndex || i < 0 {
-			break
-		}
-	}
-	return
-}
-
-// Try to find a match starting at index whose length is greater than prevSize.
-// We only look at chainCount possibilities before giving up.
-// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
-func (d *compressor) findMatchSSE(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
-	minMatchLook := maxMatchLength
-	if lookahead < minMatchLook {
-		minMatchLook = lookahead
-	}
-
-	win := d.window[0 : pos+minMatchLook]
-
-	// We quit when we get a match that's at least nice long
-	nice := len(win) - pos
-	if d.nice < nice {
-		nice = d.nice
-	}
-
-	// If we've got a match that's good enough, only look in 1/4 the chain.
-	tries := d.chain
-	length = prevLength
-	if length >= d.good {
-		tries >>= 2
-	}
-
-	wEnd := win[pos+length]
-	wPos := win[pos:]
-	minIndex := pos - windowSize
-
-	for i := prevHead; tries > 0; tries-- {
-		if wEnd == win[i+length] {
-			n := matchLenSSE4(win[i:], wPos, minMatchLook)
+			n := matchLen(win[i:i+minMatchLook], wPos)
 
 			if n > length && (n > minMatchLength || pos-i <= 4096) {
 				length = n
@@ -372,42 +322,27 @@ func (d *compressor) writeStoredBlock(buf []byte) error {
 	return d.w.err
 }
 
-const hashmul = 0x1e35a7bd
-
 // hash4 returns a hash representation of the first 4 bytes
 // of the supplied slice.
 // The caller must ensure that len(b) >= 4.
 func hash4(b []byte) uint32 {
-	return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
+	b = b[:4]
+	return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
 }
 
 // bulkHash4 will compute hashes using the same
 // algorithm as hash4
 func bulkHash4(b []byte, dst []uint32) {
-	if len(b) < minMatchLength {
+	if len(b) < 4 {
 		return
 	}
 	hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
-	dst[0] = (hb * hashmul) >> (32 - hashBits)
-	end := len(b) - minMatchLength + 1
+	dst[0] = hash4u(hb, hashBits)
+	end := len(b) - 4 + 1
 	for i := 1; i < end; i++ {
 		hb = (hb << 8) | uint32(b[i+3])
-		dst[i] = (hb * hashmul) >> (32 - hashBits)
-	}
-}
-
-// matchLen returns the number of matching bytes in a and b
-// up to length 'max'. Both slices must be at least 'max'
-// bytes in size.
-func matchLen(a, b []byte, max int) int {
-	a = a[:max]
-	b = b[:len(a)]
-	for i, av := range a {
-		if b[i] != av {
-			return i
-		}
+		dst[i] = hash4u(hb, hashBits)
 	}
-	return max
 }
 
 func (d *compressor) initDeflate() {
@@ -424,149 +359,6 @@ func (d *compressor) initDeflate() {
 	s.offset = 0
 	s.hash = 0
 	s.chainHead = -1
-	s.bulkHasher = bulkHash4
-	if useSSE42 {
-		s.bulkHasher = crc32sseAll
-	}
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazy
-func (d *compressor) deflate() {
-	s := d.state
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
-		return
-	}
-
-	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if s.index < s.maxInsertIndex {
-		s.hash = hash4(d.window[s.index : s.index+minMatchLength])
-	}
-
-	for {
-		if sanity && s.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - s.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && s.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if s.index < s.maxInsertIndex {
-			// Update the hash
-			s.hash = hash4(d.window[s.index : s.index+minMatchLength])
-			ch := s.hashHead[s.hash&hashMask]
-			s.chainHead = int(ch)
-			s.hashPrev[s.index&windowMask] = ch
-			s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
-		}
-		s.length = minMatchLength - 1
-		s.offset = 0
-		minIndex := s.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
-				s.length = newLength
-				s.offset = newOffset
-			}
-		}
-		if s.length >= minMatchLength {
-			s.ii = 0
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			// "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
-			d.tokens.n++
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			if s.length <= d.fastSkipHashing {
-				var newIndex int
-				newIndex = s.index + s.length
-				// Calculate missing hashes
-				end := newIndex
-				if end > s.maxInsertIndex {
-					end = s.maxInsertIndex
-				}
-				end += minMatchLength - 1
-				startindex := s.index + 1
-				if startindex > s.maxInsertIndex {
-					startindex = s.maxInsertIndex
-				}
-				tocheck := d.window[startindex:end]
-				dstSize := len(tocheck) - minMatchLength + 1
-				if dstSize > 0 {
-					dst := s.hashMatch[:dstSize]
-					bulkHash4(tocheck, dst)
-					var newH uint32
-					for i, val := range dst {
-						di := i + startindex
-						newH = val & hashMask
-						// Get previous value with the same hash.
-						// Our chain should point to the previous value.
-						s.hashPrev[di&windowMask] = s.hashHead[newH]
-						// Set the head of the hash chain to us.
-						s.hashHead[newH] = uint32(di + s.hashOffset)
-					}
-					s.hash = newH
-				}
-				s.index = newIndex
-			} else {
-				// For matches this long, we don't bother inserting each individual
-				// item into the table.
-				s.index += s.length
-				if s.index < s.maxInsertIndex {
-					s.hash = hash4(d.window[s.index : s.index+minMatchLength])
-				}
-			}
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			s.ii++
-			end := s.index + int(s.ii>>uint(d.fastSkipHashing)) + 1
-			if end > d.windowEnd {
-				end = d.windowEnd
-			}
-			for i := s.index; i < end; i++ {
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-			}
-			s.index = end
-		}
-	}
 }
 
 // deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
@@ -603,15 +395,14 @@ func (d *compressor) deflateLazy() {
 				// Flush current output block if any.
 				if d.byteAvailable {
 					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-					d.tokens.n++
+					d.tokens.AddLiteral(d.window[s.index-1])
 					d.byteAvailable = false
 				}
 				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 						return
 					}
-					d.tokens.n = 0
+					d.tokens.Reset()
 				}
 				return
 			}
@@ -642,8 +433,7 @@ func (d *compressor) deflateLazy() {
 		if prevLength >= minMatchLength && s.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
-			d.tokens.n++
+			d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
 
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
@@ -684,10 +474,10 @@ func (d *compressor) deflateLazy() {
 			s.length = minMatchLength - 1
 			if d.tokens.n == maxFlateBlockTokens {
 				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+				if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 					return
 				}
-				d.tokens.n = 0
+				d.tokens.Reset()
 			}
 		} else {
 			// Reset, if we got a match this run.
@@ -697,13 +487,12 @@ func (d *compressor) deflateLazy() {
 			// We have a byte waiting. Emit it.
 			if d.byteAvailable {
 				s.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-				d.tokens.n++
+				d.tokens.AddLiteral(d.window[s.index-1])
 				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+					if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 						return
 					}
-					d.tokens.n = 0
+					d.tokens.Reset()
 				}
 				s.index++
 
@@ -716,343 +505,24 @@ func (d *compressor) deflateLazy() {
 							break
 						}
 
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-						d.tokens.n++
+						d.tokens.AddLiteral(d.window[s.index-1])
 						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+							if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 								return
 							}
-							d.tokens.n = 0
+							d.tokens.Reset()
 						}
 						s.index++
 					}
 					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-					d.tokens.n++
+					d.tokens.AddLiteral(d.window[s.index-1])
 					d.byteAvailable = false
 					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
 					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
+						if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
 							return
 						}
-						d.tokens.n = 0
-					}
-				}
-			} else {
-				s.index++
-				d.byteAvailable = true
-			}
-		}
-	}
-}
-
-// Assumes that d.fastSkipHashing != skipNever,
-// otherwise use deflateLazySSE
-func (d *compressor) deflateSSE() {
-	s := d.state
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
-		return
-	}
-
-	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if s.index < s.maxInsertIndex {
-		s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
-	}
-
-	for {
-		if sanity && s.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - s.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && s.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if s.index < s.maxInsertIndex {
-			// Update the hash
-			s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
-			ch := s.hashHead[s.hash]
-			s.chainHead = int(ch)
-			s.hashPrev[s.index&windowMask] = ch
-			s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
-		}
-		s.length = minMatchLength - 1
-		s.offset = 0
-		minIndex := s.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if s.chainHead-s.hashOffset >= minIndex && lookahead > minMatchLength-1 {
-			if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
-				s.length = newLength
-				s.offset = newOffset
-			}
-		}
-		if s.length >= minMatchLength {
-			s.ii = 0
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			// "s.length-3" should NOT be "s.length-minMatchLength", since the format always assume 3
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(s.length-3), uint32(s.offset-minOffsetSize))
-			d.tokens.n++
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			if s.length <= d.fastSkipHashing {
-				var newIndex int
-				newIndex = s.index + s.length
-				// Calculate missing hashes
-				end := newIndex
-				if end > s.maxInsertIndex {
-					end = s.maxInsertIndex
-				}
-				end += minMatchLength - 1
-				startindex := s.index + 1
-				if startindex > s.maxInsertIndex {
-					startindex = s.maxInsertIndex
-				}
-				tocheck := d.window[startindex:end]
-				dstSize := len(tocheck) - minMatchLength + 1
-				if dstSize > 0 {
-					dst := s.hashMatch[:dstSize]
-
-					crc32sseAll(tocheck, dst)
-					var newH uint32
-					for i, val := range dst {
-						di := i + startindex
-						newH = val & hashMask
-						// Get previous value with the same hash.
-						// Our chain should point to the previous value.
-						s.hashPrev[di&windowMask] = s.hashHead[newH]
-						// Set the head of the hash chain to us.
-						s.hashHead[newH] = uint32(di + s.hashOffset)
-					}
-					s.hash = newH
-				}
-				s.index = newIndex
-			} else {
-				// For matches this long, we don't bother inserting each individual
-				// item into the table.
-				s.index += s.length
-				if s.index < s.maxInsertIndex {
-					s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
-				}
-			}
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlockSkip(d.tokens, s.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			s.ii++
-			end := s.index + int(s.ii>>5) + 1
-			if end > d.windowEnd {
-				end = d.windowEnd
-			}
-			for i := s.index; i < end; i++ {
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[i]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlockSkip(d.tokens, i+1, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-			}
-			s.index = end
-		}
-	}
-}
-
-// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
-// meaning it always has lazy matching on.
-func (d *compressor) deflateLazySSE() {
-	s := d.state
-	// Sanity enables additional runtime tests.
-	// It's intended to be used during development
-	// to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
-		return
-	}
-
-	s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
-	if s.index < s.maxInsertIndex {
-		s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
-	}
-
-	for {
-		if sanity && s.index > d.windowEnd {
-			panic("index > windowEnd")
-		}
-		lookahead := d.windowEnd - s.index
-		if lookahead < minMatchLength+maxMatchLength {
-			if !d.sync {
-				return
-			}
-			if sanity && s.index > d.windowEnd {
-				panic("index > windowEnd")
-			}
-			if lookahead == 0 {
-				// Flush current output block if any.
-				if d.byteAvailable {
-					// There is still one pending token that needs to be flushed
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-					d.tokens.n++
-					d.byteAvailable = false
-				}
-				if d.tokens.n > 0 {
-					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				return
-			}
-		}
-		if s.index < s.maxInsertIndex {
-			// Update the hash
-			s.hash = crc32sse(d.window[s.index:s.index+minMatchLength]) & hashMask
-			ch := s.hashHead[s.hash]
-			s.chainHead = int(ch)
-			s.hashPrev[s.index&windowMask] = ch
-			s.hashHead[s.hash] = uint32(s.index + s.hashOffset)
-		}
-		prevLength := s.length
-		prevOffset := s.offset
-		s.length = minMatchLength - 1
-		s.offset = 0
-		minIndex := s.index - windowSize
-		if minIndex < 0 {
-			minIndex = 0
-		}
-
-		if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
-			if newLength, newOffset, ok := d.findMatchSSE(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
-				s.length = newLength
-				s.offset = newOffset
-			}
-		}
-		if prevLength >= minMatchLength && s.length <= prevLength {
-			// There was a match at the previous step, and the current match is
-			// not better. Output the previous match.
-			d.tokens.tokens[d.tokens.n] = matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
-			d.tokens.n++
-
-			// Insert in the hash table all strings up to the end of the match.
-			// index and index-1 are already inserted. If there is not enough
-			// lookahead, the last two strings are not inserted into the hash
-			// table.
-			var newIndex int
-			newIndex = s.index + prevLength - 1
-			// Calculate missing hashes
-			end := newIndex
-			if end > s.maxInsertIndex {
-				end = s.maxInsertIndex
-			}
-			end += minMatchLength - 1
-			startindex := s.index + 1
-			if startindex > s.maxInsertIndex {
-				startindex = s.maxInsertIndex
-			}
-			tocheck := d.window[startindex:end]
-			dstSize := len(tocheck) - minMatchLength + 1
-			if dstSize > 0 {
-				dst := s.hashMatch[:dstSize]
-				crc32sseAll(tocheck, dst)
-				var newH uint32
-				for i, val := range dst {
-					di := i + startindex
-					newH = val & hashMask
-					// Get previous value with the same hash.
-					// Our chain should point to the previous value.
-					s.hashPrev[di&windowMask] = s.hashHead[newH]
-					// Set the head of the hash chain to us.
-					s.hashHead[newH] = uint32(di + s.hashOffset)
-				}
-				s.hash = newH
-			}
-
-			s.index = newIndex
-			d.byteAvailable = false
-			s.length = minMatchLength - 1
-			if d.tokens.n == maxFlateBlockTokens {
-				// The block includes the current character
-				if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
-					return
-				}
-				d.tokens.n = 0
-			}
-		} else {
-			// Reset, if we got a match this run.
-			if s.length >= minMatchLength {
-				s.ii = 0
-			}
-			// We have a byte waiting. Emit it.
-			if d.byteAvailable {
-				s.ii++
-				d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-				d.tokens.n++
-				if d.tokens.n == maxFlateBlockTokens {
-					if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
-						return
-					}
-					d.tokens.n = 0
-				}
-				s.index++
-
-				// If we have a long run of no matches, skip additional bytes
-				// Resets when s.ii overflows after 64KB.
-				if s.ii > 31 {
-					n := int(s.ii >> 6)
-					for j := 0; j < n; j++ {
-						if s.index >= d.windowEnd-1 {
-							break
-						}
-
-						d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-						d.tokens.n++
-						if d.tokens.n == maxFlateBlockTokens {
-							if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
-								return
-							}
-							d.tokens.n = 0
-						}
-						s.index++
-					}
-					// Flush last byte
-					d.tokens.tokens[d.tokens.n] = literalToken(uint32(d.window[s.index-1]))
-					d.tokens.n++
-					d.byteAvailable = false
-					// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
-					if d.tokens.n == maxFlateBlockTokens {
-						if d.err = d.writeBlock(d.tokens, s.index, false); d.err != nil {
-							return
-						}
-						d.tokens.n = 0
+						d.tokens.Reset()
 					}
 				}
 			} else {
@@ -1085,17 +555,17 @@ func (d *compressor) storeHuff() {
 	if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
 		return
 	}
-	d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+	d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
 	d.err = d.w.err
 	d.windowEnd = 0
 }
 
-// storeHuff will compress and store the currently added data,
+// storeFast will compress and store the currently added data,
 // if enough has been accumulated or we at the end of the stream.
 // Any error that occurred will be in d.err
-func (d *compressor) storeSnappy() {
+func (d *compressor) storeFast() {
 	// We only compress if we have maxStoreBlockSize.
-	if d.windowEnd < maxStoreBlockSize {
+	if d.windowEnd < len(d.window) {
 		if !d.sync {
 			return
 		}
@@ -1106,32 +576,30 @@ func (d *compressor) storeSnappy() {
 			}
 			if d.windowEnd <= 32 {
 				d.err = d.writeStoredBlock(d.window[:d.windowEnd])
-				d.tokens.n = 0
-				d.windowEnd = 0
 			} else {
-				d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+				d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
 				d.err = d.w.err
 			}
-			d.tokens.n = 0
+			d.tokens.Reset()
 			d.windowEnd = 0
-			d.snap.Reset()
+			d.fast.Reset()
 			return
 		}
 	}
 
-	d.snap.Encode(&d.tokens, d.window[:d.windowEnd])
+	d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
 	// If we made zero matches, store the block as is.
-	if int(d.tokens.n) == d.windowEnd {
+	if d.tokens.n == 0 {
 		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
 		// If we removed less than 1/16th, huffman compress the block.
 	} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
-		d.w.writeBlockHuff(false, d.window[:d.windowEnd])
+		d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
 		d.err = d.w.err
 	} else {
-		d.w.writeBlockDynamic(d.tokens.tokens[:d.tokens.n], false, d.window[:d.windowEnd])
+		d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
 		d.err = d.w.err
 	}
-	d.tokens.n = 0
+	d.tokens.Reset()
 	d.windowEnd = 0
 }
 
@@ -1176,36 +644,26 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).store
 	case level == ConstantCompression:
+		d.w.logReusePenalty = uint(4)
 		d.window = make([]byte, maxStoreBlockSize)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeHuff
-	case level >= 1 && level <= 4:
-		d.snap = newFastEnc(level)
-		d.window = make([]byte, maxStoreBlockSize)
-		d.fill = (*compressor).fillBlock
-		d.step = (*compressor).storeSnappy
 	case level == DefaultCompression:
 		level = 5
 		fallthrough
-	case 5 <= level && level <= 9:
+	case level >= 1 && level <= 6:
+		d.w.logReusePenalty = uint(level + 1)
+		d.fast = newFastEnc(level)
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillBlock
+		d.step = (*compressor).storeFast
+	case 7 <= level && level <= 9:
+		d.w.logReusePenalty = uint(level)
 		d.state = &advancedState{}
 		d.compressionLevel = levels[level]
 		d.initDeflate()
 		d.fill = (*compressor).fillDeflate
-		if d.fastSkipHashing == skipNever {
-			if useSSE42 {
-				d.step = (*compressor).deflateLazySSE
-			} else {
-				d.step = (*compressor).deflateLazy
-			}
-		} else {
-			if useSSE42 {
-				d.step = (*compressor).deflateSSE
-			} else {
-				d.step = (*compressor).deflate
-
-			}
-		}
+		d.step = (*compressor).deflateLazy
 	default:
 		return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
 	}
@@ -1218,10 +676,10 @@ func (d *compressor) reset(w io.Writer) {
 	d.sync = false
 	d.err = nil
 	// We only need to reset a few things for Snappy.
-	if d.snap != nil {
-		d.snap.Reset()
+	if d.fast != nil {
+		d.fast.Reset()
 		d.windowEnd = 0
-		d.tokens.n = 0
+		d.tokens.Reset()
 		return
 	}
 	switch d.compressionLevel.chain {
@@ -1240,7 +698,7 @@ func (d *compressor) reset(w io.Writer) {
 		s.hashOffset = 1
 		s.index, d.windowEnd = 0, 0
 		d.blockStart, d.byteAvailable = 0, false
-		d.tokens.n = 0
+		d.tokens.Reset()
 		s.length = minMatchLength - 1
 		s.offset = 0
 		s.hash = 0