diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate/huffman_code.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/flate/huffman_code.go | 67 |
1 files changed, 52 insertions, 15 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index f65f79336..d0099599c 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -10,6 +10,12 @@ import ( "sort" ) +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + // hcode is a huffman code with a bit code and bit length. type hcode struct { code, len uint16 @@ -25,7 +31,7 @@ type huffmanEncoder struct { type literalNode struct { literal uint16 - freq int32 + freq uint16 } // A levelInfo describes the state of the constructed tree for a given depth. @@ -54,7 +60,11 @@ func (h *hcode) set(code uint16, length uint16) { h.code = code } -func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } func newHuffmanEncoder(size int) *huffmanEncoder { // Make capacity to next power of two. @@ -64,10 +74,10 @@ func newHuffmanEncoder(size int) *huffmanEncoder { // Generates a HuffmanCode corresponding to the fixed literal table func generateFixedLiteralEncoding() *huffmanEncoder { - h := newHuffmanEncoder(maxNumLit) + h := newHuffmanEncoder(literalCount) codes := h.codes var ch uint16 - for ch = 0; ch < maxNumLit; ch++ { + for ch = 0; ch < literalCount; ch++ { var bits uint16 var size uint16 switch { @@ -108,7 +118,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder { var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() -func (h *huffmanEncoder) bitLength(freq []int32) int { +func (h *huffmanEncoder) bitLength(freq []uint16) int { var total int for i, f := range freq { if f != 0 { @@ -118,8 +128,6 @@ func (h *huffmanEncoder) bitLength(freq []int32) int { return total } -const maxBitsLimit = 16 - // Return the number of literals assigned to each bit size in the Huffman encoding // // This method is only called when list.length >= 3 @@ -163,9 +171,9 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { // We initialize the levels as if we had already figured this out. levels[level] = levelInfo{ level: level, - lastFreq: list[1].freq, - nextCharFreq: list[2].freq, - nextPairFreq: list[0].freq + list[1].freq, + lastFreq: int32(list[1].freq), + nextCharFreq: int32(list[2].freq), + nextPairFreq: int32(list[0].freq) + int32(list[1].freq), } leafCounts[level][level] = 2 if level == 1 { @@ -197,7 +205,12 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { l.lastFreq = l.nextCharFreq // Lower leafCounts are the same of the previous node. leafCounts[level][level] = n - l.nextCharFreq = list[n].freq + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } } else { // The next item on this row is a pair from the previous row. // nextPairFreq isn't valid until we generate two @@ -273,12 +286,12 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. // maxBits The maximum number of bits to use for any literal. -func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { if h.freqcache == nil { // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit. - // The largest of these is maxNumLit, so we allocate for that case. - h.freqcache = make([]literalNode, maxNumLit+1) + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + h.freqcache = make([]literalNode, literalCount+1) } list := h.freqcache[:len(freq)+1] // Number of non-zero literals @@ -345,3 +358,27 @@ func (s byFreq) Less(i, j int) bool { } func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +// histogramSize accumulates a histogram of b in h. +// An estimated size in bits is returned. +// Unassigned values are assigned '1' in the histogram. +// len(h) must be >= 256, and h's elements must be all zeroes. +func histogramSize(b []byte, h []uint16, fill bool) int { + h = h[:256] + for _, t := range b { + h[t]++ + } + invTotal := 1.0 / float64(len(b)) + shannon := 0.0 + single := math.Ceil(-math.Log2(invTotal)) + for i, v := range h[:] { + if v > 0 { + n := float64(v) + shannon += math.Ceil(-math.Log2(n*invTotal) * n) + } else if fill { + shannon += single + h[i] = 1 + } + } + return int(shannon + 0.99) +} |