summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/flate/huffman_code.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate/huffman_code.go')
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_code.go67
1 files changed, 52 insertions, 15 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index f65f79336..d0099599c 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -10,6 +10,12 @@ import (
"sort"
)
+const (
+ maxBitsLimit = 16
+ // number of valid literals
+ literalCount = 286
+)
+
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
@@ -25,7 +31,7 @@ type huffmanEncoder struct {
type literalNode struct {
literal uint16
- freq int32
+ freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
@@ -54,7 +60,11 @@ func (h *hcode) set(code uint16, length uint16) {
h.code = code
}
-func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
+func reverseBits(number uint16, bitLength byte) uint16 {
+ return bits.Reverse16(number << ((16 - bitLength) & 15))
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
@@ -64,10 +74,10 @@ func newHuffmanEncoder(size int) *huffmanEncoder {
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
- h := newHuffmanEncoder(maxNumLit)
+ h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
- for ch = 0; ch < maxNumLit; ch++ {
+ for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint16
switch {
@@ -108,7 +118,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
-func (h *huffmanEncoder) bitLength(freq []int32) int {
+func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
@@ -118,8 +128,6 @@ func (h *huffmanEncoder) bitLength(freq []int32) int {
return total
}
-const maxBitsLimit = 16
-
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
@@ -163,9 +171,9 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
- lastFreq: list[1].freq,
- nextCharFreq: list[2].freq,
- nextPairFreq: list[0].freq + list[1].freq,
+ lastFreq: int32(list[1].freq),
+ nextCharFreq: int32(list[2].freq),
+ nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
}
leafCounts[level][level] = 2
if level == 1 {
@@ -197,7 +205,12 @@ func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
- l.nextCharFreq = list[n].freq
+ e := list[n]
+ if e.literal < math.MaxUint16 {
+ l.nextCharFreq = int32(e.freq)
+ } else {
+ l.nextCharFreq = math.MaxInt32
+ }
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
@@ -273,12 +286,12 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
-func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
+func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
- // Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
- // The largest of these is maxNumLit, so we allocate for that case.
- h.freqcache = make([]literalNode, maxNumLit+1)
+ // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+ // The largest of these is literalCount, so we allocate for that case.
+ h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
@@ -345,3 +358,27 @@ func (s byFreq) Less(i, j int) bool {
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+
+// histogramSize accumulates a histogram of b in h.
+// An estimated size in bits is returned.
+// Unassigned values are assigned '1' in the histogram.
+// len(h) must be >= 256, and h's elements must be all zeroes.
+func histogramSize(b []byte, h []uint16, fill bool) int {
+ h = h[:256]
+ for _, t := range b {
+ h[t]++
+ }
+ invTotal := 1.0 / float64(len(b))
+ shannon := 0.0
+ single := math.Ceil(-math.Log2(invTotal))
+ for i, v := range h[:] {
+ if v > 0 {
+ n := float64(v)
+ shannon += math.Ceil(-math.Log2(n*invTotal) * n)
+ } else if fill {
+ shannon += single
+ h[i] = 1
+ }
+ }
+ return int(shannon + 0.99)
+}