diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate')
6 files changed, 470 insertions, 85 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 20c94f596..d9948ab40 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -644,7 +644,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.fill = (*compressor).fillBlock d.step = (*compressor).store case level == ConstantCompression: - d.w.logReusePenalty = uint(4) + d.w.logNewTablePenalty = 4 d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeHuff @@ -652,13 +652,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) { level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logReusePenalty = uint(level + 1) + d.w.logNewTablePenalty = 6 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeFast case 7 <= level && level <= 9: - d.w.logReusePenalty = uint(level) + d.w.logNewTablePenalty = 10 d.state = &advancedState{} d.compressionLevel = levels[level] d.initDeflate() diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index dd74ffb87..9feea87a3 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -93,12 +93,12 @@ type huffmanBitWriter struct { err error lastHeader int // Set between 0 (reused block can be up to 2x the size) - logReusePenalty uint - lastHuffMan bool - bytes [256]byte - literalFreq [lengthCodesStart + 32]uint16 - offsetFreq [32]uint16 - codegenFreq [codegenCodeCount]uint16 + logNewTablePenalty uint + lastHuffMan bool + bytes [256]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 // codegen must have an extra space for the final symbol. codegen [literalCount + offsetCodeCount + 1]uint8 @@ -119,7 +119,7 @@ type huffmanBitWriter struct { // If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. // // An incoming block estimates the output size of a new table using a 'fresh' by calculating the -// optimal size and adding a penalty in 'logReusePenalty'. +// optimal size and adding a penalty in 'logNewTablePenalty'. // A Huffman table is not optimal, which is why we add a penalty, and generating a new table // is slower both for compression and decompression. @@ -350,6 +350,13 @@ func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { } // dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { header, numCodegens := w.headerSize() size = header + @@ -451,12 +458,12 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n i := 0 for { - var codeWord int = int(w.codegen[i]) + var codeWord = uint32(w.codegen[i]) i++ if codeWord == badCode { break } - w.writeCode(w.codegenEncoding.codes[uint32(codeWord)]) + w.writeCode(w.codegenEncoding.codes[codeWord]) switch codeWord { case 16: @@ -602,14 +609,14 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b var size int // Check if we should reuse. if w.lastHeader > 0 { - // Estimate size for using a new table + // Estimate size for using a new table. + // Use the previous header size as the best estimate. newSize := w.lastHeader + tokens.EstimatedBits() + newSize += newSize >> w.logNewTablePenalty // The estimated size is calculated as an optimal table. // We add a penalty to make it more realistic and re-use a bit more. - newSize += newSize >> (w.logReusePenalty & 31) - extra := w.extraBitSize() - reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra) + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() // Check if a new table is better. if newSize < reuseSize { @@ -801,21 +808,30 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { } // Add everything as literals - estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15 + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) + estBits += w.lastHeader + 15 + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty // Store bytes, if we don't get a reasonable improvement. ssize, storable := w.storedSize(input) - if storable && ssize < (estBits+estBits>>4) { + if storable && ssize < estBits { w.writeStoredHeader(len(input), eof) w.writeBytes(input) return } if w.lastHeader > 0 { - size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader) - estBits += estBits >> (w.logReusePenalty) + reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) + estBits += estExtra - if estBits < size { + if estBits < reuseSize { // We owe an EOB w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 1810c6898..9d8e81ad6 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -7,7 +7,6 @@ package flate import ( "math" "math/bits" - "sort" ) const ( @@ -25,8 +24,6 @@ type huffmanEncoder struct { codes []hcode freqcache []literalNode bitCount [17]int32 - lns byLiteral // stored to avoid repeated allocation in generate - lfs byFreq // stored to avoid repeated allocation in generate } type literalNode struct { @@ -270,7 +267,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // assigned in literal order (not frequency order). chunk := list[len(list)-int(bits):] - h.lns.sort(chunk) + sortByLiteral(chunk) for _, node := range chunk { h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} code++ @@ -315,7 +312,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { } return } - h.lfs.sort(list) + sortByFreq(list) // Get the number of literals for each bit count bitCount := h.bitCounts(list, maxBits) @@ -323,59 +320,44 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } -type byLiteral []literalNode - -func (s *byLiteral) sort(a []literalNode) { - *s = byLiteral(a) - sort.Sort(s) -} - -func (s byLiteral) Len() int { return len(s) } - -func (s byLiteral) Less(i, j int) bool { - return s[i].literal < s[j].literal -} - -func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -type byFreq []literalNode - -func (s *byFreq) sort(a []literalNode) { - *s = byFreq(a) - sort.Sort(s) -} - -func (s byFreq) Len() int { return len(s) } - -func (s byFreq) Less(i, j int) bool { - if s[i].freq == s[j].freq { - return s[i].literal < s[j].literal +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 } - return s[i].freq < s[j].freq + return v } -func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - // histogramSize accumulates a histogram of b in h. // An estimated size in bits is returned. // Unassigned values are assigned '1' in the histogram. // len(h) must be >= 256, and h's elements must be all zeroes. -func histogramSize(b []byte, h []uint16, fill bool) int { +func histogramSize(b []byte, h []uint16, fill bool) (int, int) { h = h[:256] for _, t := range b { h[t]++ } - invTotal := 1.0 / float64(len(b)) - shannon := 0.0 - single := math.Ceil(-math.Log2(invTotal)) - for i, v := range h[:] { - if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - } else if fill { - shannon += single - h[i] = 1 + invTotal := 1.0 / float32(len(b)) + shannon := float32(0.0) + var extra float32 + if fill { + oneBits := atLeastOne(-mFastLog2(invTotal)) + for i, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } else { + h[i] = 1 + extra += oneBits + } + } + } else { + for _, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } } } - return int(shannon + 0.99) + + return int(shannon + 0.99), int(extra + 0.99) } diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 000000000..207780299 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 000000000..93f1aea10 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index b3df0d894..099c0ddbc 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -184,9 +184,7 @@ func (t *tokens) indexTokens(in []token) { t.Reset() for _, tok := range in { if tok < matchType { - t.tokens[t.n] = tok - t.litHist[tok]++ - t.n++ + t.AddLiteral(tok.literal()) continue } t.AddMatch(uint32(tok.length()), tok.offset()) @@ -211,43 +209,53 @@ func (t *tokens) AddLiteral(lit byte) { t.nLits++ } +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + // EstimatedBits will return an minimum size estimated by an *optimal* // compression of the block. // The size of the block func (t *tokens) EstimatedBits() int { - shannon := float64(0) + shannon := float32(0) bits := int(0) nMatches := 0 if t.nLits > 0 { - invTotal := 1.0 / float64(t.nLits) + invTotal := 1.0 / float32(t.nLits) for _, v := range t.litHist[:] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n } } // Just add 15 for EOB shannon += 15 - for _, v := range t.extraHist[1 : literalCount-256] { + for i, v := range t.extraHist[1 : literalCount-256] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - bits += int(lengthExtraBits[v&31]) * int(v) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(lengthExtraBits[i&31]) * int(v) nMatches += int(v) } } } if nMatches > 0 { - invTotal := 1.0 / float64(nMatches) - for _, v := range t.offHist[:offsetCodeCount] { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { if v > 0 { - n := float64(v) - shannon += math.Ceil(-math.Log2(n*invTotal) * n) - bits += int(offsetExtraBits[v&31]) * int(n) + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(offsetExtraBits[i&31]) * int(v) } } } - return int(shannon) + bits } |