From 9d6dce119949d340ebb42becae018a3c11148690 Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Thu, 1 Aug 2019 03:46:14 -0400 Subject: github.com/containers/storage v1.12.13 Signed-off-by: Daniel J Walsh --- vendor/github.com/klauspost/compress/fse/README.md | 79 +++ .../github.com/klauspost/compress/fse/bitreader.go | 107 ++++ .../github.com/klauspost/compress/fse/bitwriter.go | 168 +++++ .../klauspost/compress/fse/bytereader.go | 56 ++ .../github.com/klauspost/compress/fse/compress.go | 684 +++++++++++++++++++++ .../klauspost/compress/fse/decompress.go | 370 +++++++++++ vendor/github.com/klauspost/compress/fse/fse.go | 143 +++++ 7 files changed, 1607 insertions(+) create mode 100644 vendor/github.com/klauspost/compress/fse/README.md create mode 100644 vendor/github.com/klauspost/compress/fse/bitreader.go create mode 100644 vendor/github.com/klauspost/compress/fse/bitwriter.go create mode 100644 vendor/github.com/klauspost/compress/fse/bytereader.go create mode 100644 vendor/github.com/klauspost/compress/fse/compress.go create mode 100644 vendor/github.com/klauspost/compress/fse/decompress.go create mode 100644 vendor/github.com/klauspost/compress/fse/fse.go (limited to 'vendor/github.com/klauspost/compress/fse') diff --git a/vendor/github.com/klauspost/compress/fse/README.md b/vendor/github.com/klauspost/compress/fse/README.md new file mode 100644 index 000000000..ea7324da6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/README.md @@ -0,0 +1,79 @@ +# Finite State Entropy + +This package provides Finite State Entropy encoding and decoding. + +Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) +encoding provides a fast near-optimal symbol encoding/decoding +for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd). + +This can be used for compressing input with a lot of similar input values to the smallest number of bytes. +This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, +but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. + +* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse) + +## News + + * Feb 2018: First implementation released. Consider this beta software for now. + +# Usage + +This package provides a low level interface that allows to compress single independent blocks. + +Each block is separate, and there is no built in integrity checks. +This means that the caller should keep track of block sizes and also do checksums if needed. + +Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function. +You must provide input and will receive the output and maybe an error. + +These error values can be returned: + +| Error | Description | +|---------------------|-----------------------------------------------------------------------------| +| `` | Everything ok, output is returned | +| `ErrIncompressible` | Returned when input is judged to be too hard to compress | +| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | +| `(error)` | An internal error occurred. | + +As can be seen above there are errors that will be returned even under normal operation so it is important to handle these. + +To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object +that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same +object can be used for both. + +Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this +you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. + +Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function. +You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back +your input was likely corrupted. + +It is important to note that a successful decoding does *not* mean your output matches your original input. +There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. + +For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples). + +# Performance + +A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors. +All compression functions are currently only running on the calling goroutine so only one core will be used per block. + +The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input +is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be +beneficial to transpose all your input values down by 64. + +With moderate block sizes around 64k speed are typically 200MB/s per core for compression and +around 300MB/s decompression speed. + +The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. + +# Plans + +At one point, more internals will be exposed to facilitate more "expert" usage of the components. + +A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261). + +# Contributing + +Contributions are always welcome. Be aware that adding public functions will require good justification and breaking +changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go new file mode 100644 index 000000000..b9db204f5 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bitreader.go @@ -0,0 +1,107 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import ( + "errors" + "io" +) + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReader struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReader) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + b.fill() + b.fill() + b.bitsRead += 8 - uint8(highBits(uint32(v))) + return nil +} + +// getBits will return n bits. n can be 0. +func (b *bitReader) getBits(n uint8) uint16 { + if n == 0 || b.bitsRead >= 64 { + return 0 + } + return b.getBitsFast(n) +} + +// getBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReader) getBitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return v +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReader) fillFast() { + if b.bitsRead < 32 { + return + } + // Do single re-slice to avoid bounds checks. + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReader) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value = (b.value << 8) | uint64(b.in[b.off-1]) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReader) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReader) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go new file mode 100644 index 000000000..43e463611 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go @@ -0,0 +1,168 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import "fmt" + +// bitWriter will write bits. +// First bit will be LSB of the first byte of output. +type bitWriter struct { + bitContainer uint64 + nBits uint8 + out []byte +} + +// bitMask16 is bitmasks. Has extra to avoid bounds check. +var bitMask16 = [32]uint16{ + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF} /* up to 16 bits */ + +// addBits16NC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16NC(value uint16, bits uint8) { + b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16ZeroNC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +// This is fastest if bits can be zero. +func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { + if bits == 0 { + return + } + value <<= (16 - bits) & 15 + value >>= (16 - bits) & 15 + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// flush will flush all pending full bytes. +// There will be at least 56 bits available for writing when this has been called. +// Using flush32 is faster, but leaves less space for writing. +func (b *bitWriter) flush() { + v := b.nBits >> 3 + switch v { + case 0: + case 1: + b.out = append(b.out, + byte(b.bitContainer), + ) + case 2: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + ) + case 3: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + ) + case 4: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + ) + case 5: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + ) + case 6: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + ) + case 7: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + ) + case 8: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + byte(b.bitContainer>>56), + ) + default: + panic(fmt.Errorf("bits (%d) > 64", b.nBits)) + } + b.bitContainer >>= v << 3 + b.nBits &= 7 +} + +// flush32 will flush out, so there are at least 32 bits available for writing. +func (b *bitWriter) flush32() { + if b.nBits < 32 { + return + } + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24)) + b.nBits -= 32 + b.bitContainer >>= 32 +} + +// flushAlign will flush remaining full bytes and align to next byte boundary. +func (b *bitWriter) flushAlign() { + nbBytes := (b.nBits + 7) >> 3 + for i := uint8(0); i < nbBytes; i++ { + b.out = append(b.out, byte(b.bitContainer>>(i*8))) + } + b.nBits = 0 + b.bitContainer = 0 +} + +// close will write the alignment bit and write the final byte(s) +// to the output. +func (b *bitWriter) close() error { + // End mark + b.addBits16Clean(1, 1) + // flush until next byte. + b.flushAlign() + return nil +} + +// reset and continue writing by appending to out. +func (b *bitWriter) reset(out []byte) { + b.bitContainer = 0 + b.nBits = 0 + b.out = out +} diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go new file mode 100644 index 000000000..f228a46cd --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bytereader.go @@ -0,0 +1,56 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +// byteReader provides a byte reader that reads +// little endian values from a byte stream. +// The input stream is manually advanced. +// The reader performs no bounds checks. +type byteReader struct { + b []byte + off int +} + +// init will initialize the reader and set the input. +func (b *byteReader) init(in []byte) { + b.b = in + b.off = 0 +} + +// advance the stream b n bytes. +func (b *byteReader) advance(n uint) { + b.off += int(n) +} + +// Int32 returns a little endian int32 starting at current offset. +func (b byteReader) Int32() int32 { + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := int32(b2[3]) + v2 := int32(b2[2]) + v1 := int32(b2[1]) + v0 := int32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint32 returns a little endian uint32 starting at current offset. +func (b byteReader) Uint32() uint32 { + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// unread returns the unread portion of the input. +func (b byteReader) unread() []byte { + return b.b[b.off:] +} + +// remain will return the number of bytes remaining. +func (b byteReader) remain() int { + return len(b.b) - b.off +} diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go new file mode 100644 index 000000000..b69237c9b --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -0,0 +1,684 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import ( + "errors" + "fmt" +) + +// Compress the input bytes. Input must be < 2GB. +// Provide a Scratch buffer to avoid memory allocations. +// Note that the output is also kept in the scratch buffer. +// If input is too hard to compress, ErrIncompressible is returned. +// If input is a single byte value repeated ErrUseRLE is returned. +func Compress(in []byte, s *Scratch) ([]byte, error) { + if len(in) <= 1 { + return nil, ErrIncompressible + } + if len(in) > (2<<30)-1 { + return nil, errors.New("input too big, must be < 2GB") + } + s, err := s.prepare(in) + if err != nil { + return nil, err + } + + // Create histogram, if none was provided. + maxCount := s.maxCount + if maxCount == 0 { + maxCount = s.countSimple(in) + } + // Reset for next run. + s.clearCount = true + s.maxCount = 0 + if maxCount == len(in) { + // One symbol, use RLE + return nil, ErrUseRLE + } + if maxCount == 1 || maxCount < (len(in)>>7) { + // Each symbol present maximum once or too well distributed. + return nil, ErrIncompressible + } + s.optimalTableLog() + err = s.normalizeCount() + if err != nil { + return nil, err + } + err = s.writeCount() + if err != nil { + return nil, err + } + + if false { + err = s.validateNorm() + if err != nil { + return nil, err + } + } + + err = s.buildCTable() + if err != nil { + return nil, err + } + err = s.compress(in) + if err != nil { + return nil, err + } + s.Out = s.bw.out + // Check if we compressed. + if len(s.Out) >= len(in) { + return nil, ErrIncompressible + } + return s.Out, nil +} + +// cState contains the compression state of a stream. +type cState struct { + bw *bitWriter + stateTable []uint16 + state uint16 +} + +// init will initialize the compression state to the first symbol of the stream. +func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { + c.bw = bw + c.stateTable = ct.stateTable + + nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 + im := int32((nbBitsOut << 16) - first.deltaNbBits) + lu := (im >> nbBitsOut) + first.deltaFindState + c.state = c.stateTable[lu] + return +} + +// encode the output symbol provided and write it to the bitstream. +func (c *cState) encode(symbolTT symbolTransform) { + nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 + dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState + c.bw.addBits16NC(c.state, uint8(nbBitsOut)) + c.state = c.stateTable[dstState] +} + +// encode the output symbol provided and write it to the bitstream. +func (c *cState) encodeZero(symbolTT symbolTransform) { + nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 + dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState + c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) + c.state = c.stateTable[dstState] +} + +// flush will write the tablelog to the output and flush the remaining full bytes. +func (c *cState) flush(tableLog uint8) { + c.bw.flush32() + c.bw.addBits16NC(c.state, tableLog) + c.bw.flush() +} + +// compress is the main compression loop that will encode the input from the last byte to the first. +func (s *Scratch) compress(src []byte) error { + if len(src) <= 2 { + return errors.New("compress: src too small") + } + tt := s.ct.symbolTT[:256] + s.bw.reset(s.Out) + + // Our two states each encodes every second byte. + // Last byte encoded (first byte decoded) will always be encoded by c1. + var c1, c2 cState + + // Encode so remaining size is divisible by 4. + ip := len(src) + if ip&1 == 1 { + c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) + c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) + c1.encodeZero(tt[src[ip-3]]) + ip -= 3 + } else { + c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) + c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) + ip -= 2 + } + if ip&2 != 0 { + c2.encodeZero(tt[src[ip-1]]) + c1.encodeZero(tt[src[ip-2]]) + ip -= 2 + } + + // Main compression loop. + switch { + case !s.zeroBits && s.actualTableLog <= 8: + // We can encode 4 symbols without requiring a flush. + // We do not need to check if any output is 0 bits. + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encode(tt[v0]) + c1.encode(tt[v1]) + c2.encode(tt[v2]) + c1.encode(tt[v3]) + ip -= 4 + } + case !s.zeroBits: + // We do not need to check if any output is 0 bits. + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encode(tt[v0]) + c1.encode(tt[v1]) + s.bw.flush32() + c2.encode(tt[v2]) + c1.encode(tt[v3]) + ip -= 4 + } + case s.actualTableLog <= 8: + // We can encode 4 symbols without requiring a flush + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encodeZero(tt[v0]) + c1.encodeZero(tt[v1]) + c2.encodeZero(tt[v2]) + c1.encodeZero(tt[v3]) + ip -= 4 + } + default: + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encodeZero(tt[v0]) + c1.encodeZero(tt[v1]) + s.bw.flush32() + c2.encodeZero(tt[v2]) + c1.encodeZero(tt[v3]) + ip -= 4 + } + } + + // Flush final state. + // Used to initialize state when decoding. + c2.flush(s.actualTableLog) + c1.flush(s.actualTableLog) + + return s.bw.close() +} + +// writeCount will write the normalized histogram count to header. +// This is read back by readNCount. +func (s *Scratch) writeCount() error { + var ( + tableLog = s.actualTableLog + tableSize = 1 << tableLog + previous0 bool + charnum uint16 + + maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + + // Write Table Size + bitStream = uint32(tableLog - minTablelog) + bitCount = uint(4) + remaining = int16(tableSize + 1) /* +1 for extra accuracy */ + threshold = int16(tableSize) + nbBits = uint(tableLog + 1) + ) + if cap(s.Out) < maxHeaderSize { + s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) + } + outP := uint(0) + out := s.Out[:maxHeaderSize] + + // stops at 1 + for remaining > 1 { + if previous0 { + start := charnum + for s.norm[charnum] == 0 { + charnum++ + } + for charnum >= start+24 { + start += 24 + bitStream += uint32(0xFFFF) << bitCount + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + } + for charnum >= start+3 { + start += 3 + bitStream += 3 << bitCount + bitCount += 2 + } + bitStream += uint32(charnum-start) << bitCount + bitCount += 2 + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + count := s.norm[charnum] + charnum++ + max := (2*threshold - 1) - remaining + if count < 0 { + remaining += count + } else { + remaining -= count + } + count++ // +1 for extra accuracy + if count >= threshold { + count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ + } + bitStream += uint32(count) << bitCount + bitCount += nbBits + if count < max { + bitCount-- + } + + previous0 = count == 1 + if remaining < 1 { + return errors.New("internal error: remaining<1") + } + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += (bitCount + 7) / 8 + + if uint16(charnum) > s.symbolLen { + return errors.New("internal error: charnum > s.symbolLen") + } + s.Out = out[:outP] + return nil +} + +// symbolTransform contains the state transform for a symbol. +type symbolTransform struct { + deltaFindState int32 + deltaNbBits uint32 +} + +// String prints values as a human readable string. +func (s symbolTransform) String() string { + return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) +} + +// cTable contains tables used for compression. +type cTable struct { + tableSymbol []byte + stateTable []uint16 + symbolTT []symbolTransform +} + +// allocCtable will allocate tables needed for compression. +// If existing tables a re big enough, they are simply re-used. +func (s *Scratch) allocCtable() { + tableSize := 1 << s.actualTableLog + // get tableSymbol that is big enough. + if cap(s.ct.tableSymbol) < int(tableSize) { + s.ct.tableSymbol = make([]byte, tableSize) + } + s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] + + ctSize := tableSize + if cap(s.ct.stateTable) < ctSize { + s.ct.stateTable = make([]uint16, ctSize) + } + s.ct.stateTable = s.ct.stateTable[:ctSize] + + if cap(s.ct.symbolTT) < 256 { + s.ct.symbolTT = make([]symbolTransform, 256) + } + s.ct.symbolTT = s.ct.symbolTT[:256] +} + +// buildCTable will populate the compression table so it is ready to be used. +func (s *Scratch) buildCTable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + var cumul [maxSymbolValue + 2]int16 + + s.allocCtable() + tableSymbol := s.ct.tableSymbol[:tableSize] + // symbol start positions + { + cumul[0] = 0 + for ui, v := range s.norm[:s.symbolLen-1] { + u := byte(ui) // one less than reference + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = u + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + } + // Encode last symbol separately to avoid overflowing u + u := int(s.symbolLen - 1) + v := s.norm[s.symbolLen-1] + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = byte(u) + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + if uint32(cumul[s.symbolLen]) != tableSize { + return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) + } + cumul[s.symbolLen] = int16(tableSize) + 1 + } + // Spread symbols + s.zeroBits = false + { + step := tableStep(tableSize) + tableMask := tableSize - 1 + var position uint32 + // if any symbol > largeLimit, we may have 0 bits output. + largeLimit := int16(1 << (s.actualTableLog - 1)) + for ui, v := range s.norm[:s.symbolLen] { + symbol := byte(ui) + if v > largeLimit { + s.zeroBits = true + } + for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { + tableSymbol[position] = symbol + position = (position + step) & tableMask + for position > highThreshold { + position = (position + step) & tableMask + } /* Low proba area */ + } + } + + // Check if we have gone through all positions + if position != 0 { + return errors.New("position!=0") + } + } + + // Build table + table := s.ct.stateTable + { + tsi := int(tableSize) + for u, v := range tableSymbol { + // TableU16 : sorted by symbol order; gives next state value + table[cumul[v]] = uint16(tsi + u) + cumul[v]++ + } + } + + // Build Symbol Transformation Table + { + total := int16(0) + symbolTT := s.ct.symbolTT[:s.symbolLen] + tableLog := s.actualTableLog + tl := (uint32(tableLog) << 16) - (1 << tableLog) + for i, v := range s.norm[:s.symbolLen] { + switch v { + case 0: + case -1, 1: + symbolTT[i].deltaNbBits = tl + symbolTT[i].deltaFindState = int32(total - 1) + total++ + default: + maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) + minStatePlus := uint32(v) << maxBitsOut + symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus + symbolTT[i].deltaFindState = int32(total - v) + total += v + } + } + if total != int16(tableSize) { + return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) + } + } + return nil +} + +// countSimple will create a simple histogram in s.count. +// Returns the biggest count. +// Does not update s.clearCount. +func (s *Scratch) countSimple(in []byte) (max int) { + for _, v := range in { + s.count[v]++ + } + m := uint32(0) + for i, v := range s.count[:] { + if v > m { + m = v + } + if v > 0 { + s.symbolLen = uint16(i) + 1 + } + } + return int(m) +} + +// minTableLog provides the minimum logSize to safely represent a distribution. +func (s *Scratch) minTableLog() uint8 { + minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 + minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 + if minBitsSrc < minBitsSymbols { + return uint8(minBitsSrc) + } + return uint8(minBitsSymbols) +} + +// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog +func (s *Scratch) optimalTableLog() { + tableLog := s.TableLog + minBits := s.minTableLog() + maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 + if maxBitsSrc < tableLog { + // Accuracy can be reduced + tableLog = maxBitsSrc + } + if minBits > tableLog { + tableLog = minBits + } + // Need a minimum to safely represent all symbol values + if tableLog < minTablelog { + tableLog = minTablelog + } + if tableLog > maxTableLog { + tableLog = maxTableLog + } + s.actualTableLog = tableLog +} + +var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} + +// normalizeCount will normalize the count of the symbols so +// the total is equal to the table size. +func (s *Scratch) normalizeCount() error { + var ( + tableLog = s.actualTableLog + scale = 62 - uint64(tableLog) + step = (1 << 62) / uint64(s.br.remain()) + vStep = uint64(1) << (scale - 20) + stillToDistribute = int16(1 << tableLog) + largest int + largestP int16 + lowThreshold = (uint32)(s.br.remain() >> tableLog) + ) + + for i, cnt := range s.count[:s.symbolLen] { + // already handled + // if (count[s] == s.length) return 0; /* rle special case */ + + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + stillToDistribute-- + } else { + proba := (int16)((uint64(cnt) * step) >> scale) + if proba < 8 { + restToBeat := vStep * uint64(rtbTable[proba]) + v := uint64(cnt)*step - (uint64(proba) << scale) + if v > restToBeat { + proba++ + } + } + if proba > largestP { + largestP = proba + largest = i + } + s.norm[i] = proba + stillToDistribute -= proba + } + } + + if -stillToDistribute >= (s.norm[largest] >> 1) { + // corner case, need another normalization method + return s.normalizeCount2() + } + s.norm[largest] += stillToDistribute + return nil +} + +// Secondary normalization method. +// To be used when primary method fails. +func (s *Scratch) normalizeCount2() error { + const notYetAssigned = -2 + var ( + distributed uint32 + total = uint32(s.br.remain()) + tableLog = s.actualTableLog + lowThreshold = uint32(total >> tableLog) + lowOne = uint32((total * 3) >> (tableLog + 1)) + ) + for i, cnt := range s.count[:s.symbolLen] { + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + distributed++ + total -= cnt + continue + } + if cnt <= lowOne { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + s.norm[i] = notYetAssigned + } + toDistribute := (1 << tableLog) - distributed + + if (total / toDistribute) > lowOne { + // risk of rounding to zero + lowOne = uint32((total * 3) / (toDistribute * 2)) + for i, cnt := range s.count[:s.symbolLen] { + if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + } + toDistribute = (1 << tableLog) - distributed + } + if distributed == uint32(s.symbolLen)+1 { + // all values are pretty poor; + // probably incompressible data (should have already been detected); + // find max, then give all remaining points to max + var maxV int + var maxC uint32 + for i, cnt := range s.count[:s.symbolLen] { + if cnt > maxC { + maxV = i + maxC = cnt + } + } + s.norm[maxV] += int16(toDistribute) + return nil + } + + if total == 0 { + // all of the symbols were low enough for the lowOne or lowThreshold + for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { + if s.norm[i] > 0 { + toDistribute-- + s.norm[i]++ + } + } + return nil + } + + var ( + vStepLog = 62 - uint64(tableLog) + mid = uint64((1 << (vStepLog - 1)) - 1) + rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining + tmpTotal = mid + ) + for i, cnt := range s.count[:s.symbolLen] { + if s.norm[i] == notYetAssigned { + var ( + end = tmpTotal + uint64(cnt)*rStep + sStart = uint32(tmpTotal >> vStepLog) + sEnd = uint32(end >> vStepLog) + weight = sEnd - sStart + ) + if weight < 1 { + return errors.New("weight < 1") + } + s.norm[i] = int16(weight) + tmpTotal = end + } + } + return nil +} + +// validateNorm validates the normalized histogram table. +func (s *Scratch) validateNorm() (err error) { + var total int + for _, v := range s.norm[:s.symbolLen] { + if v >= 0 { + total += int(v) + } else { + total -= int(v) + } + } + defer func() { + if err == nil { + return + } + fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) + for i, v := range s.norm[:s.symbolLen] { + fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) + } + }() + if total != (1 << s.actualTableLog) { + return fmt.Errorf("warning: Total == %d != %d", total, 1< tablelogAbsoluteMax { + return errors.New("tableLog too large") + } + bitStream >>= 4 + bitCount := uint(4) + + s.actualTableLog = uint8(nbBits) + remaining := int32((1 << nbBits) + 1) + threshold := int32(1 << nbBits) + gotTotal := int32(0) + nbBits++ + + for remaining > 1 { + if previous0 { + n0 := charnum + for (bitStream & 0xFFFF) == 0xFFFF { + n0 += 24 + if b.off < iend-5 { + b.advance(2) + bitStream = b.Uint32() >> bitCount + } else { + bitStream >>= 16 + bitCount += 16 + } + } + for (bitStream & 3) == 3 { + n0 += 3 + bitStream >>= 2 + bitCount += 2 + } + n0 += uint16(bitStream & 3) + bitCount += 2 + if n0 > maxSymbolValue { + return errors.New("maxSymbolValue too small") + } + for charnum < n0 { + s.norm[charnum&0xff] = 0 + charnum++ + } + + if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { + b.advance(bitCount >> 3) + bitCount &= 7 + bitStream = b.Uint32() >> bitCount + } else { + bitStream >>= 2 + } + } + + max := (2*(threshold) - 1) - (remaining) + var count int32 + + if (int32(bitStream) & (threshold - 1)) < max { + count = int32(bitStream) & (threshold - 1) + bitCount += nbBits - 1 + } else { + count = int32(bitStream) & (2*threshold - 1) + if count >= threshold { + count -= max + } + bitCount += nbBits + } + + count-- // extra accuracy + if count < 0 { + // -1 means +1 + remaining += count + gotTotal -= count + } else { + remaining -= count + gotTotal += count + } + s.norm[charnum&0xff] = int16(count) + charnum++ + previous0 = count == 0 + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { + b.advance(bitCount >> 3) + bitCount &= 7 + } else { + bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) + b.off = len(b.b) - 4 + } + bitStream = b.Uint32() >> (bitCount & 31) + } + s.symbolLen = charnum + + if s.symbolLen <= 1 { + return fmt.Errorf("symbolLen (%d) too small", s.symbolLen) + } + if s.symbolLen > maxSymbolValue+1 { + return fmt.Errorf("symbolLen (%d) too big", s.symbolLen) + } + if remaining != 1 { + return fmt.Errorf("corruption detected (remaining %d != 1)", remaining) + } + if bitCount > 32 { + return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount) + } + if gotTotal != 1<> 3) + return nil +} + +// decSymbol contains information about a state entry, +// Including the state offset base, the output symbol and +// the number of bits to read for the low part of the destination state. +type decSymbol struct { + newState uint16 + symbol uint8 + nbBits uint8 +} + +// allocDtable will allocate decoding tables if they are not big enough. +func (s *Scratch) allocDtable() { + tableSize := 1 << s.actualTableLog + if cap(s.decTable) < int(tableSize) { + s.decTable = make([]decSymbol, tableSize) + } + s.decTable = s.decTable[:tableSize] + + if cap(s.ct.tableSymbol) < 256 { + s.ct.tableSymbol = make([]byte, 256) + } + s.ct.tableSymbol = s.ct.tableSymbol[:256] + + if cap(s.ct.stateTable) < 256 { + s.ct.stateTable = make([]uint16, 256) + } + s.ct.stateTable = s.ct.stateTable[:256] +} + +// buildDtable will build the decoding table. +func (s *Scratch) buildDtable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + s.allocDtable() + symbolNext := s.ct.stateTable[:256] + + // Init, lay down lowprob symbols + s.zeroBits = false + { + largeLimit := int16(1 << (s.actualTableLog - 1)) + for i, v := range s.norm[:s.symbolLen] { + if v == -1 { + s.decTable[highThreshold].symbol = uint8(i) + highThreshold-- + symbolNext[i] = 1 + } else { + if v >= largeLimit { + s.zeroBits = true + } + symbolNext[i] = uint16(v) + } + } + } + // Spread symbols + { + tableMask := tableSize - 1 + step := tableStep(tableSize) + position := uint32(0) + for ss, v := range s.norm[:s.symbolLen] { + for i := 0; i < int(v); i++ { + s.decTable[position].symbol = uint8(ss) + position = (position + step) & tableMask + for position > highThreshold { + // lowprob area + position = (position + step) & tableMask + } + } + } + if position != 0 { + // position must reach all cells once, otherwise normalizedCounter is incorrect + return errors.New("corrupted input (position != 0)") + } + } + + // Build Decoding table + { + tableSize := uint16(1 << s.actualTableLog) + for u, v := range s.decTable { + symbol := v.symbol + nextState := symbolNext[symbol] + symbolNext[symbol] = nextState + 1 + nBits := s.actualTableLog - byte(highBits(uint32(nextState))) + s.decTable[u].nbBits = nBits + newState := (nextState << nBits) - tableSize + if newState > tableSize { + return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) + } + if newState == uint16(u) && nBits == 0 { + // Seems weird that this is possible with nbits > 0. + return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) + } + s.decTable[u].newState = newState + } + } + return nil +} + +// decompress will decompress the bitstream. +// If the buffer is over-read an error is returned. +func (s *Scratch) decompress() error { + br := &s.bits + br.init(s.br.unread()) + + var s1, s2 decoder + // Initialize and decode first state and symbol. + s1.init(br, s.decTable, s.actualTableLog) + s2.init(br, s.decTable, s.actualTableLog) + + // Use temp table to avoid bound checks/append penalty. + var tmp = s.ct.tableSymbol[:256] + var off uint8 + + // Main part + if !s.zeroBits { + for br.off >= 8 { + br.fillFast() + tmp[off+0] = s1.nextFast() + tmp[off+1] = s2.nextFast() + br.fillFast() + tmp[off+2] = s1.nextFast() + tmp[off+3] = s2.nextFast() + off += 4 + if off == 0 { + s.Out = append(s.Out, tmp...) + } + } + } else { + for br.off >= 8 { + br.fillFast() + tmp[off+0] = s1.next() + tmp[off+1] = s2.next() + br.fillFast() + tmp[off+2] = s1.next() + tmp[off+3] = s2.next() + off += 4 + if off == 0 { + s.Out = append(s.Out, tmp...) + off = 0 + if len(s.Out) >= s.DecompressLimit { + return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) + } + } + } + } + s.Out = append(s.Out, tmp[:off]...) + + // Final bits, a bit more expensive check + for { + if s1.finished() { + s.Out = append(s.Out, s1.final(), s2.final()) + break + } + br.fill() + s.Out = append(s.Out, s1.next()) + if s2.finished() { + s.Out = append(s.Out, s2.final(), s1.final()) + break + } + s.Out = append(s.Out, s2.next()) + if len(s.Out) >= s.DecompressLimit { + return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) + } + } + return br.close() +} + +// decoder keeps track of the current state and updates it from the bitstream. +type decoder struct { + state uint16 + br *bitReader + dt []decSymbol +} + +// init will initialize the decoder and read the first state from the stream. +func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { + d.dt = dt + d.br = in + d.state = uint16(in.getBits(tableLog)) +} + +// next returns the next symbol and sets the next state. +// At least tablelog bits must be available in the bit reader. +func (d *decoder) next() uint8 { + n := &d.dt[d.state] + lowBits := d.br.getBits(n.nbBits) + d.state = n.newState + lowBits + return n.symbol +} + +// finished returns true if all bits have been read from the bitstream +// and the next state would require reading bits from the input. +func (d *decoder) finished() bool { + return d.br.finished() && d.dt[d.state].nbBits > 0 +} + +// final returns the current state symbol without decoding the next. +func (d *decoder) final() uint8 { + return d.dt[d.state].symbol +} + +// nextFast returns the next symbol and sets the next state. +// This can only be used if no symbols are 0 bits. +// At least tablelog bits must be available in the bit reader. +func (d *decoder) nextFast() uint8 { + n := d.dt[d.state] + lowBits := d.br.getBitsFast(n.nbBits) + d.state = n.newState + lowBits + return n.symbol +} diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go new file mode 100644 index 000000000..075357b5b --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/fse.go @@ -0,0 +1,143 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +// Package fse provides Finite State Entropy encoding and decoding. +// +// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding +// for byte blocks as implemented in zstd. +// +// See https://github.com/klauspost/compress/tree/master/fse for more information. +package fse + +import ( + "errors" + "fmt" + "math/bits" +) + +const ( + /*!MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ + maxMemoryUsage = 14 + defaultMemoryUsage = 13 + + maxTableLog = maxMemoryUsage - 2 + maxTablesize = 1 << maxTableLog + defaultTablelog = defaultMemoryUsage - 2 + minTablelog = 5 + maxSymbolValue = 255 +) + +var ( + // ErrIncompressible is returned when input is judged to be too hard to compress. + ErrIncompressible = errors.New("input is not compressible") + + // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. + ErrUseRLE = errors.New("input is single value repeated") +) + +// Scratch provides temporary storage for compression and decompression. +type Scratch struct { + // Private + count [maxSymbolValue + 1]uint32 + norm [maxSymbolValue + 1]int16 + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + br byteReader + bits bitReader + bw bitWriter + ct cTable // Compression tables. + decTable []decSymbol // Decompression table. + zeroBits bool // no bits has prob > 50%. + clearCount bool // clear count + maxCount int // count of the most probable symbol + + // Per block parameters. + // These can be used to override compression parameters of the block. + // Do not touch, unless you know what you are doing. + + // Out is output buffer. + // If the scratch is re-used before the caller is done processing the output, + // set this field to nil. + // Otherwise the output buffer will be re-used for next Compression/Decompression step + // and allocation will be avoided. + Out []byte + + // MaxSymbolValue will override the maximum symbol value of the next block. + MaxSymbolValue uint8 + + // TableLog will attempt to override the tablelog for the next block. + TableLog uint8 + + // DecompressLimit limits the maximum decoded size acceptable. + // If > 0 decompression will stop when approximately this many bytes + // has been decoded. + // If 0, maximum size will be 2GB. + DecompressLimit int +} + +// Histogram allows to populate the histogram and skip that step in the compression, +// It otherwise allows to inspect the histogram when compression is done. +// To indicate that you have populated the histogram call HistogramFinished +// with the value of the highest populated symbol, as well as the number of entries +// in the most populated entry. These are accepted at face value. +// The returned slice will always be length 256. +func (s *Scratch) Histogram() []uint32 { + return s.count[:] +} + +// HistogramFinished can be called to indicate that the histogram has been populated. +// maxSymbol is the index of the highest set symbol of the next data segment. +// maxCount is the number of entries in the most populated entry. +// These are accepted at face value. +func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) { + s.maxCount = maxCount + s.symbolLen = uint16(maxSymbol) + 1 + s.clearCount = maxCount != 0 +} + +// prepare will prepare and allocate scratch tables used for both compression and decompression. +func (s *Scratch) prepare(in []byte) (*Scratch, error) { + if s == nil { + s = &Scratch{} + } + if s.MaxSymbolValue == 0 { + s.MaxSymbolValue = 255 + } + if s.TableLog == 0 { + s.TableLog = defaultTablelog + } + if s.TableLog > maxTableLog { + return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog) + } + if cap(s.Out) == 0 { + s.Out = make([]byte, 0, len(in)) + } + if s.clearCount && s.maxCount == 0 { + for i := range s.count { + s.count[i] = 0 + } + s.clearCount = false + } + s.br.init(in) + if s.DecompressLimit == 0 { + // Max size 2GB. + s.DecompressLimit = (2 << 30) - 1 + } + + return s, nil +} + +// tableStep returns the next table index. +func tableStep(tableSize uint32) uint32 { + return (tableSize >> 1) + (tableSize >> 3) + 3 +} + +func highBits(val uint32) (n uint32) { + return uint32(bits.Len32(val) - 1) +} -- cgit v1.2.3-54-g00ecf