aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/internal/snapref/encode.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/internal/snapref/encode.go')
-rw-r--r--vendor/github.com/klauspost/compress/internal/snapref/encode.go289
1 files changed, 289 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode.go b/vendor/github.com/klauspost/compress/internal/snapref/encode.go
new file mode 100644
index 000000000..13c6040a5
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode.go
@@ -0,0 +1,289 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snapref
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+)
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// Encode handles the Snappy block format, not the Snappy stream format.
+func Encode(dst, src []byte) []byte {
+ if n := MaxEncodedLen(len(src)); n < 0 {
+ panic(ErrTooLarge)
+ } else if len(dst) < n {
+ dst = make([]byte, n)
+ }
+
+ // The block starts with the varint-encoded length of the decompressed bytes.
+ d := binary.PutUvarint(dst, uint64(len(src)))
+
+ for len(src) > 0 {
+ p := src
+ src = nil
+ if len(p) > maxBlockSize {
+ p, src = p[:maxBlockSize], p[maxBlockSize:]
+ }
+ if len(p) < minNonLiteralBlockSize {
+ d += emitLiteral(dst[d:], p)
+ } else {
+ d += encodeBlock(dst[d:], p)
+ }
+ }
+ return dst[:d]
+}
+
+// inputMargin is the minimum number of extra input bytes to keep, inside
+// encodeBlock's inner loop. On some architectures, this margin lets us
+// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
+// literals can be implemented as a single load to and store from a 16-byte
+// register. That literal's actual length can be as short as 1 byte, so this
+// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
+// the encoding loop will fix up the copy overrun, and this inputMargin ensures
+// that we don't overrun the dst and src buffers.
+const inputMargin = 16 - 1
+
+// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
+// could be encoded with a copy tag. This is the minimum with respect to the
+// algorithm used by encodeBlock, not a minimum enforced by the file format.
+//
+// The encoded output must start with at least a 1 byte literal, as there are
+// no previous bytes to copy. A minimal (1 byte) copy after that, generated
+// from an emitCopy call in encodeBlock's main loop, would require at least
+// another inputMargin bytes, for the reason above: we want any emitLiteral
+// calls inside encodeBlock's main loop to use the fast path if possible, which
+// requires being able to overrun by inputMargin bytes. Thus,
+// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
+//
+// The C++ code doesn't use this exact threshold, but it could, as discussed at
+// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
+// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
+// optimization. It should not affect the encoded form. This is tested by
+// TestSameEncodingAsCppShortCopies.
+const minNonLiteralBlockSize = 1 + 1 + inputMargin
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+//
+// It will return a negative value if srcLen is too large to encode.
+func MaxEncodedLen(srcLen int) int {
+ n := uint64(srcLen)
+ if n > 0xffffffff {
+ return -1
+ }
+ // Compressed data can be defined as:
+ // compressed := item* literal*
+ // item := literal* copy
+ //
+ // The trailing literal sequence has a space blowup of at most 62/60
+ // since a literal of length 60 needs one tag byte + one extra byte
+ // for length information.
+ //
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
+ // 4 bytes of data. Because of a special check in the encoding code,
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
+ // the copy op takes 3 bytes to encode, and this type of item leads
+ // to at most the 62/60 blowup for representing literals.
+ //
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
+ // worst case here is a one-byte literal followed by a five-byte copy.
+ // That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+ //
+ // This last factor dominates the blowup, so the final estimate is:
+ n = 32 + n + n/6
+ if n > 0xffffffff {
+ return -1
+ }
+ return int(n)
+}
+
+var errClosed = errors.New("snappy: Writer is closed")
+
+// NewWriter returns a new Writer that compresses to w.
+//
+// The Writer returned does not buffer writes. There is no need to Flush or
+// Close such a Writer.
+//
+// Deprecated: the Writer returned is not suitable for many small writes, only
+// for few large writes. Use NewBufferedWriter instead, which is efficient
+// regardless of the frequency and shape of the writes, and remember to Close
+// that Writer when done.
+func NewWriter(w io.Writer) *Writer {
+ return &Writer{
+ w: w,
+ obuf: make([]byte, obufLen),
+ }
+}
+
+// NewBufferedWriter returns a new Writer that compresses to w, using the
+// framing format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+//
+// The Writer returned buffers writes. Users must call Close to guarantee all
+// data has been forwarded to the underlying io.Writer. They may also call
+// Flush zero or more times before calling Close.
+func NewBufferedWriter(w io.Writer) *Writer {
+ return &Writer{
+ w: w,
+ ibuf: make([]byte, 0, maxBlockSize),
+ obuf: make([]byte, obufLen),
+ }
+}
+
+// Writer is an io.Writer that can write Snappy-compressed bytes.
+//
+// Writer handles the Snappy stream format, not the Snappy block format.
+type Writer struct {
+ w io.Writer
+ err error
+
+ // ibuf is a buffer for the incoming (uncompressed) bytes.
+ //
+ // Its use is optional. For backwards compatibility, Writers created by the
+ // NewWriter function have ibuf == nil, do not buffer incoming bytes, and
+ // therefore do not need to be Flush'ed or Close'd.
+ ibuf []byte
+
+ // obuf is a buffer for the outgoing (compressed) bytes.
+ obuf []byte
+
+ // wroteStreamHeader is whether we have written the stream header.
+ wroteStreamHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+ w.w = writer
+ w.err = nil
+ if w.ibuf != nil {
+ w.ibuf = w.ibuf[:0]
+ }
+ w.wroteStreamHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (nRet int, errRet error) {
+ if w.ibuf == nil {
+ // Do not buffer incoming bytes. This does not perform or compress well
+ // if the caller of Writer.Write writes many small slices. This
+ // behavior is therefore deprecated, but still supported for backwards
+ // compatibility with code that doesn't explicitly Flush or Close.
+ return w.write(p)
+ }
+
+ // The remainder of this method is based on bufio.Writer.Write from the
+ // standard library.
+
+ for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
+ var n int
+ if len(w.ibuf) == 0 {
+ // Large write, empty buffer.
+ // Write directly from p to avoid copy.
+ n, _ = w.write(p)
+ } else {
+ n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+ w.ibuf = w.ibuf[:len(w.ibuf)+n]
+ w.Flush()
+ }
+ nRet += n
+ p = p[n:]
+ }
+ if w.err != nil {
+ return nRet, w.err
+ }
+ n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+ w.ibuf = w.ibuf[:len(w.ibuf)+n]
+ nRet += n
+ return nRet, nil
+}
+
+func (w *Writer) write(p []byte) (nRet int, errRet error) {
+ if w.err != nil {
+ return 0, w.err
+ }
+ for len(p) > 0 {
+ obufStart := len(magicChunk)
+ if !w.wroteStreamHeader {
+ w.wroteStreamHeader = true
+ copy(w.obuf, magicChunk)
+ obufStart = 0
+ }
+
+ var uncompressed []byte
+ if len(p) > maxBlockSize {
+ uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
+ } else {
+ uncompressed, p = p, nil
+ }
+ checksum := crc(uncompressed)
+
+ // Compress the buffer, discarding the result if the improvement
+ // isn't at least 12.5%.
+ compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
+ chunkType := uint8(chunkTypeCompressedData)
+ chunkLen := 4 + len(compressed)
+ obufEnd := obufHeaderLen + len(compressed)
+ if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
+ chunkType = chunkTypeUncompressedData
+ chunkLen = 4 + len(uncompressed)
+ obufEnd = obufHeaderLen
+ }
+
+ // Fill in the per-chunk header that comes before the body.
+ w.obuf[len(magicChunk)+0] = chunkType
+ w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
+ w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
+ w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
+ w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
+ w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
+ w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
+ w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
+
+ if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
+ w.err = err
+ return nRet, err
+ }
+ if chunkType == chunkTypeUncompressedData {
+ if _, err := w.w.Write(uncompressed); err != nil {
+ w.err = err
+ return nRet, err
+ }
+ }
+ nRet += len(uncompressed)
+ }
+ return nRet, nil
+}
+
+// Flush flushes the Writer to its underlying io.Writer.
+func (w *Writer) Flush() error {
+ if w.err != nil {
+ return w.err
+ }
+ if len(w.ibuf) == 0 {
+ return nil
+ }
+ w.write(w.ibuf)
+ w.ibuf = w.ibuf[:0]
+ return w.err
+}
+
+// Close calls Flush and then closes the Writer.
+func (w *Writer) Close() error {
+ w.Flush()
+ ret := w.err
+ if w.err == nil {
+ w.err = errClosed
+ }
+ return ret
+}