diff options
Diffstat (limited to 'vendor/github.com/ulikunitz/xz/lzma')
27 files changed, 4616 insertions, 0 deletions
diff --git a/vendor/github.com/ulikunitz/xz/lzma/bintree.go b/vendor/github.com/ulikunitz/xz/lzma/bintree.go new file mode 100644 index 000000000..a781bd195 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bintree.go @@ -0,0 +1,523 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "bufio" + "errors" + "fmt" + "io" + "unicode" +) + +// node represents a node in the binary tree. +type node struct { + // x is the search value + x uint32 + // p parent node + p uint32 + // l left child + l uint32 + // r right child + r uint32 +} + +// wordLen is the number of bytes represented by the v field of a node. +const wordLen = 4 + +// binTree supports the identification of the next operation based on a +// binary tree. +// +// Nodes will be identified by their index into the ring buffer. +type binTree struct { + dict *encoderDict + // ring buffer of nodes + node []node + // absolute offset of the entry for the next node. Position 4 + // byte larger. + hoff int64 + // front position in the node ring buffer + front uint32 + // index of the root node + root uint32 + // current x value + x uint32 + // preallocated array + data []byte +} + +// null represents the nonexistent index. We can't use zero because it +// would always exist or we would need to decrease the index for each +// reference. +const null uint32 = 1<<32 - 1 + +// newBinTree initializes the binTree structure. The capacity defines +// the size of the buffer and defines the maximum distance for which +// matches will be found. +func newBinTree(capacity int) (t *binTree, err error) { + if capacity < 1 { + return nil, errors.New( + "newBinTree: capacity must be larger than zero") + } + if int64(capacity) >= int64(null) { + return nil, errors.New( + "newBinTree: capacity must less 2^{32}-1") + } + t = &binTree{ + node: make([]node, capacity), + hoff: -int64(wordLen), + root: null, + data: make([]byte, maxMatchLen), + } + return t, nil +} + +func (t *binTree) SetDict(d *encoderDict) { t.dict = d } + +// WriteByte writes a single byte into the binary tree. +func (t *binTree) WriteByte(c byte) error { + t.x = (t.x << 8) | uint32(c) + t.hoff++ + if t.hoff < 0 { + return nil + } + v := t.front + if int64(v) < t.hoff { + // We are overwriting old nodes stored in the tree. + t.remove(v) + } + t.node[v].x = t.x + t.add(v) + t.front++ + if int64(t.front) >= int64(len(t.node)) { + t.front = 0 + } + return nil +} + +// Writes writes a sequence of bytes into the binTree structure. +func (t *binTree) Write(p []byte) (n int, err error) { + for _, c := range p { + t.WriteByte(c) + } + return len(p), nil +} + +// add puts the node v into the tree. The node must not be part of the +// tree before. +func (t *binTree) add(v uint32) { + vn := &t.node[v] + // Set left and right to null indices. + vn.l, vn.r = null, null + // If the binary tree is empty make v the root. + if t.root == null { + t.root = v + vn.p = null + return + } + x := vn.x + p := t.root + // Search for the right leave link and add the new node. + for { + pn := &t.node[p] + if x <= pn.x { + if pn.l == null { + pn.l = v + vn.p = p + return + } + p = pn.l + } else { + if pn.r == null { + pn.r = v + vn.p = p + return + } + p = pn.r + } + } +} + +// parent returns the parent node index of v and the pointer to v value +// in the parent. +func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) { + if t.root == v { + return null, &t.root + } + p = t.node[v].p + if t.node[p].l == v { + ptr = &t.node[p].l + } else { + ptr = &t.node[p].r + } + return +} + +// Remove node v. +func (t *binTree) remove(v uint32) { + vn := &t.node[v] + p, ptr := t.parent(v) + l, r := vn.l, vn.r + if l == null { + // Move the right child up. + *ptr = r + if r != null { + t.node[r].p = p + } + return + } + if r == null { + // Move the left child up. + *ptr = l + t.node[l].p = p + return + } + + // Search the in-order predecessor u. + un := &t.node[l] + ur := un.r + if ur == null { + // In order predecessor is l. Move it up. + un.r = r + t.node[r].p = l + un.p = p + *ptr = l + return + } + var u uint32 + for { + // Look for the max value in the tree where l is root. + u = ur + ur = t.node[u].r + if ur == null { + break + } + } + // replace u with ul + un = &t.node[u] + ul := un.l + up := un.p + t.node[up].r = ul + if ul != null { + t.node[ul].p = up + } + + // replace v by u + un.l, un.r = l, r + t.node[l].p = u + t.node[r].p = u + *ptr = u + un.p = p +} + +// search looks for the node that have the value x or for the nodes that +// brace it. The node highest in the tree with the value x will be +// returned. All other nodes with the same value live in left subtree of +// the returned node. +func (t *binTree) search(v uint32, x uint32) (a, b uint32) { + a, b = null, null + if v == null { + return + } + for { + vn := &t.node[v] + if x <= vn.x { + if x == vn.x { + return v, v + } + b = v + if vn.l == null { + return + } + v = vn.l + } else { + a = v + if vn.r == null { + return + } + v = vn.r + } + } +} + +// max returns the node with maximum value in the subtree with v as +// root. +func (t *binTree) max(v uint32) uint32 { + if v == null { + return null + } + for { + r := t.node[v].r + if r == null { + return v + } + v = r + } +} + +// min returns the node with the minimum value in the subtree with v as +// root. +func (t *binTree) min(v uint32) uint32 { + if v == null { + return null + } + for { + l := t.node[v].l + if l == null { + return v + } + v = l + } +} + +// pred returns the in-order predecessor of node v. +func (t *binTree) pred(v uint32) uint32 { + if v == null { + return null + } + u := t.max(t.node[v].l) + if u != null { + return u + } + for { + p := t.node[v].p + if p == null { + return null + } + if t.node[p].r == v { + return p + } + v = p + } +} + +// succ returns the in-order successor of node v. +func (t *binTree) succ(v uint32) uint32 { + if v == null { + return null + } + u := t.min(t.node[v].r) + if u != null { + return u + } + for { + p := t.node[v].p + if p == null { + return null + } + if t.node[p].l == v { + return p + } + v = p + } +} + +// xval converts the first four bytes of a into an 32-bit unsigned +// integer in big-endian order. +func xval(a []byte) uint32 { + var x uint32 + switch len(a) { + default: + x |= uint32(a[3]) + fallthrough + case 3: + x |= uint32(a[2]) << 8 + fallthrough + case 2: + x |= uint32(a[1]) << 16 + fallthrough + case 1: + x |= uint32(a[0]) << 24 + case 0: + } + return x +} + +// dumpX converts value x into a four-letter string. +func dumpX(x uint32) string { + a := make([]byte, 4) + for i := 0; i < 4; i++ { + c := byte(x >> uint((3-i)*8)) + if unicode.IsGraphic(rune(c)) { + a[i] = c + } else { + a[i] = '.' + } + } + return string(a) +} + +// dumpNode writes a representation of the node v into the io.Writer. +func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) { + if v == null { + return + } + + vn := &t.node[v] + + t.dumpNode(w, vn.r, indent+2) + + for i := 0; i < indent; i++ { + fmt.Fprint(w, " ") + } + if vn.p == null { + fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x)) + } else { + fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p) + } + + t.dumpNode(w, vn.l, indent+2) +} + +// dump prints a representation of the binary tree into the writer. +func (t *binTree) dump(w io.Writer) error { + bw := bufio.NewWriter(w) + t.dumpNode(bw, t.root, 0) + return bw.Flush() +} + +func (t *binTree) distance(v uint32) int { + dist := int(t.front) - int(v) + if dist <= 0 { + dist += len(t.node) + } + return dist +} + +type matchParams struct { + rep [4]uint32 + // length when match will be accepted + nAccept int + // nodes to check + check int + // finish if length get shorter + stopShorter bool +} + +func (t *binTree) match(m match, distIter func() (int, bool), p matchParams, +) (r match, checked int, accepted bool) { + buf := &t.dict.buf + for { + if checked >= p.check { + return m, checked, true + } + dist, ok := distIter() + if !ok { + return m, checked, false + } + checked++ + if m.n > 0 { + i := buf.rear - dist + m.n - 1 + if i < 0 { + i += len(buf.data) + } else if i >= len(buf.data) { + i -= len(buf.data) + } + if buf.data[i] != t.data[m.n-1] { + if p.stopShorter { + return m, checked, false + } + continue + } + } + n := buf.matchLen(dist, t.data) + switch n { + case 0: + if p.stopShorter { + return m, checked, false + } + continue + case 1: + if uint32(dist-minDistance) != p.rep[0] { + continue + } + } + if n < m.n || (n == m.n && int64(dist) >= m.distance) { + continue + } + m = match{int64(dist), n} + if n >= p.nAccept { + return m, checked, true + } + } +} + +func (t *binTree) NextOp(rep [4]uint32) operation { + // retrieve maxMatchLen data + n, _ := t.dict.buf.Peek(t.data[:maxMatchLen]) + if n == 0 { + panic("no data in buffer") + } + t.data = t.data[:n] + + var ( + m match + x, u, v uint32 + iterPred, iterSucc func() (int, bool) + ) + p := matchParams{ + rep: rep, + nAccept: maxMatchLen, + check: 32, + } + i := 4 + iterSmall := func() (dist int, ok bool) { + i-- + if i <= 0 { + return 0, false + } + return i, true + } + m, checked, accepted := t.match(m, iterSmall, p) + if accepted { + goto end + } + p.check -= checked + x = xval(t.data) + u, v = t.search(t.root, x) + if u == v && len(t.data) == 4 { + iter := func() (dist int, ok bool) { + if u == null { + return 0, false + } + dist = t.distance(u) + u, v = t.search(t.node[u].l, x) + if u != v { + u = null + } + return dist, true + } + m, _, _ = t.match(m, iter, p) + goto end + } + p.stopShorter = true + iterSucc = func() (dist int, ok bool) { + if v == null { + return 0, false + } + dist = t.distance(v) + v = t.succ(v) + return dist, true + } + m, checked, accepted = t.match(m, iterSucc, p) + if accepted { + goto end + } + p.check -= checked + iterPred = func() (dist int, ok bool) { + if u == null { + return 0, false + } + dist = t.distance(u) + u = t.pred(u) + return dist, true + } + m, _, _ = t.match(m, iterPred, p) +end: + if m.n == 0 { + return lit{t.data[0]} + } + return m +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/bitops.go b/vendor/github.com/ulikunitz/xz/lzma/bitops.go new file mode 100644 index 000000000..e9bab0199 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bitops.go @@ -0,0 +1,45 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +/* Naming conventions follows the CodeReviewComments in the Go Wiki. */ + +// ntz32Const is used by the functions NTZ and NLZ. +const ntz32Const = 0x04d7651f + +// ntz32Table is a helper table for de Bruijn algorithm by Danny Dubé. +// See Henry S. Warren, Jr. "Hacker's Delight" section 5-1 figure 5-26. +var ntz32Table = [32]int8{ + 0, 1, 2, 24, 3, 19, 6, 25, + 22, 4, 20, 10, 16, 7, 12, 26, + 31, 23, 18, 5, 21, 9, 15, 11, + 30, 17, 8, 14, 29, 13, 28, 27, +} + +// ntz32 computes the number of trailing zeros for an unsigned 32-bit integer. +func ntz32(x uint32) int { + if x == 0 { + return 32 + } + x = (x & -x) * ntz32Const + return int(ntz32Table[x>>27]) +} + +// nlz32 computes the number of leading zeros for an unsigned 32-bit integer. +func nlz32(x uint32) int { + // Smear left most bit to the right + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + // Use ntz mechanism to calculate nlz. + x++ + if x == 0 { + return 0 + } + x *= ntz32Const + return 32 - int(ntz32Table[x>>27]) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/breader.go b/vendor/github.com/ulikunitz/xz/lzma/breader.go new file mode 100644 index 000000000..5350d814f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/breader.go @@ -0,0 +1,39 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// breader provides the ReadByte function for a Reader. It doesn't read +// more data from the reader than absolutely necessary. +type breader struct { + io.Reader + // helper slice to save allocations + p []byte +} + +// ByteReader converts an io.Reader into an io.ByteReader. +func ByteReader(r io.Reader) io.ByteReader { + br, ok := r.(io.ByteReader) + if !ok { + return &breader{r, make([]byte, 1)} + } + return br +} + +// ReadByte read byte function. +func (r *breader) ReadByte() (c byte, err error) { + n, err := r.Reader.Read(r.p) + if n < 1 { + if err == nil { + err = errors.New("breader.ReadByte: no data") + } + return 0, err + } + return r.p[0], nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/buffer.go b/vendor/github.com/ulikunitz/xz/lzma/buffer.go new file mode 100644 index 000000000..50e0b6d57 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/buffer.go @@ -0,0 +1,171 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" +) + +// buffer provides a circular buffer of bytes. If the front index equals +// the rear index the buffer is empty. As a consequence front cannot be +// equal rear for a full buffer. So a full buffer has a length that is +// one byte less the the length of the data slice. +type buffer struct { + data []byte + front int + rear int +} + +// newBuffer creates a buffer with the given size. +func newBuffer(size int) *buffer { + return &buffer{data: make([]byte, size+1)} +} + +// Cap returns the capacity of the buffer. +func (b *buffer) Cap() int { + return len(b.data) - 1 +} + +// Resets the buffer. The front and rear index are set to zero. +func (b *buffer) Reset() { + b.front = 0 + b.rear = 0 +} + +// Buffered returns the number of bytes buffered. +func (b *buffer) Buffered() int { + delta := b.front - b.rear + if delta < 0 { + delta += len(b.data) + } + return delta +} + +// Available returns the number of bytes available for writing. +func (b *buffer) Available() int { + delta := b.rear - 1 - b.front + if delta < 0 { + delta += len(b.data) + } + return delta +} + +// addIndex adds a non-negative integer to the index i and returns the +// resulting index. The function takes care of wrapping the index as +// well as potential overflow situations. +func (b *buffer) addIndex(i int, n int) int { + // subtraction of len(b.data) prevents overflow + i += n - len(b.data) + if i < 0 { + i += len(b.data) + } + return i +} + +// Read reads bytes from the buffer into p and returns the number of +// bytes read. The function never returns an error but might return less +// data than requested. +func (b *buffer) Read(p []byte) (n int, err error) { + n, err = b.Peek(p) + b.rear = b.addIndex(b.rear, n) + return n, err +} + +// Peek reads bytes from the buffer into p without changing the buffer. +// Peek will never return an error but might return less data than +// requested. +func (b *buffer) Peek(p []byte) (n int, err error) { + m := b.Buffered() + n = len(p) + if m < n { + n = m + p = p[:n] + } + k := copy(p, b.data[b.rear:]) + if k < n { + copy(p[k:], b.data) + } + return n, nil +} + +// Discard skips the n next bytes to read from the buffer, returning the +// bytes discarded. +// +// If Discards skips fewer than n bytes, it returns an error. +func (b *buffer) Discard(n int) (discarded int, err error) { + if n < 0 { + return 0, errors.New("buffer.Discard: negative argument") + } + m := b.Buffered() + if m < n { + n = m + err = errors.New( + "buffer.Discard: discarded less bytes then requested") + } + b.rear = b.addIndex(b.rear, n) + return n, err +} + +// ErrNoSpace indicates that there is insufficient space for the Write +// operation. +var ErrNoSpace = errors.New("insufficient space") + +// Write puts data into the buffer. If less bytes are written than +// requested ErrNoSpace is returned. +func (b *buffer) Write(p []byte) (n int, err error) { + m := b.Available() + n = len(p) + if m < n { + n = m + p = p[:m] + err = ErrNoSpace + } + k := copy(b.data[b.front:], p) + if k < n { + copy(b.data, p[k:]) + } + b.front = b.addIndex(b.front, n) + return n, err +} + +// WriteByte writes a single byte into the buffer. The error ErrNoSpace +// is returned if no single byte is available in the buffer for writing. +func (b *buffer) WriteByte(c byte) error { + if b.Available() < 1 { + return ErrNoSpace + } + b.data[b.front] = c + b.front = b.addIndex(b.front, 1) + return nil +} + +// prefixLen returns the length of the common prefix of a and b. +func prefixLen(a, b []byte) int { + if len(a) > len(b) { + a, b = b, a + } + for i, c := range a { + if b[i] != c { + return i + } + } + return len(a) +} + +// matchLen returns the length of the common prefix for the given +// distance from the rear and the byte slice p. +func (b *buffer) matchLen(distance int, p []byte) int { + var n int + i := b.rear - distance + if i < 0 { + if n = prefixLen(p, b.data[len(b.data)+i:]); n < -i { + return n + } + p = p[n:] + i = 0 + } + n += prefixLen(p, b.data[i:]) + return n +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go new file mode 100644 index 000000000..a3696ba08 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/bytewriter.go @@ -0,0 +1,37 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// ErrLimit indicates that the limit of the LimitedByteWriter has been +// reached. +var ErrLimit = errors.New("limit reached") + +// LimitedByteWriter provides a byte writer that can be written until a +// limit is reached. The field N provides the number of remaining +// bytes. +type LimitedByteWriter struct { + BW io.ByteWriter + N int64 +} + +// WriteByte writes a single byte to the limited byte writer. It returns +// ErrLimit if the limit has been reached. If the byte is successfully +// written the field N of the LimitedByteWriter will be decremented by +// one. +func (l *LimitedByteWriter) WriteByte(c byte) error { + if l.N <= 0 { + return ErrLimit + } + if err := l.BW.WriteByte(c); err != nil { + return err + } + l.N-- + return nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoder.go b/vendor/github.com/ulikunitz/xz/lzma/decoder.go new file mode 100644 index 000000000..16e14db39 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/decoder.go @@ -0,0 +1,277 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +// decoder decodes a raw LZMA stream without any header. +type decoder struct { + // dictionary; the rear pointer of the buffer will be used for + // reading the data. + Dict *decoderDict + // decoder state + State *state + // range decoder + rd *rangeDecoder + // start stores the head value of the dictionary for the LZMA + // stream + start int64 + // size of uncompressed data + size int64 + // end-of-stream encountered + eos bool + // EOS marker found + eosMarker bool +} + +// newDecoder creates a new decoder instance. The parameter size provides +// the expected byte size of the decompressed data. If the size is +// unknown use a negative value. In that case the decoder will look for +// a terminating end-of-stream marker. +func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) { + rd, err := newRangeDecoder(br) + if err != nil { + return nil, err + } + d = &decoder{ + State: state, + Dict: dict, + rd: rd, + size: size, + start: dict.pos(), + } + return d, nil +} + +// Reopen restarts the decoder with a new byte reader and a new size. Reopen +// resets the Decompressed counter to zero. +func (d *decoder) Reopen(br io.ByteReader, size int64) error { + var err error + if d.rd, err = newRangeDecoder(br); err != nil { + return err + } + d.start = d.Dict.pos() + d.size = size + d.eos = false + return nil +} + +// decodeLiteral decodes a single literal from the LZMA stream. +func (d *decoder) decodeLiteral() (op operation, err error) { + litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head) + match := d.Dict.byteAt(int(d.State.rep[0]) + 1) + s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState) + if err != nil { + return nil, err + } + return lit{s}, nil +} + +// errEOS indicates that an EOS marker has been found. +var errEOS = errors.New("EOS marker found") + +// readOp decodes the next operation from the compressed stream. It +// returns the operation. If an explicit end of stream marker is +// identified the eos error is returned. +func (d *decoder) readOp() (op operation, err error) { + // Value of the end of stream (EOS) marker + const eosDist = 1<<32 - 1 + + state, state2, posState := d.State.states(d.Dict.head) + + b, err := d.State.isMatch[state2].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + // literal + op, err := d.decodeLiteral() + if err != nil { + return nil, err + } + d.State.updateStateLiteral() + return op, nil + } + b, err = d.State.isRep[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + // simple match + d.State.rep[3], d.State.rep[2], d.State.rep[1] = + d.State.rep[2], d.State.rep[1], d.State.rep[0] + + d.State.updateStateMatch() + // The length decoder returns the length offset. + n, err := d.State.lenCodec.Decode(d.rd, posState) + if err != nil { + return nil, err + } + // The dist decoder returns the distance offset. The actual + // distance is 1 higher. + d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n) + if err != nil { + return nil, err + } + if d.State.rep[0] == eosDist { + d.eosMarker = true + return nil, errEOS + } + op = match{n: int(n) + minMatchLen, + distance: int64(d.State.rep[0]) + minDistance} + return op, nil + } + b, err = d.State.isRepG0[state].Decode(d.rd) + if err != nil { + return nil, err + } + dist := d.State.rep[0] + if b == 0 { + // rep match 0 + b, err = d.State.isRepG0Long[state2].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + d.State.updateStateShortRep() + op = match{n: 1, distance: int64(dist) + minDistance} + return op, nil + } + } else { + b, err = d.State.isRepG1[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + dist = d.State.rep[1] + } else { + b, err = d.State.isRepG2[state].Decode(d.rd) + if err != nil { + return nil, err + } + if b == 0 { + dist = d.State.rep[2] + } else { + dist = d.State.rep[3] + d.State.rep[3] = d.State.rep[2] + } + d.State.rep[2] = d.State.rep[1] + } + d.State.rep[1] = d.State.rep[0] + d.State.rep[0] = dist + } + n, err := d.State.repLenCodec.Decode(d.rd, posState) + if err != nil { + return nil, err + } + d.State.updateStateRep() + op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance} + return op, nil +} + +// apply takes the operation and transforms the decoder dictionary accordingly. +func (d *decoder) apply(op operation) error { + var err error + switch x := op.(type) { + case match: + err = d.Dict.writeMatch(x.distance, x.n) + case lit: + err = d.Dict.WriteByte(x.b) + default: + panic("op is neither a match nor a literal") + } + return err +} + +// decompress fills the dictionary unless no space for new data is +// available. If the end of the LZMA stream has been reached io.EOF will +// be returned. +func (d *decoder) decompress() error { + if d.eos { + return io.EOF + } + for d.Dict.Available() >= maxMatchLen { + op, err := d.readOp() + switch err { + case nil: + break + case errEOS: + d.eos = true + if !d.rd.possiblyAtEnd() { + return errDataAfterEOS + } + if d.size >= 0 && d.size != d.Decompressed() { + return errSize + } + return io.EOF + case io.EOF: + d.eos = true + return io.ErrUnexpectedEOF + default: + return err + } + if err = d.apply(op); err != nil { + return err + } + if d.size >= 0 && d.Decompressed() >= d.size { + d.eos = true + if d.Decompressed() > d.size { + return errSize + } + if !d.rd.possiblyAtEnd() { + switch _, err = d.readOp(); err { + case nil: + return errSize + case io.EOF: + return io.ErrUnexpectedEOF + case errEOS: + break + default: + return err + } + } + return io.EOF + } + } + return nil +} + +// Errors that may be returned while decoding data. +var ( + errDataAfterEOS = errors.New("lzma: data after end of stream marker") + errSize = errors.New("lzma: wrong uncompressed data size") +) + +// Read reads data from the buffer. If no more data is available io.EOF is +// returned. +func (d *decoder) Read(p []byte) (n int, err error) { + var k int + for { + // Read of decoder dict never returns an error. + k, err = d.Dict.Read(p[n:]) + if err != nil { + panic(fmt.Errorf("dictionary read error %s", err)) + } + if k == 0 && d.eos { + return n, io.EOF + } + n += k + if n >= len(p) { + return n, nil + } + if err = d.decompress(); err != nil && err != io.EOF { + return n, err + } + } +} + +// Decompressed returns the number of bytes decompressed by the decoder. +func (d *decoder) Decompressed() int64 { + return d.Dict.pos() - d.start +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go new file mode 100644 index 000000000..564a12b83 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/decoderdict.go @@ -0,0 +1,135 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// decoderDict provides the dictionary for the decoder. The whole +// dictionary is used as reader buffer. +type decoderDict struct { + buf buffer + head int64 +} + +// newDecoderDict creates a new decoder dictionary. The whole dictionary +// will be used as reader buffer. +func newDecoderDict(dictCap int) (d *decoderDict, err error) { + // lower limit supports easy test cases + if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) { + return nil, errors.New("lzma: dictCap out of range") + } + d = &decoderDict{buf: *newBuffer(dictCap)} + return d, nil +} + +// Reset clears the dictionary. The read buffer is not changed, so the +// buffered data can still be read. +func (d *decoderDict) Reset() { + d.head = 0 +} + +// WriteByte writes a single byte into the dictionary. It is used to +// write literals into the dictionary. +func (d *decoderDict) WriteByte(c byte) error { + if err := d.buf.WriteByte(c); err != nil { + return err + } + d.head++ + return nil +} + +// pos returns the position of the dictionary head. +func (d *decoderDict) pos() int64 { return d.head } + +// dictLen returns the actual length of the dictionary. +func (d *decoderDict) dictLen() int { + capacity := d.buf.Cap() + if d.head >= int64(capacity) { + return capacity + } + return int(d.head) +} + +// byteAt returns a byte stored in the dictionary. If the distance is +// non-positive or exceeds the current length of the dictionary the zero +// byte is returned. +func (d *decoderDict) byteAt(dist int) byte { + if !(0 < dist && dist <= d.dictLen()) { + return 0 + } + i := d.buf.front - dist + if i < 0 { + i += len(d.buf.data) + } + return d.buf.data[i] +} + +// writeMatch writes the match at the top of the dictionary. The given +// distance must point in the current dictionary and the length must not +// exceed the maximum length 273 supported in LZMA. +// +// The error value ErrNoSpace indicates that no space is available in +// the dictionary for writing. You need to read from the dictionary +// first. +func (d *decoderDict) writeMatch(dist int64, length int) error { + if !(0 < dist && dist <= int64(d.dictLen())) { + return errors.New("writeMatch: distance out of range") + } + if !(0 < length && length <= maxMatchLen) { + return errors.New("writeMatch: length out of range") + } + if length > d.buf.Available() { + return ErrNoSpace + } + d.head += int64(length) + + i := d.buf.front - int(dist) + if i < 0 { + i += len(d.buf.data) + } + for length > 0 { + var p []byte + if i >= d.buf.front { + p = d.buf.data[i:] + i = 0 + } else { + p = d.buf.data[i:d.buf.front] + i = d.buf.front + } + if len(p) > length { + p = p[:length] + } + if _, err := d.buf.Write(p); err != nil { + panic(fmt.Errorf("d.buf.Write returned error %s", err)) + } + length -= len(p) + } + return nil +} + +// Write writes the given bytes into the dictionary and advances the +// head. +func (d *decoderDict) Write(p []byte) (n int, err error) { + n, err = d.buf.Write(p) + d.head += int64(n) + return n, err +} + +// Available returns the number of available bytes for writing into the +// decoder dictionary. +func (d *decoderDict) Available() int { return d.buf.Available() } + +// Read reads data from the buffer contained in the decoder dictionary. +func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) } + +// Buffered returns the number of bytes currently buffered in the +// decoder dictionary. +func (d *decoderDict) buffered() int { return d.buf.Buffered() } + +// Peek gets data from the buffer without advancing the rear index. +func (d *decoderDict) peek(p []byte) (n int, err error) { return d.buf.Peek(p) } diff --git a/vendor/github.com/ulikunitz/xz/lzma/directcodec.go b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go new file mode 100644 index 000000000..e08eb989f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/directcodec.go @@ -0,0 +1,49 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "fmt" + +// directCodec allows the encoding and decoding of values with a fixed number +// of bits. The number of bits must be in the range [1,32]. +type directCodec byte + +// makeDirectCodec creates a directCodec. The function panics if the number of +// bits is not in the range [1,32]. +func makeDirectCodec(bits int) directCodec { + if !(1 <= bits && bits <= 32) { + panic(fmt.Errorf("bits=%d out of range", bits)) + } + return directCodec(bits) +} + +// Bits returns the number of bits supported by this codec. +func (dc directCodec) Bits() int { + return int(dc) +} + +// Encode uses the range encoder to encode a value with the fixed number of +// bits. The most-significant bit is encoded first. +func (dc directCodec) Encode(e *rangeEncoder, v uint32) error { + for i := int(dc) - 1; i >= 0; i-- { + if err := e.DirectEncodeBit(v >> uint(i)); err != nil { + return err + } + } + return nil +} + +// Decode uses the range decoder to decode a value with the given number of +// given bits. The most-significant bit is decoded first. +func (dc directCodec) Decode(d *rangeDecoder) (v uint32, err error) { + for i := int(dc) - 1; i >= 0; i-- { + x, err := d.DirectDecodeBit() + if err != nil { + return 0, err + } + v = (v << 1) | x + } + return v, nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/distcodec.go b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go new file mode 100644 index 000000000..b053a2dce --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/distcodec.go @@ -0,0 +1,156 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// Constants used by the distance codec. +const ( + // minimum supported distance + minDistance = 1 + // maximum supported distance, value is used for the eos marker. + maxDistance = 1 << 32 + // number of the supported len states + lenStates = 4 + // start for the position models + startPosModel = 4 + // first index with align bits support + endPosModel = 14 + // bits for the position slots + posSlotBits = 6 + // number of align bits + alignBits = 4 + // maximum position slot + maxPosSlot = 63 +) + +// distCodec provides encoding and decoding of distance values. +type distCodec struct { + posSlotCodecs [lenStates]treeCodec + posModel [endPosModel - startPosModel]treeReverseCodec + alignCodec treeReverseCodec +} + +// deepcopy initializes dc as deep copy of the source. +func (dc *distCodec) deepcopy(src *distCodec) { + if dc == src { + return + } + for i := range dc.posSlotCodecs { + dc.posSlotCodecs[i].deepcopy(&src.posSlotCodecs[i]) + } + for i := range dc.posModel { + dc.posModel[i].deepcopy(&src.posModel[i]) + } + dc.alignCodec.deepcopy(&src.alignCodec) +} + +// distBits returns the number of bits required to encode dist. +func distBits(dist uint32) int { + if dist < startPosModel { + return 6 + } + // slot s > 3, dist d + // s = 2(bits(d)-1) + bit(d, bits(d)-2) + // s>>1 = bits(d)-1 + // bits(d) = 32-nlz32(d) + // s>>1=31-nlz32(d) + // n = 5 + (s>>1) = 36 - nlz32(d) + return 36 - nlz32(dist) +} + +// newDistCodec creates a new distance codec. +func (dc *distCodec) init() { + for i := range dc.posSlotCodecs { + dc.posSlotCodecs[i] = makeTreeCodec(posSlotBits) + } + for i := range dc.posModel { + posSlot := startPosModel + i + bits := (posSlot >> 1) - 1 + dc.posModel[i] = makeTreeReverseCodec(bits) + } + dc.alignCodec = makeTreeReverseCodec(alignBits) +} + +// lenState converts the value l to a supported lenState value. +func lenState(l uint32) uint32 { + if l >= lenStates { + l = lenStates - 1 + } + return l +} + +// Encode encodes the distance using the parameter l. Dist can have values from +// the full range of uint32 values. To get the distance offset the actual match +// distance has to be decreased by 1. A distance offset of 0xffffffff (eos) +// indicates the end of the stream. +func (dc *distCodec) Encode(e *rangeEncoder, dist uint32, l uint32) (err error) { + // Compute the posSlot using nlz32 + var posSlot uint32 + var bits uint32 + if dist < startPosModel { + posSlot = dist + } else { + bits = uint32(30 - nlz32(dist)) + posSlot = startPosModel - 2 + (bits << 1) + posSlot += (dist >> uint(bits)) & 1 + } + + if err = dc.posSlotCodecs[lenState(l)].Encode(e, posSlot); err != nil { + return + } + + switch { + case posSlot < startPosModel: + return nil + case posSlot < endPosModel: + tc := &dc.posModel[posSlot-startPosModel] + return tc.Encode(dist, e) + } + dic := directCodec(bits - alignBits) + if err = dic.Encode(e, dist>>alignBits); err != nil { + return + } + return dc.alignCodec.Encode(dist, e) +} + +// Decode decodes the distance offset using the parameter l. The dist value +// 0xffffffff (eos) indicates the end of the stream. Add one to the distance +// offset to get the actual match distance. +func (dc *distCodec) Decode(d *rangeDecoder, l uint32) (dist uint32, err error) { + posSlot, err := dc.posSlotCodecs[lenState(l)].Decode(d) + if err != nil { + return + } + + // posSlot equals distance + if posSlot < startPosModel { + return posSlot, nil + } + + // posSlot uses the individual models + bits := (posSlot >> 1) - 1 + dist = (2 | (posSlot & 1)) << bits + var u uint32 + if posSlot < endPosModel { + tc := &dc.posModel[posSlot-startPosModel] + if u, err = tc.Decode(d); err != nil { + return 0, err + } + dist += u + return dist, nil + } + + // posSlots use direct encoding and a single model for the four align + // bits. + dic := directCodec(bits - alignBits) + if u, err = dic.Decode(d); err != nil { + return 0, err + } + dist += u << alignBits + if u, err = dc.alignCodec.Decode(d); err != nil { + return 0, err + } + dist += u + return dist, nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoder.go b/vendor/github.com/ulikunitz/xz/lzma/encoder.go new file mode 100644 index 000000000..18ce00992 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/encoder.go @@ -0,0 +1,268 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "fmt" + "io" +) + +// opLenMargin provides the upper limit of the number of bytes required +// to encode a single operation. +const opLenMargin = 10 + +// compressFlags control the compression process. +type compressFlags uint32 + +// Values for compressFlags. +const ( + // all data should be compressed, even if compression is not + // optimal. + all compressFlags = 1 << iota +) + +// encoderFlags provide the flags for an encoder. +type encoderFlags uint32 + +// Flags for the encoder. +const ( + // eosMarker requests an EOS marker to be written. + eosMarker encoderFlags = 1 << iota +) + +// Encoder compresses data buffered in the encoder dictionary and writes +// it into a byte writer. +type encoder struct { + dict *encoderDict + state *state + re *rangeEncoder + start int64 + // generate eos marker + marker bool + limit bool + margin int +} + +// newEncoder creates a new encoder. If the byte writer must be +// limited use LimitedByteWriter provided by this package. The flags +// argument supports the eosMarker flag, controlling whether a +// terminating end-of-stream marker must be written. +func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict, + flags encoderFlags) (e *encoder, err error) { + + re, err := newRangeEncoder(bw) + if err != nil { + return nil, err + } + e = &encoder{ + dict: dict, + state: state, + re: re, + marker: flags&eosMarker != 0, + start: dict.Pos(), + margin: opLenMargin, + } + if e.marker { + e.margin += 5 + } + return e, nil +} + +// Write writes the bytes from p into the dictionary. If not enough +// space is available the data in the dictionary buffer will be +// compressed to make additional space available. If the limit of the +// underlying writer has been reached ErrLimit will be returned. +func (e *encoder) Write(p []byte) (n int, err error) { + for { + k, err := e.dict.Write(p[n:]) + n += k + if err == ErrNoSpace { + if err = e.compress(0); err != nil { + return n, err + } + continue + } + return n, err + } +} + +// Reopen reopens the encoder with a new byte writer. +func (e *encoder) Reopen(bw io.ByteWriter) error { + var err error + if e.re, err = newRangeEncoder(bw); err != nil { + return err + } + e.start = e.dict.Pos() + e.limit = false + return nil +} + +// writeLiteral writes a literal into the LZMA stream +func (e *encoder) writeLiteral(l lit) error { + var err error + state, state2, _ := e.state.states(e.dict.Pos()) + if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil { + return err + } + litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos()) + match := e.dict.ByteAt(int(e.state.rep[0]) + 1) + err = e.state.litCodec.Encode(e.re, l.b, state, match, litState) + if err != nil { + return err + } + e.state.updateStateLiteral() + return nil +} + +// iverson implements the Iverson operator as proposed by Donald Knuth in his +// book Concrete Mathematics. +func iverson(ok bool) uint32 { + if ok { + return 1 + } + return 0 +} + +// writeMatch writes a repetition operation into the operation stream +func (e *encoder) writeMatch(m match) error { + var err error + if !(minDistance <= m.distance && m.distance <= maxDistance) { + panic(fmt.Errorf("match distance %d out of range", m.distance)) + } + dist := uint32(m.distance - minDistance) + if !(minMatchLen <= m.n && m.n <= maxMatchLen) && + !(dist == e.state.rep[0] && m.n == 1) { + panic(fmt.Errorf( + "match length %d out of range; dist %d rep[0] %d", + m.n, dist, e.state.rep[0])) + } + state, state2, posState := e.state.states(e.dict.Pos()) + if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil { + return err + } + g := 0 + for ; g < 4; g++ { + if e.state.rep[g] == dist { + break + } + } + b := iverson(g < 4) + if err = e.state.isRep[state].Encode(e.re, b); err != nil { + return err + } + n := uint32(m.n - minMatchLen) + if b == 0 { + // simple match + e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] = + e.state.rep[2], e.state.rep[1], e.state.rep[0], dist + e.state.updateStateMatch() + if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil { + return err + } + return e.state.distCodec.Encode(e.re, dist, n) + } + b = iverson(g != 0) + if err = e.state.isRepG0[state].Encode(e.re, b); err != nil { + return err + } + if b == 0 { + // g == 0 + b = iverson(m.n != 1) + if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil { + return err + } + if b == 0 { + e.state.updateStateShortRep() + return nil + } + } else { + // g in {1,2,3} + b = iverson(g != 1) + if err = e.state.isRepG1[state].Encode(e.re, b); err != nil { + return err + } + if b == 1 { + // g in {2,3} + b = iverson(g != 2) + err = e.state.isRepG2[state].Encode(e.re, b) + if err != nil { + return err + } + if b == 1 { + e.state.rep[3] = e.state.rep[2] + } + e.state.rep[2] = e.state.rep[1] + } + e.state.rep[1] = e.state.rep[0] + e.state.rep[0] = dist + } + e.state.updateStateRep() + return e.state.repLenCodec.Encode(e.re, n, posState) +} + +// writeOp writes a single operation to the range encoder. The function +// checks whether there is enough space available to close the LZMA +// stream. +func (e *encoder) writeOp(op operation) error { + if e.re.Available() < int64(e.margin) { + return ErrLimit + } + switch x := op.(type) { + case lit: + return e.writeLiteral(x) + case match: + return e.writeMatch(x) + default: + panic("unexpected operation") + } +} + +// compress compressed data from the dictionary buffer. If the flag all +// is set, all data in the dictionary buffer will be compressed. The +// function returns ErrLimit if the underlying writer has reached its +// limit. +func (e *encoder) compress(flags compressFlags) error { + n := 0 + if flags&all == 0 { + n = maxMatchLen - 1 + } + d := e.dict + m := d.m + for d.Buffered() > n { + op := m.NextOp(e.state.rep) + if err := e.writeOp(op); err != nil { + return err + } + d.Discard(op.Len()) + } + return nil +} + +// eosMatch is a pseudo operation that indicates the end of the stream. +var eosMatch = match{distance: maxDistance, n: minMatchLen} + +// Close terminates the LZMA stream. If requested the end-of-stream +// marker will be written. If the byte writer limit has been or will be +// reached during compression of the remaining data in the buffer the +// LZMA stream will be closed and data will remain in the buffer. +func (e *encoder) Close() error { + err := e.compress(all) + if err != nil && err != ErrLimit { + return err + } + if e.marker { + if err := e.writeMatch(eosMatch); err != nil { + return err + } + } + err = e.re.Close() + return err +} + +// Compressed returns the number bytes of the input data that been +// compressed. +func (e *encoder) Compressed() int64 { + return e.dict.Pos() - e.start +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go new file mode 100644 index 000000000..9d0fbc703 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/encoderdict.go @@ -0,0 +1,149 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +// matcher is an interface that supports the identification of the next +// operation. +type matcher interface { + io.Writer + SetDict(d *encoderDict) + NextOp(rep [4]uint32) operation +} + +// encoderDict provides the dictionary of the encoder. It includes an +// addtional buffer atop of the actual dictionary. +type encoderDict struct { + buf buffer + m matcher + head int64 + capacity int + // preallocated array + data [maxMatchLen]byte +} + +// newEncoderDict creates the encoder dictionary. The argument bufSize +// defines the size of the additional buffer. +func newEncoderDict(dictCap, bufSize int, m matcher) (d *encoderDict, err error) { + if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) { + return nil, errors.New( + "lzma: dictionary capacity out of range") + } + if bufSize < 1 { + return nil, errors.New( + "lzma: buffer size must be larger than zero") + } + d = &encoderDict{ + buf: *newBuffer(dictCap + bufSize), + capacity: dictCap, + m: m, + } + m.SetDict(d) + return d, nil +} + +// Discard discards n bytes. Note that n must not be larger than +// MaxMatchLen. +func (d *encoderDict) Discard(n int) { + p := d.data[:n] + k, _ := d.buf.Read(p) + if k < n { + panic(fmt.Errorf("lzma: can't discard %d bytes", n)) + } + d.head += int64(n) + d.m.Write(p) +} + +// Len returns the data available in the encoder dictionary. +func (d *encoderDict) Len() int { + n := d.buf.Available() + if int64(n) > d.head { + return int(d.head) + } + return n +} + +// DictLen returns the actual length of data in the dictionary. +func (d *encoderDict) DictLen() int { + if d.head < int64(d.capacity) { + return int(d.head) + } + return d.capacity +} + +// Available returns the number of bytes that can be written by a +// following Write call. +func (d *encoderDict) Available() int { + return d.buf.Available() - d.DictLen() +} + +// Write writes data into the dictionary buffer. Note that the position +// of the dictionary head will not be moved. If there is not enough +// space in the buffer ErrNoSpace will be returned. +func (d *encoderDict) Write(p []byte) (n int, err error) { + m := d.Available() + if len(p) > m { + p = p[:m] + err = ErrNoSpace + } + var e error + if n, e = d.buf.Write(p); e != nil { + err = e + } + return n, err +} + +// Pos returns the position of the head. +func (d *encoderDict) Pos() int64 { return d.head } + +// ByteAt returns the byte at the given distance. +func (d *encoderDict) ByteAt(distance int) byte { + if !(0 < distance && distance <= d.Len()) { + return 0 + } + i := d.buf.rear - distance + if i < 0 { + i += len(d.buf.data) + } + return d.buf.data[i] +} + +// CopyN copies the last n bytes from the dictionary into the provided +// writer. This is used for copying uncompressed data into an +// uncompressed segment. +func (d *encoderDict) CopyN(w io.Writer, n int) (written int, err error) { + if n <= 0 { + return 0, nil + } + m := d.Len() + if n > m { + n = m + err = ErrNoSpace + } + i := d.buf.rear - n + var e error + if i < 0 { + i += len(d.buf.data) + if written, e = w.Write(d.buf.data[i:]); e != nil { + return written, e + } + i = 0 + } + var k int + k, e = w.Write(d.buf.data[i:d.buf.rear]) + written += k + if e != nil { + err = e + } + return written, err +} + +// Buffered returns the number of bytes in the buffer. +func (d *encoderDict) Buffered() int { return d.buf.Buffered() } diff --git a/vendor/github.com/ulikunitz/xz/lzma/hashtable.go b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go new file mode 100644 index 000000000..d786a9745 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/hashtable.go @@ -0,0 +1,309 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + + "github.com/ulikunitz/xz/internal/hash" +) + +/* For compression we need to find byte sequences that match the byte + * sequence at the dictionary head. A hash table is a simple method to + * provide this capability. + */ + +// maxMatches limits the number of matches requested from the Matches +// function. This controls the speed of the overall encoding. +const maxMatches = 16 + +// shortDists defines the number of short distances supported by the +// implementation. +const shortDists = 8 + +// The minimum is somehow arbitrary but the maximum is limited by the +// memory requirements of the hash table. +const ( + minTableExponent = 9 + maxTableExponent = 20 +) + +// newRoller contains the function used to create an instance of the +// hash.Roller. +var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) } + +// hashTable stores the hash table including the rolling hash method. +// +// We implement chained hashing into a circular buffer. Each entry in +// the circular buffer stores the delta distance to the next position with a +// word that has the same hash value. +type hashTable struct { + dict *encoderDict + // actual hash table + t []int64 + // circular list data with the offset to the next word + data []uint32 + front int + // mask for computing the index for the hash table + mask uint64 + // hash offset; initial value is -int64(wordLen) + hoff int64 + // length of the hashed word + wordLen int + // hash roller for computing the hash values for the Write + // method + wr hash.Roller + // hash roller for computing arbitrary hashes + hr hash.Roller + // preallocated slices + p [maxMatches]int64 + distances [maxMatches + shortDists]int +} + +// hashTableExponent derives the hash table exponent from the dictionary +// capacity. +func hashTableExponent(n uint32) int { + e := 30 - nlz32(n) + switch { + case e < minTableExponent: + e = minTableExponent + case e > maxTableExponent: + e = maxTableExponent + } + return e +} + +// newHashTable creates a new hash table for words of length wordLen +func newHashTable(capacity int, wordLen int) (t *hashTable, err error) { + if !(0 < capacity) { + return nil, errors.New( + "newHashTable: capacity must not be negative") + } + exp := hashTableExponent(uint32(capacity)) + if !(1 <= wordLen && wordLen <= 4) { + return nil, errors.New("newHashTable: " + + "argument wordLen out of range") + } + n := 1 << uint(exp) + if n <= 0 { + panic("newHashTable: exponent is too large") + } + t = &hashTable{ + t: make([]int64, n), + data: make([]uint32, capacity), + mask: (uint64(1) << uint(exp)) - 1, + hoff: -int64(wordLen), + wordLen: wordLen, + wr: newRoller(wordLen), + hr: newRoller(wordLen), + } + return t, nil +} + +func (t *hashTable) SetDict(d *encoderDict) { t.dict = d } + +// buffered returns the number of bytes that are currently hashed. +func (t *hashTable) buffered() int { + n := t.hoff + 1 + switch { + case n <= 0: + return 0 + case n >= int64(len(t.data)): + return len(t.data) + } + return int(n) +} + +// addIndex adds n to an index ensuring that is stays inside the +// circular buffer for the hash chain. +func (t *hashTable) addIndex(i, n int) int { + i += n - len(t.data) + if i < 0 { + i += len(t.data) + } + return i +} + +// putDelta puts the delta instance at the current front of the circular +// chain buffer. +func (t *hashTable) putDelta(delta uint32) { + t.data[t.front] = delta + t.front = t.addIndex(t.front, 1) +} + +// putEntry puts a new entry into the hash table. If there is already a +// value stored it is moved into the circular chain buffer. +func (t *hashTable) putEntry(h uint64, pos int64) { + if pos < 0 { + return + } + i := h & t.mask + old := t.t[i] - 1 + t.t[i] = pos + 1 + var delta int64 + if old >= 0 { + delta = pos - old + if delta > 1<<32-1 || delta > int64(t.buffered()) { + delta = 0 + } + } + t.putDelta(uint32(delta)) +} + +// WriteByte converts a single byte into a hash and puts them into the hash +// table. +func (t *hashTable) WriteByte(b byte) error { + h := t.wr.RollByte(b) + t.hoff++ + t.putEntry(h, t.hoff) + return nil +} + +// Write converts the bytes provided into hash tables and stores the +// abbreviated offsets into the hash table. The method will never return an +// error. +func (t *hashTable) Write(p []byte) (n int, err error) { + for _, b := range p { + // WriteByte doesn't generate an error. + t.WriteByte(b) + } + return len(p), nil +} + +// getMatches the matches for a specific hash. The functions returns the +// number of positions found. +// +// TODO: Make a getDistances because that we are actually interested in. +func (t *hashTable) getMatches(h uint64, positions []int64) (n int) { + if t.hoff < 0 || len(positions) == 0 { + return 0 + } + buffered := t.buffered() + tailPos := t.hoff + 1 - int64(buffered) + rear := t.front - buffered + if rear >= 0 { + rear -= len(t.data) + } + // get the slot for the hash + pos := t.t[h&t.mask] - 1 + delta := pos - tailPos + for { + if delta < 0 { + return n + } + positions[n] = tailPos + delta + n++ + if n >= len(positions) { + return n + } + i := rear + int(delta) + if i < 0 { + i += len(t.data) + } + u := t.data[i] + if u == 0 { + return n + } + delta -= int64(u) + } +} + +// hash computes the rolling hash for the word stored in p. For correct +// results its length must be equal to t.wordLen. +func (t *hashTable) hash(p []byte) uint64 { + var h uint64 + for _, b := range p { + h = t.hr.RollByte(b) + } + return h +} + +// Matches fills the positions slice with potential matches. The +// functions returns the number of positions filled into positions. The +// byte slice p must have word length of the hash table. +func (t *hashTable) Matches(p []byte, positions []int64) int { + if len(p) != t.wordLen { + panic(fmt.Errorf( + "byte slice must have length %d", t.wordLen)) + } + h := t.hash(p) + return t.getMatches(h, positions) +} + +// NextOp identifies the next operation using the hash table. +// +// TODO: Use all repetitions to find matches. +func (t *hashTable) NextOp(rep [4]uint32) operation { + // get positions + data := t.dict.data[:maxMatchLen] + n, _ := t.dict.buf.Peek(data) + data = data[:n] + var p []int64 + if n < t.wordLen { + p = t.p[:0] + } else { + p = t.p[:maxMatches] + n = t.Matches(data[:t.wordLen], p) + p = p[:n] + } + + // convert positions in potential distances + head := t.dict.head + dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8) + for _, pos := range p { + dis := int(head - pos) + if dis > shortDists { + dists = append(dists, dis) + } + } + + // check distances + var m match + dictLen := t.dict.DictLen() + for _, dist := range dists { + if dist > dictLen { + continue + } + + // Here comes a trick. We are only interested in matches + // that are longer than the matches we have been found + // before. So before we test the whole byte sequence at + // the given distance, we test the first byte that would + // make the match longer. If it doesn't match the byte + // to match, we don't to care any longer. + i := t.dict.buf.rear - dist + m.n + if i < 0 { + i += len(t.dict.buf.data) + } + if t.dict.buf.data[i] != data[m.n] { + // We can't get a longer match. Jump to the next + // distance. + continue + } + + n := t.dict.buf.matchLen(dist, data) + switch n { + case 0: + continue + case 1: + if uint32(dist-minDistance) != rep[0] { + continue + } + } + if n > m.n { + m = match{int64(dist), n} + if n == len(data) { + // No better match will be found. + break + } + } + } + + if m.n == 0 { + return lit{data[0]} + } + return m +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/header.go b/vendor/github.com/ulikunitz/xz/lzma/header.go new file mode 100644 index 000000000..bc708969f --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/header.go @@ -0,0 +1,167 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// uint32LE reads an uint32 integer from a byte slice +func uint32LE(b []byte) uint32 { + x := uint32(b[3]) << 24 + x |= uint32(b[2]) << 16 + x |= uint32(b[1]) << 8 + x |= uint32(b[0]) + return x +} + +// uint64LE converts the uint64 value stored as little endian to an uint64 +// value. +func uint64LE(b []byte) uint64 { + x := uint64(b[7]) << 56 + x |= uint64(b[6]) << 48 + x |= uint64(b[5]) << 40 + x |= uint64(b[4]) << 32 + x |= uint64(b[3]) << 24 + x |= uint64(b[2]) << 16 + x |= uint64(b[1]) << 8 + x |= uint64(b[0]) + return x +} + +// putUint32LE puts an uint32 integer into a byte slice that must have at least +// a length of 4 bytes. +func putUint32LE(b []byte, x uint32) { + b[0] = byte(x) + b[1] = byte(x >> 8) + b[2] = byte(x >> 16) + b[3] = byte(x >> 24) +} + +// putUint64LE puts the uint64 value into the byte slice as little endian +// value. The byte slice b must have at least place for 8 bytes. +func putUint64LE(b []byte, x uint64) { + b[0] = byte(x) + b[1] = byte(x >> 8) + b[2] = byte(x >> 16) + b[3] = byte(x >> 24) + b[4] = byte(x >> 32) + b[5] = byte(x >> 40) + b[6] = byte(x >> 48) + b[7] = byte(x >> 56) +} + +// noHeaderSize defines the value of the length field in the LZMA header. +const noHeaderSize uint64 = 1<<64 - 1 + +// HeaderLen provides the length of the LZMA file header. +const HeaderLen = 13 + +// header represents the header of an LZMA file. +type header struct { + properties Properties + dictCap int + // uncompressed size; negative value if no size is given + size int64 +} + +// marshalBinary marshals the header. +func (h *header) marshalBinary() (data []byte, err error) { + if err = h.properties.verify(); err != nil { + return nil, err + } + if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) { + return nil, fmt.Errorf("lzma: DictCap %d out of range", + h.dictCap) + } + + data = make([]byte, 13) + + // property byte + data[0] = h.properties.Code() + + // dictionary capacity + putUint32LE(data[1:5], uint32(h.dictCap)) + + // uncompressed size + var s uint64 + if h.size > 0 { + s = uint64(h.size) + } else { + s = noHeaderSize + } + putUint64LE(data[5:], s) + + return data, nil +} + +// unmarshalBinary unmarshals the header. +func (h *header) unmarshalBinary(data []byte) error { + if len(data) != HeaderLen { + return errors.New("lzma.unmarshalBinary: data has wrong length") + } + + // properties + var err error + if h.properties, err = PropertiesForCode(data[0]); err != nil { + return err + } + + // dictionary capacity + h.dictCap = int(uint32LE(data[1:])) + if h.dictCap < 0 { + return errors.New( + "LZMA header: dictionary capacity exceeds maximum " + + "integer") + } + + // uncompressed size + s := uint64LE(data[5:]) + if s == noHeaderSize { + h.size = -1 + } else { + h.size = int64(s) + if h.size < 0 { + return errors.New( + "LZMA header: uncompressed size " + + "out of int64 range") + } + } + + return nil +} + +// validDictCap checks whether the dictionary capacity is correct. This +// is used to weed out wrong file headers. +func validDictCap(dictcap int) bool { + if int64(dictcap) == MaxDictCap { + return true + } + for n := uint(10); n < 32; n++ { + if dictcap == 1<<n { + return true + } + if dictcap == 1<<n+1<<(n-1) { + return true + } + } + return false +} + +// ValidHeader checks for a valid LZMA file header. It allows only +// dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If +// there is an explicit size it must not exceed 256 GiB. The length of +// the data argument must be HeaderLen. +func ValidHeader(data []byte) bool { + var h header + if err := h.unmarshalBinary(data); err != nil { + return false + } + if !validDictCap(h.dictCap) { + return false + } + return h.size < 0 || h.size <= 1<<38 +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/header2.go b/vendor/github.com/ulikunitz/xz/lzma/header2.go new file mode 100644 index 000000000..ac6a71a5a --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/header2.go @@ -0,0 +1,398 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "io" +) + +const ( + // maximum size of compressed data in a chunk + maxCompressed = 1 << 16 + // maximum size of uncompressed data in a chunk + maxUncompressed = 1 << 21 +) + +// chunkType represents the type of an LZMA2 chunk. Note that this +// value is an internal representation and no actual encoding of a LZMA2 +// chunk header. +type chunkType byte + +// Possible values for the chunk type. +const ( + // end of stream + cEOS chunkType = iota + // uncompressed; reset dictionary + cUD + // uncompressed; no reset of dictionary + cU + // LZMA compressed; no reset + cL + // LZMA compressed; reset state + cLR + // LZMA compressed; reset state; new property value + cLRN + // LZMA compressed; reset state; new property value; reset dictionary + cLRND +) + +// chunkTypeStrings provide a string representation for the chunk types. +var chunkTypeStrings = [...]string{ + cEOS: "EOS", + cU: "U", + cUD: "UD", + cL: "L", + cLR: "LR", + cLRN: "LRN", + cLRND: "LRND", +} + +// String returns a string representation of the chunk type. +func (c chunkType) String() string { + if !(cEOS <= c && c <= cLRND) { + return "unknown" + } + return chunkTypeStrings[c] +} + +// Actual encodings for the chunk types in the value. Note that the high +// uncompressed size bits are stored in the header byte additionally. +const ( + hEOS = 0 + hUD = 1 + hU = 2 + hL = 1 << 7 + hLR = 1<<7 | 1<<5 + hLRN = 1<<7 | 1<<6 + hLRND = 1<<7 | 1<<6 | 1<<5 +) + +// errHeaderByte indicates an unsupported value for the chunk header +// byte. These bytes starts the variable-length chunk header. +var errHeaderByte = errors.New("lzma: unsupported chunk header byte") + +// headerChunkType converts the header byte into a chunk type. It +// ignores the uncompressed size bits in the chunk header byte. +func headerChunkType(h byte) (c chunkType, err error) { + if h&hL == 0 { + // no compression + switch h { + case hEOS: + c = cEOS + case hUD: + c = cUD + case hU: + c = cU + default: + return 0, errHeaderByte + } + return + } + switch h & hLRND { + case hL: + c = cL + case hLR: + c = cLR + case hLRN: + c = cLRN + case hLRND: + c = cLRND + default: + return 0, errHeaderByte + } + return +} + +// uncompressedHeaderLen provides the length of an uncompressed header +const uncompressedHeaderLen = 3 + +// headerLen returns the length of the LZMA2 header for a given chunk +// type. +func headerLen(c chunkType) int { + switch c { + case cEOS: + return 1 + case cU, cUD: + return uncompressedHeaderLen + case cL, cLR: + return 5 + case cLRN, cLRND: + return 6 + } + panic(fmt.Errorf("unsupported chunk type %d", c)) +} + +// chunkHeader represents the contents of a chunk header. +type chunkHeader struct { + ctype chunkType + uncompressed uint32 + compressed uint16 + props Properties +} + +// String returns a string representation of the chunk header. +func (h *chunkHeader) String() string { + return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed, + h.compressed, &h.props) +} + +// UnmarshalBinary reads the content of the chunk header from the data +// slice. The slice must have the correct length. +func (h *chunkHeader) UnmarshalBinary(data []byte) error { + if len(data) == 0 { + return errors.New("no data") + } + c, err := headerChunkType(data[0]) + if err != nil { + return err + } + + n := headerLen(c) + if len(data) < n { + return errors.New("incomplete data") + } + if len(data) > n { + return errors.New("invalid data length") + } + + *h = chunkHeader{ctype: c} + if c == cEOS { + return nil + } + + h.uncompressed = uint32(uint16BE(data[1:3])) + if c <= cU { + return nil + } + h.uncompressed |= uint32(data[0]&^hLRND) << 16 + + h.compressed = uint16BE(data[3:5]) + if c <= cLR { + return nil + } + + h.props, err = PropertiesForCode(data[5]) + return err +} + +// MarshalBinary encodes the chunk header value. The function checks +// whether the content of the chunk header is correct. +func (h *chunkHeader) MarshalBinary() (data []byte, err error) { + if h.ctype > cLRND { + return nil, errors.New("invalid chunk type") + } + if err = h.props.verify(); err != nil { + return nil, err + } + + data = make([]byte, headerLen(h.ctype)) + + switch h.ctype { + case cEOS: + return data, nil + case cUD: + data[0] = hUD + case cU: + data[0] = hU + case cL: + data[0] = hL + case cLR: + data[0] = hLR + case cLRN: + data[0] = hLRN + case cLRND: + data[0] = hLRND + } + + putUint16BE(data[1:3], uint16(h.uncompressed)) + if h.ctype <= cU { + return data, nil + } + data[0] |= byte(h.uncompressed>>16) &^ hLRND + + putUint16BE(data[3:5], h.compressed) + if h.ctype <= cLR { + return data, nil + } + + data[5] = h.props.Code() + return data, nil +} + +// readChunkHeader reads the chunk header from the IO reader. +func readChunkHeader(r io.Reader) (h *chunkHeader, err error) { + p := make([]byte, 1, 6) + if _, err = io.ReadFull(r, p); err != nil { + return + } + c, err := headerChunkType(p[0]) + if err != nil { + return + } + p = p[:headerLen(c)] + if _, err = io.ReadFull(r, p[1:]); err != nil { + return + } + h = new(chunkHeader) + if err = h.UnmarshalBinary(p); err != nil { + return nil, err + } + return h, nil +} + +// uint16BE converts a big-endian uint16 representation to an uint16 +// value. +func uint16BE(p []byte) uint16 { + return uint16(p[0])<<8 | uint16(p[1]) +} + +// putUint16BE puts the big-endian uint16 presentation into the given +// slice. +func putUint16BE(p []byte, x uint16) { + p[0] = byte(x >> 8) + p[1] = byte(x) +} + +// chunkState is used to manage the state of the chunks +type chunkState byte + +// start and stop define the initial and terminating state of the chunk +// state +const ( + start chunkState = 'S' + stop = 'T' +) + +// errors for the chunk state handling +var ( + errChunkType = errors.New("lzma: unexpected chunk type") + errState = errors.New("lzma: wrong chunk state") +) + +// next transitions state based on chunk type input +func (c *chunkState) next(ctype chunkType) error { + switch *c { + // start state + case 'S': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cLRND: + *c = 'L' + default: + return errChunkType + } + // normal LZMA mode + case 'L': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cU: + *c = 'U' + case cL, cLR, cLRN, cLRND: + break + default: + return errChunkType + } + // reset required + case 'R': + switch ctype { + case cEOS: + *c = 'T' + case cUD, cU: + break + case cLRN, cLRND: + *c = 'L' + default: + return errChunkType + } + // uncompressed + case 'U': + switch ctype { + case cEOS: + *c = 'T' + case cUD: + *c = 'R' + case cU: + break + case cL, cLR, cLRN, cLRND: + *c = 'L' + default: + return errChunkType + } + // terminal state + case 'T': + return errChunkType + default: + return errState + } + return nil +} + +// defaultChunkType returns the default chunk type for each chunk state. +func (c chunkState) defaultChunkType() chunkType { + switch c { + case 'S': + return cLRND + case 'L', 'U': + return cL + case 'R': + return cLRN + default: + // no error + return cEOS + } +} + +// maxDictCap defines the maximum dictionary capacity supported by the +// LZMA2 dictionary capacity encoding. +const maxDictCap = 1<<32 - 1 + +// maxDictCapCode defines the maximum dictionary capacity code. +const maxDictCapCode = 40 + +// The function decodes the dictionary capacity byte, but doesn't change +// for the correct range of the given byte. +func decodeDictCap(c byte) int64 { + return (2 | int64(c)&1) << (11 + (c>>1)&0x1f) +} + +// DecodeDictCap decodes the encoded dictionary capacity. The function +// returns an error if the code is out of range. +func DecodeDictCap(c byte) (n int64, err error) { + if c >= maxDictCapCode { + if c == maxDictCapCode { + return maxDictCap, nil + } + return 0, errors.New("lzma: invalid dictionary size code") + } + return decodeDictCap(c), nil +} + +// EncodeDictCap encodes a dictionary capacity. The function returns the +// code for the capacity that is greater or equal n. If n exceeds the +// maximum support dictionary capacity, the maximum value is returned. +func EncodeDictCap(n int64) byte { + a, b := byte(0), byte(40) + for a < b { + c := a + (b-a)>>1 + m := decodeDictCap(c) + if n <= m { + if n == m { + return c + } + b = c + } else { + a = c + 1 + } + } + return a +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go new file mode 100644 index 000000000..e51773092 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/lengthcodec.go @@ -0,0 +1,129 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "errors" + +// maxPosBits defines the number of bits of the position value that are used to +// to compute the posState value. The value is used to select the tree codec +// for length encoding and decoding. +const maxPosBits = 4 + +// minMatchLen and maxMatchLen give the minimum and maximum values for +// encoding and decoding length values. minMatchLen is also used as base +// for the encoded length values. +const ( + minMatchLen = 2 + maxMatchLen = minMatchLen + 16 + 256 - 1 +) + +// lengthCodec support the encoding of the length value. +type lengthCodec struct { + choice [2]prob + low [1 << maxPosBits]treeCodec + mid [1 << maxPosBits]treeCodec + high treeCodec +} + +// deepcopy initializes the lc value as deep copy of the source value. +func (lc *lengthCodec) deepcopy(src *lengthCodec) { + if lc == src { + return + } + lc.choice = src.choice + for i := range lc.low { + lc.low[i].deepcopy(&src.low[i]) + } + for i := range lc.mid { + lc.mid[i].deepcopy(&src.mid[i]) + } + lc.high.deepcopy(&src.high) +} + +// init initializes a new length codec. +func (lc *lengthCodec) init() { + for i := range lc.choice { + lc.choice[i] = probInit + } + for i := range lc.low { + lc.low[i] = makeTreeCodec(3) + } + for i := range lc.mid { + lc.mid[i] = makeTreeCodec(3) + } + lc.high = makeTreeCodec(8) +} + +// lBits gives the number of bits used for the encoding of the l value +// provided to the range encoder. +func lBits(l uint32) int { + switch { + case l < 8: + return 4 + case l < 16: + return 5 + default: + return 10 + } +} + +// Encode encodes the length offset. The length offset l can be compute by +// subtracting minMatchLen (2) from the actual length. +// +// l = length - minMatchLen +// +func (lc *lengthCodec) Encode(e *rangeEncoder, l uint32, posState uint32, +) (err error) { + if l > maxMatchLen-minMatchLen { + return errors.New("lengthCodec.Encode: l out of range") + } + if l < 8 { + if err = lc.choice[0].Encode(e, 0); err != nil { + return + } + return lc.low[posState].Encode(e, l) + } + if err = lc.choice[0].Encode(e, 1); err != nil { + return + } + if l < 16 { + if err = lc.choice[1].Encode(e, 0); err != nil { + return + } + return lc.mid[posState].Encode(e, l-8) + } + if err = lc.choice[1].Encode(e, 1); err != nil { + return + } + if err = lc.high.Encode(e, l-16); err != nil { + return + } + return nil +} + +// Decode reads the length offset. Add minMatchLen to compute the actual length +// to the length offset l. +func (lc *lengthCodec) Decode(d *rangeDecoder, posState uint32, +) (l uint32, err error) { + var b uint32 + if b, err = lc.choice[0].Decode(d); err != nil { + return + } + if b == 0 { + l, err = lc.low[posState].Decode(d) + return + } + if b, err = lc.choice[1].Decode(d); err != nil { + return + } + if b == 0 { + l, err = lc.mid[posState].Decode(d) + l += 8 + return + } + l, err = lc.high.Decode(d) + l += 16 + return +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go new file mode 100644 index 000000000..c949d6ebd --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/literalcodec.go @@ -0,0 +1,132 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// literalCodec supports the encoding of literal. It provides 768 probability +// values per literal state. The upper 512 probabilities are used with the +// context of a match bit. +type literalCodec struct { + probs []prob +} + +// deepcopy initializes literal codec c as a deep copy of the source. +func (c *literalCodec) deepcopy(src *literalCodec) { + if c == src { + return + } + c.probs = make([]prob, len(src.probs)) + copy(c.probs, src.probs) +} + +// init initializes the literal codec. +func (c *literalCodec) init(lc, lp int) { + switch { + case !(minLC <= lc && lc <= maxLC): + panic("lc out of range") + case !(minLP <= lp && lp <= maxLP): + panic("lp out of range") + } + c.probs = make([]prob, 0x300<<uint(lc+lp)) + for i := range c.probs { + c.probs[i] = probInit + } +} + +// Encode encodes the byte s using a range encoder as well as the current LZMA +// encoder state, a match byte and the literal state. +func (c *literalCodec) Encode(e *rangeEncoder, s byte, + state uint32, match byte, litState uint32, +) (err error) { + k := litState * 0x300 + probs := c.probs[k : k+0x300] + symbol := uint32(1) + r := uint32(s) + if state >= 7 { + m := uint32(match) + for { + matchBit := (m >> 7) & 1 + m <<= 1 + bit := (r >> 7) & 1 + r <<= 1 + i := ((1 + matchBit) << 8) | symbol + if err = probs[i].Encode(e, bit); err != nil { + return + } + symbol = (symbol << 1) | bit + if matchBit != bit { + break + } + if symbol >= 0x100 { + break + } + } + } + for symbol < 0x100 { + bit := (r >> 7) & 1 + r <<= 1 + if err = probs[symbol].Encode(e, bit); err != nil { + return + } + symbol = (symbol << 1) | bit + } + return nil +} + +// Decode decodes a literal byte using the range decoder as well as the LZMA +// state, a match byte, and the literal state. +func (c *literalCodec) Decode(d *rangeDecoder, + state uint32, match byte, litState uint32, +) (s byte, err error) { + k := litState * 0x300 + probs := c.probs[k : k+0x300] + symbol := uint32(1) + if state >= 7 { + m := uint32(match) + for { + matchBit := (m >> 7) & 1 + m <<= 1 + i := ((1 + matchBit) << 8) | symbol + bit, err := d.DecodeBit(&probs[i]) + if err != nil { + return 0, err + } + symbol = (symbol << 1) | bit + if matchBit != bit { + break + } + if symbol >= 0x100 { + break + } + } + } + for symbol < 0x100 { + bit, err := d.DecodeBit(&probs[symbol]) + if err != nil { + return 0, err + } + symbol = (symbol << 1) | bit + } + s = byte(symbol - 0x100) + return s, nil +} + +// minLC and maxLC define the range for LC values. +const ( + minLC = 0 + maxLC = 8 +) + +// minLC and maxLC define the range for LP values. +const ( + minLP = 0 + maxLP = 4 +) + +// minState and maxState define a range for the state values stored in +// the State values. +const ( + minState = 0 + maxState = 11 +) diff --git a/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go new file mode 100644 index 000000000..4a244eb1a --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/matchalgorithm.go @@ -0,0 +1,52 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import "errors" + +// MatchAlgorithm identifies an algorithm to find matches in the +// dictionary. +type MatchAlgorithm byte + +// Supported matcher algorithms. +const ( + HashTable4 MatchAlgorithm = iota + BinaryTree +) + +// maStrings are used by the String method. +var maStrings = map[MatchAlgorithm]string{ + HashTable4: "HashTable4", + BinaryTree: "BinaryTree", +} + +// String returns a string representation of the Matcher. +func (a MatchAlgorithm) String() string { + if s, ok := maStrings[a]; ok { + return s + } + return "unknown" +} + +var errUnsupportedMatchAlgorithm = errors.New( + "lzma: unsupported match algorithm value") + +// verify checks whether the matcher value is supported. +func (a MatchAlgorithm) verify() error { + if _, ok := maStrings[a]; !ok { + return errUnsupportedMatchAlgorithm + } + return nil +} + +func (a MatchAlgorithm) new(dictCap int) (m matcher, err error) { + switch a { + case HashTable4: + return newHashTable(dictCap, 4) + case BinaryTree: + return newBinTree(dictCap) + } + return nil, errUnsupportedMatchAlgorithm +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/operation.go b/vendor/github.com/ulikunitz/xz/lzma/operation.go new file mode 100644 index 000000000..733bb99da --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/operation.go @@ -0,0 +1,80 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" + "unicode" +) + +// operation represents an operation on the dictionary during encoding or +// decoding. +type operation interface { + Len() int +} + +// rep represents a repetition at the given distance and the given length +type match struct { + // supports all possible distance values, including the eos marker + distance int64 + // length + n int +} + +// verify checks whether the match is valid. If that is not the case an +// error is returned. +func (m match) verify() error { + if !(minDistance <= m.distance && m.distance <= maxDistance) { + return errors.New("distance out of range") + } + if !(1 <= m.n && m.n <= maxMatchLen) { + return errors.New("length out of range") + } + return nil +} + +// l return the l-value for the match, which is the difference of length +// n and 2. +func (m match) l() uint32 { + return uint32(m.n - minMatchLen) +} + +// dist returns the dist value for the match, which is one less of the +// distance stored in the match. +func (m match) dist() uint32 { + return uint32(m.distance - minDistance) +} + +// Len returns the number of bytes matched. +func (m match) Len() int { + return m.n +} + +// String returns a string representation for the repetition. +func (m match) String() string { + return fmt.Sprintf("M{%d,%d}", m.distance, m.n) +} + +// lit represents a single byte literal. +type lit struct { + b byte +} + +// Len returns 1 for the single byte literal. +func (l lit) Len() int { + return 1 +} + +// String returns a string representation for the literal. +func (l lit) String() string { + var c byte + if unicode.IsPrint(rune(l.b)) { + c = l.b + } else { + c = '.' + } + return fmt.Sprintf("L{%c/%02x}", c, l.b) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/prob.go b/vendor/github.com/ulikunitz/xz/lzma/prob.go new file mode 100644 index 000000000..24d50ec68 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/prob.go @@ -0,0 +1,53 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// movebits defines the number of bits used for the updates of probability +// values. +const movebits = 5 + +// probbits defines the number of bits of a probability value. +const probbits = 11 + +// probInit defines 0.5 as initial value for prob values. +const probInit prob = 1 << (probbits - 1) + +// Type prob represents probabilities. The type can also be used to encode and +// decode single bits. +type prob uint16 + +// Dec decreases the probability. The decrease is proportional to the +// probability value. +func (p *prob) dec() { + *p -= *p >> movebits +} + +// Inc increases the probability. The Increase is proportional to the +// difference of 1 and the probability value. +func (p *prob) inc() { + *p += ((1 << probbits) - *p) >> movebits +} + +// Computes the new bound for a given range using the probability value. +func (p prob) bound(r uint32) uint32 { + return (r >> probbits) * uint32(p) +} + +// Bits returns 1. One is the number of bits that can be encoded or decoded +// with a single prob value. +func (p prob) Bits() int { + return 1 +} + +// Encode encodes the least-significant bit of v. Note that the p value will be +// changed. +func (p *prob) Encode(e *rangeEncoder, v uint32) error { + return e.EncodeBit(v, p) +} + +// Decode decodes a single bit. Note that the p value will change. +func (p *prob) Decode(d *rangeDecoder) (v uint32, err error) { + return d.DecodeBit(p) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/properties.go b/vendor/github.com/ulikunitz/xz/lzma/properties.go new file mode 100644 index 000000000..23418e25d --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/properties.go @@ -0,0 +1,69 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "fmt" +) + +// maximum and minimum values for the LZMA properties. +const ( + minPB = 0 + maxPB = 4 +) + +// maxPropertyCode is the possible maximum of a properties code byte. +const maxPropertyCode = (maxPB+1)*(maxLP+1)*(maxLC+1) - 1 + +// Properties contains the parameters LC, LP and PB. The parameter LC +// defines the number of literal context bits; parameter LP the number +// of literal position bits and PB the number of position bits. +type Properties struct { + LC int + LP int + PB int +} + +// String returns the properties in a string representation. +func (p *Properties) String() string { + return fmt.Sprintf("LC %d LP %d PB %d", p.LC, p.LP, p.PB) +} + +// PropertiesForCode converts a properties code byte into a Properties value. +func PropertiesForCode(code byte) (p Properties, err error) { + if code > maxPropertyCode { + return p, errors.New("lzma: invalid properties code") + } + p.LC = int(code % 9) + code /= 9 + p.LP = int(code % 5) + code /= 5 + p.PB = int(code % 5) + return p, err +} + +// verify checks the properties for correctness. +func (p *Properties) verify() error { + if p == nil { + return errors.New("lzma: properties are nil") + } + if !(minLC <= p.LC && p.LC <= maxLC) { + return errors.New("lzma: lc out of range") + } + if !(minLP <= p.LP && p.LP <= maxLP) { + return errors.New("lzma: lp out of range") + } + if !(minPB <= p.PB && p.PB <= maxPB) { + return errors.New("lzma: pb out of range") + } + return nil +} + +// Code converts the properties to a byte. The function assumes that +// the properties components are all in range. +func (p Properties) Code() byte { + return byte((p.PB*5+p.LP)*9 + p.LC) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go new file mode 100644 index 000000000..6361c5e7c --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/rangecodec.go @@ -0,0 +1,248 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" +) + +// rangeEncoder implements range encoding of single bits. The low value can +// overflow therefore we need uint64. The cache value is used to handle +// overflows. +type rangeEncoder struct { + lbw *LimitedByteWriter + nrange uint32 + low uint64 + cacheLen int64 + cache byte +} + +// maxInt64 provides the maximal value of the int64 type +const maxInt64 = 1<<63 - 1 + +// newRangeEncoder creates a new range encoder. +func newRangeEncoder(bw io.ByteWriter) (re *rangeEncoder, err error) { + lbw, ok := bw.(*LimitedByteWriter) + if !ok { + lbw = &LimitedByteWriter{BW: bw, N: maxInt64} + } + return &rangeEncoder{ + lbw: lbw, + nrange: 0xffffffff, + cacheLen: 1}, nil +} + +// Available returns the number of bytes that still can be written. The +// method takes the bytes that will be currently written by Close into +// account. +func (e *rangeEncoder) Available() int64 { + return e.lbw.N - (e.cacheLen + 4) +} + +// writeByte writes a single byte to the underlying writer. An error is +// returned if the limit is reached. The written byte will be counted if +// the underlying writer doesn't return an error. +func (e *rangeEncoder) writeByte(c byte) error { + if e.Available() < 1 { + return ErrLimit + } + return e.lbw.WriteByte(c) +} + +// DirectEncodeBit encodes the least-significant bit of b with probability 1/2. +func (e *rangeEncoder) DirectEncodeBit(b uint32) error { + e.nrange >>= 1 + e.low += uint64(e.nrange) & (0 - (uint64(b) & 1)) + + // normalize + const top = 1 << 24 + if e.nrange >= top { + return nil + } + e.nrange <<= 8 + return e.shiftLow() +} + +// EncodeBit encodes the least significant bit of b. The p value will be +// updated by the function depending on the bit encoded. +func (e *rangeEncoder) EncodeBit(b uint32, p *prob) error { + bound := p.bound(e.nrange) + if b&1 == 0 { + e.nrange = bound + p.inc() + } else { + e.low += uint64(bound) + e.nrange -= bound + p.dec() + } + + // normalize + const top = 1 << 24 + if e.nrange >= top { + return nil + } + e.nrange <<= 8 + return e.shiftLow() +} + +// Close writes a complete copy of the low value. +func (e *rangeEncoder) Close() error { + for i := 0; i < 5; i++ { + if err := e.shiftLow(); err != nil { + return err + } + } + return nil +} + +// shiftLow shifts the low value for 8 bit. The shifted byte is written into +// the byte writer. The cache value is used to handle overflows. +func (e *rangeEncoder) shiftLow() error { + if uint32(e.low) < 0xff000000 || (e.low>>32) != 0 { + tmp := e.cache + for { + err := e.writeByte(tmp + byte(e.low>>32)) + if err != nil { + return err + } + tmp = 0xff + e.cacheLen-- + if e.cacheLen <= 0 { + if e.cacheLen < 0 { + panic("negative cacheLen") + } + break + } + } + e.cache = byte(uint32(e.low) >> 24) + } + e.cacheLen++ + e.low = uint64(uint32(e.low) << 8) + return nil +} + +// rangeDecoder decodes single bits of the range encoding stream. +type rangeDecoder struct { + br io.ByteReader + nrange uint32 + code uint32 +} + +// init initializes the range decoder, by reading from the byte reader. +func (d *rangeDecoder) init() error { + d.nrange = 0xffffffff + d.code = 0 + + b, err := d.br.ReadByte() + if err != nil { + return err + } + if b != 0 { + return errors.New("newRangeDecoder: first byte not zero") + } + + for i := 0; i < 4; i++ { + if err = d.updateCode(); err != nil { + return err + } + } + + if d.code >= d.nrange { + return errors.New("newRangeDecoder: d.code >= d.nrange") + } + + return nil +} + +// newRangeDecoder initializes a range decoder. It reads five bytes from the +// reader and therefore may return an error. +func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) { + d = &rangeDecoder{br: br, nrange: 0xffffffff} + + b, err := d.br.ReadByte() + if err != nil { + return nil, err + } + if b != 0 { + return nil, errors.New("newRangeDecoder: first byte not zero") + } + + for i := 0; i < 4; i++ { + if err = d.updateCode(); err != nil { + return nil, err + } + } + + if d.code >= d.nrange { + return nil, errors.New("newRangeDecoder: d.code >= d.nrange") + } + + return d, nil +} + +// possiblyAtEnd checks whether the decoder may be at the end of the stream. +func (d *rangeDecoder) possiblyAtEnd() bool { + return d.code == 0 +} + +// DirectDecodeBit decodes a bit with probability 1/2. The return value b will +// contain the bit at the least-significant position. All other bits will be +// zero. +func (d *rangeDecoder) DirectDecodeBit() (b uint32, err error) { + d.nrange >>= 1 + d.code -= d.nrange + t := 0 - (d.code >> 31) + d.code += d.nrange & t + b = (t + 1) & 1 + + // d.code will stay less then d.nrange + + // normalize + // assume d.code < d.nrange + const top = 1 << 24 + if d.nrange >= top { + return b, nil + } + d.nrange <<= 8 + // d.code < d.nrange will be maintained + return b, d.updateCode() +} + +// decodeBit decodes a single bit. The bit will be returned at the +// least-significant position. All other bits will be zero. The probability +// value will be updated. +func (d *rangeDecoder) DecodeBit(p *prob) (b uint32, err error) { + bound := p.bound(d.nrange) + if d.code < bound { + d.nrange = bound + p.inc() + b = 0 + } else { + d.code -= bound + d.nrange -= bound + p.dec() + b = 1 + } + // normalize + // assume d.code < d.nrange + const top = 1 << 24 + if d.nrange >= top { + return b, nil + } + d.nrange <<= 8 + // d.code < d.nrange will be maintained + return b, d.updateCode() +} + +// updateCode reads a new byte into the code. +func (d *rangeDecoder) updateCode() error { + b, err := d.br.ReadByte() + if err != nil { + return err + } + d.code = (d.code << 8) | uint32(b) + return nil +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader.go b/vendor/github.com/ulikunitz/xz/lzma/reader.go new file mode 100644 index 000000000..2ef3dcaaa --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/reader.go @@ -0,0 +1,100 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lzma supports the decoding and encoding of LZMA streams. +// Reader and Writer support the classic LZMA format. Reader2 and +// Writer2 support the decoding and encoding of LZMA2 streams. +// +// The package is written completely in Go and doesn't rely on any external +// library. +package lzma + +import ( + "errors" + "io" +) + +// ReaderConfig stores the parameters for the reader of the classic LZMA +// format. +type ReaderConfig struct { + DictCap int +} + +// fill converts the zero values of the configuration to the default values. +func (c *ReaderConfig) fill() { + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } +} + +// Verify checks the reader configuration for errors. Zero values will +// be replaced by default values. +func (c *ReaderConfig) Verify() error { + c.fill() + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + return nil +} + +// Reader provides a reader for LZMA files or streams. +type Reader struct { + lzma io.Reader + h header + d *decoder +} + +// NewReader creates a new reader for an LZMA stream using the classic +// format. NewReader reads and checks the header of the LZMA stream. +func NewReader(lzma io.Reader) (r *Reader, err error) { + return ReaderConfig{}.NewReader(lzma) +} + +// NewReader creates a new reader for an LZMA stream in the classic +// format. The function reads and verifies the the header of the LZMA +// stream. +func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + data := make([]byte, HeaderLen) + if _, err := io.ReadFull(lzma, data); err != nil { + if err == io.EOF { + return nil, errors.New("lzma: unexpected EOF") + } + return nil, err + } + r = &Reader{lzma: lzma} + if err = r.h.unmarshalBinary(data); err != nil { + return nil, err + } + if r.h.dictCap < MinDictCap { + return nil, errors.New("lzma: dictionary capacity too small") + } + dictCap := r.h.dictCap + if c.DictCap > dictCap { + dictCap = c.DictCap + } + + state := newState(r.h.properties) + dict, err := newDecoderDict(dictCap) + if err != nil { + return nil, err + } + r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size) + if err != nil { + return nil, err + } + return r, nil +} + +// EOSMarker indicates that an EOS marker has been encountered. +func (r *Reader) EOSMarker() bool { + return r.d.eosMarker +} + +// Read returns uncompressed data. +func (r *Reader) Read(p []byte) (n int, err error) { + return r.d.Read(p) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/reader2.go b/vendor/github.com/ulikunitz/xz/lzma/reader2.go new file mode 100644 index 000000000..a55cfaa4e --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/reader2.go @@ -0,0 +1,232 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "errors" + "io" + + "github.com/ulikunitz/xz/internal/xlog" +) + +// Reader2Config stores the parameters for the LZMA2 reader. +// format. +type Reader2Config struct { + DictCap int +} + +// fill converts the zero values of the configuration to the default values. +func (c *Reader2Config) fill() { + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } +} + +// Verify checks the reader configuration for errors. Zero configuration values +// will be replaced by default values. +func (c *Reader2Config) Verify() error { + c.fill() + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + return nil +} + +// Reader2 supports the reading of LZMA2 chunk sequences. Note that the +// first chunk should have a dictionary reset and the first compressed +// chunk a properties reset. The chunk sequence may not be terminated by +// an end-of-stream chunk. +type Reader2 struct { + r io.Reader + err error + + dict *decoderDict + ur *uncompressedReader + decoder *decoder + chunkReader io.Reader + + cstate chunkState + ctype chunkType +} + +// NewReader2 creates a reader for an LZMA2 chunk sequence. +func NewReader2(lzma2 io.Reader) (r *Reader2, err error) { + return Reader2Config{}.NewReader2(lzma2) +} + +// NewReader2 creates an LZMA2 reader using the given configuration. +func (c Reader2Config) NewReader2(lzma2 io.Reader) (r *Reader2, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + r = &Reader2{r: lzma2, cstate: start} + r.dict, err = newDecoderDict(c.DictCap) + if err != nil { + return nil, err + } + if err = r.startChunk(); err != nil { + r.err = err + } + return r, nil +} + +// uncompressed tests whether the chunk type specifies an uncompressed +// chunk. +func uncompressed(ctype chunkType) bool { + return ctype == cU || ctype == cUD +} + +// startChunk parses a new chunk. +func (r *Reader2) startChunk() error { + r.chunkReader = nil + header, err := readChunkHeader(r.r) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + xlog.Debugf("chunk header %v", header) + if err = r.cstate.next(header.ctype); err != nil { + return err + } + if r.cstate == stop { + return io.EOF + } + if header.ctype == cUD || header.ctype == cLRND { + r.dict.Reset() + } + size := int64(header.uncompressed) + 1 + if uncompressed(header.ctype) { + if r.ur != nil { + r.ur.Reopen(r.r, size) + } else { + r.ur = newUncompressedReader(r.r, r.dict, size) + } + r.chunkReader = r.ur + return nil + } + br := ByteReader(io.LimitReader(r.r, int64(header.compressed)+1)) + if r.decoder == nil { + state := newState(header.props) + r.decoder, err = newDecoder(br, state, r.dict, size) + if err != nil { + return err + } + r.chunkReader = r.decoder + return nil + } + switch header.ctype { + case cLR: + r.decoder.State.Reset() + case cLRN, cLRND: + r.decoder.State = newState(header.props) + } + err = r.decoder.Reopen(br, size) + if err != nil { + return err + } + r.chunkReader = r.decoder + return nil +} + +// Read reads data from the LZMA2 chunk sequence. +func (r *Reader2) Read(p []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + for n < len(p) { + var k int + k, err = r.chunkReader.Read(p[n:]) + n += k + if err != nil { + if err == io.EOF { + err = r.startChunk() + if err == nil { + continue + } + } + r.err = err + return n, err + } + if k == 0 { + r.err = errors.New("lzma: Reader2 doesn't get data") + return n, r.err + } + } + return n, nil +} + +// EOS returns whether the LZMA2 stream has been terminated by an +// end-of-stream chunk. +func (r *Reader2) EOS() bool { + return r.cstate == stop +} + +// uncompressedReader is used to read uncompressed chunks. +type uncompressedReader struct { + lr io.LimitedReader + Dict *decoderDict + eof bool + err error +} + +// newUncompressedReader initializes a new uncompressedReader. +func newUncompressedReader(r io.Reader, dict *decoderDict, size int64) *uncompressedReader { + ur := &uncompressedReader{ + lr: io.LimitedReader{R: r, N: size}, + Dict: dict, + } + return ur +} + +// Reopen reinitializes an uncompressed reader. +func (ur *uncompressedReader) Reopen(r io.Reader, size int64) { + ur.err = nil + ur.eof = false + ur.lr = io.LimitedReader{R: r, N: size} +} + +// fill reads uncompressed data into the dictionary. +func (ur *uncompressedReader) fill() error { + if !ur.eof { + n, err := io.CopyN(ur.Dict, &ur.lr, int64(ur.Dict.Available())) + if err != io.EOF { + return err + } + ur.eof = true + if n > 0 { + return nil + } + } + if ur.lr.N != 0 { + return io.ErrUnexpectedEOF + } + return io.EOF +} + +// Read reads uncompressed data from the limited reader. +func (ur *uncompressedReader) Read(p []byte) (n int, err error) { + if ur.err != nil { + return 0, ur.err + } + for { + var k int + k, err = ur.Dict.Read(p[n:]) + n += k + if n >= len(p) { + return n, nil + } + if err != nil { + break + } + err = ur.fill() + if err != nil { + break + } + } + ur.err = err + return n, err +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/state.go b/vendor/github.com/ulikunitz/xz/lzma/state.go new file mode 100644 index 000000000..502351052 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/state.go @@ -0,0 +1,151 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// states defines the overall state count +const states = 12 + +// State maintains the full state of the operation encoding or decoding +// process. +type state struct { + rep [4]uint32 + isMatch [states << maxPosBits]prob + isRepG0Long [states << maxPosBits]prob + isRep [states]prob + isRepG0 [states]prob + isRepG1 [states]prob + isRepG2 [states]prob + litCodec literalCodec + lenCodec lengthCodec + repLenCodec lengthCodec + distCodec distCodec + state uint32 + posBitMask uint32 + Properties Properties +} + +// initProbSlice initializes a slice of probabilities. +func initProbSlice(p []prob) { + for i := range p { + p[i] = probInit + } +} + +// Reset sets all state information to the original values. +func (s *state) Reset() { + p := s.Properties + *s = state{ + Properties: p, + // dict: s.dict, + posBitMask: (uint32(1) << uint(p.PB)) - 1, + } + initProbSlice(s.isMatch[:]) + initProbSlice(s.isRep[:]) + initProbSlice(s.isRepG0[:]) + initProbSlice(s.isRepG1[:]) + initProbSlice(s.isRepG2[:]) + initProbSlice(s.isRepG0Long[:]) + s.litCodec.init(p.LC, p.LP) + s.lenCodec.init() + s.repLenCodec.init() + s.distCodec.init() +} + +// initState initializes the state. +func initState(s *state, p Properties) { + *s = state{Properties: p} + s.Reset() +} + +// newState creates a new state from the give Properties. +func newState(p Properties) *state { + s := &state{Properties: p} + s.Reset() + return s +} + +// deepcopy initializes s as a deep copy of the source. +func (s *state) deepcopy(src *state) { + if s == src { + return + } + s.rep = src.rep + s.isMatch = src.isMatch + s.isRepG0Long = src.isRepG0Long + s.isRep = src.isRep + s.isRepG0 = src.isRepG0 + s.isRepG1 = src.isRepG1 + s.isRepG2 = src.isRepG2 + s.litCodec.deepcopy(&src.litCodec) + s.lenCodec.deepcopy(&src.lenCodec) + s.repLenCodec.deepcopy(&src.repLenCodec) + s.distCodec.deepcopy(&src.distCodec) + s.state = src.state + s.posBitMask = src.posBitMask + s.Properties = src.Properties +} + +// cloneState creates a new clone of the give state. +func cloneState(src *state) *state { + s := new(state) + s.deepcopy(src) + return s +} + +// updateStateLiteral updates the state for a literal. +func (s *state) updateStateLiteral() { + switch { + case s.state < 4: + s.state = 0 + return + case s.state < 10: + s.state -= 3 + return + } + s.state -= 6 +} + +// updateStateMatch updates the state for a match. +func (s *state) updateStateMatch() { + if s.state < 7 { + s.state = 7 + } else { + s.state = 10 + } +} + +// updateStateRep updates the state for a repetition. +func (s *state) updateStateRep() { + if s.state < 7 { + s.state = 8 + } else { + s.state = 11 + } +} + +// updateStateShortRep updates the state for a short repetition. +func (s *state) updateStateShortRep() { + if s.state < 7 { + s.state = 9 + } else { + s.state = 11 + } +} + +// states computes the states of the operation codec. +func (s *state) states(dictHead int64) (state1, state2, posState uint32) { + state1 = s.state + posState = uint32(dictHead) & s.posBitMask + state2 = (s.state << maxPosBits) | posState + return +} + +// litState computes the literal state. +func (s *state) litState(prev byte, dictHead int64) uint32 { + lp, lc := uint(s.Properties.LP), uint(s.Properties.LC) + litState := ((uint32(dictHead) & ((1 << lp) - 1)) << lc) | + (uint32(prev) >> (8 - lc)) + return litState +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go new file mode 100644 index 000000000..504b3d78e --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/treecodecs.go @@ -0,0 +1,133 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +// treeCodec encodes or decodes values with a fixed bit size. It is using a +// tree of probability value. The root of the tree is the most-significant bit. +type treeCodec struct { + probTree +} + +// makeTreeCodec makes a tree codec. The bits value must be inside the range +// [1,32]. +func makeTreeCodec(bits int) treeCodec { + return treeCodec{makeProbTree(bits)} +} + +// deepcopy initializes tc as a deep copy of the source. +func (tc *treeCodec) deepcopy(src *treeCodec) { + tc.probTree.deepcopy(&src.probTree) +} + +// Encode uses the range encoder to encode a fixed-bit-size value. +func (tc *treeCodec) Encode(e *rangeEncoder, v uint32) (err error) { + m := uint32(1) + for i := int(tc.bits) - 1; i >= 0; i-- { + b := (v >> uint(i)) & 1 + if err := e.EncodeBit(b, &tc.probs[m]); err != nil { + return err + } + m = (m << 1) | b + } + return nil +} + +// Decodes uses the range decoder to decode a fixed-bit-size value. Errors may +// be caused by the range decoder. +func (tc *treeCodec) Decode(d *rangeDecoder) (v uint32, err error) { + m := uint32(1) + for j := 0; j < int(tc.bits); j++ { + b, err := d.DecodeBit(&tc.probs[m]) + if err != nil { + return 0, err + } + m = (m << 1) | b + } + return m - (1 << uint(tc.bits)), nil +} + +// treeReverseCodec is another tree codec, where the least-significant bit is +// the start of the probability tree. +type treeReverseCodec struct { + probTree +} + +// deepcopy initializes the treeReverseCodec as a deep copy of the +// source. +func (tc *treeReverseCodec) deepcopy(src *treeReverseCodec) { + tc.probTree.deepcopy(&src.probTree) +} + +// makeTreeReverseCodec creates treeReverseCodec value. The bits argument must +// be in the range [1,32]. +func makeTreeReverseCodec(bits int) treeReverseCodec { + return treeReverseCodec{makeProbTree(bits)} +} + +// Encode uses range encoder to encode a fixed-bit-size value. The range +// encoder may cause errors. +func (tc *treeReverseCodec) Encode(v uint32, e *rangeEncoder) (err error) { + m := uint32(1) + for i := uint(0); i < uint(tc.bits); i++ { + b := (v >> i) & 1 + if err := e.EncodeBit(b, &tc.probs[m]); err != nil { + return err + } + m = (m << 1) | b + } + return nil +} + +// Decodes uses the range decoder to decode a fixed-bit-size value. Errors +// returned by the range decoder will be returned. +func (tc *treeReverseCodec) Decode(d *rangeDecoder) (v uint32, err error) { + m := uint32(1) + for j := uint(0); j < uint(tc.bits); j++ { + b, err := d.DecodeBit(&tc.probs[m]) + if err != nil { + return 0, err + } + m = (m << 1) | b + v |= b << j + } + return v, nil +} + +// probTree stores enough probability values to be used by the treeEncode and +// treeDecode methods of the range coder types. +type probTree struct { + probs []prob + bits byte +} + +// deepcopy initializes the probTree value as a deep copy of the source. +func (t *probTree) deepcopy(src *probTree) { + if t == src { + return + } + t.probs = make([]prob, len(src.probs)) + copy(t.probs, src.probs) + t.bits = src.bits +} + +// makeProbTree initializes a probTree structure. +func makeProbTree(bits int) probTree { + if !(1 <= bits && bits <= 32) { + panic("bits outside of range [1,32]") + } + t := probTree{ + bits: byte(bits), + probs: make([]prob, 1<<uint(bits)), + } + for i := range t.probs { + t.probs[i] = probInit + } + return t +} + +// Bits provides the number of bits for the values to de- or encode. +func (t *probTree) Bits() int { + return int(t.bits) +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/writer.go b/vendor/github.com/ulikunitz/xz/lzma/writer.go new file mode 100644 index 000000000..efe34fb6b --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/writer.go @@ -0,0 +1,209 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "bufio" + "errors" + "io" +) + +// MinDictCap and MaxDictCap provide the range of supported dictionary +// capacities. +const ( + MinDictCap = 1 << 12 + MaxDictCap = 1<<32 - 1 +) + +// WriterConfig defines the configuration parameter for a writer. +type WriterConfig struct { + // Properties for the encoding. If the it is nil the value + // {LC: 3, LP: 0, PB: 2} will be chosen. + Properties *Properties + // The capacity of the dictionary. If DictCap is zero, the value + // 8 MiB will be chosen. + DictCap int + // Size of the lookahead buffer; value 0 indicates default size + // 4096 + BufSize int + // Match algorithm + Matcher MatchAlgorithm + // SizeInHeader indicates that the header will contain an + // explicit size. + SizeInHeader bool + // Size of the data to be encoded. A positive value will imply + // than an explicit size will be set in the header. + Size int64 + // EOSMarker requests whether the EOSMarker needs to be written. + // If no explicit size is been given the EOSMarker will be + // set automatically. + EOSMarker bool +} + +// fill converts zero-value fields to their explicit default values. +func (c *WriterConfig) fill() { + if c.Properties == nil { + c.Properties = &Properties{LC: 3, LP: 0, PB: 2} + } + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } + if c.BufSize == 0 { + c.BufSize = 4096 + } + if c.Size > 0 { + c.SizeInHeader = true + } + if !c.SizeInHeader { + c.EOSMarker = true + } +} + +// Verify checks WriterConfig for errors. Verify will replace zero +// values with default values. +func (c *WriterConfig) Verify() error { + c.fill() + var err error + if c == nil { + return errors.New("lzma: WriterConfig is nil") + } + if c.Properties == nil { + return errors.New("lzma: WriterConfig has no Properties set") + } + if err = c.Properties.verify(); err != nil { + return err + } + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + if !(maxMatchLen <= c.BufSize) { + return errors.New("lzma: lookahead buffer size too small") + } + if c.SizeInHeader { + if c.Size < 0 { + return errors.New("lzma: negative size not supported") + } + } else if !c.EOSMarker { + return errors.New("lzma: EOS marker is required") + } + if err = c.Matcher.verify(); err != nil { + return err + } + + return nil +} + +// header returns the header structure for this configuration. +func (c *WriterConfig) header() header { + h := header{ + properties: *c.Properties, + dictCap: c.DictCap, + size: -1, + } + if c.SizeInHeader { + h.size = c.Size + } + return h +} + +// Writer writes an LZMA stream in the classic format. +type Writer struct { + h header + bw io.ByteWriter + buf *bufio.Writer + e *encoder +} + +// NewWriter creates a new LZMA writer for the classic format. The +// method will write the header to the underlying stream. +func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + w = &Writer{h: c.header()} + + var ok bool + w.bw, ok = lzma.(io.ByteWriter) + if !ok { + w.buf = bufio.NewWriter(lzma) + w.bw = w.buf + } + state := newState(w.h.properties) + m, err := c.Matcher.new(w.h.dictCap) + if err != nil { + return nil, err + } + dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m) + if err != nil { + return nil, err + } + var flags encoderFlags + if c.EOSMarker { + flags = eosMarker + } + if w.e, err = newEncoder(w.bw, state, dict, flags); err != nil { + return nil, err + } + + if err = w.writeHeader(); err != nil { + return nil, err + } + return w, nil +} + +// NewWriter creates a new LZMA writer using the classic format. The +// function writes the header to the underlying stream. +func NewWriter(lzma io.Writer) (w *Writer, err error) { + return WriterConfig{}.NewWriter(lzma) +} + +// writeHeader writes the LZMA header into the stream. +func (w *Writer) writeHeader() error { + data, err := w.h.marshalBinary() + if err != nil { + return err + } + _, err = w.bw.(io.Writer).Write(data) + return err +} + +// Write puts data into the Writer. +func (w *Writer) Write(p []byte) (n int, err error) { + if w.h.size >= 0 { + m := w.h.size + m -= w.e.Compressed() + int64(w.e.dict.Buffered()) + if m < 0 { + m = 0 + } + if m < int64(len(p)) { + p = p[:m] + err = ErrNoSpace + } + } + var werr error + if n, werr = w.e.Write(p); werr != nil { + err = werr + } + return n, err +} + +// Close closes the writer stream. It ensures that all data from the +// buffer will be compressed and the LZMA stream will be finished. +func (w *Writer) Close() error { + if w.h.size >= 0 { + n := w.e.Compressed() + int64(w.e.dict.Buffered()) + if n != w.h.size { + return errSize + } + } + err := w.e.Close() + if w.buf != nil { + ferr := w.buf.Flush() + if err == nil { + err = ferr + } + } + return err +} diff --git a/vendor/github.com/ulikunitz/xz/lzma/writer2.go b/vendor/github.com/ulikunitz/xz/lzma/writer2.go new file mode 100644 index 000000000..7c1afe157 --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/lzma/writer2.go @@ -0,0 +1,305 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzma + +import ( + "bytes" + "errors" + "io" +) + +// Writer2Config is used to create a Writer2 using parameters. +type Writer2Config struct { + // The properties for the encoding. If the it is nil the value + // {LC: 3, LP: 0, PB: 2} will be chosen. + Properties *Properties + // The capacity of the dictionary. If DictCap is zero, the value + // 8 MiB will be chosen. + DictCap int + // Size of the lookahead buffer; value 0 indicates default size + // 4096 + BufSize int + // Match algorithm + Matcher MatchAlgorithm +} + +// fill replaces zero values with default values. +func (c *Writer2Config) fill() { + if c.Properties == nil { + c.Properties = &Properties{LC: 3, LP: 0, PB: 2} + } + if c.DictCap == 0 { + c.DictCap = 8 * 1024 * 1024 + } + if c.BufSize == 0 { + c.BufSize = 4096 + } +} + +// Verify checks the Writer2Config for correctness. Zero values will be +// replaced by default values. +func (c *Writer2Config) Verify() error { + c.fill() + var err error + if c == nil { + return errors.New("lzma: WriterConfig is nil") + } + if c.Properties == nil { + return errors.New("lzma: WriterConfig has no Properties set") + } + if err = c.Properties.verify(); err != nil { + return err + } + if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) { + return errors.New("lzma: dictionary capacity is out of range") + } + if !(maxMatchLen <= c.BufSize) { + return errors.New("lzma: lookahead buffer size too small") + } + if c.Properties.LC+c.Properties.LP > 4 { + return errors.New("lzma: sum of lc and lp exceeds 4") + } + if err = c.Matcher.verify(); err != nil { + return err + } + return nil +} + +// Writer2 supports the creation of an LZMA2 stream. But note that +// written data is buffered, so call Flush or Close to write data to the +// underlying writer. The Close method writes the end-of-stream marker +// to the stream. So you may be able to concatenate the output of two +// writers as long the output of the first writer has only been flushed +// but not closed. +// +// Any change to the fields Properties, DictCap must be done before the +// first call to Write, Flush or Close. +type Writer2 struct { + w io.Writer + + start *state + encoder *encoder + + cstate chunkState + ctype chunkType + + buf bytes.Buffer + lbw LimitedByteWriter +} + +// NewWriter2 creates an LZMA2 chunk sequence writer with the default +// parameters and options. +func NewWriter2(lzma2 io.Writer) (w *Writer2, err error) { + return Writer2Config{}.NewWriter2(lzma2) +} + +// NewWriter2 creates a new LZMA2 writer using the given configuration. +func (c Writer2Config) NewWriter2(lzma2 io.Writer) (w *Writer2, err error) { + if err = c.Verify(); err != nil { + return nil, err + } + w = &Writer2{ + w: lzma2, + start: newState(*c.Properties), + cstate: start, + ctype: start.defaultChunkType(), + } + w.buf.Grow(maxCompressed) + w.lbw = LimitedByteWriter{BW: &w.buf, N: maxCompressed} + m, err := c.Matcher.new(c.DictCap) + if err != nil { + return nil, err + } + d, err := newEncoderDict(c.DictCap, c.BufSize, m) + if err != nil { + return nil, err + } + w.encoder, err = newEncoder(&w.lbw, cloneState(w.start), d, 0) + if err != nil { + return nil, err + } + return w, nil +} + +// written returns the number of bytes written to the current chunk +func (w *Writer2) written() int { + if w.encoder == nil { + return 0 + } + return int(w.encoder.Compressed()) + w.encoder.dict.Buffered() +} + +// errClosed indicates that the writer is closed. +var errClosed = errors.New("lzma: writer closed") + +// Writes data to LZMA2 stream. Note that written data will be buffered. +// Use Flush or Close to ensure that data is written to the underlying +// writer. +func (w *Writer2) Write(p []byte) (n int, err error) { + if w.cstate == stop { + return 0, errClosed + } + for n < len(p) { + m := maxUncompressed - w.written() + if m <= 0 { + panic("lzma: maxUncompressed reached") + } + var q []byte + if n+m < len(p) { + q = p[n : n+m] + } else { + q = p[n:] + } + k, err := w.encoder.Write(q) + n += k + if err != nil && err != ErrLimit { + return n, err + } + if err == ErrLimit || k == m { + if err = w.flushChunk(); err != nil { + return n, err + } + } + } + return n, nil +} + +// writeUncompressedChunk writes an uncompressed chunk to the LZMA2 +// stream. +func (w *Writer2) writeUncompressedChunk() error { + u := w.encoder.Compressed() + if u <= 0 { + return errors.New("lzma: can't write empty uncompressed chunk") + } + if u > maxUncompressed { + panic("overrun of uncompressed data limit") + } + switch w.ctype { + case cLRND: + w.ctype = cUD + default: + w.ctype = cU + } + w.encoder.state = w.start + + header := chunkHeader{ + ctype: w.ctype, + uncompressed: uint32(u - 1), + } + hdata, err := header.MarshalBinary() + if err != nil { + return err + } + if _, err = w.w.Write(hdata); err != nil { + return err + } + _, err = w.encoder.dict.CopyN(w.w, int(u)) + return err +} + +// writeCompressedChunk writes a compressed chunk to the underlying +// writer. +func (w *Writer2) writeCompressedChunk() error { + if w.ctype == cU || w.ctype == cUD { + panic("chunk type uncompressed") + } + + u := w.encoder.Compressed() + if u <= 0 { + return errors.New("writeCompressedChunk: empty chunk") + } + if u > maxUncompressed { + panic("overrun of uncompressed data limit") + } + c := w.buf.Len() + if c <= 0 { + panic("no compressed data") + } + if c > maxCompressed { + panic("overrun of compressed data limit") + } + header := chunkHeader{ + ctype: w.ctype, + uncompressed: uint32(u - 1), + compressed: uint16(c - 1), + props: w.encoder.state.Properties, + } + hdata, err := header.MarshalBinary() + if err != nil { + return err + } + if _, err = w.w.Write(hdata); err != nil { + return err + } + _, err = io.Copy(w.w, &w.buf) + return err +} + +// writes a single chunk to the underlying writer. +func (w *Writer2) writeChunk() error { + u := int(uncompressedHeaderLen + w.encoder.Compressed()) + c := headerLen(w.ctype) + w.buf.Len() + if u < c { + return w.writeUncompressedChunk() + } + return w.writeCompressedChunk() +} + +// flushChunk terminates the current chunk. The encoder will be reset +// to support the next chunk. +func (w *Writer2) flushChunk() error { + if w.written() == 0 { + return nil + } + var err error + if err = w.encoder.Close(); err != nil { + return err + } + if err = w.writeChunk(); err != nil { + return err + } + w.buf.Reset() + w.lbw.N = maxCompressed + if err = w.encoder.Reopen(&w.lbw); err != nil { + return err + } + if err = w.cstate.next(w.ctype); err != nil { + return err + } + w.ctype = w.cstate.defaultChunkType() + w.start = cloneState(w.encoder.state) + return nil +} + +// Flush writes all buffered data out to the underlying stream. This +// could result in multiple chunks to be created. +func (w *Writer2) Flush() error { + if w.cstate == stop { + return errClosed + } + for w.written() > 0 { + if err := w.flushChunk(); err != nil { + return err + } + } + return nil +} + +// Close terminates the LZMA2 stream with an EOS chunk. +func (w *Writer2) Close() error { + if w.cstate == stop { + return errClosed + } + if err := w.Flush(); err != nil { + return nil + } + // write zero byte EOS chunk + _, err := w.w.Write([]byte{0}) + if err != nil { + return err + } + w.cstate = stop + return nil +} |