diff options
Diffstat (limited to 'vendor/github.com/ulikunitz/xz/format.go')
-rw-r--r-- | vendor/github.com/ulikunitz/xz/format.go | 728 |
1 files changed, 728 insertions, 0 deletions
diff --git a/vendor/github.com/ulikunitz/xz/format.go b/vendor/github.com/ulikunitz/xz/format.go new file mode 100644 index 000000000..798159c6c --- /dev/null +++ b/vendor/github.com/ulikunitz/xz/format.go @@ -0,0 +1,728 @@ +// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xz + +import ( + "bytes" + "crypto/sha256" + "errors" + "fmt" + "hash" + "hash/crc32" + "io" + + "github.com/ulikunitz/xz/lzma" +) + +// allZeros checks whether a given byte slice has only zeros. +func allZeros(p []byte) bool { + for _, c := range p { + if c != 0 { + return false + } + } + return true +} + +// padLen returns the length of the padding required for the given +// argument. +func padLen(n int64) int { + k := int(n % 4) + if k > 0 { + k = 4 - k + } + return k +} + +/*** Header ***/ + +// headerMagic stores the magic bytes for the header +var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00} + +// HeaderLen provides the length of the xz file header. +const HeaderLen = 12 + +// Constants for the checksum methods supported by xz. +const ( + CRC32 byte = 0x1 + CRC64 = 0x4 + SHA256 = 0xa +) + +// errInvalidFlags indicates that flags are invalid. +var errInvalidFlags = errors.New("xz: invalid flags") + +// verifyFlags returns the error errInvalidFlags if the value is +// invalid. +func verifyFlags(flags byte) error { + switch flags { + case CRC32, CRC64, SHA256: + return nil + default: + return errInvalidFlags + } +} + +// flagstrings maps flag values to strings. +var flagstrings = map[byte]string{ + CRC32: "CRC-32", + CRC64: "CRC-64", + SHA256: "SHA-256", +} + +// flagString returns the string representation for the given flags. +func flagString(flags byte) string { + s, ok := flagstrings[flags] + if !ok { + return "invalid" + } + return s +} + +// newHashFunc returns a function that creates hash instances for the +// hash method encoded in flags. +func newHashFunc(flags byte) (newHash func() hash.Hash, err error) { + switch flags { + case CRC32: + newHash = newCRC32 + case CRC64: + newHash = newCRC64 + case SHA256: + newHash = sha256.New + default: + err = errInvalidFlags + } + return +} + +// header provides the actual content of the xz file header: the flags. +type header struct { + flags byte +} + +// Errors returned by readHeader. +var errHeaderMagic = errors.New("xz: invalid header magic bytes") + +// ValidHeader checks whether data is a correct xz file header. The +// length of data must be HeaderLen. +func ValidHeader(data []byte) bool { + var h header + err := h.UnmarshalBinary(data) + return err == nil +} + +// String returns a string representation of the flags. +func (h header) String() string { + return flagString(h.flags) +} + +// UnmarshalBinary reads header from the provided data slice. +func (h *header) UnmarshalBinary(data []byte) error { + // header length + if len(data) != HeaderLen { + return errors.New("xz: wrong file header length") + } + + // magic header + if !bytes.Equal(headerMagic, data[:6]) { + return errHeaderMagic + } + + // checksum + crc := crc32.NewIEEE() + crc.Write(data[6:8]) + if uint32LE(data[8:]) != crc.Sum32() { + return errors.New("xz: invalid checksum for file header") + } + + // stream flags + if data[6] != 0 { + return errInvalidFlags + } + flags := data[7] + if err := verifyFlags(flags); err != nil { + return err + } + + h.flags = flags + return nil +} + +// MarshalBinary generates the xz file header. +func (h *header) MarshalBinary() (data []byte, err error) { + if err = verifyFlags(h.flags); err != nil { + return nil, err + } + + data = make([]byte, 12) + copy(data, headerMagic) + data[7] = h.flags + + crc := crc32.NewIEEE() + crc.Write(data[6:8]) + putUint32LE(data[8:], crc.Sum32()) + + return data, nil +} + +/*** Footer ***/ + +// footerLen defines the length of the footer. +const footerLen = 12 + +// footerMagic contains the footer magic bytes. +var footerMagic = []byte{'Y', 'Z'} + +// footer represents the content of the xz file footer. +type footer struct { + indexSize int64 + flags byte +} + +// String prints a string representation of the footer structure. +func (f footer) String() string { + return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize) +} + +// Minimum and maximum for the size of the index (backward size). +const ( + minIndexSize = 4 + maxIndexSize = (1 << 32) * 4 +) + +// MarshalBinary converts footer values into an xz file footer. Note +// that the footer value is checked for correctness. +func (f *footer) MarshalBinary() (data []byte, err error) { + if err = verifyFlags(f.flags); err != nil { + return nil, err + } + if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) { + return nil, errors.New("xz: index size out of range") + } + if f.indexSize%4 != 0 { + return nil, errors.New( + "xz: index size not aligned to four bytes") + } + + data = make([]byte, footerLen) + + // backward size (index size) + s := (f.indexSize / 4) - 1 + putUint32LE(data[4:], uint32(s)) + // flags + data[9] = f.flags + // footer magic + copy(data[10:], footerMagic) + + // CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[4:10]) + putUint32LE(data, crc.Sum32()) + + return data, nil +} + +// UnmarshalBinary sets the footer value by unmarshalling an xz file +// footer. +func (f *footer) UnmarshalBinary(data []byte) error { + if len(data) != footerLen { + return errors.New("xz: wrong footer length") + } + + // magic bytes + if !bytes.Equal(data[10:], footerMagic) { + return errors.New("xz: footer magic invalid") + } + + // CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[4:10]) + if uint32LE(data) != crc.Sum32() { + return errors.New("xz: footer checksum error") + } + + var g footer + // backward size (index size) + g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4 + + // flags + if data[8] != 0 { + return errInvalidFlags + } + g.flags = data[9] + if err := verifyFlags(g.flags); err != nil { + return err + } + + *f = g + return nil +} + +/*** Block Header ***/ + +// blockHeader represents the content of an xz block header. +type blockHeader struct { + compressedSize int64 + uncompressedSize int64 + filters []filter +} + +// String converts the block header into a string. +func (h blockHeader) String() string { + var buf bytes.Buffer + first := true + if h.compressedSize >= 0 { + fmt.Fprintf(&buf, "compressed size %d", h.compressedSize) + first = false + } + if h.uncompressedSize >= 0 { + if !first { + buf.WriteString(" ") + } + fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize) + first = false + } + for _, f := range h.filters { + if !first { + buf.WriteString(" ") + } + fmt.Fprintf(&buf, "filter %s", f) + first = false + } + return buf.String() +} + +// Masks for the block flags. +const ( + filterCountMask = 0x03 + compressedSizePresent = 0x40 + uncompressedSizePresent = 0x80 + reservedBlockFlags = 0x3C +) + +// errIndexIndicator signals that an index indicator (0x00) has been found +// instead of an expected block header indicator. +var errIndexIndicator = errors.New("xz: found index indicator") + +// readBlockHeader reads the block header. +func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) { + var buf bytes.Buffer + buf.Grow(20) + + // block header size + z, err := io.CopyN(&buf, r, 1) + n = int(z) + if err != nil { + return nil, n, err + } + s := buf.Bytes()[0] + if s == 0 { + return nil, n, errIndexIndicator + } + + // read complete header + headerLen := (int(s) + 1) * 4 + buf.Grow(headerLen - 1) + z, err = io.CopyN(&buf, r, int64(headerLen-1)) + n += int(z) + if err != nil { + return nil, n, err + } + + // unmarshal block header + h = new(blockHeader) + if err = h.UnmarshalBinary(buf.Bytes()); err != nil { + return nil, n, err + } + + return h, n, nil +} + +// readSizeInBlockHeader reads the uncompressed or compressed size +// fields in the block header. The present value informs the function +// whether the respective field is actually present in the header. +func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) { + if !present { + return -1, nil + } + x, _, err := readUvarint(r) + if err != nil { + return 0, err + } + if x >= 1<<63 { + return 0, errors.New("xz: size overflow in block header") + } + return int64(x), nil +} + +// UnmarshalBinary unmarshals the block header. +func (h *blockHeader) UnmarshalBinary(data []byte) error { + // Check header length + s := data[0] + if data[0] == 0 { + return errIndexIndicator + } + headerLen := (int(s) + 1) * 4 + if len(data) != headerLen { + return fmt.Errorf("xz: data length %d; want %d", len(data), + headerLen) + } + n := headerLen - 4 + + // Check CRC-32 + crc := crc32.NewIEEE() + crc.Write(data[:n]) + if crc.Sum32() != uint32LE(data[n:]) { + return errors.New("xz: checksum error for block header") + } + + // Block header flags + flags := data[1] + if flags&reservedBlockFlags != 0 { + return errors.New("xz: reserved block header flags set") + } + + r := bytes.NewReader(data[2:n]) + + // Compressed size + var err error + h.compressedSize, err = readSizeInBlockHeader( + r, flags&compressedSizePresent != 0) + if err != nil { + return err + } + + // Uncompressed size + h.uncompressedSize, err = readSizeInBlockHeader( + r, flags&uncompressedSizePresent != 0) + if err != nil { + return err + } + + h.filters, err = readFilters(r, int(flags&filterCountMask)+1) + if err != nil { + return err + } + + // Check padding + // Since headerLen is a multiple of 4 we don't need to check + // alignment. + k := r.Len() + // The standard spec says that the padding should have not more + // than 3 bytes. However we found paddings of 4 or 5 in the + // wild. See https://github.com/ulikunitz/xz/pull/11 and + // https://github.com/ulikunitz/xz/issues/15 + // + // The only reasonable approach seems to be to ignore the + // padding size. We still check that all padding bytes are zero. + if !allZeros(data[n-k : n]) { + return errPadding + } + return nil +} + +// MarshalBinary marshals the binary header. +func (h *blockHeader) MarshalBinary() (data []byte, err error) { + if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) { + return nil, errors.New("xz: filter count wrong") + } + for i, f := range h.filters { + if i < len(h.filters)-1 { + if f.id() == lzmaFilterID { + return nil, errors.New( + "xz: LZMA2 filter is not the last") + } + } else { + // last filter + if f.id() != lzmaFilterID { + return nil, errors.New("xz: " + + "last filter must be the LZMA2 filter") + } + } + } + + var buf bytes.Buffer + // header size must set at the end + buf.WriteByte(0) + + // flags + flags := byte(len(h.filters) - 1) + if h.compressedSize >= 0 { + flags |= compressedSizePresent + } + if h.uncompressedSize >= 0 { + flags |= uncompressedSizePresent + } + buf.WriteByte(flags) + + p := make([]byte, 10) + if h.compressedSize >= 0 { + k := putUvarint(p, uint64(h.compressedSize)) + buf.Write(p[:k]) + } + if h.uncompressedSize >= 0 { + k := putUvarint(p, uint64(h.uncompressedSize)) + buf.Write(p[:k]) + } + + for _, f := range h.filters { + fp, err := f.MarshalBinary() + if err != nil { + return nil, err + } + buf.Write(fp) + } + + // padding + for i := padLen(int64(buf.Len())); i > 0; i-- { + buf.WriteByte(0) + } + + // crc place holder + buf.Write(p[:4]) + + data = buf.Bytes() + if len(data)%4 != 0 { + panic("data length not aligned") + } + s := len(data)/4 - 1 + if !(1 < s && s <= 255) { + panic("wrong block header size") + } + data[0] = byte(s) + + crc := crc32.NewIEEE() + crc.Write(data[:len(data)-4]) + putUint32LE(data[len(data)-4:], crc.Sum32()) + + return data, nil +} + +// Constants used for marshalling and unmarshalling filters in the xz +// block header. +const ( + minFilters = 1 + maxFilters = 4 + minReservedID = 1 << 62 +) + +// filter represents a filter in the block header. +type filter interface { + id() uint64 + UnmarshalBinary(data []byte) error + MarshalBinary() (data []byte, err error) + reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error) + writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error) + // filter must be last filter + last() bool +} + +// readFilter reads a block filter from the block header. At this point +// in time only the LZMA2 filter is supported. +func readFilter(r io.Reader) (f filter, err error) { + br := lzma.ByteReader(r) + + // index + id, _, err := readUvarint(br) + if err != nil { + return nil, err + } + + var data []byte + switch id { + case lzmaFilterID: + data = make([]byte, lzmaFilterLen) + data[0] = lzmaFilterID + if _, err = io.ReadFull(r, data[1:]); err != nil { + return nil, err + } + f = new(lzmaFilter) + default: + if id >= minReservedID { + return nil, errors.New( + "xz: reserved filter id in block stream header") + } + return nil, errors.New("xz: invalid filter id") + } + if err = f.UnmarshalBinary(data); err != nil { + return nil, err + } + return f, err +} + +// readFilters reads count filters. At this point in time only the count +// 1 is supported. +func readFilters(r io.Reader, count int) (filters []filter, err error) { + if count != 1 { + return nil, errors.New("xz: unsupported filter count") + } + f, err := readFilter(r) + if err != nil { + return nil, err + } + return []filter{f}, err +} + +// writeFilters writes the filters. +func writeFilters(w io.Writer, filters []filter) (n int, err error) { + for _, f := range filters { + p, err := f.MarshalBinary() + if err != nil { + return n, err + } + k, err := w.Write(p) + n += k + if err != nil { + return n, err + } + } + return n, nil +} + +/*** Index ***/ + +// record describes a block in the xz file index. +type record struct { + unpaddedSize int64 + uncompressedSize int64 +} + +// readRecord reads an index record. +func readRecord(r io.ByteReader) (rec record, n int, err error) { + u, k, err := readUvarint(r) + n += k + if err != nil { + return rec, n, err + } + rec.unpaddedSize = int64(u) + if rec.unpaddedSize < 0 { + return rec, n, errors.New("xz: unpadded size negative") + } + + u, k, err = readUvarint(r) + n += k + if err != nil { + return rec, n, err + } + rec.uncompressedSize = int64(u) + if rec.uncompressedSize < 0 { + return rec, n, errors.New("xz: uncompressed size negative") + } + + return rec, n, nil +} + +// MarshalBinary converts an index record in its binary encoding. +func (rec *record) MarshalBinary() (data []byte, err error) { + // maximum length of a uvarint is 10 + p := make([]byte, 20) + n := putUvarint(p, uint64(rec.unpaddedSize)) + n += putUvarint(p[n:], uint64(rec.uncompressedSize)) + return p[:n], nil +} + +// writeIndex writes the index, a sequence of records. +func writeIndex(w io.Writer, index []record) (n int64, err error) { + crc := crc32.NewIEEE() + mw := io.MultiWriter(w, crc) + + // index indicator + k, err := mw.Write([]byte{0}) + n += int64(k) + if err != nil { + return n, err + } + + // number of records + p := make([]byte, 10) + k = putUvarint(p, uint64(len(index))) + k, err = mw.Write(p[:k]) + n += int64(k) + if err != nil { + return n, err + } + + // list of records + for _, rec := range index { + p, err := rec.MarshalBinary() + if err != nil { + return n, err + } + k, err = mw.Write(p) + n += int64(k) + if err != nil { + return n, err + } + } + + // index padding + k, err = mw.Write(make([]byte, padLen(int64(n)))) + n += int64(k) + if err != nil { + return n, err + } + + // crc32 checksum + putUint32LE(p, crc.Sum32()) + k, err = w.Write(p[:4]) + n += int64(k) + + return n, err +} + +// readIndexBody reads the index from the reader. It assumes that the +// index indicator has already been read. +func readIndexBody(r io.Reader) (records []record, n int64, err error) { + crc := crc32.NewIEEE() + // index indicator + crc.Write([]byte{0}) + + br := lzma.ByteReader(io.TeeReader(r, crc)) + + // number of records + u, k, err := readUvarint(br) + n += int64(k) + if err != nil { + return nil, n, err + } + recLen := int(u) + if recLen < 0 || uint64(recLen) != u { + return nil, n, errors.New("xz: record number overflow") + } + + // list of records + records = make([]record, recLen) + for i := range records { + records[i], k, err = readRecord(br) + n += int64(k) + if err != nil { + return nil, n, err + } + } + + p := make([]byte, padLen(int64(n+1)), 4) + k, err = io.ReadFull(br.(io.Reader), p) + n += int64(k) + if err != nil { + return nil, n, err + } + if !allZeros(p) { + return nil, n, errors.New("xz: non-zero byte in index padding") + } + + // crc32 + s := crc.Sum32() + p = p[:4] + k, err = io.ReadFull(br.(io.Reader), p) + n += int64(k) + if err != nil { + return records, n, err + } + if uint32LE(p) != s { + return nil, n, errors.New("xz: wrong checksum for index") + } + + return records, n, nil +} |