summaryrefslogtreecommitdiff
path: root/vendor/github.com/vbatts/tar-split
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/vbatts/tar-split')
-rw-r--r--vendor/github.com/vbatts/tar-split/README.md3
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/common.go689
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/format.go303
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/reader.go1377
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/stat_actime1.go (renamed from vendor/github.com/vbatts/tar-split/archive/tar/stat_atim.go)0
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/stat_actime2.go (renamed from vendor/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go)0
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go72
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/strconv.go326
-rw-r--r--vendor/github.com/vbatts/tar-split/archive/tar/writer.go871
9 files changed, 2406 insertions, 1235 deletions
diff --git a/vendor/github.com/vbatts/tar-split/README.md b/vendor/github.com/vbatts/tar-split/README.md
index 03e3ec430..fe997f69b 100644
--- a/vendor/github.com/vbatts/tar-split/README.md
+++ b/vendor/github.com/vbatts/tar-split/README.md
@@ -67,7 +67,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre
## Std Version
-The version of golang stdlib `archive/tar` is from go1.6
+The version of golang stdlib `archive/tar` is from go1.11
It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream.
@@ -135,4 +135,3 @@ bytes-per-file rate for the storage implications.
## License
See [LICENSE](LICENSE)
-
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/common.go b/vendor/github.com/vbatts/tar-split/archive/tar/common.go
index 36f4e2398..dee9e47e4 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/common.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/common.go
@@ -3,70 +3,528 @@
// license that can be found in the LICENSE file.
// Package tar implements access to tar archives.
-// It aims to cover most of the variations, including those produced
-// by GNU and BSD tars.
//
-// References:
-// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
-// http://www.gnu.org/software/tar/manual/html_node/Standard.html
-// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
+// Tape archives (tar) are a file format for storing a sequence of files that
+// can be read and written in a streaming manner.
+// This package aims to cover most variations of the format,
+// including those produced by GNU and BSD tar tools.
package tar
import (
- "bytes"
"errors"
"fmt"
+ "math"
"os"
"path"
+ "reflect"
+ "strconv"
+ "strings"
"time"
)
+// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
+// architectures. If a large value is encountered when decoding, the result
+// stored in Header will be the truncated version.
+
+var (
+ ErrHeader = errors.New("archive/tar: invalid tar header")
+ ErrWriteTooLong = errors.New("archive/tar: write too long")
+ ErrFieldTooLong = errors.New("archive/tar: header field too long")
+ ErrWriteAfterClose = errors.New("archive/tar: write after close")
+ errMissData = errors.New("archive/tar: sparse file references non-existent data")
+ errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data")
+ errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole")
+)
+
+type headerError []string
+
+func (he headerError) Error() string {
+ const prefix = "archive/tar: cannot encode header"
+ var ss []string
+ for _, s := range he {
+ if s != "" {
+ ss = append(ss, s)
+ }
+ }
+ if len(ss) == 0 {
+ return prefix
+ }
+ return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and "))
+}
+
+// Type flags for Header.Typeflag.
const (
- blockSize = 512
-
- // Types
- TypeReg = '0' // regular file
- TypeRegA = '\x00' // regular file
- TypeLink = '1' // hard link
- TypeSymlink = '2' // symbolic link
- TypeChar = '3' // character device node
- TypeBlock = '4' // block device node
- TypeDir = '5' // directory
- TypeFifo = '6' // fifo node
- TypeCont = '7' // reserved
- TypeXHeader = 'x' // extended header
- TypeXGlobalHeader = 'g' // global extended header
- TypeGNULongName = 'L' // Next file has a long name
- TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name
- TypeGNUSparse = 'S' // sparse file
+ // Type '0' indicates a regular file.
+ TypeReg = '0'
+ TypeRegA = '\x00' // Deprecated: Use TypeReg instead.
+
+ // Type '1' to '6' are header-only flags and may not have a data body.
+ TypeLink = '1' // Hard link
+ TypeSymlink = '2' // Symbolic link
+ TypeChar = '3' // Character device node
+ TypeBlock = '4' // Block device node
+ TypeDir = '5' // Directory
+ TypeFifo = '6' // FIFO node
+
+ // Type '7' is reserved.
+ TypeCont = '7'
+
+ // Type 'x' is used by the PAX format to store key-value records that
+ // are only relevant to the next file.
+ // This package transparently handles these types.
+ TypeXHeader = 'x'
+
+ // Type 'g' is used by the PAX format to store key-value records that
+ // are relevant to all subsequent files.
+ // This package only supports parsing and composing such headers,
+ // but does not currently support persisting the global state across files.
+ TypeXGlobalHeader = 'g'
+
+ // Type 'S' indicates a sparse file in the GNU format.
+ TypeGNUSparse = 'S'
+
+ // Types 'L' and 'K' are used by the GNU format for a meta file
+ // used to store the path or link name for the next file.
+ // This package transparently handles these types.
+ TypeGNULongName = 'L'
+ TypeGNULongLink = 'K'
)
+// Keywords for PAX extended header records.
+const (
+ paxNone = "" // Indicates that no PAX key is suitable
+ paxPath = "path"
+ paxLinkpath = "linkpath"
+ paxSize = "size"
+ paxUid = "uid"
+ paxGid = "gid"
+ paxUname = "uname"
+ paxGname = "gname"
+ paxMtime = "mtime"
+ paxAtime = "atime"
+ paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid
+ paxCharset = "charset" // Currently unused
+ paxComment = "comment" // Currently unused
+
+ paxSchilyXattr = "SCHILY.xattr."
+
+ // Keywords for GNU sparse files in a PAX extended header.
+ paxGNUSparse = "GNU.sparse."
+ paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
+ paxGNUSparseOffset = "GNU.sparse.offset"
+ paxGNUSparseNumBytes = "GNU.sparse.numbytes"
+ paxGNUSparseMap = "GNU.sparse.map"
+ paxGNUSparseName = "GNU.sparse.name"
+ paxGNUSparseMajor = "GNU.sparse.major"
+ paxGNUSparseMinor = "GNU.sparse.minor"
+ paxGNUSparseSize = "GNU.sparse.size"
+ paxGNUSparseRealSize = "GNU.sparse.realsize"
+)
+
+// basicKeys is a set of the PAX keys for which we have built-in support.
+// This does not contain "charset" or "comment", which are both PAX-specific,
+// so adding them as first-class features of Header is unlikely.
+// Users can use the PAXRecords field to set it themselves.
+var basicKeys = map[string]bool{
+ paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true,
+ paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true,
+}
+
// A Header represents a single header in a tar archive.
// Some fields may not be populated.
+//
+// For forward compatibility, users that retrieve a Header from Reader.Next,
+// mutate it in some ways, and then pass it back to Writer.WriteHeader
+// should do so by creating a new Header and copying the fields
+// that they are interested in preserving.
type Header struct {
- Name string // name of header file entry
- Mode int64 // permission and mode bits
- Uid int // user id of owner
- Gid int // group id of owner
- Size int64 // length in bytes
- ModTime time.Time // modified time
- Typeflag byte // type of header entry
- Linkname string // target name of link
- Uname string // user name of owner
- Gname string // group name of owner
- Devmajor int64 // major number of character or block device
- Devminor int64 // minor number of character or block device
- AccessTime time.Time // access time
- ChangeTime time.Time // status change time
- Xattrs map[string]string
+ // Typeflag is the type of header entry.
+ // The zero value is automatically promoted to either TypeReg or TypeDir
+ // depending on the presence of a trailing slash in Name.
+ Typeflag byte
+
+ Name string // Name of file entry
+ Linkname string // Target name of link (valid for TypeLink or TypeSymlink)
+
+ Size int64 // Logical file size in bytes
+ Mode int64 // Permission and mode bits
+ Uid int // User ID of owner
+ Gid int // Group ID of owner
+ Uname string // User name of owner
+ Gname string // Group name of owner
+
+ // If the Format is unspecified, then Writer.WriteHeader rounds ModTime
+ // to the nearest second and ignores the AccessTime and ChangeTime fields.
+ //
+ // To use AccessTime or ChangeTime, specify the Format as PAX or GNU.
+ // To use sub-second resolution, specify the Format as PAX.
+ ModTime time.Time // Modification time
+ AccessTime time.Time // Access time (requires either PAX or GNU support)
+ ChangeTime time.Time // Change time (requires either PAX or GNU support)
+
+ Devmajor int64 // Major device number (valid for TypeChar or TypeBlock)
+ Devminor int64 // Minor device number (valid for TypeChar or TypeBlock)
+
+ // Xattrs stores extended attributes as PAX records under the
+ // "SCHILY.xattr." namespace.
+ //
+ // The following are semantically equivalent:
+ // h.Xattrs[key] = value
+ // h.PAXRecords["SCHILY.xattr."+key] = value
+ //
+ // When Writer.WriteHeader is called, the contents of Xattrs will take
+ // precedence over those in PAXRecords.
+ //
+ // Deprecated: Use PAXRecords instead.
+ Xattrs map[string]string
+
+ // PAXRecords is a map of PAX extended header records.
+ //
+ // User-defined records should have keys of the following form:
+ // VENDOR.keyword
+ // Where VENDOR is some namespace in all uppercase, and keyword may
+ // not contain the '=' character (e.g., "GOLANG.pkg.version").
+ // The key and value should be non-empty UTF-8 strings.
+ //
+ // When Writer.WriteHeader is called, PAX records derived from the
+ // other fields in Header take precedence over PAXRecords.
+ PAXRecords map[string]string
+
+ // Format specifies the format of the tar header.
+ //
+ // This is set by Reader.Next as a best-effort guess at the format.
+ // Since the Reader liberally reads some non-compliant files,
+ // it is possible for this to be FormatUnknown.
+ //
+ // If the format is unspecified when Writer.WriteHeader is called,
+ // then it uses the first format (in the order of USTAR, PAX, GNU)
+ // capable of encoding this Header (see Format).
+ Format Format
}
-// File name constants from the tar spec.
-const (
- fileNameSize = 100 // Maximum number of bytes in a standard tar name.
- fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
+// sparseEntry represents a Length-sized fragment at Offset in the file.
+type sparseEntry struct{ Offset, Length int64 }
+
+func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
+
+// A sparse file can be represented as either a sparseDatas or a sparseHoles.
+// As long as the total size is known, they are equivalent and one can be
+// converted to the other form and back. The various tar formats with sparse
+// file support represent sparse files in the sparseDatas form. That is, they
+// specify the fragments in the file that has data, and treat everything else as
+// having zero bytes. As such, the encoding and decoding logic in this package
+// deals with sparseDatas.
+//
+// However, the external API uses sparseHoles instead of sparseDatas because the
+// zero value of sparseHoles logically represents a normal file (i.e., there are
+// no holes in it). On the other hand, the zero value of sparseDatas implies
+// that the file has no data in it, which is rather odd.
+//
+// As an example, if the underlying raw file contains the 10-byte data:
+// var compactFile = "abcdefgh"
+//
+// And the sparse map has the following entries:
+// var spd sparseDatas = []sparseEntry{
+// {Offset: 2, Length: 5}, // Data fragment for 2..6
+// {Offset: 18, Length: 3}, // Data fragment for 18..20
+// }
+// var sph sparseHoles = []sparseEntry{
+// {Offset: 0, Length: 2}, // Hole fragment for 0..1
+// {Offset: 7, Length: 11}, // Hole fragment for 7..17
+// {Offset: 21, Length: 4}, // Hole fragment for 21..24
+// }
+//
+// Then the content of the resulting sparse file with a Header.Size of 25 is:
+// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
+type (
+ sparseDatas []sparseEntry
+ sparseHoles []sparseEntry
)
+// validateSparseEntries reports whether sp is a valid sparse map.
+// It does not matter whether sp represents data fragments or hole fragments.
+func validateSparseEntries(sp []sparseEntry, size int64) bool {
+ // Validate all sparse entries. These are the same checks as performed by
+ // the BSD tar utility.
+ if size < 0 {
+ return false
+ }
+ var pre sparseEntry
+ for _, cur := range sp {
+ switch {
+ case cur.Offset < 0 || cur.Length < 0:
+ return false // Negative values are never okay
+ case cur.Offset > math.MaxInt64-cur.Length:
+ return false // Integer overflow with large length
+ case cur.endOffset() > size:
+ return false // Region extends beyond the actual size
+ case pre.endOffset() > cur.Offset:
+ return false // Regions cannot overlap and must be in order
+ }
+ pre = cur
+ }
+ return true
+}
+
+// alignSparseEntries mutates src and returns dst where each fragment's
+// starting offset is aligned up to the nearest block edge, and each
+// ending offset is aligned down to the nearest block edge.
+//
+// Even though the Go tar Reader and the BSD tar utility can handle entries
+// with arbitrary offsets and lengths, the GNU tar utility can only handle
+// offsets and lengths that are multiples of blockSize.
+func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry {
+ dst := src[:0]
+ for _, s := range src {
+ pos, end := s.Offset, s.endOffset()
+ pos += blockPadding(+pos) // Round-up to nearest blockSize
+ if end != size {
+ end -= blockPadding(-end) // Round-down to nearest blockSize
+ }
+ if pos < end {
+ dst = append(dst, sparseEntry{Offset: pos, Length: end - pos})
+ }
+ }
+ return dst
+}
+
+// invertSparseEntries converts a sparse map from one form to the other.
+// If the input is sparseHoles, then it will output sparseDatas and vice-versa.
+// The input must have been already validated.
+//
+// This function mutates src and returns a normalized map where:
+// * adjacent fragments are coalesced together
+// * only the last fragment may be empty
+// * the endOffset of the last fragment is the total size
+func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
+ dst := src[:0]
+ var pre sparseEntry
+ for _, cur := range src {
+ if cur.Length == 0 {
+ continue // Skip empty fragments
+ }
+ pre.Length = cur.Offset - pre.Offset
+ if pre.Length > 0 {
+ dst = append(dst, pre) // Only add non-empty fragments
+ }
+ pre.Offset = cur.endOffset()
+ }
+ pre.Length = size - pre.Offset // Possibly the only empty fragment
+ return append(dst, pre)
+}
+
+// fileState tracks the number of logical (includes sparse holes) and physical
+// (actual in tar archive) bytes remaining for the current file.
+//
+// Invariant: LogicalRemaining >= PhysicalRemaining
+type fileState interface {
+ LogicalRemaining() int64
+ PhysicalRemaining() int64
+}
+
+// allowedFormats determines which formats can be used.
+// The value returned is the logical OR of multiple possible formats.
+// If the value is FormatUnknown, then the input Header cannot be encoded
+// and an error is returned explaining why.
+//
+// As a by-product of checking the fields, this function returns paxHdrs, which
+// contain all fields that could not be directly encoded.
+// A value receiver ensures that this method does not mutate the source Header.
+func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) {
+ format = FormatUSTAR | FormatPAX | FormatGNU
+ paxHdrs = make(map[string]string)
+
+ var whyNoUSTAR, whyNoPAX, whyNoGNU string
+ var preferPAX bool // Prefer PAX over USTAR
+ verifyString := func(s string, size int, name, paxKey string) {
+ // NUL-terminator is optional for path and linkpath.
+ // Technically, it is required for uname and gname,
+ // but neither GNU nor BSD tar checks for it.
+ tooLong := len(s) > size
+ allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath
+ if hasNUL(s) || (tooLong && !allowLongGNU) {
+ whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s)
+ format.mustNotBe(FormatGNU)
+ }
+ if !isASCII(s) || tooLong {
+ canSplitUSTAR := paxKey == paxPath
+ if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok {
+ whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s)
+ format.mustNotBe(FormatUSTAR)
+ }
+ if paxKey == paxNone {
+ whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s)
+ format.mustNotBe(FormatPAX)
+ } else {
+ paxHdrs[paxKey] = s
+ }
+ }
+ if v, ok := h.PAXRecords[paxKey]; ok && v == s {
+ paxHdrs[paxKey] = v
+ }
+ }
+ verifyNumeric := func(n int64, size int, name, paxKey string) {
+ if !fitsInBase256(size, n) {
+ whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n)
+ format.mustNotBe(FormatGNU)
+ }
+ if !fitsInOctal(size, n) {
+ whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n)
+ format.mustNotBe(FormatUSTAR)
+ if paxKey == paxNone {
+ whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n)
+ format.mustNotBe(FormatPAX)
+ } else {
+ paxHdrs[paxKey] = strconv.FormatInt(n, 10)
+ }
+ }
+ if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
+ paxHdrs[paxKey] = v
+ }
+ }
+ verifyTime := func(ts time.Time, size int, name, paxKey string) {
+ if ts.IsZero() {
+ return // Always okay
+ }
+ if !fitsInBase256(size, ts.Unix()) {
+ whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts)
+ format.mustNotBe(FormatGNU)
+ }
+ isMtime := paxKey == paxMtime
+ fitsOctal := fitsInOctal(size, ts.Unix())
+ if (isMtime && !fitsOctal) || !isMtime {
+ whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts)
+ format.mustNotBe(FormatUSTAR)
+ }
+ needsNano := ts.Nanosecond() != 0
+ if !isMtime || !fitsOctal || needsNano {
+ preferPAX = true // USTAR may truncate sub-second measurements
+ if paxKey == paxNone {
+ whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts)
+ format.mustNotBe(FormatPAX)
+ } else {
+ paxHdrs[paxKey] = formatPAXTime(ts)
+ }
+ }
+ if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
+ paxHdrs[paxKey] = v
+ }
+ }
+
+ // Check basic fields.
+ var blk block
+ v7 := blk.V7()
+ ustar := blk.USTAR()
+ gnu := blk.GNU()
+ verifyString(h.Name, len(v7.Name()), "Name", paxPath)
+ verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath)
+ verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname)
+ verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname)
+ verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone)
+ verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid)
+ verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid)
+ verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize)
+ verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone)
+ verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone)
+ verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime)
+ verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime)
+ verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime)
+
+ // Check for header-only types.
+ var whyOnlyPAX, whyOnlyGNU string
+ switch h.Typeflag {
+ case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse:
+ // Exclude TypeLink and TypeSymlink, since they may reference directories.
+ if strings.HasSuffix(h.Name, "/") {
+ return FormatUnknown, nil, headerError{"filename may not have trailing slash"}
+ }
+ case TypeXHeader, TypeGNULongName, TypeGNULongLink:
+ return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
+ case TypeXGlobalHeader:
+ h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}
+ if !reflect.DeepEqual(h, h2) {
+ return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"}
+ }
+ whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
+ format.mayOnlyBe(FormatPAX)
+ }
+ if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
+ return FormatUnknown, nil, headerError{"negative size on header-only type"}
+ }
+
+ // Check PAX records.
+ if len(h.Xattrs) > 0 {
+ for k, v := range h.Xattrs {
+ paxHdrs[paxSchilyXattr+k] = v
+ }
+ whyOnlyPAX = "only PAX supports Xattrs"
+ format.mayOnlyBe(FormatPAX)
+ }
+ if len(h.PAXRecords) > 0 {
+ for k, v := range h.PAXRecords {
+ switch _, exists := paxHdrs[k]; {
+ case exists:
+ continue // Do not overwrite existing records
+ case h.Typeflag == TypeXGlobalHeader:
+ paxHdrs[k] = v // Copy all records
+ case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
+ paxHdrs[k] = v // Ignore local records that may conflict
+ }
+ }
+ whyOnlyPAX = "only PAX supports PAXRecords"
+ format.mayOnlyBe(FormatPAX)
+ }
+ for k, v := range paxHdrs {
+ if !validPAXRecord(k, v) {
+ return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
+ }
+ }
+
+ // TODO(dsnet): Re-enable this when adding sparse support.
+ // See https://golang.org/issue/22735
+ /*
+ // Check sparse files.
+ if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
+ if isHeaderOnlyType(h.Typeflag) {
+ return FormatUnknown, nil, headerError{"header-only type cannot be sparse"}
+ }
+ if !validateSparseEntries(h.SparseHoles, h.Size) {
+ return FormatUnknown, nil, headerError{"invalid sparse holes"}
+ }
+ if h.Typeflag == TypeGNUSparse {
+ whyOnlyGNU = "only GNU supports TypeGNUSparse"
+ format.mayOnlyBe(FormatGNU)
+ } else {
+ whyNoGNU = "GNU supports sparse files only with TypeGNUSparse"
+ format.mustNotBe(FormatGNU)
+ }
+ whyNoUSTAR = "USTAR does not support sparse files"
+ format.mustNotBe(FormatUSTAR)
+ }
+ */
+
+ // Check desired format.
+ if wantFormat := h.Format; wantFormat != FormatUnknown {
+ if wantFormat.has(FormatPAX) && !preferPAX {
+ wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too
+ }
+ format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted
+ }
+ if format == FormatUnknown {
+ switch h.Format {
+ case FormatUSTAR:
+ err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU}
+ case FormatPAX:
+ err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU}
+ case FormatGNU:
+ err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX}
+ default:
+ err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU}
+ }
+ }
+ return format, paxHdrs, err
+}
+
// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
@@ -97,63 +555,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
// Set setuid, setgid and sticky bits.
if fi.h.Mode&c_ISUID != 0 {
- // setuid
mode |= os.ModeSetuid
}
if fi.h.Mode&c_ISGID != 0 {
- // setgid
mode |= os.ModeSetgid
}
if fi.h.Mode&c_ISVTX != 0 {
- // sticky
mode |= os.ModeSticky
}
- // Set file mode bits.
- // clear perm, setuid, setgid and sticky bits.
- m := os.FileMode(fi.h.Mode) &^ 07777
- if m == c_ISDIR {
- // directory
+ // Set file mode bits; clear perm, setuid, setgid, and sticky bits.
+ switch m := os.FileMode(fi.h.Mode) &^ 07777; m {
+ case c_ISDIR:
mode |= os.ModeDir
- }
- if m == c_ISFIFO {
- // named pipe (FIFO)
+ case c_ISFIFO:
mode |= os.ModeNamedPipe
- }
- if m == c_ISLNK {
- // symbolic link
+ case c_ISLNK:
mode |= os.ModeSymlink
- }
- if m == c_ISBLK {
- // device file
+ case c_ISBLK:
mode |= os.ModeDevice
- }
- if m == c_ISCHR {
- // Unix character device
+ case c_ISCHR:
mode |= os.ModeDevice
mode |= os.ModeCharDevice
- }
- if m == c_ISSOCK {
- // Unix domain socket
+ case c_ISSOCK:
mode |= os.ModeSocket
}
switch fi.h.Typeflag {
case TypeSymlink:
- // symbolic link
mode |= os.ModeSymlink
case TypeChar:
- // character device node
mode |= os.ModeDevice
mode |= os.ModeCharDevice
case TypeBlock:
- // block device node
mode |= os.ModeDevice
case TypeDir:
- // directory
mode |= os.ModeDir
case TypeFifo:
- // fifo node
mode |= os.ModeNamedPipe
}
@@ -163,11 +601,15 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
// sysStat, if non-nil, populates h from system-dependent fields of fi.
var sysStat func(fi os.FileInfo, h *Header) error
-// Mode constants from the tar spec.
const (
- c_ISUID = 04000 // Set uid
- c_ISGID = 02000 // Set gid
- c_ISVTX = 01000 // Save text (sticky bit)
+ // Mode constants from the USTAR spec:
+ // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
+ c_ISUID = 04000 // Set uid
+ c_ISGID = 02000 // Set gid
+ c_ISVTX = 01000 // Save text (sticky bit)
+
+ // Common Unix mode constants; these are not defined in any common tar standard.
+ // Header.FileInfo understands these, but FileInfoHeader will never produce these.
c_ISDIR = 040000 // Directory
c_ISFIFO = 010000 // FIFO
c_ISREG = 0100000 // Regular file
@@ -177,33 +619,16 @@ const (
c_ISSOCK = 0140000 // Socket
)
-// Keywords for the PAX Extended Header
-const (
- paxAtime = "atime"
- paxCharset = "charset"
- paxComment = "comment"
- paxCtime = "ctime" // please note that ctime is not a valid pax header.
- paxGid = "gid"
- paxGname = "gname"
- paxLinkpath = "linkpath"
- paxMtime = "mtime"
- paxPath = "path"
- paxSize = "size"
- paxUid = "uid"
- paxUname = "uname"
- paxXattr = "SCHILY.xattr."
- paxNone = ""
-)
-
// FileInfoHeader creates a partially-populated Header from fi.
// If fi describes a symlink, FileInfoHeader records link as the link target.
// If fi describes a directory, a slash is appended to the name.
-// Because os.FileInfo's Name method returns only the base name of
-// the file it describes, it may be necessary to modify the Name field
-// of the returned header to provide the full path name of the file.
+//
+// Since os.FileInfo's Name method only returns the base name of
+// the file it describes, it may be necessary to modify Header.Name
+// to provide the full path name of the file.
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
if fi == nil {
- return nil, errors.New("tar: FileInfo is nil")
+ return nil, errors.New("archive/tar: FileInfo is nil")
}
fm := fi.Mode()
h := &Header{
@@ -213,30 +638,24 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
}
switch {
case fm.IsRegular():
- h.Mode |= c_ISREG
h.Typeflag = TypeReg
h.Size = fi.Size()
case fi.IsDir():
h.Typeflag = TypeDir
- h.Mode |= c_ISDIR
h.Name += "/"
case fm&os.ModeSymlink != 0:
h.Typeflag = TypeSymlink
- h.Mode |= c_ISLNK
h.Linkname = link
case fm&os.ModeDevice != 0:
if fm&os.ModeCharDevice != 0 {
- h.Mode |= c_ISCHR
h.Typeflag = TypeChar
} else {
- h.Mode |= c_ISBLK
h.Typeflag = TypeBlock
}
case fm&os.ModeNamedPipe != 0:
h.Typeflag = TypeFifo
- h.Mode |= c_ISFIFO
case fm&os.ModeSocket != 0:
- h.Mode |= c_ISSOCK
+ return nil, fmt.Errorf("archive/tar: sockets not supported")
default:
return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
}
@@ -272,6 +691,12 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
h.Size = 0
h.Linkname = sys.Linkname
}
+ if sys.PAXRecords != nil {
+ h.PAXRecords = make(map[string]string)
+ for k, v := range sys.PAXRecords {
+ h.PAXRecords[k] = v
+ }
+ }
}
if sysStat != nil {
return h, sysStat(fi, h)
@@ -279,55 +704,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
return h, nil
}
-var zeroBlock = make([]byte, blockSize)
-
-// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
-// We compute and return both.
-func checksum(header []byte) (unsigned int64, signed int64) {
- for i := 0; i < len(header); i++ {
- if i == 148 {
- // The chksum field (header[148:156]) is special: it should be treated as space bytes.
- unsigned += ' ' * 8
- signed += ' ' * 8
- i += 7
- continue
- }
- unsigned += int64(header[i])
- signed += int64(int8(header[i]))
- }
- return
-}
-
-type slicer []byte
-
-func (sp *slicer) next(n int) (b []byte) {
- s := *sp
- b, *sp = s[0:n], s[n:]
- return
-}
-
-func isASCII(s string) bool {
- for _, c := range s {
- if c >= 0x80 {
- return false
- }
- }
- return true
-}
-
-func toASCII(s string) string {
- if isASCII(s) {
- return s
- }
- var buf bytes.Buffer
- for _, c := range s {
- if c < 0x80 {
- buf.WriteByte(byte(c))
- }
- }
- return buf.String()
-}
-
// isHeaderOnlyType checks if the given type flag is of the type that has no
// data section even if a size is specified.
func isHeaderOnlyType(flag byte) bool {
@@ -338,3 +714,10 @@ func isHeaderOnlyType(flag byte) bool {
return false
}
}
+
+func min(a, b int64) int64 {
+ if a < b {
+ return a
+ }
+ return b
+}
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/format.go b/vendor/github.com/vbatts/tar-split/archive/tar/format.go
new file mode 100644
index 000000000..1f89d0c59
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/format.go
@@ -0,0 +1,303 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tar
+
+import "strings"
+
+// Format represents the tar archive format.
+//
+// The original tar format was introduced in Unix V7.
+// Since then, there have been multiple competing formats attempting to
+// standardize or extend the V7 format to overcome its limitations.
+// The most common formats are the USTAR, PAX, and GNU formats,
+// each with their own advantages and limitations.
+//
+// The following table captures the capabilities of each format:
+//
+// | USTAR | PAX | GNU
+// ------------------+--------+-----------+----------
+// Name | 256B | unlimited | unlimited
+// Linkname | 100B | unlimited | unlimited
+// Size | uint33 | unlimited | uint89
+// Mode | uint21 | uint21 | uint57
+// Uid/Gid | uint21 | unlimited | uint57
+// Uname/Gname | 32B | unlimited | 32B
+// ModTime | uint33 | unlimited | int89
+// AccessTime | n/a | unlimited | int89
+// ChangeTime | n/a | unlimited | int89
+// Devmajor/Devminor | uint21 | uint21 | uint57
+// ------------------+--------+-----------+----------
+// string encoding | ASCII | UTF-8 | binary
+// sub-second times | no | yes | no
+// sparse files | no | yes | yes
+//
+// The table's upper portion shows the Header fields, where each format reports
+// the maximum number of bytes allowed for each string field and
+// the integer type used to store each numeric field
+// (where timestamps are stored as the number of seconds since the Unix epoch).
+//
+// The table's lower portion shows specialized features of each format,
+// such as supported string encodings, support for sub-second timestamps,
+// or support for sparse files.
+//
+// The Writer currently provides no support for sparse files.
+type Format int
+
+// Constants to identify various tar formats.
+const (
+ // Deliberately hide the meaning of constants from public API.
+ _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
+
+ // FormatUnknown indicates that the format is unknown.
+ FormatUnknown
+
+ // The format of the original Unix V7 tar tool prior to standardization.
+ formatV7
+
+ // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
+ //
+ // While this format is compatible with most tar readers,
+ // the format has several limitations making it unsuitable for some usages.
+ // Most notably, it cannot support sparse files, files larger than 8GiB,
+ // filenames larger than 256 characters, and non-ASCII filenames.
+ //
+ // Reference:
+ // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
+ FormatUSTAR
+
+ // FormatPAX represents the PAX header format defined in POSIX.1-2001.
+ //
+ // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
+ // preceding the original header. This file contains a set of key-value
+ // records, which are used to overcome USTAR's shortcomings, in addition to
+ // providing the ability to have sub-second resolution for timestamps.
+ //
+ // Some newer formats add their own extensions to PAX by defining their
+ // own keys and assigning certain semantic meaning to the associated values.
+ // For example, sparse file support in PAX is implemented using keys
+ // defined by the GNU manual (e.g., "GNU.sparse.map").
+ //
+ // Reference:
+ // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
+ FormatPAX
+
+ // FormatGNU represents the GNU header format.
+ //
+ // The GNU header format is older than the USTAR and PAX standards and
+ // is not compatible with them. The GNU format supports
+ // arbitrary file sizes, filenames of arbitrary encoding and length,
+ // sparse files, and other features.
+ //
+ // It is recommended that PAX be chosen over GNU unless the target
+ // application can only parse GNU formatted archives.
+ //
+ // Reference:
+ // https://www.gnu.org/software/tar/manual/html_node/Standard.html
+ FormatGNU
+
+ // Schily's tar format, which is incompatible with USTAR.
+ // This does not cover STAR extensions to the PAX format; these fall under
+ // the PAX format.
+ formatSTAR
+
+ formatMax
+)
+
+func (f Format) has(f2 Format) bool { return f&f2 != 0 }
+func (f *Format) mayBe(f2 Format) { *f |= f2 }
+func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
+func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
+
+var formatNames = map[Format]string{
+ formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
+}
+
+func (f Format) String() string {
+ var ss []string
+ for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
+ if f.has(f2) {
+ ss = append(ss, formatNames[f2])
+ }
+ }
+ switch len(ss) {
+ case 0:
+ return "<unknown>"
+ case 1:
+ return ss[0]
+ default:
+ return "(" + strings.Join(ss, " | ") + ")"
+ }
+}
+
+// Magics used to identify various formats.
+const (
+ magicGNU, versionGNU = "ustar ", " \x00"
+ magicUSTAR, versionUSTAR = "ustar\x00", "00"
+ trailerSTAR = "tar\x00"
+)
+
+// Size constants from various tar specifications.
+const (
+ blockSize = 512 // Size of each block in a tar stream
+ nameSize = 100 // Max length of the name field in USTAR format
+ prefixSize = 155 // Max length of the prefix field in USTAR format
+)
+
+// blockPadding computes the number of bytes needed to pad offset up to the
+// nearest block edge where 0 <= n < blockSize.
+func blockPadding(offset int64) (n int64) {
+ return -offset & (blockSize - 1)
+}
+
+var zeroBlock block
+
+type block [blockSize]byte
+
+// Convert block to any number of formats.
+func (b *block) V7() *headerV7 { return (*headerV7)(b) }
+func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
+func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
+func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
+func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
+
+// GetFormat checks that the block is a valid tar header based on the checksum.
+// It then attempts to guess the specific format based on magic values.
+// If the checksum fails, then FormatUnknown is returned.
+func (b *block) GetFormat() Format {
+ // Verify checksum.
+ var p parser
+ value := p.parseOctal(b.V7().Chksum())
+ chksum1, chksum2 := b.ComputeChecksum()
+ if p.err != nil || (value != chksum1 && value != chksum2) {
+ return FormatUnknown
+ }
+
+ // Guess the magic values.
+ magic := string(b.USTAR().Magic())
+ version := string(b.USTAR().Version())
+ trailer := string(b.STAR().Trailer())
+ switch {
+ case magic == magicUSTAR && trailer == trailerSTAR:
+ return formatSTAR
+ case magic == magicUSTAR:
+ return FormatUSTAR | FormatPAX
+ case magic == magicGNU && version == versionGNU:
+ return FormatGNU
+ default:
+ return formatV7
+ }
+}
+
+// SetFormat writes the magic values necessary for specified format
+// and then updates the checksum accordingly.
+func (b *block) SetFormat(format Format) {
+ // Set the magic values.
+ switch {
+ case format.has(formatV7):
+ // Do nothing.
+ case format.has(FormatGNU):
+ copy(b.GNU().Magic(), magicGNU)
+ copy(b.GNU().Version(), versionGNU)
+ case format.has(formatSTAR):
+ copy(b.STAR().Magic(), magicUSTAR)
+ copy(b.STAR().Version(), versionUSTAR)
+ copy(b.STAR().Trailer(), trailerSTAR)
+ case format.has(FormatUSTAR | FormatPAX):
+ copy(b.USTAR().Magic(), magicUSTAR)
+ copy(b.USTAR().Version(), versionUSTAR)
+ default:
+ panic("invalid format")
+ }
+
+ // Update checksum.
+ // This field is special in that it is terminated by a NULL then space.
+ var f formatter
+ field := b.V7().Chksum()
+ chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
+ f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
+ field[7] = ' '
+}
+
+// ComputeChecksum computes the checksum for the header block.
+// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
+// signed byte values.
+// We compute and return both.
+func (b *block) ComputeChecksum() (unsigned, signed int64) {
+ for i, c := range b {
+ if 148 <= i && i < 156 {
+ c = ' ' // Treat the checksum field itself as all spaces.
+ }
+ unsigned += int64(c)
+ signed += int64(int8(c))
+ }
+ return unsigned, signed
+}
+
+// Reset clears the block with all zeros.
+func (b *block) Reset() {
+ *b = block{}
+}
+
+type headerV7 [blockSize]byte
+
+func (h *headerV7) Name() []byte { return h[000:][:100] }
+func (h *headerV7) Mode() []byte { return h[100:][:8] }
+func (h *headerV7) UID() []byte { return h[108:][:8] }
+func (h *headerV7) GID() []byte { return h[116:][:8] }
+func (h *headerV7) Size() []byte { return h[124:][:12] }
+func (h *headerV7) ModTime() []byte { return h[136:][:12] }
+func (h *headerV7) Chksum() []byte { return h[148:][:8] }
+func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
+func (h *headerV7) LinkName() []byte { return h[157:][:100] }
+
+type headerGNU [blockSize]byte
+
+func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
+func (h *headerGNU) Magic() []byte { return h[257:][:6] }
+func (h *headerGNU) Version() []byte { return h[263:][:2] }
+func (h *headerGNU) UserName() []byte { return h[265:][:32] }
+func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
+func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
+func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
+func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
+func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
+func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
+func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
+
+type headerSTAR [blockSize]byte
+
+func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
+func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
+func (h *headerSTAR) Version() []byte { return h[263:][:2] }
+func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
+func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
+func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
+func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
+func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
+func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
+func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
+func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
+
+type headerUSTAR [blockSize]byte
+
+func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
+func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
+func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
+func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
+func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
+func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
+func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
+func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
+
+type sparseArray []byte
+
+func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
+func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
+func (s sparseArray) MaxEntries() int { return len(s) / 24 }
+
+type sparseElem []byte
+
+func (s sparseElem) Offset() []byte { return s[00:][:12] }
+func (s sparseElem) Length() []byte { return s[12:][:12] }
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/reader.go b/vendor/github.com/vbatts/tar-split/archive/tar/reader.go
index adf32122e..ea64a3820 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/reader.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/reader.go
@@ -4,44 +4,38 @@
package tar
-// TODO(dsymonds):
-// - pax extensions
-
import (
"bytes"
- "errors"
"io"
"io/ioutil"
- "math"
- "os"
"strconv"
"strings"
"time"
)
-var (
- ErrHeader = errors.New("archive/tar: invalid tar header")
-)
-
-const maxNanoSecondIntSize = 9
-
-// A Reader provides sequential access to the contents of a tar archive.
-// A tar archive consists of a sequence of files.
-// The Next method advances to the next file in the archive (including the first),
-// and then it can be treated as an io.Reader to access the file's data.
+// Reader provides sequential access to the contents of a tar archive.
+// Reader.Next advances to the next file in the archive (including the first),
+// and then Reader can be treated as an io.Reader to access the file's data.
type Reader struct {
- r io.Reader
- err error
- pad int64 // amount of padding (ignored) after current file entry
- curr numBytesReader // reader for current file entry
- hdrBuff [blockSize]byte // buffer to use in readHeader
+ r io.Reader
+ pad int64 // Amount of padding (ignored) after current file entry
+ curr fileReader // Reader for current file entry
+ blk block // Buffer to use as temporary local storage
+
+ // err is a persistent error.
+ // It is only the responsibility of every exported method of Reader to
+ // ensure that this error is sticky.
+ err error
RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
rawBytes *bytes.Buffer // last raw bits
}
-type parser struct {
- err error // Last error seen
+type fileReader interface {
+ io.Reader
+ fileState
+
+ WriteTo(io.Writer) (int64, error)
}
// RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
@@ -57,87 +51,35 @@ func (tr *Reader) RawBytes() []byte {
if tr.rawBytes == nil {
tr.rawBytes = bytes.NewBuffer(nil)
}
- // if we've read them, then flush them.
- defer tr.rawBytes.Reset()
- return tr.rawBytes.Bytes()
-}
+ defer tr.rawBytes.Reset() // if we've read them, then flush them.
-// A numBytesReader is an io.Reader with a numBytes method, returning the number
-// of bytes remaining in the underlying encoded data.
-type numBytesReader interface {
- io.Reader
- numBytes() int64
-}
-
-// A regFileReader is a numBytesReader for reading file data from a tar archive.
-type regFileReader struct {
- r io.Reader // underlying reader
- nb int64 // number of unread bytes for current file entry
-}
-
-// A sparseFileReader is a numBytesReader for reading sparse file data from a
-// tar archive.
-type sparseFileReader struct {
- rfr numBytesReader // Reads the sparse-encoded file data
- sp []sparseEntry // The sparse map for the file
- pos int64 // Keeps track of file position
- total int64 // Total size of the file
-}
+ return tr.rawBytes.Bytes()
-// A sparseEntry holds a single entry in a sparse file's sparse map.
-//
-// Sparse files are represented using a series of sparseEntrys.
-// Despite the name, a sparseEntry represents an actual data fragment that
-// references data found in the underlying archive stream. All regions not
-// covered by a sparseEntry are logically filled with zeros.
-//
-// For example, if the underlying raw file contains the 10-byte data:
-// var compactData = "abcdefgh"
-//
-// And the sparse map has the following entries:
-// var sp = []sparseEntry{
-// {offset: 2, numBytes: 5} // Data fragment for [2..7]
-// {offset: 18, numBytes: 3} // Data fragment for [18..21]
-// }
-//
-// Then the content of the resulting sparse file with a "real" size of 25 is:
-// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
-type sparseEntry struct {
- offset int64 // Starting position of the fragment
- numBytes int64 // Length of the fragment
}
-// Keywords for GNU sparse files in a PAX extended header
-const (
- paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
- paxGNUSparseOffset = "GNU.sparse.offset"
- paxGNUSparseNumBytes = "GNU.sparse.numbytes"
- paxGNUSparseMap = "GNU.sparse.map"
- paxGNUSparseName = "GNU.sparse.name"
- paxGNUSparseMajor = "GNU.sparse.major"
- paxGNUSparseMinor = "GNU.sparse.minor"
- paxGNUSparseSize = "GNU.sparse.size"
- paxGNUSparseRealSize = "GNU.sparse.realsize"
-)
-
-// Keywords for old GNU sparse headers
-const (
- oldGNUSparseMainHeaderOffset = 386
- oldGNUSparseMainHeaderIsExtendedOffset = 482
- oldGNUSparseMainHeaderNumEntries = 4
- oldGNUSparseExtendedHeaderIsExtendedOffset = 504
- oldGNUSparseExtendedHeaderNumEntries = 21
- oldGNUSparseOffsetSize = 12
- oldGNUSparseNumBytesSize = 12
-)
-
// NewReader creates a new Reader reading from r.
-func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
+func NewReader(r io.Reader) *Reader {
+ return &Reader{r: r, curr: &regFileReader{r, 0}}
+}
// Next advances to the next entry in the tar archive.
+// The Header.Size determines how many bytes can be read for the next file.
+// Any remaining data in the current file is automatically discarded.
//
// io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) {
+ if tr.err != nil {
+ return nil, tr.err
+ }
+ hdr, err := tr.next()
+ tr.err = err
+ return hdr, err
+}
+
+func (tr *Reader) next() (*Header, error) {
+ var paxHdrs map[string]string
+ var gnuLongName, gnuLongLink string
+
if tr.RawAccounting {
if tr.rawBytes == nil {
tr.rawBytes = bytes.NewBuffer(nil)
@@ -146,248 +88,245 @@ func (tr *Reader) Next() (*Header, error) {
}
}
- if tr.err != nil {
- return nil, tr.err
- }
-
- var hdr *Header
- var extHdrs map[string]string
-
// Externally, Next iterates through the tar archive as if it is a series of
// files. Internally, the tar format often uses fake "files" to add meta
// data that describes the next file. These meta data "files" should not
// normally be visible to the outside. As such, this loop iterates through
// one or more "header files" until it finds a "normal file".
-loop:
+ format := FormatUSTAR | FormatPAX | FormatGNU
for {
- tr.err = tr.skipUnread()
- if tr.err != nil {
- return nil, tr.err
+ // Discard the remainder of the file and any padding.
+ if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil {
+ return nil, err
+ }
+ n, err := tryReadFull(tr.r, tr.blk[:tr.pad])
+ if err != nil {
+ return nil, err
}
+ if tr.RawAccounting {
+ tr.rawBytes.Write(tr.blk[:n])
+ }
+ tr.pad = 0
- hdr = tr.readHeader()
- if tr.err != nil {
- return nil, tr.err
+ hdr, rawHdr, err := tr.readHeader()
+ if err != nil {
+ return nil, err
+ }
+ if err := tr.handleRegularFile(hdr); err != nil {
+ return nil, err
}
+ format.mayOnlyBe(hdr.Format)
+
// Check for PAX/GNU special headers and files.
switch hdr.Typeflag {
- case TypeXHeader:
- extHdrs, tr.err = parsePAX(tr)
- if tr.err != nil {
- return nil, tr.err
+ case TypeXHeader, TypeXGlobalHeader:
+ format.mayOnlyBe(FormatPAX)
+ paxHdrs, err = parsePAX(tr)
+ if err != nil {
+ return nil, err
}
- continue loop // This is a meta header affecting the next header
+ if hdr.Typeflag == TypeXGlobalHeader {
+ mergePAX(hdr, paxHdrs)
+ return &Header{
+ Name: hdr.Name,
+ Typeflag: hdr.Typeflag,
+ Xattrs: hdr.Xattrs,
+ PAXRecords: hdr.PAXRecords,
+ Format: format,
+ }, nil
+ }
+ continue // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink:
- var realname []byte
- realname, tr.err = ioutil.ReadAll(tr)
- if tr.err != nil {
- return nil, tr.err
+ format.mayOnlyBe(FormatGNU)
+ realname, err := ioutil.ReadAll(tr)
+ if err != nil {
+ return nil, err
}
if tr.RawAccounting {
- if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
- return nil, tr.err
- }
+ tr.rawBytes.Write(realname)
}
- // Convert GNU extensions to use PAX headers.
- if extHdrs == nil {
- extHdrs = make(map[string]string)
- }
var p parser
switch hdr.Typeflag {
case TypeGNULongName:
- extHdrs[paxPath] = p.parseString(realname)
+ gnuLongName = p.parseString(realname)
case TypeGNULongLink:
- extHdrs[paxLinkpath] = p.parseString(realname)
- }
- if p.err != nil {
- tr.err = p.err
- return nil, tr.err
+ gnuLongLink = p.parseString(realname)
}
- continue loop // This is a meta header affecting the next header
+ continue // This is a meta header affecting the next header
default:
- mergePAX(hdr, extHdrs)
+ // The old GNU sparse format is handled here since it is technically
+ // just a regular file with additional attributes.
- // Check for a PAX format sparse file
- sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
- if err != nil {
- tr.err = err
+ if err := mergePAX(hdr, paxHdrs); err != nil {
return nil, err
}
- if sp != nil {
- // Current file is a PAX format GNU sparse file.
- // Set the current file reader to a sparse file reader.
- tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
- if tr.err != nil {
- return nil, tr.err
+ if gnuLongName != "" {
+ hdr.Name = gnuLongName
+ }
+ if gnuLongLink != "" {
+ hdr.Linkname = gnuLongLink
+ }
+ if hdr.Typeflag == TypeRegA {
+ if strings.HasSuffix(hdr.Name, "/") {
+ hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
+ } else {
+ hdr.Typeflag = TypeReg
}
}
- break loop // This is a file, so stop
+
+ // The extended headers may have updated the size.
+ // Thus, setup the regFileReader again after merging PAX headers.
+ if err := tr.handleRegularFile(hdr); err != nil {
+ return nil, err
+ }
+
+ // Sparse formats rely on being able to read from the logical data
+ // section; there must be a preceding call to handleRegularFile.
+ if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
+ return nil, err
+ }
+
+ // Set the final guess at the format.
+ if format.has(FormatUSTAR) && format.has(FormatPAX) {
+ format.mayOnlyBe(FormatUSTAR)
+ }
+ hdr.Format = format
+ return hdr, nil // This is a file, so stop
}
}
- return hdr, nil
}
-// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
-// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
-// be treated as a regular file.
-func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
- var sparseFormat string
-
- // Check for sparse format indicators
- major, majorOk := headers[paxGNUSparseMajor]
- minor, minorOk := headers[paxGNUSparseMinor]
- sparseName, sparseNameOk := headers[paxGNUSparseName]
- _, sparseMapOk := headers[paxGNUSparseMap]
- sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
- sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
-
- // Identify which, if any, sparse format applies from which PAX headers are set
- if majorOk && minorOk {
- sparseFormat = major + "." + minor
- } else if sparseNameOk && sparseMapOk {
- sparseFormat = "0.1"
- } else if sparseSizeOk {
- sparseFormat = "0.0"
- } else {
- // Not a PAX format GNU sparse file.
- return nil, nil
+// handleRegularFile sets up the current file reader and padding such that it
+// can only read the following logical data section. It will properly handle
+// special headers that contain no data section.
+func (tr *Reader) handleRegularFile(hdr *Header) error {
+ nb := hdr.Size
+ if isHeaderOnlyType(hdr.Typeflag) {
+ nb = 0
}
-
- // Check for unknown sparse format
- if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
- return nil, nil
+ if nb < 0 {
+ return ErrHeader
}
- // Update hdr from GNU sparse PAX headers
- if sparseNameOk {
- hdr.Name = sparseName
+ tr.pad = blockPadding(nb)
+ tr.curr = &regFileReader{r: tr.r, nb: nb}
+ return nil
+}
+
+// handleSparseFile checks if the current file is a sparse format of any type
+// and sets the curr reader appropriately.
+func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
+ var spd sparseDatas
+ var err error
+ if hdr.Typeflag == TypeGNUSparse {
+ spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
+ } else {
+ spd, err = tr.readGNUSparsePAXHeaders(hdr)
}
- if sparseSizeOk {
- realSize, err := strconv.ParseInt(sparseSize, 10, 0)
- if err != nil {
- return nil, ErrHeader
+
+ // If sp is non-nil, then this is a sparse file.
+ // Note that it is possible for len(sp) == 0.
+ if err == nil && spd != nil {
+ if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
+ return ErrHeader
}
- hdr.Size = realSize
- } else if sparseRealSizeOk {
- realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
+ sph := invertSparseEntries(spd, hdr.Size)
+ tr.curr = &sparseFileReader{tr.curr, sph, 0}
+ }
+ return err
+}
+
+// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
+// If they are found, then this function reads the sparse map and returns it.
+// This assumes that 0.0 headers have already been converted to 0.1 headers
+// by the PAX header parsing logic.
+func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
+ // Identify the version of GNU headers.
+ var is1x0 bool
+ major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
+ switch {
+ case major == "0" && (minor == "0" || minor == "1"):
+ is1x0 = false
+ case major == "1" && minor == "0":
+ is1x0 = true
+ case major != "" || minor != "":
+ return nil, nil // Unknown GNU sparse PAX version
+ case hdr.PAXRecords[paxGNUSparseMap] != "":
+ is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
+ default:
+ return nil, nil // Not a PAX format GNU sparse file.
+ }
+ hdr.Format.mayOnlyBe(FormatPAX)
+
+ // Update hdr from GNU sparse PAX headers.
+ if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
+ hdr.Name = name
+ }
+ size := hdr.PAXRecords[paxGNUSparseSize]
+ if size == "" {
+ size = hdr.PAXRecords[paxGNUSparseRealSize]
+ }
+ if size != "" {
+ n, err := strconv.ParseInt(size, 10, 64)
if err != nil {
return nil, ErrHeader
}
- hdr.Size = realSize
+ hdr.Size = n
}
- // Set up the sparse map, according to the particular sparse format in use
- var sp []sparseEntry
- var err error
- switch sparseFormat {
- case "0.0", "0.1":
- sp, err = readGNUSparseMap0x1(headers)
- case "1.0":
- sp, err = readGNUSparseMap1x0(tr.curr)
+ // Read the sparse map according to the appropriate format.
+ if is1x0 {
+ return readGNUSparseMap1x0(tr.curr)
}
- return sp, err
+ return readGNUSparseMap0x1(hdr.PAXRecords)
}
-// mergePAX merges well known headers according to PAX standard.
-// In general headers with the same name as those found
-// in the header struct overwrite those found in the header
-// struct with higher precision or longer values. Esp. useful
-// for name and linkname fields.
-func mergePAX(hdr *Header, headers map[string]string) error {
- for k, v := range headers {
+// mergePAX merges paxHdrs into hdr for all relevant fields of Header.
+func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
+ for k, v := range paxHdrs {
+ if v == "" {
+ continue // Keep the original USTAR value
+ }
+ var id64 int64
switch k {
case paxPath:
hdr.Name = v
case paxLinkpath:
hdr.Linkname = v
- case paxGname:
- hdr.Gname = v
case paxUname:
hdr.Uname = v
+ case paxGname:
+ hdr.Gname = v
case paxUid:
- uid, err := strconv.ParseInt(v, 10, 0)
- if err != nil {
- return err
- }
- hdr.Uid = int(uid)
+ id64, err = strconv.ParseInt(v, 10, 64)
+ hdr.Uid = int(id64) // Integer overflow possible
case paxGid:
- gid, err := strconv.ParseInt(v, 10, 0)
- if err != nil {
- return err
- }
- hdr.Gid = int(gid)
+ id64, err = strconv.ParseInt(v, 10, 64)
+ hdr.Gid = int(id64) // Integer overflow possible
case paxAtime:
- t, err := parsePAXTime(v)
- if err != nil {
- return err
- }
- hdr.AccessTime = t
+ hdr.AccessTime, err = parsePAXTime(v)
case paxMtime:
- t, err := parsePAXTime(v)
- if err != nil {
- return err
- }
- hdr.ModTime = t
+ hdr.ModTime, err = parsePAXTime(v)
case paxCtime:
- t, err := parsePAXTime(v)
- if err != nil {
- return err
- }
- hdr.ChangeTime = t
+ hdr.ChangeTime, err = parsePAXTime(v)
case paxSize:
- size, err := strconv.ParseInt(v, 10, 0)
- if err != nil {
- return err
- }
- hdr.Size = int64(size)
+ hdr.Size, err = strconv.ParseInt(v, 10, 64)
default:
- if strings.HasPrefix(k, paxXattr) {
+ if strings.HasPrefix(k, paxSchilyXattr) {
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
}
- hdr.Xattrs[k[len(paxXattr):]] = v
+ hdr.Xattrs[k[len(paxSchilyXattr):]] = v
}
}
- }
- return nil
-}
-
-// parsePAXTime takes a string of the form %d.%d as described in
-// the PAX specification.
-func parsePAXTime(t string) (time.Time, error) {
- buf := []byte(t)
- pos := bytes.IndexByte(buf, '.')
- var seconds, nanoseconds int64
- var err error
- if pos == -1 {
- seconds, err = strconv.ParseInt(t, 10, 0)
- if err != nil {
- return time.Time{}, err
- }
- } else {
- seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
if err != nil {
- return time.Time{}, err
- }
- nano_buf := string(buf[pos+1:])
- // Pad as needed before converting to a decimal.
- // For example .030 -> .030000000 -> 30000000 nanoseconds
- if len(nano_buf) < maxNanoSecondIntSize {
- // Right pad
- nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
- } else if len(nano_buf) > maxNanoSecondIntSize {
- // Right truncate
- nano_buf = nano_buf[:maxNanoSecondIntSize]
- }
- nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
- if err != nil {
- return time.Time{}, err
+ return ErrHeader
}
}
- ts := time.Unix(seconds, nanoseconds)
- return ts, nil
+ hdr.PAXRecords = paxHdrs
+ return nil
}
// parsePAX parses PAX headers.
@@ -406,12 +345,11 @@ func parsePAX(r io.Reader) (map[string]string, error) {
sbuf := string(buf)
// For GNU PAX sparse format 0.0 support.
- // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
- var sparseMap bytes.Buffer
+ // This function transforms the sparse format 0.0 headers into format 0.1
+ // headers since 0.0 headers were not PAX compliant.
+ var sparseMap []string
- headers := make(map[string]string)
- // Each record is constructed as
- // "%d %s=%s\n", length, keyword, value
+ paxHdrs := make(map[string]string)
for len(sbuf) > 0 {
key, value, residual, err := parsePAXRecord(sbuf)
if err != nil {
@@ -419,422 +357,234 @@ func parsePAX(r io.Reader) (map[string]string, error) {
}
sbuf = residual
- keyStr := string(key)
- if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
- // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
- sparseMap.WriteString(value)
- sparseMap.Write([]byte{','})
- } else {
- // Normal key. Set the value in the headers map.
- headers[keyStr] = string(value)
- }
- }
- if sparseMap.Len() != 0 {
- // Add sparse info to headers, chopping off the extra comma
- sparseMap.Truncate(sparseMap.Len() - 1)
- headers[paxGNUSparseMap] = sparseMap.String()
- }
- return headers, nil
-}
-
-// parsePAXRecord parses the input PAX record string into a key-value pair.
-// If parsing is successful, it will slice off the currently read record and
-// return the remainder as r.
-//
-// A PAX record is of the following form:
-// "%d %s=%s\n" % (size, key, value)
-func parsePAXRecord(s string) (k, v, r string, err error) {
- // The size field ends at the first space.
- sp := strings.IndexByte(s, ' ')
- if sp == -1 {
- return "", "", s, ErrHeader
- }
-
- // Parse the first token as a decimal integer.
- n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
- if perr != nil || n < 5 || int64(len(s)) < n {
- return "", "", s, ErrHeader
- }
-
- // Extract everything between the space and the final newline.
- rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
- if nl != "\n" {
- return "", "", s, ErrHeader
- }
-
- // The first equals separates the key from the value.
- eq := strings.IndexByte(rec, '=')
- if eq == -1 {
- return "", "", s, ErrHeader
- }
- return rec[:eq], rec[eq+1:], rem, nil
-}
-
-// parseString parses bytes as a NUL-terminated C-style string.
-// If a NUL byte is not found then the whole slice is returned as a string.
-func (*parser) parseString(b []byte) string {
- n := 0
- for n < len(b) && b[n] != 0 {
- n++
- }
- return string(b[0:n])
-}
-
-// parseNumeric parses the input as being encoded in either base-256 or octal.
-// This function may return negative numbers.
-// If parsing fails or an integer overflow occurs, err will be set.
-func (p *parser) parseNumeric(b []byte) int64 {
- // Check for base-256 (binary) format first.
- // If the first bit is set, then all following bits constitute a two's
- // complement encoded number in big-endian byte order.
- if len(b) > 0 && b[0]&0x80 != 0 {
- // Handling negative numbers relies on the following identity:
- // -a-1 == ^a
- //
- // If the number is negative, we use an inversion mask to invert the
- // data bytes and treat the value as an unsigned number.
- var inv byte // 0x00 if positive or zero, 0xff if negative
- if b[0]&0x40 != 0 {
- inv = 0xff
- }
-
- var x uint64
- for i, c := range b {
- c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
- if i == 0 {
- c &= 0x7f // Ignore signal bit in first byte
- }
- if (x >> 56) > 0 {
- p.err = ErrHeader // Integer overflow
- return 0
+ switch key {
+ case paxGNUSparseOffset, paxGNUSparseNumBytes:
+ // Validate sparse header order and value.
+ if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
+ (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
+ strings.Contains(value, ",") {
+ return nil, ErrHeader
}
- x = x<<8 | uint64(c)
- }
- if (x >> 63) > 0 {
- p.err = ErrHeader // Integer overflow
- return 0
- }
- if inv == 0xff {
- return ^int64(x)
- }
- return int64(x)
- }
-
- // Normal case is base-8 (octal) format.
- return p.parseOctal(b)
-}
-
-func (p *parser) parseOctal(b []byte) int64 {
- // Because unused fields are filled with NULs, we need
- // to skip leading NULs. Fields may also be padded with
- // spaces or NULs.
- // So we remove leading and trailing NULs and spaces to
- // be sure.
- b = bytes.Trim(b, " \x00")
-
- if len(b) == 0 {
- return 0
- }
- x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
- if perr != nil {
- p.err = ErrHeader
- }
- return int64(x)
-}
-
-// skipUnread skips any unread bytes in the existing file entry, as well as any
-// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
-// encountered in the data portion; it is okay to hit io.EOF in the padding.
-//
-// Note that this function still works properly even when sparse files are being
-// used since numBytes returns the bytes remaining in the underlying io.Reader.
-func (tr *Reader) skipUnread() error {
- dataSkip := tr.numBytes() // Number of data bytes to skip
- totalSkip := dataSkip + tr.pad // Total number of bytes to skip
- tr.curr, tr.pad = nil, 0
- if tr.RawAccounting {
- _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
- return tr.err
- }
- // If possible, Seek to the last byte before the end of the data section.
- // Do this because Seek is often lazy about reporting errors; this will mask
- // the fact that the tar stream may be truncated. We can rely on the
- // io.CopyN done shortly afterwards to trigger any IO errors.
- var seekSkipped int64 // Number of bytes skipped via Seek
- if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
- // Not all io.Seeker can actually Seek. For example, os.Stdin implements
- // io.Seeker, but calling Seek always returns an error and performs
- // no action. Thus, we try an innocent seek to the current position
- // to see if Seek is really supported.
- pos1, err := sr.Seek(0, os.SEEK_CUR)
- if err == nil {
- // Seek seems supported, so perform the real Seek.
- pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
- if err != nil {
- tr.err = err
- return tr.err
- }
- seekSkipped = pos2 - pos1
+ sparseMap = append(sparseMap, value)
+ default:
+ paxHdrs[key] = value
}
}
-
- var copySkipped int64 // Number of bytes skipped via CopyN
- copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
- if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
- tr.err = io.ErrUnexpectedEOF
+ if len(sparseMap) > 0 {
+ paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
}
- return tr.err
-}
-
-func (tr *Reader) verifyChecksum(header []byte) bool {
- if tr.err != nil {
- return false
- }
-
- var p parser
- given := p.parseOctal(header[148:156])
- unsigned, signed := checksum(header)
- return p.err == nil && (given == unsigned || given == signed)
+ return paxHdrs, nil
}
// readHeader reads the next block header and assumes that the underlying reader
-// is already aligned to a block boundary.
+// is already aligned to a block boundary. It returns the raw block of the
+// header in case further processing is required.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
-func (tr *Reader) readHeader() *Header {
- header := tr.hdrBuff[:]
- copy(header, zeroBlock)
-
- if n, err := io.ReadFull(tr.r, header); err != nil {
- tr.err = err
- // because it could read some of the block, but reach EOF first
- if tr.err == io.EOF && tr.RawAccounting {
- if _, err := tr.rawBytes.Write(header[:n]); err != nil {
- tr.err = err
- }
- }
- return nil // io.EOF is okay here
+func (tr *Reader) readHeader() (*Header, *block, error) {
+ // Two blocks of zero bytes marks the end of the archive.
+ n, err := io.ReadFull(tr.r, tr.blk[:])
+ if tr.RawAccounting && (err == nil || err == io.EOF) {
+ tr.rawBytes.Write(tr.blk[:n])
}
- if tr.RawAccounting {
- if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
- return nil
- }
+ if err != nil {
+ return nil, nil, err // EOF is okay here; exactly 0 bytes read
}
- // Two blocks of zero bytes marks the end of the archive.
- if bytes.Equal(header, zeroBlock[0:blockSize]) {
- if n, err := io.ReadFull(tr.r, header); err != nil {
- tr.err = err
- // because it could read some of the block, but reach EOF first
- if tr.err == io.EOF && tr.RawAccounting {
- if _, err := tr.rawBytes.Write(header[:n]); err != nil {
- tr.err = err
- }
- }
- return nil // io.EOF is okay here
+ if bytes.Equal(tr.blk[:], zeroBlock[:]) {
+ n, err = io.ReadFull(tr.r, tr.blk[:])
+ if tr.RawAccounting && (err == nil || err == io.EOF) {
+ tr.rawBytes.Write(tr.blk[:n])
}
- if tr.RawAccounting {
- if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
- return nil
- }
+ if err != nil {
+ return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
}
- if bytes.Equal(header, zeroBlock[0:blockSize]) {
- tr.err = io.EOF
- } else {
- tr.err = ErrHeader // zero block and then non-zero block
+ if bytes.Equal(tr.blk[:], zeroBlock[:]) {
+ return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
}
- return nil
+ return nil, nil, ErrHeader // Zero block and then non-zero block
}
- if !tr.verifyChecksum(header) {
- tr.err = ErrHeader
- return nil
+ // Verify the header matches a known format.
+ format := tr.blk.GetFormat()
+ if format == FormatUnknown {
+ return nil, nil, ErrHeader
}
- // Unpack
var p parser
hdr := new(Header)
- s := slicer(header)
-
- hdr.Name = p.parseString(s.next(100))
- hdr.Mode = p.parseNumeric(s.next(8))
- hdr.Uid = int(p.parseNumeric(s.next(8)))
- hdr.Gid = int(p.parseNumeric(s.next(8)))
- hdr.Size = p.parseNumeric(s.next(12))
- hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
- s.next(8) // chksum
- hdr.Typeflag = s.next(1)[0]
- hdr.Linkname = p.parseString(s.next(100))
-
- // The remainder of the header depends on the value of magic.
- // The original (v7) version of tar had no explicit magic field,
- // so its magic bytes, like the rest of the block, are NULs.
- magic := string(s.next(8)) // contains version field as well.
- var format string
- switch {
- case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
- if string(header[508:512]) == "tar\x00" {
- format = "star"
- } else {
- format = "posix"
- }
- case magic == "ustar \x00": // old GNU tar
- format = "gnu"
- }
- switch format {
- case "posix", "gnu", "star":
- hdr.Uname = p.parseString(s.next(32))
- hdr.Gname = p.parseString(s.next(32))
- devmajor := s.next(8)
- devminor := s.next(8)
- if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
- hdr.Devmajor = p.parseNumeric(devmajor)
- hdr.Devminor = p.parseNumeric(devminor)
- }
+ // Unpack the V7 header.
+ v7 := tr.blk.V7()
+ hdr.Typeflag = v7.TypeFlag()[0]
+ hdr.Name = p.parseString(v7.Name())
+ hdr.Linkname = p.parseString(v7.LinkName())
+ hdr.Size = p.parseNumeric(v7.Size())
+ hdr.Mode = p.parseNumeric(v7.Mode())
+ hdr.Uid = int(p.parseNumeric(v7.UID()))
+ hdr.Gid = int(p.parseNumeric(v7.GID()))
+ hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
+
+ // Unpack format specific fields.
+ if format > formatV7 {
+ ustar := tr.blk.USTAR()
+ hdr.Uname = p.parseString(ustar.UserName())
+ hdr.Gname = p.parseString(ustar.GroupName())
+ hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
+ hdr.Devminor = p.parseNumeric(ustar.DevMinor())
+
var prefix string
- switch format {
- case "posix", "gnu":
- prefix = p.parseString(s.next(155))
- case "star":
- prefix = p.parseString(s.next(131))
- hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
- hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
+ switch {
+ case format.has(FormatUSTAR | FormatPAX):
+ hdr.Format = format
+ ustar := tr.blk.USTAR()
+ prefix = p.parseString(ustar.Prefix())
+
+ // For Format detection, check if block is properly formatted since
+ // the parser is more liberal than what USTAR actually permits.
+ notASCII := func(r rune) bool { return r >= 0x80 }
+ if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
+ hdr.Format = FormatUnknown // Non-ASCII characters in block.
+ }
+ nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
+ if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
+ nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
+ hdr.Format = FormatUnknown // Numeric fields must end in NUL
+ }
+ case format.has(formatSTAR):
+ star := tr.blk.STAR()
+ prefix = p.parseString(star.Prefix())
+ hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
+ hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
+ case format.has(FormatGNU):
+ hdr.Format = format
+ var p2 parser
+ gnu := tr.blk.GNU()
+ if b := gnu.AccessTime(); b[0] != 0 {
+ hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
+ }
+ if b := gnu.ChangeTime(); b[0] != 0 {
+ hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
+ }
+
+ // Prior to Go1.8, the Writer had a bug where it would output
+ // an invalid tar file in certain rare situations because the logic
+ // incorrectly believed that the old GNU format had a prefix field.
+ // This is wrong and leads to an output file that mangles the
+ // atime and ctime fields, which are often left unused.
+ //
+ // In order to continue reading tar files created by former, buggy
+ // versions of Go, we skeptically parse the atime and ctime fields.
+ // If we are unable to parse them and the prefix field looks like
+ // an ASCII string, then we fallback on the pre-Go1.8 behavior
+ // of treating these fields as the USTAR prefix field.
+ //
+ // Note that this will not use the fallback logic for all possible
+ // files generated by a pre-Go1.8 toolchain. If the generated file
+ // happened to have a prefix field that parses as valid
+ // atime and ctime fields (e.g., when they are valid octal strings),
+ // then it is impossible to distinguish between an valid GNU file
+ // and an invalid pre-Go1.8 file.
+ //
+ // See https://golang.org/issues/12594
+ // See https://golang.org/issues/21005
+ if p2.err != nil {
+ hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
+ ustar := tr.blk.USTAR()
+ if s := p.parseString(ustar.Prefix()); isASCII(s) {
+ prefix = s
+ }
+ hdr.Format = FormatUnknown // Buggy file is not GNU
+ }
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name
}
}
+ return hdr, &tr.blk, p.err
+}
- if p.err != nil {
- tr.err = p.err
- return nil
- }
-
- nb := hdr.Size
- if isHeaderOnlyType(hdr.Typeflag) {
- nb = 0
- }
- if nb < 0 {
- tr.err = ErrHeader
- return nil
- }
-
- // Set the current file reader.
- tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
- tr.curr = &regFileReader{r: tr.r, nb: nb}
-
- // Check for old GNU sparse format entry.
- if hdr.Typeflag == TypeGNUSparse {
- // Get the real size of the file.
- hdr.Size = p.parseNumeric(header[483:495])
- if p.err != nil {
- tr.err = p.err
- return nil
- }
-
- // Read the sparse map.
- sp := tr.readOldGNUSparseMap(header)
- if tr.err != nil {
- return nil
- }
-
- // Current file is a GNU sparse file. Update the current file reader.
- tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
- if tr.err != nil {
- return nil
- }
+// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
+// The sparse map is stored in the tar header if it's small enough.
+// If it's larger than four entries, then one or more extension headers are used
+// to store the rest of the sparse map.
+//
+// The Header.Size does not reflect the size of any extended headers used.
+// Thus, this function will read from the raw io.Reader to fetch extra headers.
+// This method mutates blk in the process.
+func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
+ // Make sure that the input format is GNU.
+ // Unfortunately, the STAR format also has a sparse header format that uses
+ // the same type flag but has a completely different layout.
+ if blk.GetFormat() != FormatGNU {
+ return nil, ErrHeader
}
+ hdr.Format.mayOnlyBe(FormatGNU)
- return hdr
-}
-
-// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
-// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
-// then one or more extension headers are used to store the rest of the sparse map.
-func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
var p parser
- isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
- spCap := oldGNUSparseMainHeaderNumEntries
- if isExtended {
- spCap += oldGNUSparseExtendedHeaderNumEntries
- }
- sp := make([]sparseEntry, 0, spCap)
- s := slicer(header[oldGNUSparseMainHeaderOffset:])
-
- // Read the four entries from the main tar header
- for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
- offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
- numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
- if p.err != nil {
- tr.err = p.err
- return nil
- }
- if offset == 0 && numBytes == 0 {
- break
- }
- sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
+ hdr.Size = p.parseNumeric(blk.GNU().RealSize())
+ if p.err != nil {
+ return nil, p.err
}
-
- for isExtended {
- // There are more entries. Read an extension header and parse its entries.
- sparseHeader := make([]byte, blockSize)
- if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
- return nil
- }
- if tr.RawAccounting {
- if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
- return nil
+ s := blk.GNU().Sparse()
+ spd := make(sparseDatas, 0, s.MaxEntries())
+ for {
+ for i := 0; i < s.MaxEntries(); i++ {
+ // This termination condition is identical to GNU and BSD tar.
+ if s.Entry(i).Offset()[0] == 0x00 {
+ break // Don't return, need to process extended headers (even if empty)
+ }
+ offset := p.parseNumeric(s.Entry(i).Offset())
+ length := p.parseNumeric(s.Entry(i).Length())
+ if p.err != nil {
+ return nil, p.err
}
+ spd = append(spd, sparseEntry{Offset: offset, Length: length})
}
- isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
- s = slicer(sparseHeader)
- for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
- offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
- numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
- if p.err != nil {
- tr.err = p.err
- return nil
+ if s.IsExtended()[0] > 0 {
+ // There are more entries. Read an extension header and parse its entries.
+ if _, err := mustReadFull(tr.r, blk[:]); err != nil {
+ return nil, err
}
- if offset == 0 && numBytes == 0 {
- break
+ if tr.RawAccounting {
+ tr.rawBytes.Write(blk[:])
}
- sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
+ s = blk.Sparse()
+ continue
}
+ return spd, nil // Done
}
- return sp
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
// version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
-// fields (offset, numBytes). This function must stop reading at the end
+// fields (offset, length). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
-func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
- var cntNewline int64
- var buf bytes.Buffer
- var blk = make([]byte, blockSize)
-
- // feedTokens copies data in numBlock chunks from r into buf until there are
+func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
+ var (
+ cntNewline int64
+ buf bytes.Buffer
+ blk block
+ )
+
+ // feedTokens copies data in blocks from r into buf until there are
// at least cnt newlines in buf. It will not read more blocks than needed.
- var feedTokens = func(cnt int64) error {
- for cntNewline < cnt {
- if _, err := io.ReadFull(r, blk); err != nil {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
+ feedTokens := func(n int64) error {
+ for cntNewline < n {
+ if _, err := mustReadFull(r, blk[:]); err != nil {
return err
}
- buf.Write(blk)
+ buf.Write(blk[:])
for _, c := range blk {
if c == '\n' {
cntNewline++
@@ -846,10 +596,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
// nextToken gets the next token delimited by a newline. This assumes that
// at least one newline exists in the buffer.
- var nextToken = func() string {
+ nextToken := func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
- return tok[:len(tok)-1] // Cut off newline
+ return strings.TrimRight(tok, "\n")
}
// Parse for the number of entries.
@@ -868,197 +618,306 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
if err := feedTokens(2 * numEntries); err != nil {
return nil, err
}
- sp := make([]sparseEntry, 0, numEntries)
+ spd := make(sparseDatas, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
- offset, err := strconv.ParseInt(nextToken(), 10, 64)
- if err != nil {
- return nil, ErrHeader
- }
- numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
- if err != nil {
+ offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
+ length, err2 := strconv.ParseInt(nextToken(), 10, 64)
+ if err1 != nil || err2 != nil {
return nil, ErrHeader
}
- sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
+ spd = append(spd, sparseEntry{Offset: offset, Length: length})
}
- return sp, nil
+ return spd, nil
}
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// version 0.1. The sparse map is stored in the PAX headers.
-func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
+func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
// Get number of entries.
// Use integer overflow resistant math to check this.
- numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
+ numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
return nil, ErrHeader
}
// There should be two numbers in sparseMap for each entry.
- sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
+ sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
+ if len(sparseMap) == 1 && sparseMap[0] == "" {
+ sparseMap = sparseMap[:0]
+ }
if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader
}
// Loop through the entries in the sparse map.
// numEntries is trusted now.
- sp := make([]sparseEntry, 0, numEntries)
- for i := int64(0); i < numEntries; i++ {
- offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
- if err != nil {
+ spd := make(sparseDatas, 0, numEntries)
+ for len(sparseMap) >= 2 {
+ offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
+ length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
+ if err1 != nil || err2 != nil {
return nil, ErrHeader
}
- numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
- if err != nil {
- return nil, ErrHeader
- }
- sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
+ spd = append(spd, sparseEntry{Offset: offset, Length: length})
+ sparseMap = sparseMap[2:]
}
- return sp, nil
+ return spd, nil
}
-// numBytes returns the number of bytes left to read in the current file's entry
-// in the tar archive, or 0 if there is no current file.
-func (tr *Reader) numBytes() int64 {
- if tr.curr == nil {
- // No current file, so no bytes
- return 0
- }
- return tr.curr.numBytes()
-}
-
-// Read reads from the current entry in the tar archive.
-// It returns 0, io.EOF when it reaches the end of that entry,
-// until Next is called to advance to the next entry.
+// Read reads from the current file in the tar archive.
+// It returns (0, io.EOF) when it reaches the end of that file,
+// until Next is called to advance to the next file.
+//
+// If the current file is sparse, then the regions marked as a hole
+// are read back as NUL-bytes.
//
-// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
-// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
+// Calling Read on special types like TypeLink, TypeSymlink, TypeChar,
+// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
// the Header.Size claims.
-func (tr *Reader) Read(b []byte) (n int, err error) {
+func (tr *Reader) Read(b []byte) (int, error) {
if tr.err != nil {
return 0, tr.err
}
- if tr.curr == nil {
- return 0, io.EOF
- }
-
- n, err = tr.curr.Read(b)
+ n, err := tr.curr.Read(b)
if err != nil && err != io.EOF {
tr.err = err
}
- return
+ return n, err
}
-func (rfr *regFileReader) Read(b []byte) (n int, err error) {
- if rfr.nb == 0 {
- // file consumed
- return 0, io.EOF
+// writeTo writes the content of the current file to w.
+// The bytes written matches the number of remaining bytes in the current file.
+//
+// If the current file is sparse and w is an io.WriteSeeker,
+// then writeTo uses Seek to skip past holes defined in Header.SparseHoles,
+// assuming that skipped regions are filled with NULs.
+// This always writes the last byte to ensure w is the right size.
+//
+// TODO(dsnet): Re-export this when adding sparse file support.
+// See https://golang.org/issue/22735
+func (tr *Reader) writeTo(w io.Writer) (int64, error) {
+ if tr.err != nil {
+ return 0, tr.err
}
- if int64(len(b)) > rfr.nb {
- b = b[0:rfr.nb]
+ n, err := tr.curr.WriteTo(w)
+ if err != nil {
+ tr.err = err
}
- n, err = rfr.r.Read(b)
- rfr.nb -= int64(n)
+ return n, err
+}
- if err == io.EOF && rfr.nb > 0 {
- err = io.ErrUnexpectedEOF
+// regFileReader is a fileReader for reading data from a regular file entry.
+type regFileReader struct {
+ r io.Reader // Underlying Reader
+ nb int64 // Number of remaining bytes to read
+}
+
+func (fr *regFileReader) Read(b []byte) (n int, err error) {
+ if int64(len(b)) > fr.nb {
+ b = b[:fr.nb]
+ }
+ if len(b) > 0 {
+ n, err = fr.r.Read(b)
+ fr.nb -= int64(n)
+ }
+ switch {
+ case err == io.EOF && fr.nb > 0:
+ return n, io.ErrUnexpectedEOF
+ case err == nil && fr.nb == 0:
+ return n, io.EOF
+ default:
+ return n, err
}
- return
}
-// numBytes returns the number of bytes left to read in the file's data in the tar archive.
-func (rfr *regFileReader) numBytes() int64 {
- return rfr.nb
+func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
+ return io.Copy(w, struct{ io.Reader }{fr})
}
-// newSparseFileReader creates a new sparseFileReader, but validates all of the
-// sparse entries before doing so.
-func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
- if total < 0 {
- return nil, ErrHeader // Total size cannot be negative
+func (fr regFileReader) LogicalRemaining() int64 {
+ return fr.nb
+}
+
+func (fr regFileReader) PhysicalRemaining() int64 {
+ return fr.nb
+}
+
+// sparseFileReader is a fileReader for reading data from a sparse file entry.
+type sparseFileReader struct {
+ fr fileReader // Underlying fileReader
+ sp sparseHoles // Normalized list of sparse holes
+ pos int64 // Current position in sparse file
+}
+
+func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
+ finished := int64(len(b)) >= sr.LogicalRemaining()
+ if finished {
+ b = b[:sr.LogicalRemaining()]
}
- // Validate all sparse entries. These are the same checks as performed by
- // the BSD tar utility.
- for i, s := range sp {
- switch {
- case s.offset < 0 || s.numBytes < 0:
- return nil, ErrHeader // Negative values are never okay
- case s.offset > math.MaxInt64-s.numBytes:
- return nil, ErrHeader // Integer overflow with large length
- case s.offset+s.numBytes > total:
- return nil, ErrHeader // Region extends beyond the "real" size
- case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
- return nil, ErrHeader // Regions can't overlap and must be in order
+ b0 := b
+ endPos := sr.pos + int64(len(b))
+ for endPos > sr.pos && err == nil {
+ var nf int // Bytes read in fragment
+ holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
+ if sr.pos < holeStart { // In a data fragment
+ bf := b[:min(int64(len(b)), holeStart-sr.pos)]
+ nf, err = tryReadFull(sr.fr, bf)
+ } else { // In a hole fragment
+ bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
+ nf, err = tryReadFull(zeroReader{}, bf)
+ }
+ b = b[nf:]
+ sr.pos += int64(nf)
+ if sr.pos >= holeEnd && len(sr.sp) > 1 {
+ sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
}
- return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
-}
-// readHole reads a sparse hole ending at endOffset.
-func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
- n64 := endOffset - sfr.pos
- if n64 > int64(len(b)) {
- n64 = int64(len(b))
- }
- n := int(n64)
- for i := 0; i < n; i++ {
- b[i] = 0
+ n = len(b0) - len(b)
+ switch {
+ case err == io.EOF:
+ return n, errMissData // Less data in dense file than sparse file
+ case err != nil:
+ return n, err
+ case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
+ return n, errUnrefData // More data in dense file than sparse file
+ case finished:
+ return n, io.EOF
+ default:
+ return n, nil
}
- sfr.pos += n64
- return n
}
-// Read reads the sparse file data in expanded form.
-func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
- // Skip past all empty fragments.
- for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
- sfr.sp = sfr.sp[1:]
+func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
+ ws, ok := w.(io.WriteSeeker)
+ if ok {
+ if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
+ ok = false // Not all io.Seeker can really seek
+ }
+ }
+ if !ok {
+ return io.Copy(w, struct{ io.Reader }{sr})
+ }
+
+ var writeLastByte bool
+ pos0 := sr.pos
+ for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
+ var nf int64 // Size of fragment
+ holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
+ if sr.pos < holeStart { // In a data fragment
+ nf = holeStart - sr.pos
+ nf, err = io.CopyN(ws, sr.fr, nf)
+ } else { // In a hole fragment
+ nf = holeEnd - sr.pos
+ if sr.PhysicalRemaining() == 0 {
+ writeLastByte = true
+ nf--
+ }
+ _, err = ws.Seek(nf, io.SeekCurrent)
+ }
+ sr.pos += nf
+ if sr.pos >= holeEnd && len(sr.sp) > 1 {
+ sr.sp = sr.sp[1:] // Ensure last fragment always remains
+ }
}
- // If there are no more fragments, then it is possible that there
- // is one last sparse hole.
- if len(sfr.sp) == 0 {
- // This behavior matches the BSD tar utility.
- // However, GNU tar stops returning data even if sfr.total is unmet.
- if sfr.pos < sfr.total {
- return sfr.readHole(b, sfr.total), nil
- }
- return 0, io.EOF
+ // If the last fragment is a hole, then seek to 1-byte before EOF, and
+ // write a single byte to ensure the file is the right size.
+ if writeLastByte && err == nil {
+ _, err = ws.Write([]byte{0})
+ sr.pos++
}
- // In front of a data fragment, so read a hole.
- if sfr.pos < sfr.sp[0].offset {
- return sfr.readHole(b, sfr.sp[0].offset), nil
+ n = sr.pos - pos0
+ switch {
+ case err == io.EOF:
+ return n, errMissData // Less data in dense file than sparse file
+ case err != nil:
+ return n, err
+ case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
+ return n, errUnrefData // More data in dense file than sparse file
+ default:
+ return n, nil
}
+}
- // In a data fragment, so read from it.
- // This math is overflow free since we verify that offset and numBytes can
- // be safely added when creating the sparseFileReader.
- endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
- bytesLeft := endPos - sfr.pos // Bytes left in fragment
- if int64(len(b)) > bytesLeft {
- b = b[:bytesLeft]
+func (sr sparseFileReader) LogicalRemaining() int64 {
+ return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
+}
+func (sr sparseFileReader) PhysicalRemaining() int64 {
+ return sr.fr.PhysicalRemaining()
+}
+
+type zeroReader struct{}
+
+func (zeroReader) Read(b []byte) (int, error) {
+ for i := range b {
+ b[i] = 0
}
+ return len(b), nil
+}
- n, err = sfr.rfr.Read(b)
- sfr.pos += int64(n)
+// mustReadFull is like io.ReadFull except it returns
+// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
+func mustReadFull(r io.Reader, b []byte) (int, error) {
+ n, err := tryReadFull(r, b)
if err == io.EOF {
- if sfr.pos < endPos {
- err = io.ErrUnexpectedEOF // There was supposed to be more data
- } else if sfr.pos < sfr.total {
- err = nil // There is still an implicit sparse hole at the end
- }
+ err = io.ErrUnexpectedEOF
}
+ return n, err
+}
- if sfr.pos == endPos {
- sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
+// tryReadFull is like io.ReadFull except it returns
+// io.EOF when it is hit before len(b) bytes are read.
+func tryReadFull(r io.Reader, b []byte) (n int, err error) {
+ for len(b) > n && err == nil {
+ var nn int
+ nn, err = r.Read(b[n:])
+ n += nn
+ }
+ if len(b) == n && err == io.EOF {
+ err = nil
}
return n, err
}
-// numBytes returns the number of bytes left to read in the sparse file's
-// sparse-encoded data in the tar archive.
-func (sfr *sparseFileReader) numBytes() int64 {
- return sfr.rfr.numBytes()
+// discard skips n bytes in r, reporting an error if unable to do so.
+func discard(tr *Reader, n int64) error {
+ var seekSkipped, copySkipped int64
+ var err error
+ r := tr.r
+ if tr.RawAccounting {
+
+ copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n)
+ goto out
+ }
+
+ // If possible, Seek to the last byte before the end of the data section.
+ // Do this because Seek is often lazy about reporting errors; this will mask
+ // the fact that the stream may be truncated. We can rely on the
+ // io.CopyN done shortly afterwards to trigger any IO errors.
+ if sr, ok := r.(io.Seeker); ok && n > 1 {
+ // Not all io.Seeker can actually Seek. For example, os.Stdin implements
+ // io.Seeker, but calling Seek always returns an error and performs
+ // no action. Thus, we try an innocent seek to the current position
+ // to see if Seek is really supported.
+ pos1, err := sr.Seek(0, io.SeekCurrent)
+ if pos1 >= 0 && err == nil {
+ // Seek seems supported, so perform the real Seek.
+ pos2, err := sr.Seek(n-1, io.SeekCurrent)
+ if pos2 < 0 || err != nil {
+ return err
+ }
+ seekSkipped = pos2 - pos1
+ }
+ }
+
+ copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped)
+out:
+ if err == io.EOF && seekSkipped+copySkipped < n {
+ err = io.ErrUnexpectedEOF
+ }
+ return err
}
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/stat_atim.go b/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime1.go
index cf9cc79c5..cf9cc79c5 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/stat_atim.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime1.go
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go b/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime2.go
index 6f17dbe30..6f17dbe30 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime2.go
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go b/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go
index cb843db4c..868105f33 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go
@@ -8,6 +8,10 @@ package tar
import (
"os"
+ "os/user"
+ "runtime"
+ "strconv"
+ "sync"
"syscall"
)
@@ -15,6 +19,10 @@ func init() {
sysStat = statUnix
}
+// userMap and groupMap caches UID and GID lookups for performance reasons.
+// The downside is that renaming uname or gname by the OS never takes effect.
+var userMap, groupMap sync.Map // map[int]string
+
func statUnix(fi os.FileInfo, h *Header) error {
sys, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
@@ -22,11 +30,67 @@ func statUnix(fi os.FileInfo, h *Header) error {
}
h.Uid = int(sys.Uid)
h.Gid = int(sys.Gid)
- // TODO(bradfitz): populate username & group. os/user
- // doesn't cache LookupId lookups, and lacks group
- // lookup functions.
+
+ // Best effort at populating Uname and Gname.
+ // The os/user functions may fail for any number of reasons
+ // (not implemented on that platform, cgo not enabled, etc).
+ if u, ok := userMap.Load(h.Uid); ok {
+ h.Uname = u.(string)
+ } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil {
+ h.Uname = u.Username
+ userMap.Store(h.Uid, h.Uname)
+ }
+ if g, ok := groupMap.Load(h.Gid); ok {
+ h.Gname = g.(string)
+ } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil {
+ h.Gname = g.Name
+ groupMap.Store(h.Gid, h.Gname)
+ }
+
h.AccessTime = statAtime(sys)
h.ChangeTime = statCtime(sys)
- // TODO(bradfitz): major/minor device numbers?
+
+ // Best effort at populating Devmajor and Devminor.
+ if h.Typeflag == TypeChar || h.Typeflag == TypeBlock {
+ dev := uint64(sys.Rdev) // May be int32 or uint32
+ switch runtime.GOOS {
+ case "linux":
+ // Copied from golang.org/x/sys/unix/dev_linux.go.
+ major := uint32((dev & 0x00000000000fff00) >> 8)
+ major |= uint32((dev & 0xfffff00000000000) >> 32)
+ minor := uint32((dev & 0x00000000000000ff) >> 0)
+ minor |= uint32((dev & 0x00000ffffff00000) >> 12)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ case "darwin":
+ // Copied from golang.org/x/sys/unix/dev_darwin.go.
+ major := uint32((dev >> 24) & 0xff)
+ minor := uint32(dev & 0xffffff)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ case "dragonfly":
+ // Copied from golang.org/x/sys/unix/dev_dragonfly.go.
+ major := uint32((dev >> 8) & 0xff)
+ minor := uint32(dev & 0xffff00ff)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ case "freebsd":
+ // Copied from golang.org/x/sys/unix/dev_freebsd.go.
+ major := uint32((dev >> 8) & 0xff)
+ minor := uint32(dev & 0xffff00ff)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ case "netbsd":
+ // Copied from golang.org/x/sys/unix/dev_netbsd.go.
+ major := uint32((dev & 0x000fff00) >> 8)
+ minor := uint32((dev & 0x000000ff) >> 0)
+ minor |= uint32((dev & 0xfff00000) >> 12)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ case "openbsd":
+ // Copied from golang.org/x/sys/unix/dev_openbsd.go.
+ major := uint32((dev & 0x0000ff00) >> 8)
+ minor := uint32((dev & 0x000000ff) >> 0)
+ minor |= uint32((dev & 0xffff0000) >> 8)
+ h.Devmajor, h.Devminor = int64(major), int64(minor)
+ default:
+ // TODO: Implement solaris (see https://golang.org/issue/8106)
+ }
+ }
return nil
}
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go b/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go
new file mode 100644
index 000000000..d144485a4
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go
@@ -0,0 +1,326 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tar
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// hasNUL reports whether the NUL character exists within s.
+func hasNUL(s string) bool {
+ return strings.IndexByte(s, 0) >= 0
+}
+
+// isASCII reports whether the input is an ASCII C-style string.
+func isASCII(s string) bool {
+ for _, c := range s {
+ if c >= 0x80 || c == 0x00 {
+ return false
+ }
+ }
+ return true
+}
+
+// toASCII converts the input to an ASCII C-style string.
+// This a best effort conversion, so invalid characters are dropped.
+func toASCII(s string) string {
+ if isASCII(s) {
+ return s
+ }
+ b := make([]byte, 0, len(s))
+ for _, c := range s {
+ if c < 0x80 && c != 0x00 {
+ b = append(b, byte(c))
+ }
+ }
+ return string(b)
+}
+
+type parser struct {
+ err error // Last error seen
+}
+
+type formatter struct {
+ err error // Last error seen
+}
+
+// parseString parses bytes as a NUL-terminated C-style string.
+// If a NUL byte is not found then the whole slice is returned as a string.
+func (*parser) parseString(b []byte) string {
+ if i := bytes.IndexByte(b, 0); i >= 0 {
+ return string(b[:i])
+ }
+ return string(b)
+}
+
+// formatString copies s into b, NUL-terminating if possible.
+func (f *formatter) formatString(b []byte, s string) {
+ if len(s) > len(b) {
+ f.err = ErrFieldTooLong
+ }
+ copy(b, s)
+ if len(s) < len(b) {
+ b[len(s)] = 0
+ }
+
+ // Some buggy readers treat regular files with a trailing slash
+ // in the V7 path field as a directory even though the full path
+ // recorded elsewhere (e.g., via PAX record) contains no trailing slash.
+ if len(s) > len(b) && b[len(b)-1] == '/' {
+ n := len(strings.TrimRight(s[:len(b)], "/"))
+ b[n] = 0 // Replace trailing slash with NUL terminator
+ }
+}
+
+// fitsInBase256 reports whether x can be encoded into n bytes using base-256
+// encoding. Unlike octal encoding, base-256 encoding does not require that the
+// string ends with a NUL character. Thus, all n bytes are available for output.
+//
+// If operating in binary mode, this assumes strict GNU binary mode; which means
+// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
+// equivalent to the sign bit in two's complement form.
+func fitsInBase256(n int, x int64) bool {
+ binBits := uint(n-1) * 8
+ return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
+}
+
+// parseNumeric parses the input as being encoded in either base-256 or octal.
+// This function may return negative numbers.
+// If parsing fails or an integer overflow occurs, err will be set.
+func (p *parser) parseNumeric(b []byte) int64 {
+ // Check for base-256 (binary) format first.
+ // If the first bit is set, then all following bits constitute a two's
+ // complement encoded number in big-endian byte order.
+ if len(b) > 0 && b[0]&0x80 != 0 {
+ // Handling negative numbers relies on the following identity:
+ // -a-1 == ^a
+ //
+ // If the number is negative, we use an inversion mask to invert the
+ // data bytes and treat the value as an unsigned number.
+ var inv byte // 0x00 if positive or zero, 0xff if negative
+ if b[0]&0x40 != 0 {
+ inv = 0xff
+ }
+
+ var x uint64
+ for i, c := range b {
+ c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
+ if i == 0 {
+ c &= 0x7f // Ignore signal bit in first byte
+ }
+ if (x >> 56) > 0 {
+ p.err = ErrHeader // Integer overflow
+ return 0
+ }
+ x = x<<8 | uint64(c)
+ }
+ if (x >> 63) > 0 {
+ p.err = ErrHeader // Integer overflow
+ return 0
+ }
+ if inv == 0xff {
+ return ^int64(x)
+ }
+ return int64(x)
+ }
+
+ // Normal case is base-8 (octal) format.
+ return p.parseOctal(b)
+}
+
+// formatNumeric encodes x into b using base-8 (octal) encoding if possible.
+// Otherwise it will attempt to use base-256 (binary) encoding.
+func (f *formatter) formatNumeric(b []byte, x int64) {
+ if fitsInOctal(len(b), x) {
+ f.formatOctal(b, x)
+ return
+ }
+
+ if fitsInBase256(len(b), x) {
+ for i := len(b) - 1; i >= 0; i-- {
+ b[i] = byte(x)
+ x >>= 8
+ }
+ b[0] |= 0x80 // Highest bit indicates binary format
+ return
+ }
+
+ f.formatOctal(b, 0) // Last resort, just write zero
+ f.err = ErrFieldTooLong
+}
+
+func (p *parser) parseOctal(b []byte) int64 {
+ // Because unused fields are filled with NULs, we need
+ // to skip leading NULs. Fields may also be padded with
+ // spaces or NULs.
+ // So we remove leading and trailing NULs and spaces to
+ // be sure.
+ b = bytes.Trim(b, " \x00")
+
+ if len(b) == 0 {
+ return 0
+ }
+ x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
+ if perr != nil {
+ p.err = ErrHeader
+ }
+ return int64(x)
+}
+
+func (f *formatter) formatOctal(b []byte, x int64) {
+ if !fitsInOctal(len(b), x) {
+ x = 0 // Last resort, just write zero
+ f.err = ErrFieldTooLong
+ }
+
+ s := strconv.FormatInt(x, 8)
+ // Add leading zeros, but leave room for a NUL.
+ if n := len(b) - len(s) - 1; n > 0 {
+ s = strings.Repeat("0", n) + s
+ }
+ f.formatString(b, s)
+}
+
+// fitsInOctal reports whether the integer x fits in a field n-bytes long
+// using octal encoding with the appropriate NUL terminator.
+func fitsInOctal(n int, x int64) bool {
+ octBits := uint(n-1) * 3
+ return x >= 0 && (n >= 22 || x < 1<<octBits)
+}
+
+// parsePAXTime takes a string of the form %d.%d as described in the PAX
+// specification. Note that this implementation allows for negative timestamps,
+// which is allowed for by the PAX specification, but not always portable.
+func parsePAXTime(s string) (time.Time, error) {
+ const maxNanoSecondDigits = 9
+
+ // Split string into seconds and sub-seconds parts.
+ ss, sn := s, ""
+ if pos := strings.IndexByte(s, '.'); pos >= 0 {
+ ss, sn = s[:pos], s[pos+1:]
+ }
+
+ // Parse the seconds.
+ secs, err := strconv.ParseInt(ss, 10, 64)
+ if err != nil {
+ return time.Time{}, ErrHeader
+ }
+ if len(sn) == 0 {
+ return time.Unix(secs, 0), nil // No sub-second values
+ }
+
+ // Parse the nanoseconds.
+ if strings.Trim(sn, "0123456789") != "" {
+ return time.Time{}, ErrHeader
+ }
+ if len(sn) < maxNanoSecondDigits {
+ sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
+ } else {
+ sn = sn[:maxNanoSecondDigits] // Right truncate
+ }
+ nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
+ if len(ss) > 0 && ss[0] == '-' {
+ return time.Unix(secs, -1*nsecs), nil // Negative correction
+ }
+ return time.Unix(secs, nsecs), nil
+}
+
+// formatPAXTime converts ts into a time of the form %d.%d as described in the
+// PAX specification. This function is capable of negative timestamps.
+func formatPAXTime(ts time.Time) (s string) {
+ secs, nsecs := ts.Unix(), ts.Nanosecond()
+ if nsecs == 0 {
+ return strconv.FormatInt(secs, 10)
+ }
+
+ // If seconds is negative, then perform correction.
+ sign := ""
+ if secs < 0 {
+ sign = "-" // Remember sign
+ secs = -(secs + 1) // Add a second to secs
+ nsecs = -(nsecs - 1E9) // Take that second away from nsecs
+ }
+ return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
+}
+
+// parsePAXRecord parses the input PAX record string into a key-value pair.
+// If parsing is successful, it will slice off the currently read record and
+// return the remainder as r.
+func parsePAXRecord(s string) (k, v, r string, err error) {
+ // The size field ends at the first space.
+ sp := strings.IndexByte(s, ' ')
+ if sp == -1 {
+ return "", "", s, ErrHeader
+ }
+
+ // Parse the first token as a decimal integer.
+ n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
+ if perr != nil || n < 5 || int64(len(s)) < n {
+ return "", "", s, ErrHeader
+ }
+
+ // Extract everything between the space and the final newline.
+ rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
+ if nl != "\n" {
+ return "", "", s, ErrHeader
+ }
+
+ // The first equals separates the key from the value.
+ eq := strings.IndexByte(rec, '=')
+ if eq == -1 {
+ return "", "", s, ErrHeader
+ }
+ k, v = rec[:eq], rec[eq+1:]
+
+ if !validPAXRecord(k, v) {
+ return "", "", s, ErrHeader
+ }
+ return k, v, rem, nil
+}
+
+// formatPAXRecord formats a single PAX record, prefixing it with the
+// appropriate length.
+func formatPAXRecord(k, v string) (string, error) {
+ if !validPAXRecord(k, v) {
+ return "", ErrHeader
+ }
+
+ const padding = 3 // Extra padding for ' ', '=', and '\n'
+ size := len(k) + len(v) + padding
+ size += len(strconv.Itoa(size))
+ record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
+
+ // Final adjustment if adding size field increased the record size.
+ if len(record) != size {
+ size = len(record)
+ record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
+ }
+ return record, nil
+}
+
+// validPAXRecord reports whether the key-value pair is valid where each
+// record is formatted as:
+// "%d %s=%s\n" % (size, key, value)
+//
+// Keys and values should be UTF-8, but the number of bad writers out there
+// forces us to be a more liberal.
+// Thus, we only reject all keys with NUL, and only reject NULs in values
+// for the PAX version of the USTAR string fields.
+// The key must not contain an '=' character.
+func validPAXRecord(k, v string) bool {
+ if k == "" || strings.IndexByte(k, '=') >= 0 {
+ return false
+ }
+ switch k {
+ case paxPath, paxLinkpath, paxUname, paxGname:
+ return !hasNUL(v)
+ default:
+ return !hasNUL(k)
+ }
+}
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/writer.go b/vendor/github.com/vbatts/tar-split/archive/tar/writer.go
index 042638175..e80498d03 100644
--- a/vendor/github.com/vbatts/tar-split/archive/tar/writer.go
+++ b/vendor/github.com/vbatts/tar-split/archive/tar/writer.go
@@ -4,295 +4,410 @@
package tar
-// TODO(dsymonds):
-// - catch more errors (no first header, etc.)
-
import (
- "bytes"
- "errors"
"fmt"
"io"
"path"
"sort"
- "strconv"
"strings"
"time"
)
-var (
- ErrWriteTooLong = errors.New("archive/tar: write too long")
- ErrFieldTooLong = errors.New("archive/tar: header field too long")
- ErrWriteAfterClose = errors.New("archive/tar: write after close")
- errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
-)
-
-// A Writer provides sequential writing of a tar archive in POSIX.1 format.
-// A tar archive consists of a sequence of files.
-// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
-// writing at most hdr.Size bytes in total.
+// Writer provides sequential writing of a tar archive.
+// Write.WriteHeader begins a new file with the provided Header,
+// and then Writer can be treated as an io.Writer to supply that file's data.
type Writer struct {
- w io.Writer
- err error
- nb int64 // number of unwritten bytes for current file entry
- pad int64 // amount of padding to write after current file entry
- closed bool
- usedBinary bool // whether the binary numeric field extension was used
- preferPax bool // use pax header instead of binary numeric header
- hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
- paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
+ w io.Writer
+ pad int64 // Amount of padding to write after current file entry
+ curr fileWriter // Writer for current file entry
+ hdr Header // Shallow copy of Header that is safe for mutations
+ blk block // Buffer to use as temporary local storage
+
+ // err is a persistent error.
+ // It is only the responsibility of every exported method of Writer to
+ // ensure that this error is sticky.
+ err error
}
-type formatter struct {
- err error // Last error seen
+// NewWriter creates a new Writer writing to w.
+func NewWriter(w io.Writer) *Writer {
+ return &Writer{w: w, curr: &regFileWriter{w, 0}}
}
-// NewWriter creates a new Writer writing to w.
-func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
+type fileWriter interface {
+ io.Writer
+ fileState
-// Flush finishes writing the current file (optional).
+ ReadFrom(io.Reader) (int64, error)
+}
+
+// Flush finishes writing the current file's block padding.
+// The current file must be fully written before Flush can be called.
+//
+// This is unnecessary as the next call to WriteHeader or Close
+// will implicitly flush out the file's padding.
func (tw *Writer) Flush() error {
- if tw.nb > 0 {
- tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
+ if tw.err != nil {
return tw.err
}
-
- n := tw.nb + tw.pad
- for n > 0 && tw.err == nil {
- nr := n
- if nr > blockSize {
- nr = blockSize
- }
- var nw int
- nw, tw.err = tw.w.Write(zeroBlock[0:nr])
- n -= int64(nw)
+ if nb := tw.curr.LogicalRemaining(); nb > 0 {
+ return fmt.Errorf("archive/tar: missed writing %d bytes", nb)
+ }
+ if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
+ return tw.err
}
- tw.nb = 0
tw.pad = 0
- return tw.err
+ return nil
}
-// Write s into b, terminating it with a NUL if there is room.
-func (f *formatter) formatString(b []byte, s string) {
- if len(s) > len(b) {
- f.err = ErrFieldTooLong
- return
+// WriteHeader writes hdr and prepares to accept the file's contents.
+// The Header.Size determines how many bytes can be written for the next file.
+// If the current file is not fully written, then this returns an error.
+// This implicitly flushes any padding necessary before writing the header.
+func (tw *Writer) WriteHeader(hdr *Header) error {
+ if err := tw.Flush(); err != nil {
+ return err
}
- ascii := toASCII(s)
- copy(b, ascii)
- if len(ascii) < len(b) {
- b[len(ascii)] = 0
+ tw.hdr = *hdr // Shallow copy of Header
+
+ // Avoid usage of the legacy TypeRegA flag, and automatically promote
+ // it to use TypeReg or TypeDir.
+ if tw.hdr.Typeflag == TypeRegA {
+ if strings.HasSuffix(tw.hdr.Name, "/") {
+ tw.hdr.Typeflag = TypeDir
+ } else {
+ tw.hdr.Typeflag = TypeReg
+ }
}
-}
-// Encode x as an octal ASCII string and write it into b with leading zeros.
-func (f *formatter) formatOctal(b []byte, x int64) {
- s := strconv.FormatInt(x, 8)
- // leading zeros, but leave room for a NUL.
- for len(s)+1 < len(b) {
- s = "0" + s
+ // Round ModTime and ignore AccessTime and ChangeTime unless
+ // the format is explicitly chosen.
+ // This ensures nominal usage of WriteHeader (without specifying the format)
+ // does not always result in the PAX format being chosen, which
+ // causes a 1KiB increase to every header.
+ if tw.hdr.Format == FormatUnknown {
+ tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second)
+ tw.hdr.AccessTime = time.Time{}
+ tw.hdr.ChangeTime = time.Time{}
}
- f.formatString(b, s)
-}
-// fitsInBase256 reports whether x can be encoded into n bytes using base-256
-// encoding. Unlike octal encoding, base-256 encoding does not require that the
-// string ends with a NUL character. Thus, all n bytes are available for output.
-//
-// If operating in binary mode, this assumes strict GNU binary mode; which means
-// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
-// equivalent to the sign bit in two's complement form.
-func fitsInBase256(n int, x int64) bool {
- var binBits = uint(n-1) * 8
- return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
+ allowedFormats, paxHdrs, err := tw.hdr.allowedFormats()
+ switch {
+ case allowedFormats.has(FormatUSTAR):
+ tw.err = tw.writeUSTARHeader(&tw.hdr)
+ return tw.err
+ case allowedFormats.has(FormatPAX):
+ tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs)
+ return tw.err
+ case allowedFormats.has(FormatGNU):
+ tw.err = tw.writeGNUHeader(&tw.hdr)
+ return tw.err
+ default:
+ return err // Non-fatal error
+ }
}
-// Write x into b, as binary (GNUtar/star extension).
-func (f *formatter) formatNumeric(b []byte, x int64) {
- if fitsInBase256(len(b), x) {
- for i := len(b) - 1; i >= 0; i-- {
- b[i] = byte(x)
- x >>= 8
- }
- b[0] |= 0x80 // Highest bit indicates binary format
- return
+func (tw *Writer) writeUSTARHeader(hdr *Header) error {
+ // Check if we can use USTAR prefix/suffix splitting.
+ var namePrefix string
+ if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok {
+ namePrefix, hdr.Name = prefix, suffix
}
- f.formatOctal(b, 0) // Last resort, just write zero
- f.err = ErrFieldTooLong
+ // Pack the main header.
+ var f formatter
+ blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal)
+ f.formatString(blk.USTAR().Prefix(), namePrefix)
+ blk.SetFormat(FormatUSTAR)
+ if f.err != nil {
+ return f.err // Should never happen since header is validated
+ }
+ return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag)
}
-var (
- minTime = time.Unix(0, 0)
- // There is room for 11 octal digits (33 bits) of mtime.
- maxTime = minTime.Add((1<<33 - 1) * time.Second)
-)
-
-// WriteHeader writes hdr and prepares to accept the file's contents.
-// WriteHeader calls Flush if it is not the first header.
-// Calling after a Close will return ErrWriteAfterClose.
-func (tw *Writer) WriteHeader(hdr *Header) error {
- return tw.writeHeader(hdr, true)
-}
+func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
+ realName, realSize := hdr.Name, hdr.Size
+
+ // TODO(dsnet): Re-enable this when adding sparse support.
+ // See https://golang.org/issue/22735
+ /*
+ // Handle sparse files.
+ var spd sparseDatas
+ var spb []byte
+ if len(hdr.SparseHoles) > 0 {
+ sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
+ sph = alignSparseEntries(sph, hdr.Size)
+ spd = invertSparseEntries(sph, hdr.Size)
+
+ // Format the sparse map.
+ hdr.Size = 0 // Replace with encoded size
+ spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n')
+ for _, s := range spd {
+ hdr.Size += s.Length
+ spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n')
+ spb = append(strconv.AppendInt(spb, s.Length, 10), '\n')
+ }
+ pad := blockPadding(int64(len(spb)))
+ spb = append(spb, zeroBlock[:pad]...)
+ hdr.Size += int64(len(spb)) // Accounts for encoded sparse map
+
+ // Add and modify appropriate PAX records.
+ dir, file := path.Split(realName)
+ hdr.Name = path.Join(dir, "GNUSparseFile.0", file)
+ paxHdrs[paxGNUSparseMajor] = "1"
+ paxHdrs[paxGNUSparseMinor] = "0"
+ paxHdrs[paxGNUSparseName] = realName
+ paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10)
+ paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10)
+ delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName
+ }
+ */
+ _ = realSize
+
+ // Write PAX records to the output.
+ isGlobal := hdr.Typeflag == TypeXGlobalHeader
+ if len(paxHdrs) > 0 || isGlobal {
+ // Sort keys for deterministic ordering.
+ var keys []string
+ for k := range paxHdrs {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ // Write each record to a buffer.
+ var buf strings.Builder
+ for _, k := range keys {
+ rec, err := formatPAXRecord(k, paxHdrs[k])
+ if err != nil {
+ return err
+ }
+ buf.WriteString(rec)
+ }
-// WriteHeader writes hdr and prepares to accept the file's contents.
-// WriteHeader calls Flush if it is not the first header.
-// Calling after a Close will return ErrWriteAfterClose.
-// As this method is called internally by writePax header to allow it to
-// suppress writing the pax header.
-func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
- if tw.closed {
- return ErrWriteAfterClose
- }
- if tw.err == nil {
- tw.Flush()
- }
- if tw.err != nil {
- return tw.err
+ // Write the extended header file.
+ var name string
+ var flag byte
+ if isGlobal {
+ name = realName
+ if name == "" {
+ name = "GlobalHead.0.0"
+ }
+ flag = TypeXGlobalHeader
+ } else {
+ dir, file := path.Split(realName)
+ name = path.Join(dir, "PaxHeaders.0", file)
+ flag = TypeXHeader
+ }
+ data := buf.String()
+ if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal {
+ return err // Global headers return here
+ }
}
- // a map to hold pax header records, if any are needed
- paxHeaders := make(map[string]string)
+ // Pack the main header.
+ var f formatter // Ignore errors since they are expected
+ fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) }
+ blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal)
+ blk.SetFormat(FormatPAX)
+ if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
+ return err
+ }
- // TODO(shanemhansen): we might want to use PAX headers for
- // subsecond time resolution, but for now let's just capture
- // too long fields or non ascii characters
+ // TODO(dsnet): Re-enable this when adding sparse support.
+ // See https://golang.org/issue/22735
+ /*
+ // Write the sparse map and setup the sparse writer if necessary.
+ if len(spd) > 0 {
+ // Use tw.curr since the sparse map is accounted for in hdr.Size.
+ if _, err := tw.curr.Write(spb); err != nil {
+ return err
+ }
+ tw.curr = &sparseFileWriter{tw.curr, spd, 0}
+ }
+ */
+ return nil
+}
- var f formatter
- var header []byte
-
- // We need to select which scratch buffer to use carefully,
- // since this method is called recursively to write PAX headers.
- // If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
- // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
- // already being used by the non-recursive call, so we must use paxHdrBuff.
- header = tw.hdrBuff[:]
- if !allowPax {
- header = tw.paxHdrBuff[:]
- }
- copy(header, zeroBlock)
- s := slicer(header)
-
- // Wrappers around formatter that automatically sets paxHeaders if the
- // argument extends beyond the capacity of the input byte slice.
- var formatString = func(b []byte, s string, paxKeyword string) {
- needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
- if needsPaxHeader {
- paxHeaders[paxKeyword] = s
- return
+func (tw *Writer) writeGNUHeader(hdr *Header) error {
+ // Use long-link files if Name or Linkname exceeds the field size.
+ const longName = "././@LongLink"
+ if len(hdr.Name) > nameSize {
+ data := hdr.Name + "\x00"
+ if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil {
+ return err
}
- f.formatString(b, s)
- }
- var formatNumeric = func(b []byte, x int64, paxKeyword string) {
- // Try octal first.
- s := strconv.FormatInt(x, 8)
- if len(s) < len(b) {
- f.formatOctal(b, x)
- return
+ }
+ if len(hdr.Linkname) > nameSize {
+ data := hdr.Linkname + "\x00"
+ if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil {
+ return err
}
+ }
- // If it is too long for octal, and PAX is preferred, use a PAX header.
- if paxKeyword != paxNone && tw.preferPax {
- f.formatOctal(b, 0)
- s := strconv.FormatInt(x, 10)
- paxHeaders[paxKeyword] = s
- return
+ // Pack the main header.
+ var f formatter // Ignore errors since they are expected
+ var spd sparseDatas
+ var spb []byte
+ blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric)
+ if !hdr.AccessTime.IsZero() {
+ f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix())
+ }
+ if !hdr.ChangeTime.IsZero() {
+ f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix())
+ }
+ // TODO(dsnet): Re-enable this when adding sparse support.
+ // See https://golang.org/issue/22735
+ /*
+ if hdr.Typeflag == TypeGNUSparse {
+ sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
+ sph = alignSparseEntries(sph, hdr.Size)
+ spd = invertSparseEntries(sph, hdr.Size)
+
+ // Format the sparse map.
+ formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas {
+ for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ {
+ f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset)
+ f.formatNumeric(sa.Entry(i).Length(), sp[0].Length)
+ sp = sp[1:]
+ }
+ if len(sp) > 0 {
+ sa.IsExtended()[0] = 1
+ }
+ return sp
+ }
+ sp2 := formatSPD(spd, blk.GNU().Sparse())
+ for len(sp2) > 0 {
+ var spHdr block
+ sp2 = formatSPD(sp2, spHdr.Sparse())
+ spb = append(spb, spHdr[:]...)
+ }
+
+ // Update size fields in the header block.
+ realSize := hdr.Size
+ hdr.Size = 0 // Encoded size; does not account for encoded sparse map
+ for _, s := range spd {
+ hdr.Size += s.Length
+ }
+ copy(blk.V7().Size(), zeroBlock[:]) // Reset field
+ f.formatNumeric(blk.V7().Size(), hdr.Size)
+ f.formatNumeric(blk.GNU().RealSize(), realSize)
}
-
- tw.usedBinary = true
- f.formatNumeric(b, x)
+ */
+ blk.SetFormat(FormatGNU)
+ if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
+ return err
}
- // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
- pathHeaderBytes := s.next(fileNameSize)
-
- formatString(pathHeaderBytes, hdr.Name, paxPath)
-
- // Handle out of range ModTime carefully.
- var modTime int64
- if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
- modTime = hdr.ModTime.Unix()
+ // Write the extended sparse map and setup the sparse writer if necessary.
+ if len(spd) > 0 {
+ // Use tw.w since the sparse map is not accounted for in hdr.Size.
+ if _, err := tw.w.Write(spb); err != nil {
+ return err
+ }
+ tw.curr = &sparseFileWriter{tw.curr, spd, 0}
}
+ return nil
+}
- f.formatOctal(s.next(8), hdr.Mode) // 100:108
- formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116
- formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124
- formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136
- formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity
- s.next(8) // chksum (148:156)
- s.next(1)[0] = hdr.Typeflag // 156:157
-
- formatString(s.next(100), hdr.Linkname, paxLinkpath)
-
- copy(s.next(8), []byte("ustar\x0000")) // 257:265
- formatString(s.next(32), hdr.Uname, paxUname) // 265:297
- formatString(s.next(32), hdr.Gname, paxGname) // 297:329
- formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337
- formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345
-
- // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
- prefixHeaderBytes := s.next(155)
- formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix
+type (
+ stringFormatter func([]byte, string)
+ numberFormatter func([]byte, int64)
+)
- // Use the GNU magic instead of POSIX magic if we used any GNU extensions.
- if tw.usedBinary {
- copy(header[257:265], []byte("ustar \x00"))
- }
+// templateV7Plus fills out the V7 fields of a block using values from hdr.
+// It also fills out fields (uname, gname, devmajor, devminor) that are
+// shared in the USTAR, PAX, and GNU formats using the provided formatters.
+//
+// The block returned is only valid until the next call to
+// templateV7Plus or writeRawFile.
+func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block {
+ tw.blk.Reset()
+
+ modTime := hdr.ModTime
+ if modTime.IsZero() {
+ modTime = time.Unix(0, 0)
+ }
+
+ v7 := tw.blk.V7()
+ v7.TypeFlag()[0] = hdr.Typeflag
+ fmtStr(v7.Name(), hdr.Name)
+ fmtStr(v7.LinkName(), hdr.Linkname)
+ fmtNum(v7.Mode(), hdr.Mode)
+ fmtNum(v7.UID(), int64(hdr.Uid))
+ fmtNum(v7.GID(), int64(hdr.Gid))
+ fmtNum(v7.Size(), hdr.Size)
+ fmtNum(v7.ModTime(), modTime.Unix())
+
+ ustar := tw.blk.USTAR()
+ fmtStr(ustar.UserName(), hdr.Uname)
+ fmtStr(ustar.GroupName(), hdr.Gname)
+ fmtNum(ustar.DevMajor(), hdr.Devmajor)
+ fmtNum(ustar.DevMinor(), hdr.Devminor)
+
+ return &tw.blk
+}
- _, paxPathUsed := paxHeaders[paxPath]
- // try to use a ustar header when only the name is too long
- if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
- prefix, suffix, ok := splitUSTARPath(hdr.Name)
- if ok {
- // Since we can encode in USTAR format, disable PAX header.
- delete(paxHeaders, paxPath)
+// writeRawFile writes a minimal file with the given name and flag type.
+// It uses format to encode the header format and will write data as the body.
+// It uses default values for all of the other fields (as BSD and GNU tar does).
+func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error {
+ tw.blk.Reset()
- // Update the path fields
- formatString(pathHeaderBytes, suffix, paxNone)
- formatString(prefixHeaderBytes, prefix, paxNone)
- }
+ // Best effort for the filename.
+ name = toASCII(name)
+ if len(name) > nameSize {
+ name = name[:nameSize]
}
+ name = strings.TrimRight(name, "/")
- // The chksum field is terminated by a NUL and a space.
- // This is different from the other octal fields.
- chksum, _ := checksum(header)
- f.formatOctal(header[148:155], chksum) // Never fails
- header[155] = ' '
-
- // Check if there were any formatting errors.
+ var f formatter
+ v7 := tw.blk.V7()
+ v7.TypeFlag()[0] = flag
+ f.formatString(v7.Name(), name)
+ f.formatOctal(v7.Mode(), 0)
+ f.formatOctal(v7.UID(), 0)
+ f.formatOctal(v7.GID(), 0)
+ f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB
+ f.formatOctal(v7.ModTime(), 0)
+ tw.blk.SetFormat(format)
if f.err != nil {
- tw.err = f.err
- return tw.err
+ return f.err // Only occurs if size condition is violated
}
- if allowPax {
- for k, v := range hdr.Xattrs {
- paxHeaders[paxXattr+k] = v
- }
+ // Write the header and data.
+ if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil {
+ return err
}
+ _, err := io.WriteString(tw, data)
+ return err
+}
- if len(paxHeaders) > 0 {
- if !allowPax {
- return errInvalidHeader
- }
- if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
- return err
- }
+// writeRawHeader writes the value of blk, regardless of its value.
+// It sets up the Writer such that it can accept a file of the given size.
+// If the flag is a special header-only flag, then the size is treated as zero.
+func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
+ if err := tw.Flush(); err != nil {
+ return err
}
- tw.nb = int64(hdr.Size)
- tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
-
- _, tw.err = tw.w.Write(header)
- return tw.err
+ if _, err := tw.w.Write(blk[:]); err != nil {
+ return err
+ }
+ if isHeaderOnlyType(flag) {
+ size = 0
+ }
+ tw.curr = &regFileWriter{tw.w, size}
+ tw.pad = blockPadding(size)
+ return nil
}
// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
// If the path is not splittable, then it will return ("", "", false).
func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
length := len(name)
- if length <= fileNameSize || !isASCII(name) {
+ if length <= nameSize || !isASCII(name) {
return "", "", false
- } else if length > fileNamePrefixSize+1 {
- length = fileNamePrefixSize + 1
+ } else if length > prefixSize+1 {
+ length = prefixSize + 1
} else if name[length-1] == '/' {
length--
}
@@ -300,117 +415,239 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
i := strings.LastIndex(name[:length], "/")
nlen := len(name) - i - 1 // nlen is length of suffix
plen := i // plen is length of prefix
- if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
+ if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
return "", "", false
}
return name[:i], name[i+1:], true
}
-// writePaxHeader writes an extended pax header to the
-// archive.
-func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
- // Prepare extended header
- ext := new(Header)
- ext.Typeflag = TypeXHeader
- // Setting ModTime is required for reader parsing to
- // succeed, and seems harmless enough.
- ext.ModTime = hdr.ModTime
- // The spec asks that we namespace our pseudo files
- // with the current pid. However, this results in differing outputs
- // for identical inputs. As such, the constant 0 is now used instead.
- // golang.org/issue/12358
- dir, file := path.Split(hdr.Name)
- fullName := path.Join(dir, "PaxHeaders.0", file)
-
- ascii := toASCII(fullName)
- if len(ascii) > 100 {
- ascii = ascii[:100]
- }
- ext.Name = ascii
- // Construct the body
- var buf bytes.Buffer
-
- // Keys are sorted before writing to body to allow deterministic output.
- var keys []string
- for k := range paxHeaders {
- keys = append(keys, k)
- }
- sort.Strings(keys)
-
- for _, k := range keys {
- fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
- }
-
- ext.Size = int64(len(buf.Bytes()))
- if err := tw.writeHeader(ext, false); err != nil {
- return err
+// Write writes to the current file in the tar archive.
+// Write returns the error ErrWriteTooLong if more than
+// Header.Size bytes are written after WriteHeader.
+//
+// Calling Write on special types like TypeLink, TypeSymlink, TypeChar,
+// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless
+// of what the Header.Size claims.
+func (tw *Writer) Write(b []byte) (int, error) {
+ if tw.err != nil {
+ return 0, tw.err
}
- if _, err := tw.Write(buf.Bytes()); err != nil {
- return err
+ n, err := tw.curr.Write(b)
+ if err != nil && err != ErrWriteTooLong {
+ tw.err = err
}
- if err := tw.Flush(); err != nil {
- return err
+ return n, err
+}
+
+// readFrom populates the content of the current file by reading from r.
+// The bytes read must match the number of remaining bytes in the current file.
+//
+// If the current file is sparse and r is an io.ReadSeeker,
+// then readFrom uses Seek to skip past holes defined in Header.SparseHoles,
+// assuming that skipped regions are all NULs.
+// This always reads the last byte to ensure r is the right size.
+//
+// TODO(dsnet): Re-export this when adding sparse file support.
+// See https://golang.org/issue/22735
+func (tw *Writer) readFrom(r io.Reader) (int64, error) {
+ if tw.err != nil {
+ return 0, tw.err
}
- return nil
+ n, err := tw.curr.ReadFrom(r)
+ if err != nil && err != ErrWriteTooLong {
+ tw.err = err
+ }
+ return n, err
}
-// formatPAXRecord formats a single PAX record, prefixing it with the
-// appropriate length.
-func formatPAXRecord(k, v string) string {
- const padding = 3 // Extra padding for ' ', '=', and '\n'
- size := len(k) + len(v) + padding
- size += len(strconv.Itoa(size))
- record := fmt.Sprintf("%d %s=%s\n", size, k, v)
+// Close closes the tar archive by flushing the padding, and writing the footer.
+// If the current file (from a prior call to WriteHeader) is not fully written,
+// then this returns an error.
+func (tw *Writer) Close() error {
+ if tw.err == ErrWriteAfterClose {
+ return nil
+ }
+ if tw.err != nil {
+ return tw.err
+ }
- // Final adjustment if adding size field increased the record size.
- if len(record) != size {
- size = len(record)
- record = fmt.Sprintf("%d %s=%s\n", size, k, v)
+ // Trailer: two zero blocks.
+ err := tw.Flush()
+ for i := 0; i < 2 && err == nil; i++ {
+ _, err = tw.w.Write(zeroBlock[:])
}
- return record
+
+ // Ensure all future actions are invalid.
+ tw.err = ErrWriteAfterClose
+ return err // Report IO errors
}
-// Write writes to the current entry in the tar archive.
-// Write returns the error ErrWriteTooLong if more than
-// hdr.Size bytes are written after WriteHeader.
-func (tw *Writer) Write(b []byte) (n int, err error) {
- if tw.closed {
- err = ErrWriteAfterClose
- return
- }
- overwrite := false
- if int64(len(b)) > tw.nb {
- b = b[0:tw.nb]
- overwrite = true
- }
- n, err = tw.w.Write(b)
- tw.nb -= int64(n)
- if err == nil && overwrite {
- err = ErrWriteTooLong
- return
- }
- tw.err = err
- return
+// regFileWriter is a fileWriter for writing data to a regular file entry.
+type regFileWriter struct {
+ w io.Writer // Underlying Writer
+ nb int64 // Number of remaining bytes to write
}
-// Close closes the tar archive, flushing any unwritten
-// data to the underlying writer.
-func (tw *Writer) Close() error {
- if tw.err != nil || tw.closed {
- return tw.err
+func (fw *regFileWriter) Write(b []byte) (n int, err error) {
+ overwrite := int64(len(b)) > fw.nb
+ if overwrite {
+ b = b[:fw.nb]
}
- tw.Flush()
- tw.closed = true
- if tw.err != nil {
- return tw.err
+ if len(b) > 0 {
+ n, err = fw.w.Write(b)
+ fw.nb -= int64(n)
+ }
+ switch {
+ case err != nil:
+ return n, err
+ case overwrite:
+ return n, ErrWriteTooLong
+ default:
+ return n, nil
}
+}
+
+func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) {
+ return io.Copy(struct{ io.Writer }{fw}, r)
+}
- // trailer: two zero blocks
- for i := 0; i < 2; i++ {
- _, tw.err = tw.w.Write(zeroBlock)
- if tw.err != nil {
- break
+func (fw regFileWriter) LogicalRemaining() int64 {
+ return fw.nb
+}
+func (fw regFileWriter) PhysicalRemaining() int64 {
+ return fw.nb
+}
+
+// sparseFileWriter is a fileWriter for writing data to a sparse file entry.
+type sparseFileWriter struct {
+ fw fileWriter // Underlying fileWriter
+ sp sparseDatas // Normalized list of data fragments
+ pos int64 // Current position in sparse file
+}
+
+func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
+ overwrite := int64(len(b)) > sw.LogicalRemaining()
+ if overwrite {
+ b = b[:sw.LogicalRemaining()]
+ }
+
+ b0 := b
+ endPos := sw.pos + int64(len(b))
+ for endPos > sw.pos && err == nil {
+ var nf int // Bytes written in fragment
+ dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
+ if sw.pos < dataStart { // In a hole fragment
+ bf := b[:min(int64(len(b)), dataStart-sw.pos)]
+ nf, err = zeroWriter{}.Write(bf)
+ } else { // In a data fragment
+ bf := b[:min(int64(len(b)), dataEnd-sw.pos)]
+ nf, err = sw.fw.Write(bf)
+ }
+ b = b[nf:]
+ sw.pos += int64(nf)
+ if sw.pos >= dataEnd && len(sw.sp) > 1 {
+ sw.sp = sw.sp[1:] // Ensure last fragment always remains
}
}
- return tw.err
+
+ n = len(b0) - len(b)
+ switch {
+ case err == ErrWriteTooLong:
+ return n, errMissData // Not possible; implies bug in validation logic
+ case err != nil:
+ return n, err
+ case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
+ return n, errUnrefData // Not possible; implies bug in validation logic
+ case overwrite:
+ return n, ErrWriteTooLong
+ default:
+ return n, nil
+ }
+}
+
+func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) {
+ rs, ok := r.(io.ReadSeeker)
+ if ok {
+ if _, err := rs.Seek(0, io.SeekCurrent); err != nil {
+ ok = false // Not all io.Seeker can really seek
+ }
+ }
+ if !ok {
+ return io.Copy(struct{ io.Writer }{sw}, r)
+ }
+
+ var readLastByte bool
+ pos0 := sw.pos
+ for sw.LogicalRemaining() > 0 && !readLastByte && err == nil {
+ var nf int64 // Size of fragment
+ dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
+ if sw.pos < dataStart { // In a hole fragment
+ nf = dataStart - sw.pos
+ if sw.PhysicalRemaining() == 0 {
+ readLastByte = true
+ nf--
+ }
+ _, err = rs.Seek(nf, io.SeekCurrent)
+ } else { // In a data fragment
+ nf = dataEnd - sw.pos
+ nf, err = io.CopyN(sw.fw, rs, nf)
+ }
+ sw.pos += nf
+ if sw.pos >= dataEnd && len(sw.sp) > 1 {
+ sw.sp = sw.sp[1:] // Ensure last fragment always remains
+ }
+ }
+
+ // If the last fragment is a hole, then seek to 1-byte before EOF, and
+ // read a single byte to ensure the file is the right size.
+ if readLastByte && err == nil {
+ _, err = mustReadFull(rs, []byte{0})
+ sw.pos++
+ }
+
+ n = sw.pos - pos0
+ switch {
+ case err == io.EOF:
+ return n, io.ErrUnexpectedEOF
+ case err == ErrWriteTooLong:
+ return n, errMissData // Not possible; implies bug in validation logic
+ case err != nil:
+ return n, err
+ case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
+ return n, errUnrefData // Not possible; implies bug in validation logic
+ default:
+ return n, ensureEOF(rs)
+ }
+}
+
+func (sw sparseFileWriter) LogicalRemaining() int64 {
+ return sw.sp[len(sw.sp)-1].endOffset() - sw.pos
+}
+func (sw sparseFileWriter) PhysicalRemaining() int64 {
+ return sw.fw.PhysicalRemaining()
+}
+
+// zeroWriter may only be written with NULs, otherwise it returns errWriteHole.
+type zeroWriter struct{}
+
+func (zeroWriter) Write(b []byte) (int, error) {
+ for i, c := range b {
+ if c != 0 {
+ return i, errWriteHole
+ }
+ }
+ return len(b), nil
+}
+
+// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so.
+func ensureEOF(r io.Reader) error {
+ n, err := tryReadFull(r, []byte{0})
+ switch {
+ case n > 0:
+ return ErrWriteTooLong
+ case err == io.EOF:
+ return nil
+ default:
+ return err
+ }
}