diff options
Diffstat (limited to 'vendor/github.com/vbatts/tar-split/archive/tar/writer.go')
-rw-r--r-- | vendor/github.com/vbatts/tar-split/archive/tar/writer.go | 871 |
1 files changed, 554 insertions, 317 deletions
diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/writer.go b/vendor/github.com/vbatts/tar-split/archive/tar/writer.go index 042638175..e80498d03 100644 --- a/vendor/github.com/vbatts/tar-split/archive/tar/writer.go +++ b/vendor/github.com/vbatts/tar-split/archive/tar/writer.go @@ -4,295 +4,410 @@ package tar -// TODO(dsymonds): -// - catch more errors (no first header, etc.) - import ( - "bytes" - "errors" "fmt" "io" "path" "sort" - "strconv" "strings" "time" ) -var ( - ErrWriteTooLong = errors.New("archive/tar: write too long") - ErrFieldTooLong = errors.New("archive/tar: header field too long") - ErrWriteAfterClose = errors.New("archive/tar: write after close") - errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") -) - -// A Writer provides sequential writing of a tar archive in POSIX.1 format. -// A tar archive consists of a sequence of files. -// Call WriteHeader to begin a new file, and then call Write to supply that file's data, -// writing at most hdr.Size bytes in total. +// Writer provides sequential writing of a tar archive. +// Write.WriteHeader begins a new file with the provided Header, +// and then Writer can be treated as an io.Writer to supply that file's data. type Writer struct { - w io.Writer - err error - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - closed bool - usedBinary bool // whether the binary numeric field extension was used - preferPax bool // use pax header instead of binary numeric header - hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header - paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Writer to + // ensure that this error is sticky. + err error } -type formatter struct { - err error // Last error seen +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} } -// NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } +type fileWriter interface { + io.Writer + fileState -// Flush finishes writing the current file (optional). + ReadFrom(io.Reader) (int64, error) +} + +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. +// +// This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. func (tw *Writer) Flush() error { - if tw.nb > 0 { - tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) + if tw.err != nil { return tw.err } - - n := tw.nb + tw.pad - for n > 0 && tw.err == nil { - nr := n - if nr > blockSize { - nr = blockSize - } - var nw int - nw, tw.err = tw.w.Write(zeroBlock[0:nr]) - n -= int64(nw) + if nb := tw.curr.LogicalRemaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) + } + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { + return tw.err } - tw.nb = 0 tw.pad = 0 - return tw.err + return nil } -// Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { - if len(s) > len(b) { - f.err = ErrFieldTooLong - return +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 + tw.hdr = *hdr // Shallow copy of Header + + // Avoid usage of the legacy TypeRegA flag, and automatically promote + // it to use TypeReg or TypeDir. + if tw.hdr.Typeflag == TypeRegA { + if strings.HasSuffix(tw.hdr.Name, "/") { + tw.hdr.Typeflag = TypeDir + } else { + tw.hdr.Typeflag = TypeReg + } } -} -// Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { - s := strconv.FormatInt(x, 8) - // leading zeros, but leave room for a NUL. - for len(s)+1 < len(b) { - s = "0" + s + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} } - f.formatString(b, s) -} -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. -// -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } } -// Write x into b, as binary (GNUtar/star extension). -func (f *formatter) formatNumeric(b []byte, x int64) { - if fitsInBase256(len(b), x) { - for i := len(b) - 1; i >= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format - return +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix } - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.USTAR().Prefix(), namePrefix) + blk.SetFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) } -var ( - minTime = time.Unix(0, 0) - // There is room for 11 octal digits (33 bits) of mtime. - maxTime = minTime.Add((1<<33 - 1) * time.Second) -) - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -func (tw *Writer) WriteHeader(hdr *Header) error { - return tw.writeHeader(hdr, true) -} +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + */ + _ = realSize + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf strings.Builder + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -// As this method is called internally by writePax header to allow it to -// suppress writing the pax header. -func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { - if tw.closed { - return ErrWriteAfterClose - } - if tw.err == nil { - tw.Flush() - } - if tw.err != nil { - return tw.err + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = realName + if name == "" { + name = "GlobalHead.0.0" + } + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here + } } - // a map to hold pax header records, if any are needed - paxHeaders := make(map[string]string) + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.SetFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } - // TODO(shanemhansen): we might want to use PAX headers for - // subsecond time resolution, but for now let's just capture - // too long fields or non ascii characters + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + */ + return nil +} - var f formatter - var header []byte - - // We need to select which scratch buffer to use carefully, - // since this method is called recursively to write PAX headers. - // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. - // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is - // already being used by the non-recursive call, so we must use paxHdrBuff. - header = tw.hdrBuff[:] - if !allowPax { - header = tw.paxHdrBuff[:] - } - copy(header, zeroBlock) - s := slicer(header) - - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { + return err } + } - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) + } + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + if hdr.Typeflag == TypeGNUSparse { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) } - - tw.usedBinary = true - f.formatNumeric(b, x) + */ + blk.SetFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err } - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - pathHeaderBytes := s.next(fileNameSize) - - formatString(pathHeaderBytes, hdr.Name, paxPath) - - // Handle out of range ModTime carefully. - var modTime int64 - if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { - modTime = hdr.ModTime.Unix() + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} } + return nil +} - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 - - formatString(s.next(100), hdr.Linkname, paxLinkpath) - - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 - - // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) - // Use the GNU magic instead of POSIX magic if we used any GNU extensions. - if tw.usedBinary { - copy(header[257:265], []byte("ustar \x00")) - } +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.Reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.V7() + v7.TypeFlag()[0] = hdr.Typeflag + fmtStr(v7.Name(), hdr.Name) + fmtStr(v7.LinkName(), hdr.Linkname) + fmtNum(v7.Mode(), hdr.Mode) + fmtNum(v7.UID(), int64(hdr.Uid)) + fmtNum(v7.GID(), int64(hdr.Gid)) + fmtNum(v7.Size(), hdr.Size) + fmtNum(v7.ModTime(), modTime.Unix()) + + ustar := tw.blk.USTAR() + fmtStr(ustar.UserName(), hdr.Uname) + fmtStr(ustar.GroupName(), hdr.Gname) + fmtNum(ustar.DevMajor(), hdr.Devmajor) + fmtNum(ustar.DevMinor(), hdr.Devminor) + + return &tw.blk +} - _, paxPathUsed := paxHeaders[paxPath] - // try to use a ustar header when only the name is too long - if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.Reset() - // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) - } + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] } + name = strings.TrimRight(name, "/") - // The chksum field is terminated by a NUL and a space. - // This is different from the other octal fields. - chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails - header[155] = ' ' - - // Check if there were any formatting errors. + var f formatter + v7 := tw.blk.V7() + v7.TypeFlag()[0] = flag + f.formatString(v7.Name(), name) + f.formatOctal(v7.Mode(), 0) + f.formatOctal(v7.UID(), 0) + f.formatOctal(v7.GID(), 0) + f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.ModTime(), 0) + tw.blk.SetFormat(format) if f.err != nil { - tw.err = f.err - return tw.err + return f.err // Only occurs if size condition is violated } - if allowPax { - for k, v := range hdr.Xattrs { - paxHeaders[paxXattr+k] = v - } + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err } + _, err := io.WriteString(tw, data) + return err +} - if len(paxHeaders) > 0 { - if !allowPax { - return errInvalidHeader - } - if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { - return err - } +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err } - tw.nb = int64(hdr.Size) - tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - - _, tw.err = tw.w.Write(header) - return tw.err + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil } // splitUSTARPath splits a path according to USTAR prefix and suffix rules. // If the path is not splittable, then it will return ("", "", false). func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length <= fileNameSize || !isASCII(name) { + if length <= nameSize || !isASCII(name) { return "", "", false - } else if length > fileNamePrefixSize+1 { - length = fileNamePrefixSize + 1 + } else if length > prefixSize+1 { + length = prefixSize + 1 } else if name[length-1] == '/' { length-- } @@ -300,117 +415,239 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { i := strings.LastIndex(name[:length], "/") nlen := len(name) - i - 1 // nlen is length of suffix plen := i // plen is length of prefix - if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { return "", "", false } return name[:i], name[i+1:], true } -// writePaxHeader writes an extended pax header to the -// archive. -func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { - // Prepare extended header - ext := new(Header) - ext.Typeflag = TypeXHeader - // Setting ModTime is required for reader parsing to - // succeed, and seems harmless enough. - ext.ModTime = hdr.ModTime - // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 - dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) - - ascii := toASCII(fullName) - if len(ascii) > 100 { - ascii = ascii[:100] - } - ext.Name = ascii - // Construct the body - var buf bytes.Buffer - - // Keys are sorted before writing to body to allow deterministic output. - var keys []string - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) - } - - ext.Size = int64(len(buf.Bytes())) - if err := tw.writeHeader(ext, false); err != nil { - return err +// Write writes to the current file in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// Header.Size bytes are written after WriteHeader. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err } - if _, err := tw.Write(buf.Bytes()); err != nil { - return err + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - if err := tw.Flush(); err != nil { - return err + return n, err +} + +// readFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then readFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tw *Writer) readFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err } - return nil + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err } -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding - size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil + } + if tw.err != nil { + return tw.err + } - // Final adjustment if adding size field increased the record size. - if len(record) != size { - size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) } - return record + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors } -// Write writes to the current entry in the tar archive. -// Write returns the error ErrWriteTooLong if more than -// hdr.Size bytes are written after WriteHeader. -func (tw *Writer) Write(b []byte) (n int, err error) { - if tw.closed { - err = ErrWriteAfterClose - return - } - overwrite := false - if int64(len(b)) > tw.nb { - b = b[0:tw.nb] - overwrite = true - } - n, err = tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - err = ErrWriteTooLong - return - } - tw.err = err - return +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write } -// Close closes the tar archive, flushing any unwritten -// data to the underlying writer. -func (tw *Writer) Close() error { - if tw.err != nil || tw.closed { - return tw.err +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] } - tw.Flush() - tw.closed = true - if tw.err != nil { - return tw.err + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} - // trailer: two zero blocks - for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock) - if tw.err != nil { - break +func (fw regFileWriter) LogicalRemaining() int64 { + return fw.nb +} +func (fw regFileWriter) PhysicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.LogicalRemaining() + if overwrite { + b = b[:sw.LogicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains } } - return tw.err + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.PhysicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) LogicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) PhysicalRemaining() int64 { + return sw.fw.PhysicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } } |