diff options
author | dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> | 2021-09-13 12:29:39 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-13 12:29:39 +0000 |
commit | eec59cea2c20cf5e515bdcbc8c35eba3e07da607 (patch) | |
tree | 71ec92fd12f16d15f25facd599ef097c2650c5f9 /vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go | |
parent | 0f3d3bd21da0b67542c44c832f0e1642c5f639cf (diff) | |
download | podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.gz podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.bz2 podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.zip |
Bump github.com/containers/storage from 1.35.0 to 1.36.0
Bumps [github.com/containers/storage](https://github.com/containers/storage) from 1.35.0 to 1.36.0.
- [Release notes](https://github.com/containers/storage/releases)
- [Changelog](https://github.com/containers/storage/blob/main/docs/containers-storage-changes.md)
- [Commits](https://github.com/containers/storage/compare/v1.35.0...v1.36.0)
---
updated-dependencies:
- dependency-name: github.com/containers/storage
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
Diffstat (limited to 'vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go')
-rw-r--r-- | vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go | 933 |
1 files changed, 933 insertions, 0 deletions
diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go new file mode 100644 index 000000000..e997d9cce --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go @@ -0,0 +1,933 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "crypto/sha256" + "fmt" + "hash" + "io" + "io/ioutil" + "os" + "path" + "sort" + "strings" + "sync" + "time" + + "github.com/containerd/stargz-snapshotter/estargz/errorutil" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +// A Reader permits random access reads from a stargz file. +type Reader struct { + sr *io.SectionReader + toc *JTOC + tocDigest digest.Digest + + // m stores all non-chunk entries, keyed by name. + m map[string]*TOCEntry + + // chunks stores all TOCEntry values for regular files that + // are split up. For a file with a single chunk, it's only + // stored in m. + chunks map[string][]*TOCEntry + + decompressor Decompressor +} + +type openOpts struct { + tocOffset int64 + decompressors []Decompressor + telemetry *Telemetry +} + +// OpenOption is an option used during opening the layer +type OpenOption func(o *openOpts) error + +// WithTOCOffset option specifies the offset of TOC +func WithTOCOffset(tocOffset int64) OpenOption { + return func(o *openOpts) error { + o.tocOffset = tocOffset + return nil + } +} + +// WithDecompressors option specifies decompressors to use. +// Default is gzip-based decompressor. +func WithDecompressors(decompressors ...Decompressor) OpenOption { + return func(o *openOpts) error { + o.decompressors = decompressors + return nil + } +} + +// WithTelemetry option specifies the telemetry hooks +func WithTelemetry(telemetry *Telemetry) OpenOption { + return func(o *openOpts) error { + o.telemetry = telemetry + return nil + } +} + +// A func which takes start time and records the diff +type MeasureLatencyHook func(time.Time) + +// A struct which defines telemetry hooks. By implementing these hooks you should be able to record +// the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...) +type Telemetry struct { + GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds) + GetTocLatency MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds) + DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds) +} + +// Open opens a stargz file for reading. +// The behaviour is configurable using options. +// +// Note that each entry name is normalized as the path that is relative to root. +func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) { + var opts openOpts + for _, o := range opt { + if err := o(&opts); err != nil { + return nil, err + } + } + + gzipCompressors := []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} + decompressors := append(gzipCompressors, opts.decompressors...) + + // Determine the size to fetch. Try to fetch as many bytes as possible. + fetchSize := maxFooterSize(sr.Size(), decompressors...) + if maybeTocOffset := opts.tocOffset; maybeTocOffset > fetchSize { + if maybeTocOffset > sr.Size() { + return nil, fmt.Errorf("blob size %d is smaller than the toc offset", sr.Size()) + } + fetchSize = sr.Size() - maybeTocOffset + } + + start := time.Now() // before getting layer footer + footer := make([]byte, fetchSize) + if _, err := sr.ReadAt(footer, sr.Size()-fetchSize); err != nil { + return nil, fmt.Errorf("error reading footer: %v", err) + } + if opts.telemetry != nil && opts.telemetry.GetFooterLatency != nil { + opts.telemetry.GetFooterLatency(start) + } + + var allErr []error + var found bool + var r *Reader + for _, d := range decompressors { + fSize := d.FooterSize() + fOffset := positive(int64(len(footer)) - fSize) + maybeTocBytes := footer[:fOffset] + tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:]) + if err != nil { + allErr = append(allErr, err) + continue + } + if tocSize <= 0 { + tocSize = sr.Size() - tocOffset - fSize + } + if tocSize < int64(len(maybeTocBytes)) { + maybeTocBytes = maybeTocBytes[:tocSize] + } + r, err = parseTOC(d, sr, tocOffset, tocSize, maybeTocBytes, opts) + if err == nil { + found = true + break + } + allErr = append(allErr, err) + } + if !found { + return nil, errorutil.Aggregate(allErr) + } + if err := r.initFields(); err != nil { + return nil, fmt.Errorf("failed to initialize fields of entries: %v", err) + } + return r, nil +} + +// OpenFooter extracts and parses footer from the given blob. +// only supports gzip-based eStargz. +func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error) { + if sr.Size() < FooterSize && sr.Size() < legacyFooterSize { + return 0, 0, fmt.Errorf("blob size %d is smaller than the footer size", sr.Size()) + } + var footer [FooterSize]byte + if _, err := sr.ReadAt(footer[:], sr.Size()-FooterSize); err != nil { + return 0, 0, fmt.Errorf("error reading footer: %v", err) + } + var allErr []error + for _, d := range []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} { + fSize := d.FooterSize() + fOffset := positive(int64(len(footer)) - fSize) + tocOffset, _, err := d.ParseFooter(footer[fOffset:]) + if err == nil { + return tocOffset, fSize, err + } + allErr = append(allErr, err) + } + return 0, 0, errorutil.Aggregate(allErr) +} + +// initFields populates the Reader from r.toc after decoding it from +// JSON. +// +// Unexported fields are populated and TOCEntry fields that were +// implicit in the JSON are populated. +func (r *Reader) initFields() error { + r.m = make(map[string]*TOCEntry, len(r.toc.Entries)) + r.chunks = make(map[string][]*TOCEntry) + var lastPath string + uname := map[int]string{} + gname := map[int]string{} + var lastRegEnt *TOCEntry + for _, ent := range r.toc.Entries { + ent.Name = cleanEntryName(ent.Name) + if ent.Type == "reg" { + lastRegEnt = ent + } + if ent.Type == "chunk" { + ent.Name = lastPath + r.chunks[ent.Name] = append(r.chunks[ent.Name], ent) + if ent.ChunkSize == 0 && lastRegEnt != nil { + ent.ChunkSize = lastRegEnt.Size - ent.ChunkOffset + } + } else { + lastPath = ent.Name + + if ent.Uname != "" { + uname[ent.UID] = ent.Uname + } else { + ent.Uname = uname[ent.UID] + } + if ent.Gname != "" { + gname[ent.GID] = ent.Gname + } else { + ent.Gname = uname[ent.GID] + } + + ent.modTime, _ = time.Parse(time.RFC3339, ent.ModTime3339) + + if ent.Type == "dir" { + ent.NumLink++ // Parent dir links to this directory + } + r.m[ent.Name] = ent + } + if ent.Type == "reg" && ent.ChunkSize > 0 && ent.ChunkSize < ent.Size { + r.chunks[ent.Name] = make([]*TOCEntry, 0, ent.Size/ent.ChunkSize+1) + r.chunks[ent.Name] = append(r.chunks[ent.Name], ent) + } + if ent.ChunkSize == 0 && ent.Size != 0 { + ent.ChunkSize = ent.Size + } + } + + // Populate children, add implicit directories: + for _, ent := range r.toc.Entries { + if ent.Type == "chunk" { + continue + } + // add "foo/": + // add "foo" child to "" (creating "" if necessary) + // + // add "foo/bar/": + // add "bar" child to "foo" (creating "foo" if necessary) + // + // add "foo/bar.txt": + // add "bar.txt" child to "foo" (creating "foo" if necessary) + // + // add "a/b/c/d/e/f.txt": + // create "a/b/c/d/e" node + // add "f.txt" child to "e" + + name := ent.Name + pdirName := parentDir(name) + if name == pdirName { + // This entry and its parent are the same. + // Ignore this for avoiding infinite loop of the reference. + // The example case where this can occur is when tar contains the root + // directory itself (e.g. "./", "/"). + continue + } + pdir := r.getOrCreateDir(pdirName) + ent.NumLink++ // at least one name(ent.Name) references this entry. + if ent.Type == "hardlink" { + if org, ok := r.m[cleanEntryName(ent.LinkName)]; ok { + org.NumLink++ // original entry is referenced by this ent.Name. + ent = org + } else { + return fmt.Errorf("%q is a hardlink but the linkname %q isn't found", ent.Name, ent.LinkName) + } + } + pdir.addChild(path.Base(name), ent) + } + + lastOffset := r.sr.Size() + for i := len(r.toc.Entries) - 1; i >= 0; i-- { + e := r.toc.Entries[i] + if e.isDataType() { + e.nextOffset = lastOffset + } + if e.Offset != 0 { + lastOffset = e.Offset + } + } + + return nil +} + +func parentDir(p string) string { + dir, _ := path.Split(p) + return strings.TrimSuffix(dir, "/") +} + +func (r *Reader) getOrCreateDir(d string) *TOCEntry { + e, ok := r.m[d] + if !ok { + e = &TOCEntry{ + Name: d, + Type: "dir", + Mode: 0755, + NumLink: 2, // The directory itself(.) and the parent link to this directory. + } + r.m[d] = e + if d != "" { + pdir := r.getOrCreateDir(parentDir(d)) + pdir.addChild(path.Base(d), e) + } + } + return e +} + +// VerifyTOC checks that the TOC JSON in the passed blob matches the +// passed digests and that the TOC JSON contains digests for all chunks +// contained in the blob. If the verification succceeds, this function +// returns TOCEntryVerifier which holds all chunk digests in the stargz blob. +func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error) { + // Verify the digest of TOC JSON + if r.tocDigest != tocDigest { + return nil, fmt.Errorf("invalid TOC JSON %q; want %q", r.tocDigest, tocDigest) + } + + chunkDigestMap := make(map[int64]digest.Digest) // map from chunk offset to the chunk digest + regDigestMap := make(map[int64]digest.Digest) // map from chunk offset to the reg file digest + var chunkDigestMapIncomplete bool + var regDigestMapIncomplete bool + var containsChunk bool + for _, e := range r.toc.Entries { + if e.Type != "reg" && e.Type != "chunk" { + continue + } + + // offset must be unique in stargz blob + _, dOK := chunkDigestMap[e.Offset] + _, rOK := regDigestMap[e.Offset] + if dOK || rOK { + return nil, fmt.Errorf("offset %d found twice", e.Offset) + } + + if e.Type == "reg" { + if e.Size == 0 { + continue // ignores empty file + } + + // record the digest of regular file payload + if e.Digest != "" { + d, err := digest.Parse(e.Digest) + if err != nil { + return nil, errors.Wrapf(err, + "failed to parse regular file digest %q", e.Digest) + } + regDigestMap[e.Offset] = d + } else { + regDigestMapIncomplete = true + } + } else { + containsChunk = true // this layer contains "chunk" entries. + } + + // "reg" also can contain ChunkDigest (e.g. when "reg" is the first entry of + // chunked file) + if e.ChunkDigest != "" { + d, err := digest.Parse(e.ChunkDigest) + if err != nil { + return nil, errors.Wrapf(err, + "failed to parse chunk digest %q", e.ChunkDigest) + } + chunkDigestMap[e.Offset] = d + } else { + chunkDigestMapIncomplete = true + } + } + + if chunkDigestMapIncomplete { + // Though some chunk digests are not found, if this layer doesn't contain + // "chunk"s and all digest of "reg" files are recorded, we can use them instead. + if !containsChunk && !regDigestMapIncomplete { + return &verifier{digestMap: regDigestMap}, nil + } + return nil, fmt.Errorf("some ChunkDigest not found in TOC JSON") + } + + return &verifier{digestMap: chunkDigestMap}, nil +} + +// verifier is an implementation of TOCEntryVerifier which holds verifiers keyed by +// offset of the chunk. +type verifier struct { + digestMap map[int64]digest.Digest + digestMapMu sync.Mutex +} + +// Verifier returns a content verifier specified by TOCEntry. +func (v *verifier) Verifier(ce *TOCEntry) (digest.Verifier, error) { + v.digestMapMu.Lock() + defer v.digestMapMu.Unlock() + d, ok := v.digestMap[ce.Offset] + if !ok { + return nil, fmt.Errorf("verifier for offset=%d,size=%d hasn't been registered", + ce.Offset, ce.ChunkSize) + } + return d.Verifier(), nil +} + +// ChunkEntryForOffset returns the TOCEntry containing the byte of the +// named file at the given offset within the file. +// Name must be absolute path or one that is relative to root. +func (r *Reader) ChunkEntryForOffset(name string, offset int64) (e *TOCEntry, ok bool) { + name = cleanEntryName(name) + e, ok = r.Lookup(name) + if !ok || !e.isDataType() { + return nil, false + } + ents := r.chunks[name] + if len(ents) < 2 { + if offset >= e.ChunkSize { + return nil, false + } + return e, true + } + i := sort.Search(len(ents), func(i int) bool { + e := ents[i] + return e.ChunkOffset >= offset || (offset > e.ChunkOffset && offset < e.ChunkOffset+e.ChunkSize) + }) + if i == len(ents) { + return nil, false + } + return ents[i], true +} + +// Lookup returns the Table of Contents entry for the given path. +// +// To get the root directory, use the empty string. +// Path must be absolute path or one that is relative to root. +func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool) { + path = cleanEntryName(path) + if r == nil { + return + } + e, ok = r.m[path] + if ok && e.Type == "hardlink" { + e, ok = r.m[e.LinkName] + } + return +} + +// OpenFile returns the reader of the specified file payload. +// +// Name must be absolute path or one that is relative to root. +func (r *Reader) OpenFile(name string) (*io.SectionReader, error) { + name = cleanEntryName(name) + ent, ok := r.Lookup(name) + if !ok { + // TODO: come up with some error plan. This is lazy: + return nil, &os.PathError{ + Path: name, + Op: "OpenFile", + Err: os.ErrNotExist, + } + } + if ent.Type != "reg" { + return nil, &os.PathError{ + Path: name, + Op: "OpenFile", + Err: errors.New("not a regular file"), + } + } + fr := &fileReader{ + r: r, + size: ent.Size, + ents: r.getChunks(ent), + } + return io.NewSectionReader(fr, 0, fr.size), nil +} + +func (r *Reader) getChunks(ent *TOCEntry) []*TOCEntry { + if ents, ok := r.chunks[ent.Name]; ok { + return ents + } + return []*TOCEntry{ent} +} + +type fileReader struct { + r *Reader + size int64 + ents []*TOCEntry // 1 or more reg/chunk entries +} + +func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) { + if off >= fr.size { + return 0, io.EOF + } + if off < 0 { + return 0, errors.New("invalid offset") + } + var i int + if len(fr.ents) > 1 { + i = sort.Search(len(fr.ents), func(i int) bool { + return fr.ents[i].ChunkOffset >= off + }) + if i == len(fr.ents) { + i = len(fr.ents) - 1 + } + } + ent := fr.ents[i] + if ent.ChunkOffset > off { + if i == 0 { + return 0, errors.New("internal error; first chunk offset is non-zero") + } + ent = fr.ents[i-1] + } + + // If ent is a chunk of a large file, adjust the ReadAt + // offset by the chunk's offset. + off -= ent.ChunkOffset + + finalEnt := fr.ents[len(fr.ents)-1] + compressedOff := ent.Offset + // compressedBytesRemain is the number of compressed bytes in this + // file remaining, over 1+ chunks. + compressedBytesRemain := finalEnt.NextOffset() - compressedOff + + sr := io.NewSectionReader(fr.r.sr, compressedOff, compressedBytesRemain) + + const maxRead = 2 << 20 + var bufSize = maxRead + if compressedBytesRemain < maxRead { + bufSize = int(compressedBytesRemain) + } + + br := bufio.NewReaderSize(sr, bufSize) + if _, err := br.Peek(bufSize); err != nil { + return 0, fmt.Errorf("fileReader.ReadAt.peek: %v", err) + } + + dr, err := fr.r.decompressor.Reader(br) + if err != nil { + return 0, fmt.Errorf("fileReader.ReadAt.decompressor.Reader: %v", err) + } + defer dr.Close() + if n, err := io.CopyN(ioutil.Discard, dr, off); n != off || err != nil { + return 0, fmt.Errorf("discard of %d bytes = %v, %v", off, n, err) + } + return io.ReadFull(dr, p) +} + +// A Writer writes stargz files. +// +// Use NewWriter to create a new Writer. +type Writer struct { + bw *bufio.Writer + cw *countWriter + toc *JTOC + diffHash hash.Hash // SHA-256 of uncompressed tar + + closed bool + gz io.WriteCloser + lastUsername map[int]string + lastGroupname map[int]string + compressor Compressor + + // ChunkSize optionally controls the maximum number of bytes + // of data of a regular file that can be written in one gzip + // stream before a new gzip stream is started. + // Zero means to use a default, currently 4 MiB. + ChunkSize int +} + +// currentCompressionWriter writes to the current w.gz field, which can +// change throughout writing a tar entry. +// +// Additionally, it updates w's SHA-256 of the uncompressed bytes +// of the tar file. +type currentCompressionWriter struct{ w *Writer } + +func (ccw currentCompressionWriter) Write(p []byte) (int, error) { + ccw.w.diffHash.Write(p) + return ccw.w.gz.Write(p) +} + +func (w *Writer) chunkSize() int { + if w.ChunkSize <= 0 { + return 4 << 20 + } + return w.ChunkSize +} + +// NewWriter returns a new stargz writer (gzip-based) writing to w. +// +// The writer must be closed to write its trailing table of contents. +func NewWriter(w io.Writer) *Writer { + return NewWriterLevel(w, gzip.BestCompression) +} + +// NewWriterLevel returns a new stargz writer (gzip-based) writing to w. +// The compression level is configurable. +// +// The writer must be closed to write its trailing table of contents. +func NewWriterLevel(w io.Writer, compressionLevel int) *Writer { + return NewWriterWithCompressor(w, NewGzipCompressorWithLevel(compressionLevel)) +} + +// NewWriterLevel returns a new stargz writer writing to w. +// The compression method is configurable. +// +// The writer must be closed to write its trailing table of contents. +func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer { + bw := bufio.NewWriter(w) + cw := &countWriter{w: bw} + return &Writer{ + bw: bw, + cw: cw, + toc: &JTOC{Version: 1}, + diffHash: sha256.New(), + compressor: c, + } +} + +// Close writes the stargz's table of contents and flushes all the +// buffers, returning any error. +func (w *Writer) Close() (digest.Digest, error) { + if w.closed { + return "", nil + } + defer func() { w.closed = true }() + + if err := w.closeGz(); err != nil { + return "", err + } + + // Write the TOC index and footer. + tocDigest, err := w.compressor.WriteTOCAndFooter(w.cw, w.cw.n, w.toc, w.diffHash) + if err != nil { + return "", err + } + if err := w.bw.Flush(); err != nil { + return "", err + } + + return tocDigest, nil +} + +func (w *Writer) closeGz() error { + if w.closed { + return errors.New("write on closed Writer") + } + if w.gz != nil { + if err := w.gz.Close(); err != nil { + return err + } + w.gz = nil + } + return nil +} + +// nameIfChanged returns name, unless it was the already the value of (*mp)[id], +// in which case it returns the empty string. +func (w *Writer) nameIfChanged(mp *map[int]string, id int, name string) string { + if name == "" { + return "" + } + if *mp == nil { + *mp = make(map[int]string) + } + if (*mp)[id] == name { + return "" + } + (*mp)[id] = name + return name +} + +func (w *Writer) condOpenGz() (err error) { + if w.gz == nil { + w.gz, err = w.compressor.Writer(w.cw) + } + return +} + +// AppendTar reads the tar or tar.gz file from r and appends +// each of its contents to w. +// +// The input r can optionally be gzip compressed but the output will +// always be gzip compressed. +func (w *Writer) AppendTar(r io.Reader) error { + br := bufio.NewReader(r) + var tr *tar.Reader + if isGzip(br) { + // NewReader can't fail if isGzip returned true. + zr, _ := gzip.NewReader(br) + tr = tar.NewReader(zr) + } else { + tr = tar.NewReader(br) + } + for { + h, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("error reading from source tar: tar.Reader.Next: %v", err) + } + if h.Name == TOCTarName { + // It is possible for a layer to be "stargzified" twice during the + // distribution lifecycle. So we reserve "TOCTarName" here to avoid + // duplicated entries in the resulting layer. + continue + } + + xattrs := make(map[string][]byte) + const xattrPAXRecordsPrefix = "SCHILY.xattr." + if h.PAXRecords != nil { + for k, v := range h.PAXRecords { + if strings.HasPrefix(k, xattrPAXRecordsPrefix) { + xattrs[k[len(xattrPAXRecordsPrefix):]] = []byte(v) + } + } + } + ent := &TOCEntry{ + Name: h.Name, + Mode: h.Mode, + UID: h.Uid, + GID: h.Gid, + Uname: w.nameIfChanged(&w.lastUsername, h.Uid, h.Uname), + Gname: w.nameIfChanged(&w.lastGroupname, h.Gid, h.Gname), + ModTime3339: formatModtime(h.ModTime), + Xattrs: xattrs, + } + if err := w.condOpenGz(); err != nil { + return err + } + tw := tar.NewWriter(currentCompressionWriter{w}) + if err := tw.WriteHeader(h); err != nil { + return err + } + switch h.Typeflag { + case tar.TypeLink: + ent.Type = "hardlink" + ent.LinkName = h.Linkname + case tar.TypeSymlink: + ent.Type = "symlink" + ent.LinkName = h.Linkname + case tar.TypeDir: + ent.Type = "dir" + case tar.TypeReg: + ent.Type = "reg" + ent.Size = h.Size + case tar.TypeChar: + ent.Type = "char" + ent.DevMajor = int(h.Devmajor) + ent.DevMinor = int(h.Devminor) + case tar.TypeBlock: + ent.Type = "block" + ent.DevMajor = int(h.Devmajor) + ent.DevMinor = int(h.Devminor) + case tar.TypeFifo: + ent.Type = "fifo" + default: + return fmt.Errorf("unsupported input tar entry %q", h.Typeflag) + } + + // We need to keep a reference to the TOC entry for regular files, so that we + // can fill the digest later. + var regFileEntry *TOCEntry + var payloadDigest digest.Digester + if h.Typeflag == tar.TypeReg { + regFileEntry = ent + payloadDigest = digest.Canonical.Digester() + } + + if h.Typeflag == tar.TypeReg && ent.Size > 0 { + var written int64 + totalSize := ent.Size // save it before we destroy ent + tee := io.TeeReader(tr, payloadDigest.Hash()) + for written < totalSize { + if err := w.closeGz(); err != nil { + return err + } + + chunkSize := int64(w.chunkSize()) + remain := totalSize - written + if remain < chunkSize { + chunkSize = remain + } else { + ent.ChunkSize = chunkSize + } + ent.Offset = w.cw.n + ent.ChunkOffset = written + chunkDigest := digest.Canonical.Digester() + + if err := w.condOpenGz(); err != nil { + return err + } + + teeChunk := io.TeeReader(tee, chunkDigest.Hash()) + if _, err := io.CopyN(tw, teeChunk, chunkSize); err != nil { + return fmt.Errorf("error copying %q: %v", h.Name, err) + } + ent.ChunkDigest = chunkDigest.Digest().String() + w.toc.Entries = append(w.toc.Entries, ent) + written += chunkSize + ent = &TOCEntry{ + Name: h.Name, + Type: "chunk", + } + } + } else { + w.toc.Entries = append(w.toc.Entries, ent) + } + if payloadDigest != nil { + regFileEntry.Digest = payloadDigest.Digest().String() + } + if err := tw.Flush(); err != nil { + return err + } + } + return nil +} + +// DiffID returns the SHA-256 of the uncompressed tar bytes. +// It is only valid to call DiffID after Close. +func (w *Writer) DiffID() string { + return fmt.Sprintf("sha256:%x", w.diffHash.Sum(nil)) +} + +func maxFooterSize(blobSize int64, decompressors ...Decompressor) (res int64) { + for _, d := range decompressors { + if s := d.FooterSize(); res < s && s <= blobSize { + res = s + } + } + return +} + +func parseTOC(d Decompressor, sr *io.SectionReader, tocOff, tocSize int64, tocBytes []byte, opts openOpts) (*Reader, error) { + if len(tocBytes) > 0 { + start := time.Now() + toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes)) + if err == nil { + if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil { + opts.telemetry.DeserializeTocLatency(start) + } + return &Reader{ + sr: sr, + toc: toc, + tocDigest: tocDgst, + decompressor: d, + }, nil + } + } + + start := time.Now() + tocBytes = make([]byte, tocSize) + if _, err := sr.ReadAt(tocBytes, tocOff); err != nil { + return nil, fmt.Errorf("error reading %d byte TOC targz: %v", len(tocBytes), err) + } + if opts.telemetry != nil && opts.telemetry.GetTocLatency != nil { + opts.telemetry.GetTocLatency(start) + } + start = time.Now() + toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes)) + if err != nil { + return nil, err + } + if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil { + opts.telemetry.DeserializeTocLatency(start) + } + return &Reader{ + sr: sr, + toc: toc, + tocDigest: tocDgst, + decompressor: d, + }, nil +} + +func formatModtime(t time.Time) string { + if t.IsZero() || t.Unix() == 0 { + return "" + } + return t.UTC().Round(time.Second).Format(time.RFC3339) +} + +func cleanEntryName(name string) string { + // Use path.Clean to consistently deal with path separators across platforms. + return strings.TrimPrefix(path.Clean("/"+name), "/") +} + +// countWriter counts how many bytes have been written to its wrapped +// io.Writer. +type countWriter struct { + w io.Writer + n int64 +} + +func (cw *countWriter) Write(p []byte) (n int, err error) { + n, err = cw.w.Write(p) + cw.n += int64(n) + return +} + +// isGzip reports whether br is positioned right before an upcoming gzip stream. +// It does not consume any bytes from br. +func isGzip(br *bufio.Reader) bool { + const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + ) + peek, _ := br.Peek(3) + return len(peek) >= 3 && peek[0] == gzipID1 && peek[1] == gzipID2 && peek[2] == gzipDeflate +} + +func positive(n int64) int64 { + if n < 0 { + return 0 + } + return n +} |