From b0cbcd1d0943a0b34d54c58c475c8239a32e0a59 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Mon, 13 Sep 2021 18:48:48 +0200 Subject: bump c/common to v0.44.0 Signed-off-by: Paul Holzinger --- .../containerd/stargz-snapshotter/estargz/LICENSE | 202 ++ .../containerd/stargz-snapshotter/estargz/build.go | 628 +++++++ .../stargz-snapshotter/estargz/errorutil/errors.go | 40 + .../stargz-snapshotter/estargz/estargz.go | 933 ++++++++++ .../containerd/stargz-snapshotter/estargz/go.mod | 10 + .../containerd/stargz-snapshotter/estargz/go.sum | 8 + .../containerd/stargz-snapshotter/estargz/gzip.go | 216 +++ .../stargz-snapshotter/estargz/testutil.go | 1933 ++++++++++++++++++++ .../containerd/stargz-snapshotter/estargz/types.go | 314 ++++ 9 files changed, 4284 insertions(+) create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/LICENSE create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/build.go create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/errorutil/errors.go create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go create mode 100644 vendor/github.com/containerd/stargz-snapshotter/estargz/types.go (limited to 'vendor/github.com/containerd/stargz-snapshotter/estargz') diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/LICENSE b/vendor/github.com/containerd/stargz-snapshotter/estargz/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go new file mode 100644 index 000000000..708b26689 --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/build.go @@ -0,0 +1,628 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "runtime" + "strings" + "sync" + + "github.com/containerd/stargz-snapshotter/estargz/errorutil" + "github.com/klauspost/compress/zstd" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" +) + +type options struct { + chunkSize int + compressionLevel int + prioritizedFiles []string + missedPrioritizedFiles *[]string + compression Compression +} + +type Option func(o *options) error + +// WithChunkSize option specifies the chunk size of eStargz blob to build. +func WithChunkSize(chunkSize int) Option { + return func(o *options) error { + o.chunkSize = chunkSize + return nil + } +} + +// WithCompressionLevel option specifies the gzip compression level. +// The default is gzip.BestCompression. +// See also: https://godoc.org/compress/gzip#pkg-constants +func WithCompressionLevel(level int) Option { + return func(o *options) error { + o.compressionLevel = level + return nil + } +} + +// WithPrioritizedFiles option specifies the list of prioritized files. +// These files must be complete paths that are absolute or relative to "/" +// For example, all of "foo/bar", "/foo/bar", "./foo/bar" and "../foo/bar" +// are treated as "/foo/bar". +func WithPrioritizedFiles(files []string) Option { + return func(o *options) error { + o.prioritizedFiles = files + return nil + } +} + +// WithAllowPrioritizeNotFound makes Build continue the execution even if some +// of prioritized files specified by WithPrioritizedFiles option aren't found +// in the input tar. Instead, this records all missed file names to the passed +// slice. +func WithAllowPrioritizeNotFound(missedFiles *[]string) Option { + return func(o *options) error { + if missedFiles == nil { + return fmt.Errorf("WithAllowPrioritizeNotFound: slice must be passed") + } + o.missedPrioritizedFiles = missedFiles + return nil + } +} + +// WithCompression specifies compression algorithm to be used. +// Default is gzip. +func WithCompression(compression Compression) Option { + return func(o *options) error { + o.compression = compression + return nil + } +} + +// Blob is an eStargz blob. +type Blob struct { + io.ReadCloser + diffID digest.Digester + tocDigest digest.Digest +} + +// DiffID returns the digest of uncompressed blob. +// It is only valid to call DiffID after Close. +func (b *Blob) DiffID() digest.Digest { + return b.diffID.Digest() +} + +// TOCDigest returns the digest of uncompressed TOC JSON. +func (b *Blob) TOCDigest() digest.Digest { + return b.tocDigest +} + +// Build builds an eStargz blob which is an extended version of stargz, from a blob (gzip, zstd +// or plain tar) passed through the argument. If there are some prioritized files are listed in +// the option, these files are grouped as "prioritized" and can be used for runtime optimization +// (e.g. prefetch). This function builds a blob in parallel, with dividing that blob into several +// (at least the number of runtime.GOMAXPROCS(0)) sub-blobs. +func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) { + var opts options + opts.compressionLevel = gzip.BestCompression // BestCompression by default + for _, o := range opt { + if err := o(&opts); err != nil { + return nil, err + } + } + if opts.compression == nil { + opts.compression = newGzipCompressionWithLevel(opts.compressionLevel) + } + layerFiles := newTempFiles() + defer func() { + if rErr != nil { + if err := layerFiles.CleanupAll(); err != nil { + rErr = errors.Wrapf(rErr, "failed to cleanup tmp files: %v", err) + } + } + }() + tarBlob, err := decompressBlob(tarBlob, layerFiles) + if err != nil { + return nil, err + } + entries, err := sortEntries(tarBlob, opts.prioritizedFiles, opts.missedPrioritizedFiles) + if err != nil { + return nil, err + } + tarParts := divideEntries(entries, runtime.GOMAXPROCS(0)) + writers := make([]*Writer, len(tarParts)) + payloads := make([]*os.File, len(tarParts)) + var mu sync.Mutex + var eg errgroup.Group + for i, parts := range tarParts { + i, parts := i, parts + // builds verifiable stargz sub-blobs + eg.Go(func() error { + esgzFile, err := layerFiles.TempFile("", "esgzdata") + if err != nil { + return err + } + sw := NewWriterWithCompressor(esgzFile, opts.compression) + sw.ChunkSize = opts.chunkSize + if err := sw.AppendTar(readerFromEntries(parts...)); err != nil { + return err + } + mu.Lock() + writers[i] = sw + payloads[i] = esgzFile + mu.Unlock() + return nil + }) + } + if err := eg.Wait(); err != nil { + rErr = err + return nil, err + } + tocAndFooter, tocDgst, err := closeWithCombine(opts.compressionLevel, writers...) + if err != nil { + rErr = err + return nil, err + } + var rs []io.Reader + for _, p := range payloads { + fs, err := fileSectionReader(p) + if err != nil { + return nil, err + } + rs = append(rs, fs) + } + diffID := digest.Canonical.Digester() + pr, pw := io.Pipe() + go func() { + r, err := opts.compression.Reader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw)) + if err != nil { + pw.CloseWithError(err) + return + } + defer r.Close() + if _, err := io.Copy(diffID.Hash(), r); err != nil { + pw.CloseWithError(err) + return + } + pw.Close() + }() + return &Blob{ + ReadCloser: readCloser{ + Reader: pr, + closeFunc: layerFiles.CleanupAll, + }, + tocDigest: tocDgst, + diffID: diffID, + }, nil +} + +// closeWithCombine takes unclosed Writers and close them. This also returns the +// toc that combined all Writers into. +// Writers doesn't write TOC and footer to the underlying writers so they can be +// combined into a single eStargz and tocAndFooter returned by this function can +// be appended at the tail of that combined blob. +func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Digest, err error) { + if len(ws) == 0 { + return nil, "", fmt.Errorf("at least one writer must be passed") + } + for _, w := range ws { + if w.closed { + return nil, "", fmt.Errorf("writer must be unclosed") + } + defer func(w *Writer) { w.closed = true }(w) + if err := w.closeGz(); err != nil { + return nil, "", err + } + if err := w.bw.Flush(); err != nil { + return nil, "", err + } + } + var ( + mtoc = new(JTOC) + currentOffset int64 + ) + mtoc.Version = ws[0].toc.Version + for _, w := range ws { + for _, e := range w.toc.Entries { + // Recalculate Offset of non-empty files/chunks + if (e.Type == "reg" && e.Size > 0) || e.Type == "chunk" { + e.Offset += currentOffset + } + mtoc.Entries = append(mtoc.Entries, e) + } + if w.toc.Version > mtoc.Version { + mtoc.Version = w.toc.Version + } + currentOffset += w.cw.n + } + + return tocAndFooter(ws[0].compressor, mtoc, currentOffset) +} + +func tocAndFooter(compressor Compressor, toc *JTOC, offset int64) (io.Reader, digest.Digest, error) { + buf := new(bytes.Buffer) + tocDigest, err := compressor.WriteTOCAndFooter(buf, offset, toc, nil) + if err != nil { + return nil, "", err + } + return buf, tocDigest, nil +} + +// divideEntries divides passed entries to the parts at least the number specified by the +// argument. +func divideEntries(entries []*entry, minPartsNum int) (set [][]*entry) { + var estimatedSize int64 + for _, e := range entries { + estimatedSize += e.header.Size + } + unitSize := estimatedSize / int64(minPartsNum) + var ( + nextEnd = unitSize + offset int64 + ) + set = append(set, []*entry{}) + for _, e := range entries { + set[len(set)-1] = append(set[len(set)-1], e) + offset += e.header.Size + if offset > nextEnd { + set = append(set, []*entry{}) + nextEnd += unitSize + } + } + return +} + +var errNotFound = errors.New("not found") + +// sortEntries reads the specified tar blob and returns a list of tar entries. +// If some of prioritized files are specified, the list starts from these +// files with keeping the order specified by the argument. +func sortEntries(in io.ReaderAt, prioritized []string, missedPrioritized *[]string) ([]*entry, error) { + + // Import tar file. + intar, err := importTar(in) + if err != nil { + return nil, errors.Wrap(err, "failed to sort") + } + + // Sort the tar file respecting to the prioritized files list. + sorted := &tarFile{} + for _, l := range prioritized { + if err := moveRec(l, intar, sorted); err != nil { + if errors.Is(err, errNotFound) && missedPrioritized != nil { + *missedPrioritized = append(*missedPrioritized, l) + continue // allow not found + } + return nil, errors.Wrap(err, "failed to sort tar entries") + } + } + if len(prioritized) == 0 { + sorted.add(&entry{ + header: &tar.Header{ + Name: NoPrefetchLandmark, + Typeflag: tar.TypeReg, + Size: int64(len([]byte{landmarkContents})), + }, + payload: bytes.NewReader([]byte{landmarkContents}), + }) + } else { + sorted.add(&entry{ + header: &tar.Header{ + Name: PrefetchLandmark, + Typeflag: tar.TypeReg, + Size: int64(len([]byte{landmarkContents})), + }, + payload: bytes.NewReader([]byte{landmarkContents}), + }) + } + + // Dump all entry and concatinate them. + return append(sorted.dump(), intar.dump()...), nil +} + +// readerFromEntries returns a reader of tar archive that contains entries passed +// through the arguments. +func readerFromEntries(entries ...*entry) io.Reader { + pr, pw := io.Pipe() + go func() { + tw := tar.NewWriter(pw) + defer tw.Close() + for _, entry := range entries { + if err := tw.WriteHeader(entry.header); err != nil { + pw.CloseWithError(fmt.Errorf("Failed to write tar header: %v", err)) + return + } + if _, err := io.Copy(tw, entry.payload); err != nil { + pw.CloseWithError(fmt.Errorf("Failed to write tar payload: %v", err)) + return + } + } + pw.Close() + }() + return pr +} + +func importTar(in io.ReaderAt) (*tarFile, error) { + tf := &tarFile{} + pw, err := newCountReader(in) + if err != nil { + return nil, errors.Wrap(err, "failed to make position watcher") + } + tr := tar.NewReader(pw) + + // Walk through all nodes. + for { + // Fetch and parse next header. + h, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } else { + return nil, errors.Wrap(err, "failed to parse tar file") + } + } + switch cleanEntryName(h.Name) { + case PrefetchLandmark, NoPrefetchLandmark: + // Ignore existing landmark + continue + } + + // Add entry. If it already exists, replace it. + if _, ok := tf.get(h.Name); ok { + tf.remove(h.Name) + } + tf.add(&entry{ + header: h, + payload: io.NewSectionReader(in, pw.currentPos(), h.Size), + }) + } + + return tf, nil +} + +func moveRec(name string, in *tarFile, out *tarFile) error { + name = cleanEntryName(name) + if name == "" { // root directory. stop recursion. + if e, ok := in.get(name); ok { + // entry of the root directory exists. we should move it as well. + // this case will occur if tar entries are prefixed with "./", "/", etc. + out.add(e) + in.remove(name) + } + return nil + } + + _, okIn := in.get(name) + _, okOut := out.get(name) + if !okIn && !okOut { + return errors.Wrapf(errNotFound, "file: %q", name) + } + + parent, _ := path.Split(strings.TrimSuffix(name, "/")) + if err := moveRec(parent, in, out); err != nil { + return err + } + if e, ok := in.get(name); ok && e.header.Typeflag == tar.TypeLink { + if err := moveRec(e.header.Linkname, in, out); err != nil { + return err + } + } + if e, ok := in.get(name); ok { + out.add(e) + in.remove(name) + } + return nil +} + +type entry struct { + header *tar.Header + payload io.ReadSeeker +} + +type tarFile struct { + index map[string]*entry + stream []*entry +} + +func (f *tarFile) add(e *entry) { + if f.index == nil { + f.index = make(map[string]*entry) + } + f.index[cleanEntryName(e.header.Name)] = e + f.stream = append(f.stream, e) +} + +func (f *tarFile) remove(name string) { + name = cleanEntryName(name) + if f.index != nil { + delete(f.index, name) + } + var filtered []*entry + for _, e := range f.stream { + if cleanEntryName(e.header.Name) == name { + continue + } + filtered = append(filtered, e) + } + f.stream = filtered +} + +func (f *tarFile) get(name string) (e *entry, ok bool) { + if f.index == nil { + return nil, false + } + e, ok = f.index[cleanEntryName(name)] + return +} + +func (f *tarFile) dump() []*entry { + return f.stream +} + +type readCloser struct { + io.Reader + closeFunc func() error +} + +func (rc readCloser) Close() error { + return rc.closeFunc() +} + +func fileSectionReader(file *os.File) (*io.SectionReader, error) { + info, err := file.Stat() + if err != nil { + return nil, err + } + return io.NewSectionReader(file, 0, info.Size()), nil +} + +func newTempFiles() *tempFiles { + return &tempFiles{} +} + +type tempFiles struct { + files []*os.File + filesMu sync.Mutex +} + +func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) { + f, err := ioutil.TempFile(dir, pattern) + if err != nil { + return nil, err + } + tf.filesMu.Lock() + tf.files = append(tf.files, f) + tf.filesMu.Unlock() + return f, nil +} + +func (tf *tempFiles) CleanupAll() error { + tf.filesMu.Lock() + defer tf.filesMu.Unlock() + var allErr []error + for _, f := range tf.files { + if err := f.Close(); err != nil { + allErr = append(allErr, err) + } + if err := os.Remove(f.Name()); err != nil { + allErr = append(allErr, err) + } + } + tf.files = nil + return errorutil.Aggregate(allErr) +} + +func newCountReader(r io.ReaderAt) (*countReader, error) { + pos := int64(0) + return &countReader{r: r, cPos: &pos}, nil +} + +type countReader struct { + r io.ReaderAt + cPos *int64 + + mu sync.Mutex +} + +func (cr *countReader) Read(p []byte) (int, error) { + cr.mu.Lock() + defer cr.mu.Unlock() + + n, err := cr.r.ReadAt(p, *cr.cPos) + if err == nil { + *cr.cPos += int64(n) + } + return n, err +} + +func (cr *countReader) Seek(offset int64, whence int) (int64, error) { + cr.mu.Lock() + defer cr.mu.Unlock() + + switch whence { + default: + return 0, fmt.Errorf("Unknown whence: %v", whence) + case io.SeekStart: + case io.SeekCurrent: + offset += *cr.cPos + case io.SeekEnd: + return 0, fmt.Errorf("Unsupported whence: %v", whence) + } + + if offset < 0 { + return 0, fmt.Errorf("invalid offset") + } + *cr.cPos = offset + return offset, nil +} + +func (cr *countReader) currentPos() int64 { + cr.mu.Lock() + defer cr.mu.Unlock() + + return *cr.cPos +} + +func decompressBlob(org *io.SectionReader, tmp *tempFiles) (*io.SectionReader, error) { + if org.Size() < 4 { + return org, nil + } + src := make([]byte, 4) + if _, err := org.Read(src); err != nil && err != io.EOF { + return nil, err + } + var dR io.Reader + if bytes.Equal([]byte{0x1F, 0x8B, 0x08}, src[:3]) { + // gzip + dgR, err := gzip.NewReader(io.NewSectionReader(org, 0, org.Size())) + if err != nil { + return nil, err + } + defer dgR.Close() + dR = io.Reader(dgR) + } else if bytes.Equal([]byte{0x28, 0xb5, 0x2f, 0xfd}, src[:4]) { + // zstd + dzR, err := zstd.NewReader(io.NewSectionReader(org, 0, org.Size())) + if err != nil { + return nil, err + } + defer dzR.Close() + dR = io.Reader(dzR) + } else { + // uncompressed + return io.NewSectionReader(org, 0, org.Size()), nil + } + b, err := tmp.TempFile("", "uncompresseddata") + if err != nil { + return nil, err + } + if _, err := io.Copy(b, dR); err != nil { + return nil, err + } + return fileSectionReader(b) +} diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/errorutil/errors.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/errorutil/errors.go new file mode 100644 index 000000000..6de78b02d --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/errorutil/errors.go @@ -0,0 +1,40 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package errorutil + +import ( + "errors" + "fmt" + "strings" +) + +// Aggregate combines a list of errors into a single new error. +func Aggregate(errs []error) error { + switch len(errs) { + case 0: + return nil + case 1: + return errs[0] + default: + points := make([]string, len(errs)+1) + points[0] = fmt.Sprintf("%d error(s) occurred:", len(errs)) + for i, err := range errs { + points[i+1] = fmt.Sprintf("* %s", err) + } + return errors.New(strings.Join(points, "\n\t")) + } +} diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go new file mode 100644 index 000000000..e997d9cce --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/estargz.go @@ -0,0 +1,933 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "crypto/sha256" + "fmt" + "hash" + "io" + "io/ioutil" + "os" + "path" + "sort" + "strings" + "sync" + "time" + + "github.com/containerd/stargz-snapshotter/estargz/errorutil" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +// A Reader permits random access reads from a stargz file. +type Reader struct { + sr *io.SectionReader + toc *JTOC + tocDigest digest.Digest + + // m stores all non-chunk entries, keyed by name. + m map[string]*TOCEntry + + // chunks stores all TOCEntry values for regular files that + // are split up. For a file with a single chunk, it's only + // stored in m. + chunks map[string][]*TOCEntry + + decompressor Decompressor +} + +type openOpts struct { + tocOffset int64 + decompressors []Decompressor + telemetry *Telemetry +} + +// OpenOption is an option used during opening the layer +type OpenOption func(o *openOpts) error + +// WithTOCOffset option specifies the offset of TOC +func WithTOCOffset(tocOffset int64) OpenOption { + return func(o *openOpts) error { + o.tocOffset = tocOffset + return nil + } +} + +// WithDecompressors option specifies decompressors to use. +// Default is gzip-based decompressor. +func WithDecompressors(decompressors ...Decompressor) OpenOption { + return func(o *openOpts) error { + o.decompressors = decompressors + return nil + } +} + +// WithTelemetry option specifies the telemetry hooks +func WithTelemetry(telemetry *Telemetry) OpenOption { + return func(o *openOpts) error { + o.telemetry = telemetry + return nil + } +} + +// A func which takes start time and records the diff +type MeasureLatencyHook func(time.Time) + +// A struct which defines telemetry hooks. By implementing these hooks you should be able to record +// the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...) +type Telemetry struct { + GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds) + GetTocLatency MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds) + DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds) +} + +// Open opens a stargz file for reading. +// The behaviour is configurable using options. +// +// Note that each entry name is normalized as the path that is relative to root. +func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) { + var opts openOpts + for _, o := range opt { + if err := o(&opts); err != nil { + return nil, err + } + } + + gzipCompressors := []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} + decompressors := append(gzipCompressors, opts.decompressors...) + + // Determine the size to fetch. Try to fetch as many bytes as possible. + fetchSize := maxFooterSize(sr.Size(), decompressors...) + if maybeTocOffset := opts.tocOffset; maybeTocOffset > fetchSize { + if maybeTocOffset > sr.Size() { + return nil, fmt.Errorf("blob size %d is smaller than the toc offset", sr.Size()) + } + fetchSize = sr.Size() - maybeTocOffset + } + + start := time.Now() // before getting layer footer + footer := make([]byte, fetchSize) + if _, err := sr.ReadAt(footer, sr.Size()-fetchSize); err != nil { + return nil, fmt.Errorf("error reading footer: %v", err) + } + if opts.telemetry != nil && opts.telemetry.GetFooterLatency != nil { + opts.telemetry.GetFooterLatency(start) + } + + var allErr []error + var found bool + var r *Reader + for _, d := range decompressors { + fSize := d.FooterSize() + fOffset := positive(int64(len(footer)) - fSize) + maybeTocBytes := footer[:fOffset] + tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:]) + if err != nil { + allErr = append(allErr, err) + continue + } + if tocSize <= 0 { + tocSize = sr.Size() - tocOffset - fSize + } + if tocSize < int64(len(maybeTocBytes)) { + maybeTocBytes = maybeTocBytes[:tocSize] + } + r, err = parseTOC(d, sr, tocOffset, tocSize, maybeTocBytes, opts) + if err == nil { + found = true + break + } + allErr = append(allErr, err) + } + if !found { + return nil, errorutil.Aggregate(allErr) + } + if err := r.initFields(); err != nil { + return nil, fmt.Errorf("failed to initialize fields of entries: %v", err) + } + return r, nil +} + +// OpenFooter extracts and parses footer from the given blob. +// only supports gzip-based eStargz. +func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error) { + if sr.Size() < FooterSize && sr.Size() < legacyFooterSize { + return 0, 0, fmt.Errorf("blob size %d is smaller than the footer size", sr.Size()) + } + var footer [FooterSize]byte + if _, err := sr.ReadAt(footer[:], sr.Size()-FooterSize); err != nil { + return 0, 0, fmt.Errorf("error reading footer: %v", err) + } + var allErr []error + for _, d := range []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} { + fSize := d.FooterSize() + fOffset := positive(int64(len(footer)) - fSize) + tocOffset, _, err := d.ParseFooter(footer[fOffset:]) + if err == nil { + return tocOffset, fSize, err + } + allErr = append(allErr, err) + } + return 0, 0, errorutil.Aggregate(allErr) +} + +// initFields populates the Reader from r.toc after decoding it from +// JSON. +// +// Unexported fields are populated and TOCEntry fields that were +// implicit in the JSON are populated. +func (r *Reader) initFields() error { + r.m = make(map[string]*TOCEntry, len(r.toc.Entries)) + r.chunks = make(map[string][]*TOCEntry) + var lastPath string + uname := map[int]string{} + gname := map[int]string{} + var lastRegEnt *TOCEntry + for _, ent := range r.toc.Entries { + ent.Name = cleanEntryName(ent.Name) + if ent.Type == "reg" { + lastRegEnt = ent + } + if ent.Type == "chunk" { + ent.Name = lastPath + r.chunks[ent.Name] = append(r.chunks[ent.Name], ent) + if ent.ChunkSize == 0 && lastRegEnt != nil { + ent.ChunkSize = lastRegEnt.Size - ent.ChunkOffset + } + } else { + lastPath = ent.Name + + if ent.Uname != "" { + uname[ent.UID] = ent.Uname + } else { + ent.Uname = uname[ent.UID] + } + if ent.Gname != "" { + gname[ent.GID] = ent.Gname + } else { + ent.Gname = uname[ent.GID] + } + + ent.modTime, _ = time.Parse(time.RFC3339, ent.ModTime3339) + + if ent.Type == "dir" { + ent.NumLink++ // Parent dir links to this directory + } + r.m[ent.Name] = ent + } + if ent.Type == "reg" && ent.ChunkSize > 0 && ent.ChunkSize < ent.Size { + r.chunks[ent.Name] = make([]*TOCEntry, 0, ent.Size/ent.ChunkSize+1) + r.chunks[ent.Name] = append(r.chunks[ent.Name], ent) + } + if ent.ChunkSize == 0 && ent.Size != 0 { + ent.ChunkSize = ent.Size + } + } + + // Populate children, add implicit directories: + for _, ent := range r.toc.Entries { + if ent.Type == "chunk" { + continue + } + // add "foo/": + // add "foo" child to "" (creating "" if necessary) + // + // add "foo/bar/": + // add "bar" child to "foo" (creating "foo" if necessary) + // + // add "foo/bar.txt": + // add "bar.txt" child to "foo" (creating "foo" if necessary) + // + // add "a/b/c/d/e/f.txt": + // create "a/b/c/d/e" node + // add "f.txt" child to "e" + + name := ent.Name + pdirName := parentDir(name) + if name == pdirName { + // This entry and its parent are the same. + // Ignore this for avoiding infinite loop of the reference. + // The example case where this can occur is when tar contains the root + // directory itself (e.g. "./", "/"). + continue + } + pdir := r.getOrCreateDir(pdirName) + ent.NumLink++ // at least one name(ent.Name) references this entry. + if ent.Type == "hardlink" { + if org, ok := r.m[cleanEntryName(ent.LinkName)]; ok { + org.NumLink++ // original entry is referenced by this ent.Name. + ent = org + } else { + return fmt.Errorf("%q is a hardlink but the linkname %q isn't found", ent.Name, ent.LinkName) + } + } + pdir.addChild(path.Base(name), ent) + } + + lastOffset := r.sr.Size() + for i := len(r.toc.Entries) - 1; i >= 0; i-- { + e := r.toc.Entries[i] + if e.isDataType() { + e.nextOffset = lastOffset + } + if e.Offset != 0 { + lastOffset = e.Offset + } + } + + return nil +} + +func parentDir(p string) string { + dir, _ := path.Split(p) + return strings.TrimSuffix(dir, "/") +} + +func (r *Reader) getOrCreateDir(d string) *TOCEntry { + e, ok := r.m[d] + if !ok { + e = &TOCEntry{ + Name: d, + Type: "dir", + Mode: 0755, + NumLink: 2, // The directory itself(.) and the parent link to this directory. + } + r.m[d] = e + if d != "" { + pdir := r.getOrCreateDir(parentDir(d)) + pdir.addChild(path.Base(d), e) + } + } + return e +} + +// VerifyTOC checks that the TOC JSON in the passed blob matches the +// passed digests and that the TOC JSON contains digests for all chunks +// contained in the blob. If the verification succceeds, this function +// returns TOCEntryVerifier which holds all chunk digests in the stargz blob. +func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error) { + // Verify the digest of TOC JSON + if r.tocDigest != tocDigest { + return nil, fmt.Errorf("invalid TOC JSON %q; want %q", r.tocDigest, tocDigest) + } + + chunkDigestMap := make(map[int64]digest.Digest) // map from chunk offset to the chunk digest + regDigestMap := make(map[int64]digest.Digest) // map from chunk offset to the reg file digest + var chunkDigestMapIncomplete bool + var regDigestMapIncomplete bool + var containsChunk bool + for _, e := range r.toc.Entries { + if e.Type != "reg" && e.Type != "chunk" { + continue + } + + // offset must be unique in stargz blob + _, dOK := chunkDigestMap[e.Offset] + _, rOK := regDigestMap[e.Offset] + if dOK || rOK { + return nil, fmt.Errorf("offset %d found twice", e.Offset) + } + + if e.Type == "reg" { + if e.Size == 0 { + continue // ignores empty file + } + + // record the digest of regular file payload + if e.Digest != "" { + d, err := digest.Parse(e.Digest) + if err != nil { + return nil, errors.Wrapf(err, + "failed to parse regular file digest %q", e.Digest) + } + regDigestMap[e.Offset] = d + } else { + regDigestMapIncomplete = true + } + } else { + containsChunk = true // this layer contains "chunk" entries. + } + + // "reg" also can contain ChunkDigest (e.g. when "reg" is the first entry of + // chunked file) + if e.ChunkDigest != "" { + d, err := digest.Parse(e.ChunkDigest) + if err != nil { + return nil, errors.Wrapf(err, + "failed to parse chunk digest %q", e.ChunkDigest) + } + chunkDigestMap[e.Offset] = d + } else { + chunkDigestMapIncomplete = true + } + } + + if chunkDigestMapIncomplete { + // Though some chunk digests are not found, if this layer doesn't contain + // "chunk"s and all digest of "reg" files are recorded, we can use them instead. + if !containsChunk && !regDigestMapIncomplete { + return &verifier{digestMap: regDigestMap}, nil + } + return nil, fmt.Errorf("some ChunkDigest not found in TOC JSON") + } + + return &verifier{digestMap: chunkDigestMap}, nil +} + +// verifier is an implementation of TOCEntryVerifier which holds verifiers keyed by +// offset of the chunk. +type verifier struct { + digestMap map[int64]digest.Digest + digestMapMu sync.Mutex +} + +// Verifier returns a content verifier specified by TOCEntry. +func (v *verifier) Verifier(ce *TOCEntry) (digest.Verifier, error) { + v.digestMapMu.Lock() + defer v.digestMapMu.Unlock() + d, ok := v.digestMap[ce.Offset] + if !ok { + return nil, fmt.Errorf("verifier for offset=%d,size=%d hasn't been registered", + ce.Offset, ce.ChunkSize) + } + return d.Verifier(), nil +} + +// ChunkEntryForOffset returns the TOCEntry containing the byte of the +// named file at the given offset within the file. +// Name must be absolute path or one that is relative to root. +func (r *Reader) ChunkEntryForOffset(name string, offset int64) (e *TOCEntry, ok bool) { + name = cleanEntryName(name) + e, ok = r.Lookup(name) + if !ok || !e.isDataType() { + return nil, false + } + ents := r.chunks[name] + if len(ents) < 2 { + if offset >= e.ChunkSize { + return nil, false + } + return e, true + } + i := sort.Search(len(ents), func(i int) bool { + e := ents[i] + return e.ChunkOffset >= offset || (offset > e.ChunkOffset && offset < e.ChunkOffset+e.ChunkSize) + }) + if i == len(ents) { + return nil, false + } + return ents[i], true +} + +// Lookup returns the Table of Contents entry for the given path. +// +// To get the root directory, use the empty string. +// Path must be absolute path or one that is relative to root. +func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool) { + path = cleanEntryName(path) + if r == nil { + return + } + e, ok = r.m[path] + if ok && e.Type == "hardlink" { + e, ok = r.m[e.LinkName] + } + return +} + +// OpenFile returns the reader of the specified file payload. +// +// Name must be absolute path or one that is relative to root. +func (r *Reader) OpenFile(name string) (*io.SectionReader, error) { + name = cleanEntryName(name) + ent, ok := r.Lookup(name) + if !ok { + // TODO: come up with some error plan. This is lazy: + return nil, &os.PathError{ + Path: name, + Op: "OpenFile", + Err: os.ErrNotExist, + } + } + if ent.Type != "reg" { + return nil, &os.PathError{ + Path: name, + Op: "OpenFile", + Err: errors.New("not a regular file"), + } + } + fr := &fileReader{ + r: r, + size: ent.Size, + ents: r.getChunks(ent), + } + return io.NewSectionReader(fr, 0, fr.size), nil +} + +func (r *Reader) getChunks(ent *TOCEntry) []*TOCEntry { + if ents, ok := r.chunks[ent.Name]; ok { + return ents + } + return []*TOCEntry{ent} +} + +type fileReader struct { + r *Reader + size int64 + ents []*TOCEntry // 1 or more reg/chunk entries +} + +func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) { + if off >= fr.size { + return 0, io.EOF + } + if off < 0 { + return 0, errors.New("invalid offset") + } + var i int + if len(fr.ents) > 1 { + i = sort.Search(len(fr.ents), func(i int) bool { + return fr.ents[i].ChunkOffset >= off + }) + if i == len(fr.ents) { + i = len(fr.ents) - 1 + } + } + ent := fr.ents[i] + if ent.ChunkOffset > off { + if i == 0 { + return 0, errors.New("internal error; first chunk offset is non-zero") + } + ent = fr.ents[i-1] + } + + // If ent is a chunk of a large file, adjust the ReadAt + // offset by the chunk's offset. + off -= ent.ChunkOffset + + finalEnt := fr.ents[len(fr.ents)-1] + compressedOff := ent.Offset + // compressedBytesRemain is the number of compressed bytes in this + // file remaining, over 1+ chunks. + compressedBytesRemain := finalEnt.NextOffset() - compressedOff + + sr := io.NewSectionReader(fr.r.sr, compressedOff, compressedBytesRemain) + + const maxRead = 2 << 20 + var bufSize = maxRead + if compressedBytesRemain < maxRead { + bufSize = int(compressedBytesRemain) + } + + br := bufio.NewReaderSize(sr, bufSize) + if _, err := br.Peek(bufSize); err != nil { + return 0, fmt.Errorf("fileReader.ReadAt.peek: %v", err) + } + + dr, err := fr.r.decompressor.Reader(br) + if err != nil { + return 0, fmt.Errorf("fileReader.ReadAt.decompressor.Reader: %v", err) + } + defer dr.Close() + if n, err := io.CopyN(ioutil.Discard, dr, off); n != off || err != nil { + return 0, fmt.Errorf("discard of %d bytes = %v, %v", off, n, err) + } + return io.ReadFull(dr, p) +} + +// A Writer writes stargz files. +// +// Use NewWriter to create a new Writer. +type Writer struct { + bw *bufio.Writer + cw *countWriter + toc *JTOC + diffHash hash.Hash // SHA-256 of uncompressed tar + + closed bool + gz io.WriteCloser + lastUsername map[int]string + lastGroupname map[int]string + compressor Compressor + + // ChunkSize optionally controls the maximum number of bytes + // of data of a regular file that can be written in one gzip + // stream before a new gzip stream is started. + // Zero means to use a default, currently 4 MiB. + ChunkSize int +} + +// currentCompressionWriter writes to the current w.gz field, which can +// change throughout writing a tar entry. +// +// Additionally, it updates w's SHA-256 of the uncompressed bytes +// of the tar file. +type currentCompressionWriter struct{ w *Writer } + +func (ccw currentCompressionWriter) Write(p []byte) (int, error) { + ccw.w.diffHash.Write(p) + return ccw.w.gz.Write(p) +} + +func (w *Writer) chunkSize() int { + if w.ChunkSize <= 0 { + return 4 << 20 + } + return w.ChunkSize +} + +// NewWriter returns a new stargz writer (gzip-based) writing to w. +// +// The writer must be closed to write its trailing table of contents. +func NewWriter(w io.Writer) *Writer { + return NewWriterLevel(w, gzip.BestCompression) +} + +// NewWriterLevel returns a new stargz writer (gzip-based) writing to w. +// The compression level is configurable. +// +// The writer must be closed to write its trailing table of contents. +func NewWriterLevel(w io.Writer, compressionLevel int) *Writer { + return NewWriterWithCompressor(w, NewGzipCompressorWithLevel(compressionLevel)) +} + +// NewWriterLevel returns a new stargz writer writing to w. +// The compression method is configurable. +// +// The writer must be closed to write its trailing table of contents. +func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer { + bw := bufio.NewWriter(w) + cw := &countWriter{w: bw} + return &Writer{ + bw: bw, + cw: cw, + toc: &JTOC{Version: 1}, + diffHash: sha256.New(), + compressor: c, + } +} + +// Close writes the stargz's table of contents and flushes all the +// buffers, returning any error. +func (w *Writer) Close() (digest.Digest, error) { + if w.closed { + return "", nil + } + defer func() { w.closed = true }() + + if err := w.closeGz(); err != nil { + return "", err + } + + // Write the TOC index and footer. + tocDigest, err := w.compressor.WriteTOCAndFooter(w.cw, w.cw.n, w.toc, w.diffHash) + if err != nil { + return "", err + } + if err := w.bw.Flush(); err != nil { + return "", err + } + + return tocDigest, nil +} + +func (w *Writer) closeGz() error { + if w.closed { + return errors.New("write on closed Writer") + } + if w.gz != nil { + if err := w.gz.Close(); err != nil { + return err + } + w.gz = nil + } + return nil +} + +// nameIfChanged returns name, unless it was the already the value of (*mp)[id], +// in which case it returns the empty string. +func (w *Writer) nameIfChanged(mp *map[int]string, id int, name string) string { + if name == "" { + return "" + } + if *mp == nil { + *mp = make(map[int]string) + } + if (*mp)[id] == name { + return "" + } + (*mp)[id] = name + return name +} + +func (w *Writer) condOpenGz() (err error) { + if w.gz == nil { + w.gz, err = w.compressor.Writer(w.cw) + } + return +} + +// AppendTar reads the tar or tar.gz file from r and appends +// each of its contents to w. +// +// The input r can optionally be gzip compressed but the output will +// always be gzip compressed. +func (w *Writer) AppendTar(r io.Reader) error { + br := bufio.NewReader(r) + var tr *tar.Reader + if isGzip(br) { + // NewReader can't fail if isGzip returned true. + zr, _ := gzip.NewReader(br) + tr = tar.NewReader(zr) + } else { + tr = tar.NewReader(br) + } + for { + h, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("error reading from source tar: tar.Reader.Next: %v", err) + } + if h.Name == TOCTarName { + // It is possible for a layer to be "stargzified" twice during the + // distribution lifecycle. So we reserve "TOCTarName" here to avoid + // duplicated entries in the resulting layer. + continue + } + + xattrs := make(map[string][]byte) + const xattrPAXRecordsPrefix = "SCHILY.xattr." + if h.PAXRecords != nil { + for k, v := range h.PAXRecords { + if strings.HasPrefix(k, xattrPAXRecordsPrefix) { + xattrs[k[len(xattrPAXRecordsPrefix):]] = []byte(v) + } + } + } + ent := &TOCEntry{ + Name: h.Name, + Mode: h.Mode, + UID: h.Uid, + GID: h.Gid, + Uname: w.nameIfChanged(&w.lastUsername, h.Uid, h.Uname), + Gname: w.nameIfChanged(&w.lastGroupname, h.Gid, h.Gname), + ModTime3339: formatModtime(h.ModTime), + Xattrs: xattrs, + } + if err := w.condOpenGz(); err != nil { + return err + } + tw := tar.NewWriter(currentCompressionWriter{w}) + if err := tw.WriteHeader(h); err != nil { + return err + } + switch h.Typeflag { + case tar.TypeLink: + ent.Type = "hardlink" + ent.LinkName = h.Linkname + case tar.TypeSymlink: + ent.Type = "symlink" + ent.LinkName = h.Linkname + case tar.TypeDir: + ent.Type = "dir" + case tar.TypeReg: + ent.Type = "reg" + ent.Size = h.Size + case tar.TypeChar: + ent.Type = "char" + ent.DevMajor = int(h.Devmajor) + ent.DevMinor = int(h.Devminor) + case tar.TypeBlock: + ent.Type = "block" + ent.DevMajor = int(h.Devmajor) + ent.DevMinor = int(h.Devminor) + case tar.TypeFifo: + ent.Type = "fifo" + default: + return fmt.Errorf("unsupported input tar entry %q", h.Typeflag) + } + + // We need to keep a reference to the TOC entry for regular files, so that we + // can fill the digest later. + var regFileEntry *TOCEntry + var payloadDigest digest.Digester + if h.Typeflag == tar.TypeReg { + regFileEntry = ent + payloadDigest = digest.Canonical.Digester() + } + + if h.Typeflag == tar.TypeReg && ent.Size > 0 { + var written int64 + totalSize := ent.Size // save it before we destroy ent + tee := io.TeeReader(tr, payloadDigest.Hash()) + for written < totalSize { + if err := w.closeGz(); err != nil { + return err + } + + chunkSize := int64(w.chunkSize()) + remain := totalSize - written + if remain < chunkSize { + chunkSize = remain + } else { + ent.ChunkSize = chunkSize + } + ent.Offset = w.cw.n + ent.ChunkOffset = written + chunkDigest := digest.Canonical.Digester() + + if err := w.condOpenGz(); err != nil { + return err + } + + teeChunk := io.TeeReader(tee, chunkDigest.Hash()) + if _, err := io.CopyN(tw, teeChunk, chunkSize); err != nil { + return fmt.Errorf("error copying %q: %v", h.Name, err) + } + ent.ChunkDigest = chunkDigest.Digest().String() + w.toc.Entries = append(w.toc.Entries, ent) + written += chunkSize + ent = &TOCEntry{ + Name: h.Name, + Type: "chunk", + } + } + } else { + w.toc.Entries = append(w.toc.Entries, ent) + } + if payloadDigest != nil { + regFileEntry.Digest = payloadDigest.Digest().String() + } + if err := tw.Flush(); err != nil { + return err + } + } + return nil +} + +// DiffID returns the SHA-256 of the uncompressed tar bytes. +// It is only valid to call DiffID after Close. +func (w *Writer) DiffID() string { + return fmt.Sprintf("sha256:%x", w.diffHash.Sum(nil)) +} + +func maxFooterSize(blobSize int64, decompressors ...Decompressor) (res int64) { + for _, d := range decompressors { + if s := d.FooterSize(); res < s && s <= blobSize { + res = s + } + } + return +} + +func parseTOC(d Decompressor, sr *io.SectionReader, tocOff, tocSize int64, tocBytes []byte, opts openOpts) (*Reader, error) { + if len(tocBytes) > 0 { + start := time.Now() + toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes)) + if err == nil { + if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil { + opts.telemetry.DeserializeTocLatency(start) + } + return &Reader{ + sr: sr, + toc: toc, + tocDigest: tocDgst, + decompressor: d, + }, nil + } + } + + start := time.Now() + tocBytes = make([]byte, tocSize) + if _, err := sr.ReadAt(tocBytes, tocOff); err != nil { + return nil, fmt.Errorf("error reading %d byte TOC targz: %v", len(tocBytes), err) + } + if opts.telemetry != nil && opts.telemetry.GetTocLatency != nil { + opts.telemetry.GetTocLatency(start) + } + start = time.Now() + toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes)) + if err != nil { + return nil, err + } + if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil { + opts.telemetry.DeserializeTocLatency(start) + } + return &Reader{ + sr: sr, + toc: toc, + tocDigest: tocDgst, + decompressor: d, + }, nil +} + +func formatModtime(t time.Time) string { + if t.IsZero() || t.Unix() == 0 { + return "" + } + return t.UTC().Round(time.Second).Format(time.RFC3339) +} + +func cleanEntryName(name string) string { + // Use path.Clean to consistently deal with path separators across platforms. + return strings.TrimPrefix(path.Clean("/"+name), "/") +} + +// countWriter counts how many bytes have been written to its wrapped +// io.Writer. +type countWriter struct { + w io.Writer + n int64 +} + +func (cw *countWriter) Write(p []byte) (n int, err error) { + n, err = cw.w.Write(p) + cw.n += int64(n) + return +} + +// isGzip reports whether br is positioned right before an upcoming gzip stream. +// It does not consume any bytes from br. +func isGzip(br *bufio.Reader) bool { + const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + ) + peek, _ := br.Peek(3) + return len(peek) >= 3 && peek[0] == gzipID1 && peek[1] == gzipID2 && peek[2] == gzipDeflate +} + +func positive(n int64) int64 { + if n < 0 { + return 0 + } + return n +} diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod new file mode 100644 index 000000000..ee6b2e17f --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.mod @@ -0,0 +1,10 @@ +module github.com/containerd/stargz-snapshotter/estargz + +go 1.16 + +require ( + github.com/klauspost/compress v1.13.5 + github.com/opencontainers/go-digest v1.0.0 + github.com/pkg/errors v0.9.1 + golang.org/x/sync v0.0.0-20201207232520-09787c993a3a +) diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum new file mode 100644 index 000000000..66cd2d69c --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/go.sum @@ -0,0 +1,8 @@ +github.com/klauspost/compress v1.13.5 h1:9O69jUPDcsT9fEm74W92rZL9FQY7rCdaXVneq+yyzl4= +github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a h1:DcqTD9SDLc+1P/r1EmRBwnVsrOwW+kk2vWf9n+1sGhs= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go new file mode 100644 index 000000000..efc435e09 --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/gzip.go @@ -0,0 +1,216 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "encoding/binary" + "encoding/json" + "fmt" + "hash" + "io" + "strconv" + + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +type gzipCompression struct { + *gzipCompressor + *GzipDecompressor +} + +func newGzipCompressionWithLevel(level int) Compression { + return &gzipCompression{ + &gzipCompressor{level}, + &GzipDecompressor{}, + } +} + +func NewGzipCompressorWithLevel(level int) Compressor { + return &gzipCompressor{level} +} + +type gzipCompressor struct { + compressionLevel int +} + +func (gc *gzipCompressor) Writer(w io.Writer) (io.WriteCloser, error) { + return gzip.NewWriterLevel(w, gc.compressionLevel) +} + +func (gc *gzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error) { + tocJSON, err := json.MarshalIndent(toc, "", "\t") + if err != nil { + return "", err + } + gz, _ := gzip.NewWriterLevel(w, gc.compressionLevel) + gw := io.Writer(gz) + if diffHash != nil { + gw = io.MultiWriter(gz, diffHash) + } + tw := tar.NewWriter(gw) + if err := tw.WriteHeader(&tar.Header{ + Typeflag: tar.TypeReg, + Name: TOCTarName, + Size: int64(len(tocJSON)), + }); err != nil { + return "", err + } + if _, err := tw.Write(tocJSON); err != nil { + return "", err + } + + if err := tw.Close(); err != nil { + return "", err + } + if err := gz.Close(); err != nil { + return "", err + } + if _, err := w.Write(gzipFooterBytes(off)); err != nil { + return "", err + } + return digest.FromBytes(tocJSON), nil +} + +// gzipFooterBytes returns the 51 bytes footer. +func gzipFooterBytes(tocOff int64) []byte { + buf := bytes.NewBuffer(make([]byte, 0, FooterSize)) + gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes + + // Extra header indicating the offset of TOCJSON + // https://tools.ietf.org/html/rfc1952#section-2.3.1.1 + header := make([]byte, 4) + header[0], header[1] = 'S', 'G' + subfield := fmt.Sprintf("%016xSTARGZ", tocOff) + binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952 + gz.Header.Extra = append(header, []byte(subfield)...) + gz.Close() + if buf.Len() != FooterSize { + panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize)) + } + return buf.Bytes() +} + +type GzipDecompressor struct{} + +func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) { + return gzip.NewReader(r) +} + +func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) { + return parseTOCEStargz(r) +} + +func (gz *GzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) { + if len(p) != FooterSize { + return 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p)) + } + zr, err := gzip.NewReader(bytes.NewReader(p)) + if err != nil { + return 0, 0, err + } + defer zr.Close() + extra := zr.Header.Extra + si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:] + if si1 != 'S' || si2 != 'G' { + return 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2) + } + if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) { + return 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ")) + } + if string(subfield[16:]) != "STARGZ" { + return 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield") + } + tocOffset, err = strconv.ParseInt(string(subfield[:16]), 16, 64) + if err != nil { + return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset") + } + return tocOffset, 0, nil +} + +func (gz *GzipDecompressor) FooterSize() int64 { + return FooterSize +} + +type legacyGzipDecompressor struct{} + +func (gz *legacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) { + return gzip.NewReader(r) +} + +func (gz *legacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) { + return parseTOCEStargz(r) +} + +func (gz *legacyGzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) { + if len(p) != legacyFooterSize { + return 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p)) + } + zr, err := gzip.NewReader(bytes.NewReader(p)) + if err != nil { + return 0, 0, errors.Wrapf(err, "legacy: failed to get footer gzip reader") + } + defer zr.Close() + extra := zr.Header.Extra + if len(extra) != 16+len("STARGZ") { + return 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size") + } + if string(extra[16:]) != "STARGZ" { + return 0, 0, fmt.Errorf("legacy: magic string STARGZ not found") + } + tocOffset, err = strconv.ParseInt(string(extra[:16]), 16, 64) + if err != nil { + return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset") + } + return tocOffset, 0, nil +} + +func (gz *legacyGzipDecompressor) FooterSize() int64 { + return legacyFooterSize +} + +func parseTOCEStargz(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) { + zr, err := gzip.NewReader(r) + if err != nil { + return nil, "", fmt.Errorf("malformed TOC gzip header: %v", err) + } + defer zr.Close() + zr.Multistream(false) + tr := tar.NewReader(zr) + h, err := tr.Next() + if err != nil { + return nil, "", fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err) + } + if h.Name != TOCTarName { + return nil, "", fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, TOCTarName) + } + dgstr := digest.Canonical.Digester() + toc = new(JTOC) + if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil { + return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err) + } + return toc, dgstr.Digest(), nil +} diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go new file mode 100644 index 000000000..020729b7e --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/testutil.go @@ -0,0 +1,1933 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "reflect" + "sort" + "strings" + "testing" + "time" + + "github.com/containerd/stargz-snapshotter/estargz/errorutil" + "github.com/klauspost/compress/zstd" + digest "github.com/opencontainers/go-digest" + "github.com/pkg/errors" +) + +// TestingController is Compression with some helper methods necessary for testing. +type TestingController interface { + Compression + CountStreams(*testing.T, []byte) int + DiffIDOf(*testing.T, []byte) string + String() string +} + +// CompressionTestSuite tests this pkg with controllers can build valid eStargz blobs and parse them. +func CompressionTestSuite(t *testing.T, controllers ...TestingController) { + t.Run("testBuild", func(t *testing.T) { t.Parallel(); testBuild(t, controllers...) }) + t.Run("testDigestAndVerify", func(t *testing.T) { t.Parallel(); testDigestAndVerify(t, controllers...) }) + t.Run("testWriteAndOpen", func(t *testing.T) { t.Parallel(); testWriteAndOpen(t, controllers...) }) +} + +const ( + uncompressedType int = iota + gzipType + zstdType +) + +var srcCompressions = []int{ + uncompressedType, + gzipType, + zstdType, +} + +var allowedPrefix = [4]string{"", "./", "/", "../"} + +// testBuild tests the resulting stargz blob built by this pkg has the same +// contents as the normal stargz blob. +func testBuild(t *testing.T, controllers ...TestingController) { + tests := []struct { + name string + chunkSize int + in []tarEntry + }{ + { + name: "regfiles and directories", + chunkSize: 4, + in: tarOf( + file("foo", "test1"), + dir("foo2/"), + file("foo2/bar", "test2", xAttr(map[string]string{"test": "sample"})), + ), + }, + { + name: "empty files", + chunkSize: 4, + in: tarOf( + file("foo", "tttttt"), + file("foo_empty", ""), + file("foo2", "tttttt"), + file("foo_empty2", ""), + file("foo3", "tttttt"), + file("foo_empty3", ""), + file("foo4", "tttttt"), + file("foo_empty4", ""), + file("foo5", "tttttt"), + file("foo_empty5", ""), + file("foo6", "tttttt"), + ), + }, + { + name: "various files", + chunkSize: 4, + in: tarOf( + file("baz.txt", "bazbazbazbazbazbazbaz"), + file("foo.txt", "a"), + symlink("barlink", "test/bar.txt"), + dir("test/"), + dir("dev/"), + blockdev("dev/testblock", 3, 4), + fifo("dev/testfifo"), + chardev("dev/testchar1", 5, 6), + file("test/bar.txt", "testbartestbar", xAttr(map[string]string{"test2": "sample2"})), + dir("test2/"), + link("test2/bazlink", "baz.txt"), + chardev("dev/testchar2", 1, 2), + ), + }, + { + name: "no contents", + chunkSize: 4, + in: tarOf( + file("baz.txt", ""), + symlink("barlink", "test/bar.txt"), + dir("test/"), + dir("dev/"), + blockdev("dev/testblock", 3, 4), + fifo("dev/testfifo"), + chardev("dev/testchar1", 5, 6), + file("test/bar.txt", "", xAttr(map[string]string{"test2": "sample2"})), + dir("test2/"), + link("test2/bazlink", "baz.txt"), + chardev("dev/testchar2", 1, 2), + ), + }, + } + for _, tt := range tests { + for _, srcCompression := range srcCompressions { + srcCompression := srcCompression + for _, cl := range controllers { + cl := cl + for _, prefix := range allowedPrefix { + prefix := prefix + t.Run(tt.name+"-"+fmt.Sprintf("compression=%v-prefix=%q-src=%d", cl, prefix, srcCompression), func(t *testing.T) { + tarBlob := buildTarStatic(t, tt.in, prefix) + // Test divideEntries() + entries, err := sortEntries(tarBlob, nil, nil) // identical order + if err != nil { + t.Fatalf("faield to parse tar: %v", err) + } + var merged []*entry + for _, part := range divideEntries(entries, 4) { + merged = append(merged, part...) + } + if !reflect.DeepEqual(entries, merged) { + for _, e := range entries { + t.Logf("Original: %v", e.header) + } + for _, e := range merged { + t.Logf("Merged: %v", e.header) + } + t.Errorf("divided entries couldn't be merged") + return + } + + // Prepare sample data + wantBuf := new(bytes.Buffer) + sw := NewWriterWithCompressor(wantBuf, cl) + sw.ChunkSize = tt.chunkSize + if err := sw.AppendTar(tarBlob); err != nil { + t.Fatalf("faield to append tar to want stargz: %v", err) + } + if _, err := sw.Close(); err != nil { + t.Fatalf("faield to prepare want stargz: %v", err) + } + wantData := wantBuf.Bytes() + want, err := Open(io.NewSectionReader( + bytes.NewReader(wantData), 0, int64(len(wantData))), + WithDecompressors(cl), + ) + if err != nil { + t.Fatalf("failed to parse the want stargz: %v", err) + } + + // Prepare testing data + rc, err := Build(compressBlob(t, tarBlob, srcCompression), + WithChunkSize(tt.chunkSize), WithCompression(cl)) + if err != nil { + t.Fatalf("faield to build stargz: %v", err) + } + defer rc.Close() + gotBuf := new(bytes.Buffer) + if _, err := io.Copy(gotBuf, rc); err != nil { + t.Fatalf("failed to copy built stargz blob: %v", err) + } + gotData := gotBuf.Bytes() + got, err := Open(io.NewSectionReader( + bytes.NewReader(gotBuf.Bytes()), 0, int64(len(gotData))), + WithDecompressors(cl), + ) + if err != nil { + t.Fatalf("failed to parse the got stargz: %v", err) + } + + // Check DiffID is properly calculated + rc.Close() + diffID := rc.DiffID() + wantDiffID := cl.DiffIDOf(t, gotData) + if diffID.String() != wantDiffID { + t.Errorf("DiffID = %q; want %q", diffID, wantDiffID) + } + + // Compare as stargz + if !isSameVersion(t, cl, wantData, gotData) { + t.Errorf("built stargz hasn't same json") + return + } + if !isSameEntries(t, want, got) { + t.Errorf("built stargz isn't same as the original") + return + } + + // Compare as tar.gz + if !isSameTarGz(t, cl, wantData, gotData) { + t.Errorf("built stargz isn't same tar.gz") + return + } + }) + } + } + } + } +} + +func isSameTarGz(t *testing.T, controller TestingController, a, b []byte) bool { + aGz, err := controller.Reader(bytes.NewReader(a)) + if err != nil { + t.Fatalf("failed to read A") + } + defer aGz.Close() + bGz, err := controller.Reader(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to read B") + } + defer bGz.Close() + + // Same as tar's Next() method but ignores landmarks and TOCJSON file + next := func(r *tar.Reader) (h *tar.Header, err error) { + for { + if h, err = r.Next(); err != nil { + return + } + if h.Name != PrefetchLandmark && + h.Name != NoPrefetchLandmark && + h.Name != TOCTarName { + return + } + } + } + + aTar := tar.NewReader(aGz) + bTar := tar.NewReader(bGz) + for { + // Fetch and parse next header. + aH, aErr := next(aTar) + bH, bErr := next(bTar) + if aErr != nil || bErr != nil { + if aErr == io.EOF && bErr == io.EOF { + break + } + t.Fatalf("Failed to parse tar file: A: %v, B: %v", aErr, bErr) + } + if !reflect.DeepEqual(aH, bH) { + t.Logf("different header (A = %v; B = %v)", aH, bH) + return false + + } + aFile, err := ioutil.ReadAll(aTar) + if err != nil { + t.Fatal("failed to read tar payload of A") + } + bFile, err := ioutil.ReadAll(bTar) + if err != nil { + t.Fatal("failed to read tar payload of B") + } + if !bytes.Equal(aFile, bFile) { + t.Logf("different tar payload (A = %q; B = %q)", string(a), string(b)) + return false + } + } + + return true +} + +func isSameVersion(t *testing.T, controller TestingController, a, b []byte) bool { + aJTOC, _, err := parseStargz(io.NewSectionReader(bytes.NewReader(a), 0, int64(len(a))), controller) + if err != nil { + t.Fatalf("failed to parse A: %v", err) + } + bJTOC, _, err := parseStargz(io.NewSectionReader(bytes.NewReader(b), 0, int64(len(b))), controller) + if err != nil { + t.Fatalf("failed to parse B: %v", err) + } + t.Logf("A: TOCJSON: %v", dumpTOCJSON(t, aJTOC)) + t.Logf("B: TOCJSON: %v", dumpTOCJSON(t, bJTOC)) + return aJTOC.Version == bJTOC.Version +} + +func isSameEntries(t *testing.T, a, b *Reader) bool { + aroot, ok := a.Lookup("") + if !ok { + t.Fatalf("failed to get root of A") + } + broot, ok := b.Lookup("") + if !ok { + t.Fatalf("failed to get root of B") + } + aEntry := stargzEntry{aroot, a} + bEntry := stargzEntry{broot, b} + return contains(t, aEntry, bEntry) && contains(t, bEntry, aEntry) +} + +func compressBlob(t *testing.T, src *io.SectionReader, srcCompression int) *io.SectionReader { + buf := new(bytes.Buffer) + var w io.WriteCloser + var err error + if srcCompression == gzipType { + w = gzip.NewWriter(buf) + } else if srcCompression == zstdType { + w, err = zstd.NewWriter(buf) + if err != nil { + t.Fatalf("failed to init zstd writer: %v", err) + } + } else { + return src + } + src.Seek(0, io.SeekStart) + if _, err := io.Copy(w, src); err != nil { + t.Fatalf("failed to compress source") + } + if err := w.Close(); err != nil { + t.Fatalf("failed to finalize compress source") + } + data := buf.Bytes() + return io.NewSectionReader(bytes.NewReader(data), 0, int64(len(data))) + +} + +type stargzEntry struct { + e *TOCEntry + r *Reader +} + +// contains checks if all child entries in "b" are also contained in "a". +// This function also checks if the files/chunks contain the same contents among "a" and "b". +func contains(t *testing.T, a, b stargzEntry) bool { + ae, ar := a.e, a.r + be, br := b.e, b.r + t.Logf("Comparing: %q vs %q", ae.Name, be.Name) + if !equalEntry(ae, be) { + t.Logf("%q != %q: entry: a: %v, b: %v", ae.Name, be.Name, ae, be) + return false + } + if ae.Type == "dir" { + t.Logf("Directory: %q vs %q: %v vs %v", ae.Name, be.Name, + allChildrenName(ae), allChildrenName(be)) + iscontain := true + ae.ForeachChild(func(aBaseName string, aChild *TOCEntry) bool { + // Walk through all files on this stargz file. + + if aChild.Name == PrefetchLandmark || + aChild.Name == NoPrefetchLandmark { + return true // Ignore landmarks + } + + // Ignore a TOCEntry of "./" (formated as "" by stargz lib) on root directory + // because this points to the root directory itself. + if aChild.Name == "" && ae.Name == "" { + return true + } + + bChild, ok := be.LookupChild(aBaseName) + if !ok { + t.Logf("%q (base: %q): not found in b: %v", + ae.Name, aBaseName, allChildrenName(be)) + iscontain = false + return false + } + + childcontain := contains(t, stargzEntry{aChild, a.r}, stargzEntry{bChild, b.r}) + if !childcontain { + t.Logf("%q != %q: non-equal dir", ae.Name, be.Name) + iscontain = false + return false + } + return true + }) + return iscontain + } else if ae.Type == "reg" { + af, err := ar.OpenFile(ae.Name) + if err != nil { + t.Fatalf("failed to open file %q on A: %v", ae.Name, err) + } + bf, err := br.OpenFile(be.Name) + if err != nil { + t.Fatalf("failed to open file %q on B: %v", be.Name, err) + } + + var nr int64 + for nr < ae.Size { + abytes, anext, aok := readOffset(t, af, nr, a) + bbytes, bnext, bok := readOffset(t, bf, nr, b) + if !aok && !bok { + break + } else if !(aok && bok) || anext != bnext { + t.Logf("%q != %q (offset=%d): chunk existence a=%v vs b=%v, anext=%v vs bnext=%v", + ae.Name, be.Name, nr, aok, bok, anext, bnext) + return false + } + nr = anext + if !bytes.Equal(abytes, bbytes) { + t.Logf("%q != %q: different contents %v vs %v", + ae.Name, be.Name, string(abytes), string(bbytes)) + return false + } + } + return true + } + + return true +} + +func allChildrenName(e *TOCEntry) (children []string) { + e.ForeachChild(func(baseName string, _ *TOCEntry) bool { + children = append(children, baseName) + return true + }) + return +} + +func equalEntry(a, b *TOCEntry) bool { + // Here, we selectively compare fileds that we are interested in. + return a.Name == b.Name && + a.Type == b.Type && + a.Size == b.Size && + a.ModTime3339 == b.ModTime3339 && + a.Stat().ModTime().Equal(b.Stat().ModTime()) && // modTime time.Time + a.LinkName == b.LinkName && + a.Mode == b.Mode && + a.UID == b.UID && + a.GID == b.GID && + a.Uname == b.Uname && + a.Gname == b.Gname && + (a.Offset > 0) == (b.Offset > 0) && + (a.NextOffset() > 0) == (b.NextOffset() > 0) && + a.DevMajor == b.DevMajor && + a.DevMinor == b.DevMinor && + a.NumLink == b.NumLink && + reflect.DeepEqual(a.Xattrs, b.Xattrs) && + // chunk-related infomations aren't compared in this function. + // ChunkOffset int64 `json:"chunkOffset,omitempty"` + // ChunkSize int64 `json:"chunkSize,omitempty"` + // children map[string]*TOCEntry + a.Digest == b.Digest +} + +func readOffset(t *testing.T, r *io.SectionReader, offset int64, e stargzEntry) ([]byte, int64, bool) { + ce, ok := e.r.ChunkEntryForOffset(e.e.Name, offset) + if !ok { + return nil, 0, false + } + data := make([]byte, ce.ChunkSize) + t.Logf("Offset: %v, NextOffset: %v", ce.Offset, ce.NextOffset()) + n, err := r.ReadAt(data, ce.ChunkOffset) + if err != nil { + t.Fatalf("failed to read file payload of %q (offset:%d,size:%d): %v", + e.e.Name, ce.ChunkOffset, ce.ChunkSize, err) + } + if int64(n) != ce.ChunkSize { + t.Fatalf("unexpected copied data size %d; want %d", + n, ce.ChunkSize) + } + return data[:n], offset + ce.ChunkSize, true +} + +func dumpTOCJSON(t *testing.T, tocJSON *JTOC) string { + jtocData, err := json.Marshal(*tocJSON) + if err != nil { + t.Fatalf("failed to marshal TOC JSON: %v", err) + } + buf := new(bytes.Buffer) + if _, err := io.Copy(buf, bytes.NewReader(jtocData)); err != nil { + t.Fatalf("failed to read toc json blob: %v", err) + } + return buf.String() +} + +const chunkSize = 3 + +// type check func(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, compressionLevel int) +type check func(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) + +// testDigestAndVerify runs specified checks against sample stargz blobs. +func testDigestAndVerify(t *testing.T, controllers ...TestingController) { + tests := []struct { + name string + tarInit func(t *testing.T, dgstMap map[string]digest.Digest) (blob []tarEntry) + checks []check + }{ + { + name: "no-regfile", + tarInit: func(t *testing.T, dgstMap map[string]digest.Digest) (blob []tarEntry) { + return tarOf( + dir("test/"), + ) + }, + checks: []check{ + checkStargzTOC, + checkVerifyTOC, + checkVerifyInvalidStargzFail(buildTarStatic(t, tarOf( + dir("test2/"), // modified + ), allowedPrefix[0])), + }, + }, + { + name: "small-files", + tarInit: func(t *testing.T, dgstMap map[string]digest.Digest) (blob []tarEntry) { + return tarOf( + regDigest(t, "baz.txt", "", dgstMap), + regDigest(t, "foo.txt", "a", dgstMap), + dir("test/"), + regDigest(t, "test/bar.txt", "bbb", dgstMap), + ) + }, + checks: []check{ + checkStargzTOC, + checkVerifyTOC, + checkVerifyInvalidStargzFail(buildTarStatic(t, tarOf( + file("baz.txt", ""), + file("foo.txt", "M"), // modified + dir("test/"), + file("test/bar.txt", "bbb"), + ), allowedPrefix[0])), + // checkVerifyInvalidTOCEntryFail("foo.txt"), // TODO + checkVerifyBrokenContentFail("foo.txt"), + }, + }, + { + name: "big-files", + tarInit: func(t *testing.T, dgstMap map[string]digest.Digest) (blob []tarEntry) { + return tarOf( + regDigest(t, "baz.txt", "bazbazbazbazbazbazbaz", dgstMap), + regDigest(t, "foo.txt", "a", dgstMap), + dir("test/"), + regDigest(t, "test/bar.txt", "testbartestbar", dgstMap), + ) + }, + checks: []check{ + checkStargzTOC, + checkVerifyTOC, + checkVerifyInvalidStargzFail(buildTarStatic(t, tarOf( + file("baz.txt", "bazbazbazMMMbazbazbaz"), // modified + file("foo.txt", "a"), + dir("test/"), + file("test/bar.txt", "testbartestbar"), + ), allowedPrefix[0])), + checkVerifyInvalidTOCEntryFail("test/bar.txt"), + checkVerifyBrokenContentFail("test/bar.txt"), + }, + }, + { + name: "with-non-regfiles", + tarInit: func(t *testing.T, dgstMap map[string]digest.Digest) (blob []tarEntry) { + return tarOf( + regDigest(t, "baz.txt", "bazbazbazbazbazbazbaz", dgstMap), + regDigest(t, "foo.txt", "a", dgstMap), + symlink("barlink", "test/bar.txt"), + dir("test/"), + regDigest(t, "test/bar.txt", "testbartestbar", dgstMap), + dir("test2/"), + link("test2/bazlink", "baz.txt"), + ) + }, + checks: []check{ + checkStargzTOC, + checkVerifyTOC, + checkVerifyInvalidStargzFail(buildTarStatic(t, tarOf( + file("baz.txt", "bazbazbazbazbazbazbaz"), + file("foo.txt", "a"), + symlink("barlink", "test/bar.txt"), + dir("test/"), + file("test/bar.txt", "testbartestbar"), + dir("test2/"), + link("test2/bazlink", "foo.txt"), // modified + ), allowedPrefix[0])), + checkVerifyInvalidTOCEntryFail("test/bar.txt"), + checkVerifyBrokenContentFail("test/bar.txt"), + }, + }, + } + + for _, tt := range tests { + for _, srcCompression := range srcCompressions { + srcCompression := srcCompression + for _, cl := range controllers { + cl := cl + for _, prefix := range allowedPrefix { + prefix := prefix + t.Run(tt.name+"-"+fmt.Sprintf("compression=%v-prefix=%q", cl, prefix), func(t *testing.T) { + // Get original tar file and chunk digests + dgstMap := make(map[string]digest.Digest) + tarBlob := buildTarStatic(t, tt.tarInit(t, dgstMap), prefix) + + rc, err := Build(compressBlob(t, tarBlob, srcCompression), + WithChunkSize(chunkSize), WithCompression(cl)) + if err != nil { + t.Fatalf("failed to convert stargz: %v", err) + } + tocDigest := rc.TOCDigest() + defer rc.Close() + buf := new(bytes.Buffer) + if _, err := io.Copy(buf, rc); err != nil { + t.Fatalf("failed to copy built stargz blob: %v", err) + } + newStargz := buf.Bytes() + // NoPrefetchLandmark is added during `Bulid`, which is expected behaviour. + dgstMap[chunkID(NoPrefetchLandmark, 0, int64(len([]byte{landmarkContents})))] = digest.FromBytes([]byte{landmarkContents}) + + for _, check := range tt.checks { + check(t, newStargz, tocDigest, dgstMap, cl) + } + }) + } + } + } + } +} + +// checkStargzTOC checks the TOC JSON of the passed stargz has the expected +// digest and contains valid chunks. It walks all entries in the stargz and +// checks all chunk digests stored to the TOC JSON match the actual contents. +func checkStargzTOC(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) { + sgz, err := Open( + io.NewSectionReader(bytes.NewReader(sgzData), 0, int64(len(sgzData))), + WithDecompressors(controller), + ) + if err != nil { + t.Errorf("failed to parse converted stargz: %v", err) + return + } + digestMapTOC, err := listDigests(io.NewSectionReader( + bytes.NewReader(sgzData), 0, int64(len(sgzData))), + controller, + ) + if err != nil { + t.Fatalf("failed to list digest: %v", err) + } + found := make(map[string]bool) + for id := range dgstMap { + found[id] = false + } + zr, err := controller.Reader(bytes.NewReader(sgzData)) + if err != nil { + t.Fatalf("failed to decompress converted stargz: %v", err) + } + defer zr.Close() + tr := tar.NewReader(zr) + for { + h, err := tr.Next() + if err != nil { + if err != io.EOF { + t.Errorf("failed to read tar entry: %v", err) + return + } + break + } + if h.Name == TOCTarName { + // Check the digest of TOC JSON based on the actual contents + // It's sure that TOC JSON exists in this archive because + // Open succeeded. + dgstr := digest.Canonical.Digester() + if _, err := io.Copy(dgstr.Hash(), tr); err != nil { + t.Fatalf("failed to calculate digest of TOC JSON: %v", + err) + } + if dgstr.Digest() != tocDigest { + t.Errorf("invalid TOC JSON %q; want %q", tocDigest, dgstr.Digest()) + } + continue + } + if _, ok := sgz.Lookup(h.Name); !ok { + t.Errorf("lost stargz entry %q in the converted TOC", h.Name) + return + } + var n int64 + for n < h.Size { + ce, ok := sgz.ChunkEntryForOffset(h.Name, n) + if !ok { + t.Errorf("lost chunk %q(offset=%d) in the converted TOC", + h.Name, n) + return + } + + // Get the original digest to make sure the file contents are kept unchanged + // from the original tar, during the whole conversion steps. + id := chunkID(h.Name, n, ce.ChunkSize) + want, ok := dgstMap[id] + if !ok { + t.Errorf("Unexpected chunk %q(offset=%d,size=%d): %v", + h.Name, n, ce.ChunkSize, dgstMap) + return + } + found[id] = true + + // Check the file contents + dgstr := digest.Canonical.Digester() + if _, err := io.CopyN(dgstr.Hash(), tr, ce.ChunkSize); err != nil { + t.Fatalf("failed to calculate digest of %q (offset=%d,size=%d)", + h.Name, n, ce.ChunkSize) + } + if want != dgstr.Digest() { + t.Errorf("Invalid contents in converted stargz %q: %q; want %q", + h.Name, dgstr.Digest(), want) + return + } + + // Check the digest stored in TOC JSON + dgstTOC, ok := digestMapTOC[ce.Offset] + if !ok { + t.Errorf("digest of %q(offset=%d,size=%d,chunkOffset=%d) isn't registered", + h.Name, ce.Offset, ce.ChunkSize, ce.ChunkOffset) + } + if want != dgstTOC { + t.Errorf("Invalid digest in TOCEntry %q: %q; want %q", + h.Name, dgstTOC, want) + return + } + + n += ce.ChunkSize + } + } + + for id, ok := range found { + if !ok { + t.Errorf("required chunk %q not found in the converted stargz: %v", id, found) + } + } +} + +// checkVerifyTOC checks the verification works for the TOC JSON of the passed +// stargz. It walks all entries in the stargz and checks the verifications for +// all chunks work. +func checkVerifyTOC(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) { + sgz, err := Open( + io.NewSectionReader(bytes.NewReader(sgzData), 0, int64(len(sgzData))), + WithDecompressors(controller), + ) + if err != nil { + t.Errorf("failed to parse converted stargz: %v", err) + return + } + ev, err := sgz.VerifyTOC(tocDigest) + if err != nil { + t.Errorf("failed to verify stargz: %v", err) + return + } + + found := make(map[string]bool) + for id := range dgstMap { + found[id] = false + } + zr, err := controller.Reader(bytes.NewReader(sgzData)) + if err != nil { + t.Fatalf("failed to decompress converted stargz: %v", err) + } + defer zr.Close() + tr := tar.NewReader(zr) + for { + h, err := tr.Next() + if err != nil { + if err != io.EOF { + t.Errorf("failed to read tar entry: %v", err) + return + } + break + } + if h.Name == TOCTarName { + continue + } + if _, ok := sgz.Lookup(h.Name); !ok { + t.Errorf("lost stargz entry %q in the converted TOC", h.Name) + return + } + var n int64 + for n < h.Size { + ce, ok := sgz.ChunkEntryForOffset(h.Name, n) + if !ok { + t.Errorf("lost chunk %q(offset=%d) in the converted TOC", + h.Name, n) + return + } + + v, err := ev.Verifier(ce) + if err != nil { + t.Errorf("failed to get verifier for %q(offset=%d)", h.Name, n) + } + + found[chunkID(h.Name, n, ce.ChunkSize)] = true + + // Check the file contents + if _, err := io.CopyN(v, tr, ce.ChunkSize); err != nil { + t.Fatalf("failed to get chunk of %q (offset=%d,size=%d)", + h.Name, n, ce.ChunkSize) + } + if !v.Verified() { + t.Errorf("Invalid contents in converted stargz %q (should be succeeded)", + h.Name) + return + } + n += ce.ChunkSize + } + } + + for id, ok := range found { + if !ok { + t.Errorf("required chunk %q not found in the converted stargz: %v", id, found) + } + } +} + +// checkVerifyInvalidTOCEntryFail checks if misconfigured TOC JSON can be +// detected during the verification and the verification returns an error. +func checkVerifyInvalidTOCEntryFail(filename string) check { + return func(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) { + funcs := map[string]rewriteFunc{ + "lost digest in a entry": func(t *testing.T, toc *JTOC, sgz *io.SectionReader) { + var found bool + for _, e := range toc.Entries { + if cleanEntryName(e.Name) == filename { + if e.Type != "reg" && e.Type != "chunk" { + t.Fatalf("entry %q to break must be regfile or chunk", filename) + } + if e.ChunkDigest == "" { + t.Fatalf("entry %q is already invalid", filename) + } + e.ChunkDigest = "" + found = true + } + } + if !found { + t.Fatalf("rewrite target not found") + } + }, + "duplicated entry offset": func(t *testing.T, toc *JTOC, sgz *io.SectionReader) { + var ( + sampleEntry *TOCEntry + targetEntry *TOCEntry + ) + for _, e := range toc.Entries { + if e.Type == "reg" || e.Type == "chunk" { + if cleanEntryName(e.Name) == filename { + targetEntry = e + } else { + sampleEntry = e + } + } + } + if sampleEntry == nil { + t.Fatalf("TOC must contain at least one regfile or chunk entry other than the rewrite target") + } + if targetEntry == nil { + t.Fatalf("rewrite target not found") + } + targetEntry.Offset = sampleEntry.Offset + }, + } + + for name, rFunc := range funcs { + t.Run(name, func(t *testing.T) { + newSgz, newTocDigest := rewriteTOCJSON(t, io.NewSectionReader(bytes.NewReader(sgzData), 0, int64(len(sgzData))), rFunc, controller) + buf := new(bytes.Buffer) + if _, err := io.Copy(buf, newSgz); err != nil { + t.Fatalf("failed to get converted stargz") + } + isgz := buf.Bytes() + + sgz, err := Open( + io.NewSectionReader(bytes.NewReader(isgz), 0, int64(len(isgz))), + WithDecompressors(controller), + ) + if err != nil { + t.Fatalf("failed to parse converted stargz: %v", err) + return + } + _, err = sgz.VerifyTOC(newTocDigest) + if err == nil { + t.Errorf("must fail for invalid TOC") + return + } + }) + } + } +} + +// checkVerifyInvalidStargzFail checks if the verification detects that the +// given stargz file doesn't match to the expected digest and returns error. +func checkVerifyInvalidStargzFail(invalid *io.SectionReader) check { + return func(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) { + rc, err := Build(invalid, WithChunkSize(chunkSize), WithCompression(controller)) + if err != nil { + t.Fatalf("failed to convert stargz: %v", err) + } + defer rc.Close() + buf := new(bytes.Buffer) + if _, err := io.Copy(buf, rc); err != nil { + t.Fatalf("failed to copy built stargz blob: %v", err) + } + mStargz := buf.Bytes() + + sgz, err := Open( + io.NewSectionReader(bytes.NewReader(mStargz), 0, int64(len(mStargz))), + WithDecompressors(controller), + ) + if err != nil { + t.Fatalf("failed to parse converted stargz: %v", err) + return + } + _, err = sgz.VerifyTOC(tocDigest) + if err == nil { + t.Errorf("must fail for invalid TOC") + return + } + } +} + +// checkVerifyBrokenContentFail checks if the verifier detects broken contents +// that doesn't match to the expected digest and returns error. +func checkVerifyBrokenContentFail(filename string) check { + return func(t *testing.T, sgzData []byte, tocDigest digest.Digest, dgstMap map[string]digest.Digest, controller TestingController) { + // Parse stargz file + sgz, err := Open( + io.NewSectionReader(bytes.NewReader(sgzData), 0, int64(len(sgzData))), + WithDecompressors(controller), + ) + if err != nil { + t.Fatalf("failed to parse converted stargz: %v", err) + return + } + ev, err := sgz.VerifyTOC(tocDigest) + if err != nil { + t.Fatalf("failed to verify stargz: %v", err) + return + } + + // Open the target file + sr, err := sgz.OpenFile(filename) + if err != nil { + t.Fatalf("failed to open file %q", filename) + } + ce, ok := sgz.ChunkEntryForOffset(filename, 0) + if !ok { + t.Fatalf("lost chunk %q(offset=%d) in the converted TOC", filename, 0) + return + } + if ce.ChunkSize == 0 { + t.Fatalf("file mustn't be empty") + return + } + data := make([]byte, ce.ChunkSize) + if _, err := sr.ReadAt(data, ce.ChunkOffset); err != nil { + t.Errorf("failed to get data of a chunk of %q(offset=%q)", + filename, ce.ChunkOffset) + } + + // Check the broken chunk (must fail) + v, err := ev.Verifier(ce) + if err != nil { + t.Fatalf("failed to get verifier for %q", filename) + } + broken := append([]byte{^data[0]}, data[1:]...) + if _, err := io.CopyN(v, bytes.NewReader(broken), ce.ChunkSize); err != nil { + t.Fatalf("failed to get chunk of %q (offset=%d,size=%d)", + filename, ce.ChunkOffset, ce.ChunkSize) + } + if v.Verified() { + t.Errorf("verification must fail for broken file chunk %q(org:%q,broken:%q)", + filename, data, broken) + } + } +} + +func chunkID(name string, offset, size int64) string { + return fmt.Sprintf("%s-%d-%d", cleanEntryName(name), offset, size) +} + +type rewriteFunc func(t *testing.T, toc *JTOC, sgz *io.SectionReader) + +func rewriteTOCJSON(t *testing.T, sgz *io.SectionReader, rewrite rewriteFunc, controller TestingController) (newSgz io.Reader, tocDigest digest.Digest) { + decodedJTOC, jtocOffset, err := parseStargz(sgz, controller) + if err != nil { + t.Fatalf("failed to extract TOC JSON: %v", err) + } + + rewrite(t, decodedJTOC, sgz) + + tocFooter, tocDigest, err := tocAndFooter(controller, decodedJTOC, jtocOffset) + if err != nil { + t.Fatalf("failed to create toc and footer: %v", err) + } + + // Reconstruct stargz file with the modified TOC JSON + if _, err := sgz.Seek(0, io.SeekStart); err != nil { + t.Fatalf("failed to reset the seek position of stargz: %v", err) + } + return io.MultiReader( + io.LimitReader(sgz, jtocOffset), // Original stargz (before TOC JSON) + tocFooter, // Rewritten TOC and footer + ), tocDigest +} + +func listDigests(sgz *io.SectionReader, controller TestingController) (map[int64]digest.Digest, error) { + decodedJTOC, _, err := parseStargz(sgz, controller) + if err != nil { + return nil, err + } + digestMap := make(map[int64]digest.Digest) + for _, e := range decodedJTOC.Entries { + if e.Type == "reg" || e.Type == "chunk" { + if e.Type == "reg" && e.Size == 0 { + continue // ignores empty file + } + if e.ChunkDigest == "" { + return nil, fmt.Errorf("ChunkDigest of %q(off=%d) not found in TOC JSON", + e.Name, e.Offset) + } + d, err := digest.Parse(e.ChunkDigest) + if err != nil { + return nil, err + } + digestMap[e.Offset] = d + } + } + return digestMap, nil +} + +func parseStargz(sgz *io.SectionReader, controller TestingController) (decodedJTOC *JTOC, jtocOffset int64, err error) { + fSize := controller.FooterSize() + footer := make([]byte, fSize) + if _, err := sgz.ReadAt(footer, sgz.Size()-fSize); err != nil { + return nil, 0, errors.Wrap(err, "error reading footer") + } + tocOffset, _, err := controller.ParseFooter(footer[positive(int64(len(footer))-fSize):]) + if err != nil { + return nil, 0, errors.Wrapf(err, "failed to parse footer") + } + + // Decode the TOC JSON + tocReader := io.NewSectionReader(sgz, tocOffset, sgz.Size()-tocOffset-fSize) + decodedJTOC, _, err = controller.ParseTOC(tocReader) + if err != nil { + return nil, 0, errors.Wrap(err, "failed to parse TOC") + } + return decodedJTOC, tocOffset, nil +} + +func testWriteAndOpen(t *testing.T, controllers ...TestingController) { + const content = "Some contents" + invalidUtf8 := "\xff\xfe\xfd" + + xAttrFile := xAttr{"foo": "bar", "invalid-utf8": invalidUtf8} + sampleOwner := owner{uid: 50, gid: 100} + + tests := []struct { + name string + chunkSize int + in []tarEntry + want []stargzCheck + wantNumGz int // expected number of streams + }{ + { + name: "empty", + in: tarOf(), + wantNumGz: 2, // TOC + footer + want: checks( + numTOCEntries(0), + ), + }, + { + name: "1dir_1empty_file", + in: tarOf( + dir("foo/"), + file("foo/bar.txt", ""), + ), + wantNumGz: 3, // dir, TOC, footer + want: checks( + numTOCEntries(2), + hasDir("foo/"), + hasFileLen("foo/bar.txt", 0), + entryHasChildren("foo", "bar.txt"), + hasFileDigest("foo/bar.txt", digestFor("")), + ), + }, + { + name: "1dir_1file", + in: tarOf( + dir("foo/"), + file("foo/bar.txt", content, xAttrFile), + ), + wantNumGz: 4, // var dir, foo.txt alone, TOC, footer + want: checks( + numTOCEntries(2), + hasDir("foo/"), + hasFileLen("foo/bar.txt", len(content)), + hasFileDigest("foo/bar.txt", digestFor(content)), + hasFileContentsRange("foo/bar.txt", 0, content), + hasFileContentsRange("foo/bar.txt", 1, content[1:]), + entryHasChildren("", "foo"), + entryHasChildren("foo", "bar.txt"), + hasFileXattrs("foo/bar.txt", "foo", "bar"), + hasFileXattrs("foo/bar.txt", "invalid-utf8", invalidUtf8), + ), + }, + { + name: "2meta_2file", + in: tarOf( + dir("bar/", sampleOwner), + dir("foo/", sampleOwner), + file("foo/bar.txt", content, sampleOwner), + ), + wantNumGz: 4, // both dirs, foo.txt alone, TOC, footer + want: checks( + numTOCEntries(3), + hasDir("bar/"), + hasDir("foo/"), + hasFileLen("foo/bar.txt", len(content)), + entryHasChildren("", "bar", "foo"), + entryHasChildren("foo", "bar.txt"), + hasChunkEntries("foo/bar.txt", 1), + hasEntryOwner("bar/", sampleOwner), + hasEntryOwner("foo/", sampleOwner), + hasEntryOwner("foo/bar.txt", sampleOwner), + ), + }, + { + name: "3dir", + in: tarOf( + dir("bar/"), + dir("foo/"), + dir("foo/bar/"), + ), + wantNumGz: 3, // 3 dirs, TOC, footer + want: checks( + hasDirLinkCount("bar/", 2), + hasDirLinkCount("foo/", 3), + hasDirLinkCount("foo/bar/", 2), + ), + }, + { + name: "symlink", + in: tarOf( + dir("foo/"), + symlink("foo/bar", "../../x"), + ), + wantNumGz: 3, // metas + TOC + footer + want: checks( + numTOCEntries(2), + hasSymlink("foo/bar", "../../x"), + entryHasChildren("", "foo"), + entryHasChildren("foo", "bar"), + ), + }, + { + name: "chunked_file", + chunkSize: 4, + in: tarOf( + dir("foo/"), + file("foo/big.txt", "This "+"is s"+"uch "+"a bi"+"g fi"+"le"), + ), + wantNumGz: 9, + want: checks( + numTOCEntries(7), // 1 for foo dir, 6 for the foo/big.txt file + hasDir("foo/"), + hasFileLen("foo/big.txt", len("This is such a big file")), + hasFileDigest("foo/big.txt", digestFor("This is such a big file")), + hasFileContentsRange("foo/big.txt", 0, "This is such a big file"), + hasFileContentsRange("foo/big.txt", 1, "his is such a big file"), + hasFileContentsRange("foo/big.txt", 2, "is is such a big file"), + hasFileContentsRange("foo/big.txt", 3, "s is such a big file"), + hasFileContentsRange("foo/big.txt", 4, " is such a big file"), + hasFileContentsRange("foo/big.txt", 5, "is such a big file"), + hasFileContentsRange("foo/big.txt", 6, "s such a big file"), + hasFileContentsRange("foo/big.txt", 7, " such a big file"), + hasFileContentsRange("foo/big.txt", 8, "such a big file"), + hasFileContentsRange("foo/big.txt", 9, "uch a big file"), + hasFileContentsRange("foo/big.txt", 10, "ch a big file"), + hasFileContentsRange("foo/big.txt", 11, "h a big file"), + hasFileContentsRange("foo/big.txt", 12, " a big file"), + hasFileContentsRange("foo/big.txt", len("This is such a big file")-1, ""), + hasChunkEntries("foo/big.txt", 6), + ), + }, + { + name: "recursive", + in: tarOf( + dir("/", sampleOwner), + dir("bar/", sampleOwner), + dir("foo/", sampleOwner), + file("foo/bar.txt", content, sampleOwner), + ), + wantNumGz: 4, // dirs, bar.txt alone, TOC, footer + want: checks( + maxDepth(2), // 0: root directory, 1: "foo/", 2: "bar.txt" + ), + }, + { + name: "block_char_fifo", + in: tarOf( + tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Name: prefix + "b", + Typeflag: tar.TypeBlock, + Devmajor: 123, + Devminor: 456, + }) + }), + tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Name: prefix + "c", + Typeflag: tar.TypeChar, + Devmajor: 111, + Devminor: 222, + }) + }), + tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Name: prefix + "f", + Typeflag: tar.TypeFifo, + }) + }), + ), + wantNumGz: 3, + want: checks( + lookupMatch("b", &TOCEntry{Name: "b", Type: "block", DevMajor: 123, DevMinor: 456, NumLink: 1}), + lookupMatch("c", &TOCEntry{Name: "c", Type: "char", DevMajor: 111, DevMinor: 222, NumLink: 1}), + lookupMatch("f", &TOCEntry{Name: "f", Type: "fifo", NumLink: 1}), + ), + }, + { + name: "modes", + in: tarOf( + dir("foo1/", 0755|os.ModeDir|os.ModeSetgid), + file("foo1/bar1", content, 0700|os.ModeSetuid), + file("foo1/bar2", content, 0755|os.ModeSetgid), + dir("foo2/", 0755|os.ModeDir|os.ModeSticky), + file("foo2/bar3", content, 0755|os.ModeSticky), + dir("foo3/", 0755|os.ModeDir), + file("foo3/bar4", content, os.FileMode(0700)), + file("foo3/bar5", content, os.FileMode(0755)), + ), + wantNumGz: 8, // dir, bar1 alone, bar2 alone + dir, bar3 alone + dir, bar4 alone, bar5 alone, TOC, footer + want: checks( + hasMode("foo1/", 0755|os.ModeDir|os.ModeSetgid), + hasMode("foo1/bar1", 0700|os.ModeSetuid), + hasMode("foo1/bar2", 0755|os.ModeSetgid), + hasMode("foo2/", 0755|os.ModeDir|os.ModeSticky), + hasMode("foo2/bar3", 0755|os.ModeSticky), + hasMode("foo3/", 0755|os.ModeDir), + hasMode("foo3/bar4", os.FileMode(0700)), + hasMode("foo3/bar5", os.FileMode(0755)), + ), + }, + } + + for _, tt := range tests { + for _, cl := range controllers { + cl := cl + for _, prefix := range allowedPrefix { + prefix := prefix + t.Run(tt.name+"-"+fmt.Sprintf("compression=%v-prefix=%q", cl, prefix), func(t *testing.T) { + tr, cancel := buildTar(t, tt.in, prefix) + defer cancel() + var stargzBuf bytes.Buffer + w := NewWriterWithCompressor(&stargzBuf, cl) + w.ChunkSize = tt.chunkSize + if err := w.AppendTar(tr); err != nil { + t.Fatalf("Append: %v", err) + } + if _, err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + b := stargzBuf.Bytes() + + diffID := w.DiffID() + wantDiffID := cl.DiffIDOf(t, b) + if diffID != wantDiffID { + t.Errorf("DiffID = %q; want %q", diffID, wantDiffID) + } + + got := cl.CountStreams(t, b) + if got != tt.wantNumGz { + t.Errorf("number of streams = %d; want %d", got, tt.wantNumGz) + } + + telemetry, checkCalled := newCalledTelemetry() + r, err := Open( + io.NewSectionReader(bytes.NewReader(b), 0, int64(len(b))), + WithDecompressors(cl), + WithTelemetry(telemetry), + ) + if err != nil { + t.Fatalf("stargz.Open: %v", err) + } + if err := checkCalled(); err != nil { + t.Errorf("telemetry failure: %v", err) + } + for _, want := range tt.want { + want.check(t, r) + } + }) + } + } + } +} + +func newCalledTelemetry() (telemetry *Telemetry, check func() error) { + var getFooterLatencyCalled bool + var getTocLatencyCalled bool + var deserializeTocLatencyCalled bool + return &Telemetry{ + func(time.Time) { getFooterLatencyCalled = true }, + func(time.Time) { getTocLatencyCalled = true }, + func(time.Time) { deserializeTocLatencyCalled = true }, + }, func() error { + var allErr []error + if !getFooterLatencyCalled { + allErr = append(allErr, fmt.Errorf("metrics GetFooterLatency isn't called")) + } + if !getTocLatencyCalled { + allErr = append(allErr, fmt.Errorf("metrics GetTocLatency isn't called")) + } + if !deserializeTocLatencyCalled { + allErr = append(allErr, fmt.Errorf("metrics DeserializeTocLatency isn't called")) + } + return errorutil.Aggregate(allErr) + } +} + +func digestFor(content string) string { + sum := sha256.Sum256([]byte(content)) + return fmt.Sprintf("sha256:%x", sum) +} + +type numTOCEntries int + +func (n numTOCEntries) check(t *testing.T, r *Reader) { + if r.toc == nil { + t.Fatal("nil TOC") + } + if got, want := len(r.toc.Entries), int(n); got != want { + t.Errorf("got %d TOC entries; want %d", got, want) + } + t.Logf("got TOC entries:") + for i, ent := range r.toc.Entries { + entj, _ := json.Marshal(ent) + t.Logf(" [%d]: %s\n", i, entj) + } + if t.Failed() { + t.FailNow() + } +} + +func checks(s ...stargzCheck) []stargzCheck { return s } + +type stargzCheck interface { + check(t *testing.T, r *Reader) +} + +type stargzCheckFn func(*testing.T, *Reader) + +func (f stargzCheckFn) check(t *testing.T, r *Reader) { f(t, r) } + +func maxDepth(max int) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + e, ok := r.Lookup("") + if !ok { + t.Fatal("root directory not found") + } + d, err := getMaxDepth(t, e, 0, 10*max) + if err != nil { + t.Errorf("failed to get max depth (wanted %d): %v", max, err) + return + } + if d != max { + t.Errorf("invalid depth %d; want %d", d, max) + return + } + }) +} + +func getMaxDepth(t *testing.T, e *TOCEntry, current, limit int) (max int, rErr error) { + if current > limit { + return -1, fmt.Errorf("walkMaxDepth: exceeds limit: current:%d > limit:%d", + current, limit) + } + max = current + e.ForeachChild(func(baseName string, ent *TOCEntry) bool { + t.Logf("%q(basename:%q) is child of %q\n", ent.Name, baseName, e.Name) + d, err := getMaxDepth(t, ent, current+1, limit) + if err != nil { + rErr = err + return false + } + if d > max { + max = d + } + return true + }) + return +} + +func hasFileLen(file string, wantLen int) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == file { + if ent.Type != "reg" { + t.Errorf("file type of %q is %q; want \"reg\"", file, ent.Type) + } else if ent.Size != int64(wantLen) { + t.Errorf("file size of %q = %d; want %d", file, ent.Size, wantLen) + } + return + } + } + t.Errorf("file %q not found", file) + }) +} + +func hasFileXattrs(file, name, value string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == file { + if ent.Type != "reg" { + t.Errorf("file type of %q is %q; want \"reg\"", file, ent.Type) + } + if ent.Xattrs == nil { + t.Errorf("file %q has no xattrs", file) + return + } + valueFound, found := ent.Xattrs[name] + if !found { + t.Errorf("file %q has no xattr %q", file, name) + return + } + if string(valueFound) != value { + t.Errorf("file %q has xattr %q with value %q instead of %q", file, name, valueFound, value) + } + + return + } + } + t.Errorf("file %q not found", file) + }) +} + +func hasFileDigest(file string, digest string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + ent, ok := r.Lookup(file) + if !ok { + t.Fatalf("didn't find TOCEntry for file %q", file) + } + if ent.Digest != digest { + t.Fatalf("Digest(%q) = %q, want %q", file, ent.Digest, digest) + } + }) +} + +func hasFileContentsRange(file string, offset int, want string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + f, err := r.OpenFile(file) + if err != nil { + t.Fatal(err) + } + got := make([]byte, len(want)) + n, err := f.ReadAt(got, int64(offset)) + if err != nil { + t.Fatalf("ReadAt(len %d, offset %d) = %v, %v", len(got), offset, n, err) + } + if string(got) != want { + t.Fatalf("ReadAt(len %d, offset %d) = %q, want %q", len(got), offset, got, want) + } + }) +} + +func hasChunkEntries(file string, wantChunks int) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + ent, ok := r.Lookup(file) + if !ok { + t.Fatalf("no file for %q", file) + } + if ent.Type != "reg" { + t.Fatalf("file %q has unexpected type %q; want reg", file, ent.Type) + } + chunks := r.getChunks(ent) + if len(chunks) != wantChunks { + t.Errorf("len(r.getChunks(%q)) = %d; want %d", file, len(chunks), wantChunks) + return + } + f := chunks[0] + + var gotChunks []*TOCEntry + var last *TOCEntry + for off := int64(0); off < f.Size; off++ { + e, ok := r.ChunkEntryForOffset(file, off) + if !ok { + t.Errorf("no ChunkEntryForOffset at %d", off) + return + } + if last != e { + gotChunks = append(gotChunks, e) + last = e + } + } + if !reflect.DeepEqual(chunks, gotChunks) { + t.Errorf("gotChunks=%d, want=%d; contents mismatch", len(gotChunks), wantChunks) + } + + // And verify the NextOffset + for i := 0; i < len(gotChunks)-1; i++ { + ci := gotChunks[i] + cnext := gotChunks[i+1] + if ci.NextOffset() != cnext.Offset { + t.Errorf("chunk %d NextOffset %d != next chunk's Offset of %d", i, ci.NextOffset(), cnext.Offset) + } + } + }) +} + +func entryHasChildren(dir string, want ...string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + want := append([]string(nil), want...) + var got []string + ent, ok := r.Lookup(dir) + if !ok { + t.Fatalf("didn't find TOCEntry for dir node %q", dir) + } + for baseName := range ent.children { + got = append(got, baseName) + } + sort.Strings(got) + sort.Strings(want) + if !reflect.DeepEqual(got, want) { + t.Errorf("children of %q = %q; want %q", dir, got, want) + } + }) +} + +func hasDir(file string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == cleanEntryName(file) { + if ent.Type != "dir" { + t.Errorf("file type of %q is %q; want \"dir\"", file, ent.Type) + } + return + } + } + t.Errorf("directory %q not found", file) + }) +} + +func hasDirLinkCount(file string, count int) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == cleanEntryName(file) { + if ent.Type != "dir" { + t.Errorf("file type of %q is %q; want \"dir\"", file, ent.Type) + return + } + if ent.NumLink != count { + t.Errorf("link count of %q = %d; want %d", file, ent.NumLink, count) + } + return + } + } + t.Errorf("directory %q not found", file) + }) +} + +func hasMode(file string, mode os.FileMode) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == cleanEntryName(file) { + if ent.Stat().Mode() != mode { + t.Errorf("invalid mode: got %v; want %v", ent.Stat().Mode(), mode) + return + } + return + } + } + t.Errorf("file %q not found", file) + }) +} + +func hasSymlink(file, target string) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + for _, ent := range r.toc.Entries { + if ent.Name == file { + if ent.Type != "symlink" { + t.Errorf("file type of %q is %q; want \"symlink\"", file, ent.Type) + } else if ent.LinkName != target { + t.Errorf("link target of symlink %q is %q; want %q", file, ent.LinkName, target) + } + return + } + } + t.Errorf("symlink %q not found", file) + }) +} + +func lookupMatch(name string, want *TOCEntry) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + e, ok := r.Lookup(name) + if !ok { + t.Fatalf("failed to Lookup entry %q", name) + } + if !reflect.DeepEqual(e, want) { + t.Errorf("entry %q mismatch.\n got: %+v\nwant: %+v\n", name, e, want) + } + + }) +} + +func hasEntryOwner(entry string, owner owner) stargzCheck { + return stargzCheckFn(func(t *testing.T, r *Reader) { + ent, ok := r.Lookup(strings.TrimSuffix(entry, "/")) + if !ok { + t.Errorf("entry %q not found", entry) + return + } + if ent.UID != owner.uid || ent.GID != owner.gid { + t.Errorf("entry %q has invalid owner (uid:%d, gid:%d) instead of (uid:%d, gid:%d)", entry, ent.UID, ent.GID, owner.uid, owner.gid) + return + } + }) +} + +func tarOf(s ...tarEntry) []tarEntry { return s } + +type tarEntry interface { + appendTar(tw *tar.Writer, prefix string) error +} + +type tarEntryFunc func(*tar.Writer, string) error + +func (f tarEntryFunc) appendTar(tw *tar.Writer, prefix string) error { return f(tw, prefix) } + +func buildTar(t *testing.T, ents []tarEntry, prefix string) (r io.Reader, cancel func()) { + pr, pw := io.Pipe() + go func() { + tw := tar.NewWriter(pw) + for _, ent := range ents { + if err := ent.appendTar(tw, prefix); err != nil { + t.Errorf("building input tar: %v", err) + pw.Close() + return + } + } + if err := tw.Close(); err != nil { + t.Errorf("closing write of input tar: %v", err) + } + pw.Close() + }() + return pr, func() { go pr.Close(); go pw.Close() } +} + +func buildTarStatic(t *testing.T, ents []tarEntry, prefix string) *io.SectionReader { + buf := new(bytes.Buffer) + tw := tar.NewWriter(buf) + for _, ent := range ents { + if err := ent.appendTar(tw, prefix); err != nil { + t.Fatalf("building input tar: %v", err) + } + } + if err := tw.Close(); err != nil { + t.Errorf("closing write of input tar: %v", err) + } + data := buf.Bytes() + return io.NewSectionReader(bytes.NewReader(data), 0, int64(len(data))) +} + +func dir(name string, opts ...interface{}) tarEntry { + return tarEntryFunc(func(tw *tar.Writer, prefix string) error { + var o owner + mode := os.FileMode(0755) + for _, opt := range opts { + switch v := opt.(type) { + case owner: + o = v + case os.FileMode: + mode = v + default: + return errors.New("unsupported opt") + } + } + if !strings.HasSuffix(name, "/") { + panic(fmt.Sprintf("missing trailing slash in dir %q ", name)) + } + tm, err := fileModeToTarMode(mode) + if err != nil { + return err + } + return tw.WriteHeader(&tar.Header{ + Typeflag: tar.TypeDir, + Name: prefix + name, + Mode: tm, + Uid: o.uid, + Gid: o.gid, + }) + }) +} + +// xAttr are extended attributes to set on test files created with the file func. +type xAttr map[string]string + +// owner is owner ot set on test files and directories with the file and dir functions. +type owner struct { + uid int + gid int +} + +func file(name, contents string, opts ...interface{}) tarEntry { + return tarEntryFunc(func(tw *tar.Writer, prefix string) error { + var xattrs xAttr + var o owner + mode := os.FileMode(0644) + for _, opt := range opts { + switch v := opt.(type) { + case xAttr: + xattrs = v + case owner: + o = v + case os.FileMode: + mode = v + default: + return errors.New("unsupported opt") + } + } + if strings.HasSuffix(name, "/") { + return fmt.Errorf("bogus trailing slash in file %q", name) + } + tm, err := fileModeToTarMode(mode) + if err != nil { + return err + } + if err := tw.WriteHeader(&tar.Header{ + Typeflag: tar.TypeReg, + Name: prefix + name, + Mode: tm, + Xattrs: xattrs, + Size: int64(len(contents)), + Uid: o.uid, + Gid: o.gid, + }); err != nil { + return err + } + _, err = io.WriteString(tw, contents) + return err + }) +} + +func symlink(name, target string) tarEntry { + return tarEntryFunc(func(tw *tar.Writer, prefix string) error { + return tw.WriteHeader(&tar.Header{ + Typeflag: tar.TypeSymlink, + Name: prefix + name, + Linkname: target, + Mode: 0644, + }) + }) +} + +func link(name string, linkname string) tarEntry { + now := time.Now() + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Typeflag: tar.TypeLink, + Name: prefix + name, + Linkname: linkname, + ModTime: now, + AccessTime: now, + ChangeTime: now, + }) + }) +} + +func chardev(name string, major, minor int64) tarEntry { + now := time.Now() + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Typeflag: tar.TypeChar, + Name: prefix + name, + Devmajor: major, + Devminor: minor, + ModTime: now, + AccessTime: now, + ChangeTime: now, + }) + }) +} + +func blockdev(name string, major, minor int64) tarEntry { + now := time.Now() + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Typeflag: tar.TypeBlock, + Name: prefix + name, + Devmajor: major, + Devminor: minor, + ModTime: now, + AccessTime: now, + ChangeTime: now, + }) + }) +} +func fifo(name string) tarEntry { + now := time.Now() + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + return w.WriteHeader(&tar.Header{ + Typeflag: tar.TypeFifo, + Name: prefix + name, + ModTime: now, + AccessTime: now, + ChangeTime: now, + }) + }) +} + +func prefetchLandmark() tarEntry { + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + if err := w.WriteHeader(&tar.Header{ + Name: PrefetchLandmark, + Typeflag: tar.TypeReg, + Size: int64(len([]byte{landmarkContents})), + }); err != nil { + return err + } + contents := []byte{landmarkContents} + if _, err := io.CopyN(w, bytes.NewReader(contents), int64(len(contents))); err != nil { + return err + } + return nil + }) +} + +func noPrefetchLandmark() tarEntry { + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + if err := w.WriteHeader(&tar.Header{ + Name: NoPrefetchLandmark, + Typeflag: tar.TypeReg, + Size: int64(len([]byte{landmarkContents})), + }); err != nil { + return err + } + contents := []byte{landmarkContents} + if _, err := io.CopyN(w, bytes.NewReader(contents), int64(len(contents))); err != nil { + return err + } + return nil + }) +} + +func regDigest(t *testing.T, name string, contentStr string, digestMap map[string]digest.Digest) tarEntry { + if digestMap == nil { + t.Fatalf("digest map mustn't be nil") + } + content := []byte(contentStr) + + var n int64 + for n < int64(len(content)) { + size := int64(chunkSize) + remain := int64(len(content)) - n + if remain < size { + size = remain + } + dgstr := digest.Canonical.Digester() + if _, err := io.CopyN(dgstr.Hash(), bytes.NewReader(content[n:n+size]), size); err != nil { + t.Fatalf("failed to calculate digest of %q (name=%q,offset=%d,size=%d)", + string(content[n:n+size]), name, n, size) + } + digestMap[chunkID(name, n, size)] = dgstr.Digest() + n += size + } + + return tarEntryFunc(func(w *tar.Writer, prefix string) error { + if err := w.WriteHeader(&tar.Header{ + Typeflag: tar.TypeReg, + Name: prefix + name, + Size: int64(len(content)), + }); err != nil { + return err + } + if _, err := io.CopyN(w, bytes.NewReader(content), int64(len(content))); err != nil { + return err + } + return nil + }) +} + +func fileModeToTarMode(mode os.FileMode) (int64, error) { + h, err := tar.FileInfoHeader(fileInfoOnlyMode(mode), "") + if err != nil { + return 0, err + } + return h.Mode, nil +} + +// fileInfoOnlyMode is os.FileMode that populates only file mode. +type fileInfoOnlyMode os.FileMode + +func (f fileInfoOnlyMode) Name() string { return "" } +func (f fileInfoOnlyMode) Size() int64 { return 0 } +func (f fileInfoOnlyMode) Mode() os.FileMode { return os.FileMode(f) } +func (f fileInfoOnlyMode) ModTime() time.Time { return time.Now() } +func (f fileInfoOnlyMode) IsDir() bool { return os.FileMode(f).IsDir() } +func (f fileInfoOnlyMode) Sys() interface{} { return nil } diff --git a/vendor/github.com/containerd/stargz-snapshotter/estargz/types.go b/vendor/github.com/containerd/stargz-snapshotter/estargz/types.go new file mode 100644 index 000000000..1b1075a64 --- /dev/null +++ b/vendor/github.com/containerd/stargz-snapshotter/estargz/types.go @@ -0,0 +1,314 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + Copyright 2019 The Go Authors. All rights reserved. + Use of this source code is governed by a BSD-style + license that can be found in the LICENSE file. +*/ + +package estargz + +import ( + "archive/tar" + "hash" + "io" + "os" + "path" + "time" + + digest "github.com/opencontainers/go-digest" +) + +const ( + // TOCTarName is the name of the JSON file in the tar archive in the + // table of contents gzip stream. + TOCTarName = "stargz.index.json" + + // FooterSize is the number of bytes in the footer + // + // The footer is an empty gzip stream with no compression and an Extra + // header of the form "%016xSTARGZ", where the 64 bit hex-encoded + // number is the offset to the gzip stream of JSON TOC. + // + // 51 comes from: + // + // 10 bytes gzip header + // 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ")) + // 2 bytes Extra: SI1 = 'S', SI2 = 'G' + // 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ")) + // 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC) + // 5 bytes flate header + // 8 bytes gzip footer + // (End of the eStargz blob) + // + // NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz. + FooterSize = 51 + + // legacyFooterSize is the number of bytes in the legacy stargz footer. + // + // 47 comes from: + // + // 10 byte gzip header + + // 2 byte (LE16) length of extra, encoding 22 (16 hex digits + len("STARGZ")) == "\x16\x00" + + // 22 bytes of extra (fmt.Sprintf("%016xSTARGZ", tocGzipOffset)) + // 5 byte flate header + // 8 byte gzip footer (two little endian uint32s: digest, size) + legacyFooterSize = 47 + + // TOCJSONDigestAnnotation is an annotation for an image layer. This stores the + // digest of the TOC JSON. + // This annotation is valid only when it is specified in `.[]layers.annotations` + // of an image manifest. + TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest" + + // StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy + // pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size + // to the runtime but current OCI image doesn't ship this information by default. So we store this + // to the special annotation. + StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size" + + // PrefetchLandmark is a file entry which indicates the end position of + // prefetch in the stargz file. + PrefetchLandmark = ".prefetch.landmark" + + // NoPrefetchLandmark is a file entry which indicates that no prefetch should + // occur in the stargz file. + NoPrefetchLandmark = ".no.prefetch.landmark" + + landmarkContents = 0xf +) + +// JTOC is the JSON-serialized table of contents index of the files in the stargz file. +type JTOC struct { + Version int `json:"version"` + Entries []*TOCEntry `json:"entries"` +} + +// TOCEntry is an entry in the stargz file's TOC (Table of Contents). +type TOCEntry struct { + // Name is the tar entry's name. It is the complete path + // stored in the tar file, not just the base name. + Name string `json:"name"` + + // Type is one of "dir", "reg", "symlink", "hardlink", "char", + // "block", "fifo", or "chunk". + // The "chunk" type is used for regular file data chunks past the first + // TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset, + // ChunkOffset, and ChunkSize populated. + Type string `json:"type"` + + // Size, for regular files, is the logical size of the file. + Size int64 `json:"size,omitempty"` + + // ModTime3339 is the modification time of the tar entry. Empty + // means zero or unknown. Otherwise it's in UTC RFC3339 + // format. Use the ModTime method to access the time.Time value. + ModTime3339 string `json:"modtime,omitempty"` + modTime time.Time + + // LinkName, for symlinks and hardlinks, is the link target. + LinkName string `json:"linkName,omitempty"` + + // Mode is the permission and mode bits. + Mode int64 `json:"mode,omitempty"` + + // UID is the user ID of the owner. + UID int `json:"uid,omitempty"` + + // GID is the group ID of the owner. + GID int `json:"gid,omitempty"` + + // Uname is the username of the owner. + // + // In the serialized JSON, this field may only be present for + // the first entry with the same UID. + Uname string `json:"userName,omitempty"` + + // Gname is the group name of the owner. + // + // In the serialized JSON, this field may only be present for + // the first entry with the same GID. + Gname string `json:"groupName,omitempty"` + + // Offset, for regular files, provides the offset in the + // stargz file to the file's data bytes. See ChunkOffset and + // ChunkSize. + Offset int64 `json:"offset,omitempty"` + + nextOffset int64 // the Offset of the next entry with a non-zero Offset + + // DevMajor is the major device number for "char" and "block" types. + DevMajor int `json:"devMajor,omitempty"` + + // DevMinor is the major device number for "char" and "block" types. + DevMinor int `json:"devMinor,omitempty"` + + // NumLink is the number of entry names pointing to this entry. + // Zero means one name references this entry. + NumLink int + + // Xattrs are the extended attribute for the entry. + Xattrs map[string][]byte `json:"xattrs,omitempty"` + + // Digest stores the OCI checksum for regular files payload. + // It has the form "sha256:abcdef01234....". + Digest string `json:"digest,omitempty"` + + // ChunkOffset is non-zero if this is a chunk of a large, + // regular file. If so, the Offset is where the gzip header of + // ChunkSize bytes at ChunkOffset in Name begin. + // + // In serialized form, a "chunkSize" JSON field of zero means + // that the chunk goes to the end of the file. After reading + // from the stargz TOC, though, the ChunkSize is initialized + // to a non-zero file for when Type is either "reg" or + // "chunk". + ChunkOffset int64 `json:"chunkOffset,omitempty"` + ChunkSize int64 `json:"chunkSize,omitempty"` + + // ChunkDigest stores an OCI digest of the chunk. This must be formed + // as "sha256:0123abcd...". + ChunkDigest string `json:"chunkDigest,omitempty"` + + children map[string]*TOCEntry +} + +// ModTime returns the entry's modification time. +func (e *TOCEntry) ModTime() time.Time { return e.modTime } + +// NextOffset returns the position (relative to the start of the +// stargz file) of the next gzip boundary after e.Offset. +func (e *TOCEntry) NextOffset() int64 { return e.nextOffset } + +func (e *TOCEntry) addChild(baseName string, child *TOCEntry) { + if e.children == nil { + e.children = make(map[string]*TOCEntry) + } + if child.Type == "dir" { + e.NumLink++ // Entry ".." in the subdirectory links to this directory + } + e.children[baseName] = child +} + +// isDataType reports whether TOCEntry is a regular file or chunk (something that +// contains regular file data). +func (e *TOCEntry) isDataType() bool { return e.Type == "reg" || e.Type == "chunk" } + +// Stat returns a FileInfo value representing e. +func (e *TOCEntry) Stat() os.FileInfo { return fileInfo{e} } + +// ForeachChild calls f for each child item. If f returns false, iteration ends. +// If e is not a directory, f is not called. +func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool) { + for name, ent := range e.children { + if !f(name, ent) { + return + } + } +} + +// LookupChild returns the directory e's child by its base name. +func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool) { + child, ok = e.children[baseName] + return +} + +// fileInfo implements os.FileInfo using the wrapped *TOCEntry. +type fileInfo struct{ e *TOCEntry } + +var _ os.FileInfo = fileInfo{} + +func (fi fileInfo) Name() string { return path.Base(fi.e.Name) } +func (fi fileInfo) IsDir() bool { return fi.e.Type == "dir" } +func (fi fileInfo) Size() int64 { return fi.e.Size } +func (fi fileInfo) ModTime() time.Time { return fi.e.ModTime() } +func (fi fileInfo) Sys() interface{} { return fi.e } +func (fi fileInfo) Mode() (m os.FileMode) { + // TOCEntry.Mode is tar.Header.Mode so we can understand the these bits using `tar` pkg. + m = (&tar.Header{Mode: fi.e.Mode}).FileInfo().Mode() & + (os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky) + switch fi.e.Type { + case "dir": + m |= os.ModeDir + case "symlink": + m |= os.ModeSymlink + case "char": + m |= os.ModeDevice | os.ModeCharDevice + case "block": + m |= os.ModeDevice + case "fifo": + m |= os.ModeNamedPipe + } + return m +} + +// TOCEntryVerifier holds verifiers that are usable for verifying chunks contained +// in a eStargz blob. +type TOCEntryVerifier interface { + + // Verifier provides a content verifier that can be used for verifying the + // contents of the specified TOCEntry. + Verifier(ce *TOCEntry) (digest.Verifier, error) +} + +// Compression provides the compression helper to be used creating and parsing eStargz. +// This package provides gzip-based Compression by default, but any compression +// algorithm (e.g. zstd) can be used as long as it implements Compression. +type Compression interface { + Compressor + Decompressor +} + +// Compressor represents the helper mothods to be used for creating eStargz. +type Compressor interface { + // Writer returns WriteCloser to be used for writing a chunk to eStargz. + // Everytime a chunk is written, the WriteCloser is closed and Writer is + // called again for writing the next chunk. + Writer(w io.Writer) (io.WriteCloser, error) + + // WriteTOCAndFooter is called to write JTOC to the passed Writer. + // diffHash calculates the DiffID (uncompressed sha256 hash) of the blob + // WriteTOCAndFooter can optionally write anything that affects DiffID calculation + // (e.g. uncompressed TOC JSON). + // + // This function returns tocDgst that represents the digest of TOC that will be used + // to verify this blob when it's parsed. + WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error) +} + +// Deompressor represents the helper mothods to be used for parsing eStargz. +type Decompressor interface { + // Reader returns ReadCloser to be used for decompressing file payload. + Reader(r io.Reader) (io.ReadCloser, error) + + // FooterSize returns the size of the footer of this blob. + FooterSize() int64 + + // ParseFooter parses the footer and returns the offset and (compressed) size of TOC. + // + // Here, tocSize is optional. If tocSize <= 0, it's by default the size of the range + // from tocOffset until the beginning of the footer (blob size - tocOff - FooterSize). + ParseFooter(p []byte) (tocOffset, tocSize int64, err error) + + // ParseTOC parses TOC from the passed reader. The reader provides the partial contents + // of the underlying blob that has the range specified by ParseFooter method. + // + // This function returns tocDgst that represents the digest of TOC that will be used + // to verify this blob. This must match to the value returned from + // Compressor.WriteTOCAndFooter that is used when creating this blob. + ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) +} -- cgit v1.2.3-54-g00ecf