diff options
| author | dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> | 2021-09-13 12:29:39 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-13 12:29:39 +0000 |
| commit | eec59cea2c20cf5e515bdcbc8c35eba3e07da607 (patch) | |
| tree | 71ec92fd12f16d15f25facd599ef097c2650c5f9 /vendor/github.com/containers/storage/pkg/chunked | |
| parent | 0f3d3bd21da0b67542c44c832f0e1642c5f639cf (diff) | |
| download | podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.gz podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.bz2 podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.zip | |
Bump github.com/containers/storage from 1.35.0 to 1.36.0
Bumps [github.com/containers/storage](https://github.com/containers/storage) from 1.35.0 to 1.36.0.
- [Release notes](https://github.com/containers/storage/releases)
- [Changelog](https://github.com/containers/storage/blob/main/docs/containers-storage-changes.md)
- [Commits](https://github.com/containers/storage/compare/v1.35.0...v1.36.0)
---
updated-dependencies:
- dependency-name: github.com/containers/storage
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
Diffstat (limited to 'vendor/github.com/containers/storage/pkg/chunked')
4 files changed, 386 insertions, 103 deletions
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compression.go b/vendor/github.com/containers/storage/pkg/chunked/compression.go index f2811fb9a..96254bc4e 100644 --- a/vendor/github.com/containers/storage/pkg/chunked/compression.go +++ b/vendor/github.com/containers/storage/pkg/chunked/compression.go @@ -1,14 +1,18 @@ package chunked import ( + archivetar "archive/tar" "bytes" "encoding/binary" "fmt" "io" + "strconv" + "github.com/containerd/stargz-snapshotter/estargz" "github.com/containers/storage/pkg/chunked/compressor" "github.com/containers/storage/pkg/chunked/internal" "github.com/klauspost/compress/zstd" + "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" "github.com/vbatts/tar-split/archive/tar" @@ -50,25 +54,129 @@ func isZstdChunkedFrameMagic(data []byte) bool { return bytes.Equal(internal.ZstdChunkedFrameMagic, data[:8]) } +func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) { + // information on the format here https://github.com/containerd/stargz-snapshotter/blob/main/docs/stargz-estargz.md + footerSize := int64(51) + if blobSize <= footerSize { + return nil, 0, errors.New("blob too small") + } + chunk := ImageSourceChunk{ + Offset: uint64(blobSize - footerSize), + Length: uint64(footerSize), + } + parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, 0, err + } + var reader io.ReadCloser + select { + case r := <-parts: + reader = r + case err := <-errs: + return nil, 0, err + } + defer reader.Close() + footer := make([]byte, footerSize) + if _, err := io.ReadFull(reader, footer); err != nil { + return nil, 0, err + } + + /* Read the ToC offset: + - 10 bytes gzip header + - 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ")) + - 2 bytes Extra: SI1 = 'S', SI2 = 'G' + - 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ")) + - 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC) + - 5 bytes flate header: BFINAL = 1(last block), BTYPE = 0(non-compressed block), LEN = 0 + - 8 bytes gzip footer + */ + tocOffset, err := strconv.ParseInt(string(footer[16:16+22-6]), 16, 64) + if err != nil { + return nil, 0, errors.Wrap(err, "parse ToC offset") + } + + size := int64(blobSize - footerSize - tocOffset) + // set a reasonable limit + if size > (1<<20)*50 { + return nil, 0, errors.New("manifest too big") + } + + chunk = ImageSourceChunk{ + Offset: uint64(tocOffset), + Length: uint64(size), + } + parts, errs, err = blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, 0, err + } + + var tocReader io.ReadCloser + select { + case r := <-parts: + tocReader = r + case err := <-errs: + return nil, 0, err + } + defer tocReader.Close() + + r, err := pgzip.NewReader(tocReader) + if err != nil { + return nil, 0, err + } + defer r.Close() + + aTar := archivetar.NewReader(r) + + header, err := aTar.Next() + if err != nil { + return nil, 0, err + } + // set a reasonable limit + if header.Size > (1<<20)*50 { + return nil, 0, errors.New("manifest too big") + } + + manifestUncompressed := make([]byte, header.Size) + if _, err := io.ReadFull(aTar, manifestUncompressed); err != nil { + return nil, 0, err + } + + manifestDigester := digest.Canonical.Digester() + manifestChecksum := manifestDigester.Hash() + if _, err := manifestChecksum.Write(manifestUncompressed); err != nil { + return nil, 0, err + } + + d, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation]) + if err != nil { + return nil, 0, err + } + if manifestDigester.Digest() != d { + return nil, 0, errors.New("invalid manifest checksum") + } + + return manifestUncompressed, tocOffset, nil +} + // readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must // be specified. // This function uses the io.containers.zstd-chunked. annotations when specified. -func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) { +func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) { footerSize := int64(internal.FooterSizeSupported) if blobSize <= footerSize { - return nil, errors.New("blob too small") + return nil, 0, errors.New("blob too small") } manifestChecksumAnnotation := annotations[internal.ManifestChecksumKey] if manifestChecksumAnnotation == "" { - return nil, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey) + return nil, 0, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey) } var offset, length, lengthUncompressed, manifestType uint64 if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" { if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil { - return nil, err + return nil, 0, err } } else { chunk := ImageSourceChunk{ @@ -77,18 +185,18 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann } parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) if err != nil { - return nil, err + return nil, 0, err } var reader io.ReadCloser select { case r := <-parts: reader = r case err := <-errs: - return nil, err + return nil, 0, err } footer := make([]byte, footerSize) if _, err := io.ReadFull(reader, footer); err != nil { - return nil, err + return nil, 0, err } offset = binary.LittleEndian.Uint64(footer[0:8]) @@ -96,20 +204,20 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann lengthUncompressed = binary.LittleEndian.Uint64(footer[16:24]) manifestType = binary.LittleEndian.Uint64(footer[24:32]) if !isZstdChunkedFrameMagic(footer[32:40]) { - return nil, errors.New("invalid magic number") + return nil, 0, errors.New("invalid magic number") } } if manifestType != internal.ManifestTypeCRFS { - return nil, errors.New("invalid manifest type") + return nil, 0, errors.New("invalid manifest type") } // set a reasonable limit if length > (1<<20)*50 { - return nil, errors.New("manifest too big") + return nil, 0, errors.New("manifest too big") } if lengthUncompressed > (1<<20)*50 { - return nil, errors.New("manifest too big") + return nil, 0, errors.New("manifest too big") } chunk := ImageSourceChunk{ @@ -119,47 +227,47 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) if err != nil { - return nil, err + return nil, 0, err } var reader io.ReadCloser select { case r := <-parts: reader = r case err := <-errs: - return nil, err + return nil, 0, err } manifest := make([]byte, length) if _, err := io.ReadFull(reader, manifest); err != nil { - return nil, err + return nil, 0, err } manifestDigester := digest.Canonical.Digester() manifestChecksum := manifestDigester.Hash() if _, err := manifestChecksum.Write(manifest); err != nil { - return nil, err + return nil, 0, err } d, err := digest.Parse(manifestChecksumAnnotation) if err != nil { - return nil, err + return nil, 0, err } if manifestDigester.Digest() != d { - return nil, errors.New("invalid manifest checksum") + return nil, 0, errors.New("invalid manifest checksum") } decoder, err := zstd.NewReader(nil) if err != nil { - return nil, err + return nil, 0, err } defer decoder.Close() b := make([]byte, 0, lengthUncompressed) if decoded, err := decoder.DecodeAll(manifest, b); err == nil { - return decoded, nil + return decoded, int64(offset), nil } - return manifest, nil + return manifest, int64(offset), nil } // ZstdCompressor is a CompressorFunc for the zstd compression algorithm. diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go index a205b73fd..092cf584a 100644 --- a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go +++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go @@ -50,7 +50,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r return offset, nil } - var metadata []internal.ZstdFileMetadata + var metadata []internal.FileMetadata for { hdr, err := tr.Next() if err != nil { @@ -112,7 +112,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r for k, v := range hdr.Xattrs { xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v)) } - m := internal.ZstdFileMetadata{ + m := internal.FileMetadata{ Type: typ, Name: hdr.Name, Linkname: hdr.Linkname, diff --git a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go index af0025c20..c91c43d85 100644 --- a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go +++ b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go @@ -17,12 +17,12 @@ import ( "github.com/opencontainers/go-digest" ) -type ZstdTOC struct { - Version int `json:"version"` - Entries []ZstdFileMetadata `json:"entries"` +type TOC struct { + Version int `json:"version"` + Entries []FileMetadata `json:"entries"` } -type ZstdFileMetadata struct { +type FileMetadata struct { Type string `json:"type"` Name string `json:"name"` Linkname string `json:"linkName,omitempty"` @@ -114,11 +114,11 @@ func appendZstdSkippableFrame(dest io.Writer, data []byte) error { return nil } -func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []ZstdFileMetadata, level int) error { +func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []FileMetadata, level int) error { // 8 is the size of the zstd skippable frame header + the frame size manifestOffset := offset + 8 - toc := ZstdTOC{ + toc := TOC{ Version: 1, Entries: metadata, } diff --git a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go index 3aea77f22..7bd804c44 100644 --- a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go +++ b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go @@ -10,19 +10,23 @@ import ( "io/ioutil" "os" "path/filepath" + "reflect" "sort" "strings" "syscall" "time" + "github.com/containerd/stargz-snapshotter/estargz" storage "github.com/containers/storage" graphdriver "github.com/containers/storage/drivers" driversCopy "github.com/containers/storage/drivers/copy" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked/internal" "github.com/containers/storage/pkg/idtools" + "github.com/containers/storage/pkg/system" "github.com/containers/storage/types" "github.com/klauspost/compress/zstd" + "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -35,13 +39,22 @@ const ( newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY) containersOverrideXattr = "user.containers.override_stat" bigDataKey = "zstd-chunked-manifest" + + fileTypeZstdChunked = iota + fileTypeEstargz = iota ) -type chunkedZstdDiffer struct { +type compressedFileType int + +type chunkedDiffer struct { stream ImageSourceSeekable manifest []byte - layersMetadata map[string][]internal.ZstdFileMetadata + layersMetadata map[string][]internal.FileMetadata layersTarget map[string]string + tocOffset int64 + fileType compressedFileType + + gzipReader *pgzip.Reader } func timeToTimespec(time time.Time) (ts unix.Timespec) { @@ -71,7 +84,10 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us defer destDir.Close() doLink := func() error { - return unix.Linkat(srcFd, "", int(destDir.Fd()), destBase, unix.AT_EMPTY_PATH) + // Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses + // /proc/self/fd doesn't and can be used with rootless. + srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFd) + return unix.Linkat(unix.AT_FDCWD, srcPath, int(destDir.Fd()), destBase, unix.AT_SYMLINK_FOLLOW) } err := doLink() @@ -101,26 +117,28 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us return dstFile, st.Size(), err } -func prepareOtherLayersCache(layersMetadata map[string][]internal.ZstdFileMetadata) map[string]map[string]*internal.ZstdFileMetadata { - maps := make(map[string]map[string]*internal.ZstdFileMetadata) +func prepareOtherLayersCache(layersMetadata map[string][]internal.FileMetadata) map[string]map[string][]*internal.FileMetadata { + maps := make(map[string]map[string][]*internal.FileMetadata) for layerID, v := range layersMetadata { - r := make(map[string]*internal.ZstdFileMetadata) + r := make(map[string][]*internal.FileMetadata) for i := range v { - r[v[i].Digest] = &v[i] + if v[i].Digest != "" { + r[v[i].Digest] = append(r[v[i].Digest], &v[i]) + } } maps[layerID] = r } return maps } -func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata, map[string]string, error) { +func getLayersCache(store storage.Store) (map[string][]internal.FileMetadata, map[string]string, error) { allLayers, err := store.Layers() if err != nil { return nil, nil, err } - layersMetadata := make(map[string][]internal.ZstdFileMetadata) + layersMetadata := make(map[string][]internal.FileMetadata) layersTarget := make(map[string]string) for _, r := range allLayers { manifestReader, err := store.LayerBigData(r.ID, bigDataKey) @@ -132,7 +150,7 @@ func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata if err != nil { return nil, nil, err } - var toc internal.ZstdTOC + var toc internal.TOC if err := json.Unmarshal(manifest, &toc); err != nil { continue } @@ -152,11 +170,14 @@ func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotat if _, ok := annotations[internal.ManifestChecksumKey]; ok { return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss) } + if _, ok := annotations[estargz.TOCJSONDigestAnnotation]; ok { + return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss) + } return nil, errors.New("blob type not supported for partial retrieval") } -func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedZstdDiffer, error) { - manifest, err := readZstdChunkedManifest(iss, blobSize, annotations) +func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedDiffer, error) { + manifest, tocOffset, err := readZstdChunkedManifest(iss, blobSize, annotations) if err != nil { return nil, err } @@ -165,11 +186,33 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in return nil, err } - return &chunkedZstdDiffer{ + return &chunkedDiffer{ stream: iss, manifest: manifest, layersMetadata: layersMetadata, layersTarget: layersTarget, + tocOffset: tocOffset, + fileType: fileTypeZstdChunked, + }, nil +} + +func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedDiffer, error) { + manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, annotations) + if err != nil { + return nil, err + } + layersMetadata, layersTarget, err := getLayersCache(store) + if err != nil { + return nil, err + } + + return &chunkedDiffer{ + stream: iss, + manifest: manifest, + layersMetadata: layersMetadata, + layersTarget: layersTarget, + tocOffset: tocOffset, + fileType: fileTypeEstargz, }, nil } @@ -179,7 +222,7 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in // otherFile contains the metadata for the file. // dirfd is an open file descriptor to the destination root directory. // useHardLinks defines whether the deduplication can be performed using hard links. -func copyFileFromOtherLayer(file internal.ZstdFileMetadata, source string, otherFile *internal.ZstdFileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0) if err != nil { return false, nil, 0, err @@ -199,30 +242,98 @@ func copyFileFromOtherLayer(file internal.ZstdFileMetadata, source string, other return true, dstFile, written, err } +// canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile. +// It checks that the two files have the same UID, GID, file mode and xattrs. +func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool { + if file.UID != otherFile.UID { + return false + } + if file.GID != otherFile.GID { + return false + } + if file.Mode != otherFile.Mode { + return false + } + if !reflect.DeepEqual(file.Xattrs, otherFile.Xattrs) { + return false + } + return true +} + +// canDedupFileWithHardLink checks if the specified file can be deduplicated by an +// open file, given its descriptor and stat data. +func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool { + st, ok := s.Sys().(*syscall.Stat_t) + if !ok { + return false + } + + path := fmt.Sprintf("/proc/self/fd/%d", fd) + + listXattrs, err := system.Llistxattr(path) + if err != nil { + return false + } + + xattrsToIgnore := map[string]interface{}{ + "security.selinux": true, + } + + xattrs := make(map[string]string) + for _, x := range listXattrs { + v, err := system.Lgetxattr(path, x) + if err != nil { + return false + } + + if _, found := xattrsToIgnore[x]; found { + continue + } + xattrs[x] = string(v) + } + // fill only the attributes used by canDedupMetadataWithHardLink. + otherFile := internal.FileMetadata{ + UID: int(st.Uid), + GID: int(st.Gid), + Mode: int64(st.Mode), + Xattrs: xattrs, + } + return canDedupMetadataWithHardLink(file, &otherFile) +} + // findFileInOtherLayers finds the specified file in other layers. // file is the file to look for. // dirfd is an open file descriptor to the checkout root directory. // layersMetadata contains the metadata for each layer in the storage. // layersTarget maps each layer to its checkout on disk. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileInOtherLayers(file internal.ZstdFileMetadata, dirfd int, layersMetadata map[string]map[string]*internal.ZstdFileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileInOtherLayers(file *internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) { // this is ugly, needs to be indexed for layerID, checksums := range layersMetadata { - m, found := checksums[file.Digest] - if !found { - continue - } - source, ok := layersTarget[layerID] if !ok { continue } + files, found := checksums[file.Digest] + if !found { + continue + } + for _, candidate := range files { + // check if it is a valid candidate to dedup file + if useHardLinks && !canDedupMetadataWithHardLink(file, candidate) { + continue + } - found, dstFile, written, err := copyFileFromOtherLayer(file, source, m, dirfd, useHardLinks) - if found && err == nil { - return found, dstFile, written, err + found, dstFile, written, err := copyFileFromOtherLayer(file, source, candidate, dirfd, useHardLinks) + if found && err == nil { + return found, dstFile, written, err + } } } + // If hard links deduplication was used and it has failed, try again without hard links. + if useHardLinks { + return findFileInOtherLayers(file, dirfd, layersMetadata, layersTarget, false) + } return false, nil, 0, nil } @@ -240,7 +351,7 @@ func getFileDigest(f *os.File) (digest.Digest, error) { // file is the file to look for. // dirfd is an open fd to the destination checkout. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { sourceFile := filepath.Clean(filepath.Join("/", file.Name)) if !strings.HasPrefix(sourceFile, "/usr/") { // limit host deduplication to files under /usr. @@ -278,6 +389,9 @@ func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks b return false, nil, 0, nil } + // check if the open file can be deduplicated with hard links + useHardLinks = useHardLinks && canDedupFileWithHardLink(file, fd, st) + dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks) if err != nil { return false, nil, 0, nil @@ -300,7 +414,7 @@ func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks b return true, dstFile, written, nil } -func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOptions) error { +func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error { if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { return nil } @@ -327,7 +441,7 @@ func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOp } type missingFile struct { - File *internal.ZstdFileMetadata + File *internal.FileMetadata Gap int64 } @@ -341,7 +455,7 @@ type missingChunk struct { } // setFileAttrs sets the file attributes for file given metadata -func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { +func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { if file == nil || file.Fd() < 0 { return errors.Errorf("invalid file") } @@ -401,7 +515,7 @@ func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) ( return os.NewFile(uintptr(fd), name), nil } -func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) (err error) { +func (c *chunkedDiffer) createFileFromCompressedStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) (err error) { file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0) if err != nil { return err @@ -413,18 +527,48 @@ func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os. } }() - z, err := zstd.NewReader(reader) - if err != nil { - return err - } - defer z.Close() - digester := digest.Canonical.Digester() checksum := digester.Hash() - _, err = z.WriteTo(io.MultiWriter(file, checksum)) - if err != nil { - return err + to := io.MultiWriter(file, checksum) + + switch c.fileType { + case fileTypeZstdChunked: + z, err := zstd.NewReader(reader) + if err != nil { + return err + } + defer z.Close() + + if _, err := io.Copy(to, io.LimitReader(z, metadata.Size)); err != nil { + return err + } + if _, err := io.Copy(ioutil.Discard, reader); err != nil { + return err + } + case fileTypeEstargz: + if c.gzipReader == nil { + r, err := pgzip.NewReader(reader) + if err != nil { + return err + } + c.gzipReader = r + } else { + if err := c.gzipReader.Reset(reader); err != nil { + return err + } + } + defer c.gzipReader.Close() + + if _, err := io.Copy(to, io.LimitReader(c.gzipReader, metadata.Size)); err != nil { + return err + } + if _, err := io.Copy(ioutil.Discard, reader); err != nil { + return err + } + default: + return fmt.Errorf("unknown file type %q", c.fileType) } + manifestChecksum, err := digest.Parse(metadata.Digest) if err != nil { return err @@ -435,7 +579,7 @@ func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os. return setFileAttrs(file, mode, metadata, options) } -func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error { +func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error { for mc := 0; ; mc++ { var part io.ReadCloser select { @@ -448,9 +592,10 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, if mc == len(missingChunks) { break } - return errors.Errorf("invalid stream returned %d %d", mc, len(missingChunks)) + return errors.Errorf("invalid stream returned") } if mc == len(missingChunks) { + part.Close() return errors.Errorf("too many chunks returned") } @@ -459,6 +604,7 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, limitReader := io.LimitReader(part, mf.Gap) _, err := io.Copy(ioutil.Discard, limitReader) if err != nil { + part.Close() return err } continue @@ -466,7 +612,7 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, limitReader := io.LimitReader(part, mf.Length()) - if err := createFileFromZstdStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil { + if err := c.createFileFromCompressedStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil { part.Close() return err } @@ -505,18 +651,20 @@ func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk newMissingChunks = append(newMissingChunks, missingChunks[i]) } else { prev := &newMissingChunks[len(newMissingChunks)-1] - gapFile := missingFile{ - Gap: int64(gap), - } prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length - prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...) + if gap > 0 { + gapFile := missingFile{ + Gap: int64(gap), + } + prev.Files = append(prev.Files, gapFile) + } + prev.Files = append(prev.Files, missingChunks[i].Files...) } } - return newMissingChunks } -func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error { +func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error { var chunksToRequest []ImageSourceChunk for _, c := range missingChunks { chunksToRequest = append(chunksToRequest, c.RawChunk) @@ -527,7 +675,7 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss var err error var errs chan error for { - streams, errs, err = input.stream.GetBlobAt(chunksToRequest) + streams, errs, err = c.stream.GetBlobAt(chunksToRequest) if err == nil { break } @@ -546,13 +694,13 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss return err } - if err := storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil { + if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil { return err } return nil } -func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { +func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { parent := filepath.Dir(metadata.Name) base := filepath.Base(metadata.Name) @@ -581,7 +729,7 @@ func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, return setFileAttrs(file, mode, metadata, options) } -func safeLink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { +func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_RDONLY, 0) if err != nil { return err @@ -613,7 +761,7 @@ func safeLink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, return setFileAttrs(newFile, mode, metadata, options) } -func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { +func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) destDirFd := dirfd if destDir != "." { @@ -691,7 +839,7 @@ type hardLinkToCreate struct { dest string dirfd int mode os.FileMode - metadata *internal.ZstdFileMetadata + metadata *internal.FileMetadata } func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool { @@ -701,12 +849,12 @@ func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bo return def } -func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { +func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { bigData := map[string][]byte{ - bigDataKey: d.manifest, + bigDataKey: c.manifest, } output := graphdriver.DriverWithDifferOutput{ - Differ: d, + Differ: c, BigData: bigData, } @@ -726,30 +874,21 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) useHardLinks := parseBooleanPullOption(&storeOpts, "use_hard_links", false) // Generate the manifest - var toc internal.ZstdTOC - if err := json.Unmarshal(d.manifest, &toc); err != nil { + var toc internal.TOC + if err := json.Unmarshal(c.manifest, &toc); err != nil { return output, err } whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) var missingChunks []missingChunk - var mergedEntries []internal.ZstdFileMetadata - if err := maybeDoIDRemap(toc.Entries, options); err != nil { + mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries) + if err != nil { return output, err } - - for _, e := range toc.Entries { - if e.Type == TypeChunk { - l := len(mergedEntries) - if l == 0 || mergedEntries[l-1].Type != TypeReg { - return output, errors.New("chunk type without a regular file") - } - mergedEntries[l-1].EndOffset = e.EndOffset - continue - } - mergedEntries = append(mergedEntries, e) + if err := maybeDoIDRemap(mergedEntries, options); err != nil { + return output, err } if options.ForceMask != nil { @@ -768,7 +907,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) } defer unix.Close(dirfd) - otherLayersCache := prepareOtherLayersCache(d.layersMetadata) + otherLayersCache := prepareOtherLayersCache(c.layersMetadata) // hardlinks can point to missing files. So create them after all files // are retrieved @@ -870,7 +1009,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) totalChunksSize += r.Size - found, dstFile, _, err := findFileInOtherLayers(r, dirfd, otherLayersCache, d.layersTarget, useHardLinks) + found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks) if err != nil { return output, err } @@ -886,7 +1025,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) } if enableHostDedup { - found, dstFile, _, err = findFileOnTheHost(r, dirfd, useHardLinks) + found, dstFile, _, err = findFileOnTheHost(&r, dirfd, useHardLinks) if err != nil { return output, err } @@ -908,9 +1047,11 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) Offset: uint64(r.Offset), Length: uint64(r.EndOffset - r.Offset), } + file := missingFile{ - File: &toc.Entries[i], + File: &mergedEntries[i], } + missingChunks = append(missingChunks, missingChunk{ RawChunk: rawChunk, Files: []missingFile{ @@ -922,7 +1063,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) // There are some missing files. Prepare a multirange request for the missing chunks. if len(missingChunks) > 0 { missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks) - if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, options); err != nil { + if err := c.retrieveMissingFiles(dest, dirfd, missingChunks, options); err != nil { return output, err } } @@ -938,3 +1079,37 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) } return output, nil } + +func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, error) { + var mergedEntries []internal.FileMetadata + var prevEntry *internal.FileMetadata + for _, entry := range entries { + e := entry + + // ignore the metadata files for the estargz format. + if fileType == fileTypeEstargz && (e.Name == estargz.PrefetchLandmark || e.Name == estargz.NoPrefetchLandmark || e.Name == estargz.TOCTarName) { + continue + } + + if e.Type == TypeChunk { + if prevEntry == nil || prevEntry.Type != TypeReg { + return nil, errors.New("chunk type without a regular file") + } + prevEntry.EndOffset = e.EndOffset + continue + } + mergedEntries = append(mergedEntries, e) + prevEntry = &e + } + // stargz/estargz doesn't store EndOffset so let's calculate it here + lastOffset := c.tocOffset + for i := len(mergedEntries) - 1; i >= 0; i-- { + if mergedEntries[i].EndOffset == 0 { + mergedEntries[i].EndOffset = lastOffset + } + if mergedEntries[i].Offset != 0 { + lastOffset = mergedEntries[i].Offset + } + } + return mergedEntries, nil +} |
