aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/containers/storage/pkg/chunked
diff options
context:
space:
mode:
authordependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>2021-09-13 12:29:39 +0000
committerGitHub <noreply@github.com>2021-09-13 12:29:39 +0000
commiteec59cea2c20cf5e515bdcbc8c35eba3e07da607 (patch)
tree71ec92fd12f16d15f25facd599ef097c2650c5f9 /vendor/github.com/containers/storage/pkg/chunked
parent0f3d3bd21da0b67542c44c832f0e1642c5f639cf (diff)
downloadpodman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.gz
podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.tar.bz2
podman-eec59cea2c20cf5e515bdcbc8c35eba3e07da607.zip
Bump github.com/containers/storage from 1.35.0 to 1.36.0
Bumps [github.com/containers/storage](https://github.com/containers/storage) from 1.35.0 to 1.36.0. - [Release notes](https://github.com/containers/storage/releases) - [Changelog](https://github.com/containers/storage/blob/main/docs/containers-storage-changes.md) - [Commits](https://github.com/containers/storage/compare/v1.35.0...v1.36.0) --- updated-dependencies: - dependency-name: github.com/containers/storage dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
Diffstat (limited to 'vendor/github.com/containers/storage/pkg/chunked')
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/compression.go148
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go4
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/internal/compression.go12
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/storage_linux.go325
4 files changed, 386 insertions, 103 deletions
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compression.go b/vendor/github.com/containers/storage/pkg/chunked/compression.go
index f2811fb9a..96254bc4e 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/compression.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/compression.go
@@ -1,14 +1,18 @@
package chunked
import (
+ archivetar "archive/tar"
"bytes"
"encoding/binary"
"fmt"
"io"
+ "strconv"
+ "github.com/containerd/stargz-snapshotter/estargz"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
+ "github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/vbatts/tar-split/archive/tar"
@@ -50,25 +54,129 @@ func isZstdChunkedFrameMagic(data []byte) bool {
return bytes.Equal(internal.ZstdChunkedFrameMagic, data[:8])
}
+func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) {
+ // information on the format here https://github.com/containerd/stargz-snapshotter/blob/main/docs/stargz-estargz.md
+ footerSize := int64(51)
+ if blobSize <= footerSize {
+ return nil, 0, errors.New("blob too small")
+ }
+ chunk := ImageSourceChunk{
+ Offset: uint64(blobSize - footerSize),
+ Length: uint64(footerSize),
+ }
+ parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk})
+ if err != nil {
+ return nil, 0, err
+ }
+ var reader io.ReadCloser
+ select {
+ case r := <-parts:
+ reader = r
+ case err := <-errs:
+ return nil, 0, err
+ }
+ defer reader.Close()
+ footer := make([]byte, footerSize)
+ if _, err := io.ReadFull(reader, footer); err != nil {
+ return nil, 0, err
+ }
+
+ /* Read the ToC offset:
+ - 10 bytes gzip header
+ - 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
+ - 2 bytes Extra: SI1 = 'S', SI2 = 'G'
+ - 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ"))
+ - 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
+ - 5 bytes flate header: BFINAL = 1(last block), BTYPE = 0(non-compressed block), LEN = 0
+ - 8 bytes gzip footer
+ */
+ tocOffset, err := strconv.ParseInt(string(footer[16:16+22-6]), 16, 64)
+ if err != nil {
+ return nil, 0, errors.Wrap(err, "parse ToC offset")
+ }
+
+ size := int64(blobSize - footerSize - tocOffset)
+ // set a reasonable limit
+ if size > (1<<20)*50 {
+ return nil, 0, errors.New("manifest too big")
+ }
+
+ chunk = ImageSourceChunk{
+ Offset: uint64(tocOffset),
+ Length: uint64(size),
+ }
+ parts, errs, err = blobStream.GetBlobAt([]ImageSourceChunk{chunk})
+ if err != nil {
+ return nil, 0, err
+ }
+
+ var tocReader io.ReadCloser
+ select {
+ case r := <-parts:
+ tocReader = r
+ case err := <-errs:
+ return nil, 0, err
+ }
+ defer tocReader.Close()
+
+ r, err := pgzip.NewReader(tocReader)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer r.Close()
+
+ aTar := archivetar.NewReader(r)
+
+ header, err := aTar.Next()
+ if err != nil {
+ return nil, 0, err
+ }
+ // set a reasonable limit
+ if header.Size > (1<<20)*50 {
+ return nil, 0, errors.New("manifest too big")
+ }
+
+ manifestUncompressed := make([]byte, header.Size)
+ if _, err := io.ReadFull(aTar, manifestUncompressed); err != nil {
+ return nil, 0, err
+ }
+
+ manifestDigester := digest.Canonical.Digester()
+ manifestChecksum := manifestDigester.Hash()
+ if _, err := manifestChecksum.Write(manifestUncompressed); err != nil {
+ return nil, 0, err
+ }
+
+ d, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation])
+ if err != nil {
+ return nil, 0, err
+ }
+ if manifestDigester.Digest() != d {
+ return nil, 0, errors.New("invalid manifest checksum")
+ }
+
+ return manifestUncompressed, tocOffset, nil
+}
+
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must
// be specified.
// This function uses the io.containers.zstd-chunked. annotations when specified.
-func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) {
+func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) {
footerSize := int64(internal.FooterSizeSupported)
if blobSize <= footerSize {
- return nil, errors.New("blob too small")
+ return nil, 0, errors.New("blob too small")
}
manifestChecksumAnnotation := annotations[internal.ManifestChecksumKey]
if manifestChecksumAnnotation == "" {
- return nil, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey)
+ return nil, 0, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey)
}
var offset, length, lengthUncompressed, manifestType uint64
if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" {
if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil {
- return nil, err
+ return nil, 0, err
}
} else {
chunk := ImageSourceChunk{
@@ -77,18 +185,18 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
}
parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk})
if err != nil {
- return nil, err
+ return nil, 0, err
}
var reader io.ReadCloser
select {
case r := <-parts:
reader = r
case err := <-errs:
- return nil, err
+ return nil, 0, err
}
footer := make([]byte, footerSize)
if _, err := io.ReadFull(reader, footer); err != nil {
- return nil, err
+ return nil, 0, err
}
offset = binary.LittleEndian.Uint64(footer[0:8])
@@ -96,20 +204,20 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
lengthUncompressed = binary.LittleEndian.Uint64(footer[16:24])
manifestType = binary.LittleEndian.Uint64(footer[24:32])
if !isZstdChunkedFrameMagic(footer[32:40]) {
- return nil, errors.New("invalid magic number")
+ return nil, 0, errors.New("invalid magic number")
}
}
if manifestType != internal.ManifestTypeCRFS {
- return nil, errors.New("invalid manifest type")
+ return nil, 0, errors.New("invalid manifest type")
}
// set a reasonable limit
if length > (1<<20)*50 {
- return nil, errors.New("manifest too big")
+ return nil, 0, errors.New("manifest too big")
}
if lengthUncompressed > (1<<20)*50 {
- return nil, errors.New("manifest too big")
+ return nil, 0, errors.New("manifest too big")
}
chunk := ImageSourceChunk{
@@ -119,47 +227,47 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
parts, errs, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk})
if err != nil {
- return nil, err
+ return nil, 0, err
}
var reader io.ReadCloser
select {
case r := <-parts:
reader = r
case err := <-errs:
- return nil, err
+ return nil, 0, err
}
manifest := make([]byte, length)
if _, err := io.ReadFull(reader, manifest); err != nil {
- return nil, err
+ return nil, 0, err
}
manifestDigester := digest.Canonical.Digester()
manifestChecksum := manifestDigester.Hash()
if _, err := manifestChecksum.Write(manifest); err != nil {
- return nil, err
+ return nil, 0, err
}
d, err := digest.Parse(manifestChecksumAnnotation)
if err != nil {
- return nil, err
+ return nil, 0, err
}
if manifestDigester.Digest() != d {
- return nil, errors.New("invalid manifest checksum")
+ return nil, 0, errors.New("invalid manifest checksum")
}
decoder, err := zstd.NewReader(nil)
if err != nil {
- return nil, err
+ return nil, 0, err
}
defer decoder.Close()
b := make([]byte, 0, lengthUncompressed)
if decoded, err := decoder.DecodeAll(manifest, b); err == nil {
- return decoded, nil
+ return decoded, int64(offset), nil
}
- return manifest, nil
+ return manifest, int64(offset), nil
}
// ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
index a205b73fd..092cf584a 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
@@ -50,7 +50,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
return offset, nil
}
- var metadata []internal.ZstdFileMetadata
+ var metadata []internal.FileMetadata
for {
hdr, err := tr.Next()
if err != nil {
@@ -112,7 +112,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
- m := internal.ZstdFileMetadata{
+ m := internal.FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
diff --git a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
index af0025c20..c91c43d85 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
@@ -17,12 +17,12 @@ import (
"github.com/opencontainers/go-digest"
)
-type ZstdTOC struct {
- Version int `json:"version"`
- Entries []ZstdFileMetadata `json:"entries"`
+type TOC struct {
+ Version int `json:"version"`
+ Entries []FileMetadata `json:"entries"`
}
-type ZstdFileMetadata struct {
+type FileMetadata struct {
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
@@ -114,11 +114,11 @@ func appendZstdSkippableFrame(dest io.Writer, data []byte) error {
return nil
}
-func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []ZstdFileMetadata, level int) error {
+func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []FileMetadata, level int) error {
// 8 is the size of the zstd skippable frame header + the frame size
manifestOffset := offset + 8
- toc := ZstdTOC{
+ toc := TOC{
Version: 1,
Entries: metadata,
}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
index 3aea77f22..7bd804c44 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
@@ -10,19 +10,23 @@ import (
"io/ioutil"
"os"
"path/filepath"
+ "reflect"
"sort"
"strings"
"syscall"
"time"
+ "github.com/containerd/stargz-snapshotter/estargz"
storage "github.com/containers/storage"
graphdriver "github.com/containers/storage/drivers"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/idtools"
+ "github.com/containers/storage/pkg/system"
"github.com/containers/storage/types"
"github.com/klauspost/compress/zstd"
+ "github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -35,13 +39,22 @@ const (
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
containersOverrideXattr = "user.containers.override_stat"
bigDataKey = "zstd-chunked-manifest"
+
+ fileTypeZstdChunked = iota
+ fileTypeEstargz = iota
)
-type chunkedZstdDiffer struct {
+type compressedFileType int
+
+type chunkedDiffer struct {
stream ImageSourceSeekable
manifest []byte
- layersMetadata map[string][]internal.ZstdFileMetadata
+ layersMetadata map[string][]internal.FileMetadata
layersTarget map[string]string
+ tocOffset int64
+ fileType compressedFileType
+
+ gzipReader *pgzip.Reader
}
func timeToTimespec(time time.Time) (ts unix.Timespec) {
@@ -71,7 +84,10 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us
defer destDir.Close()
doLink := func() error {
- return unix.Linkat(srcFd, "", int(destDir.Fd()), destBase, unix.AT_EMPTY_PATH)
+ // Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses
+ // /proc/self/fd doesn't and can be used with rootless.
+ srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFd)
+ return unix.Linkat(unix.AT_FDCWD, srcPath, int(destDir.Fd()), destBase, unix.AT_SYMLINK_FOLLOW)
}
err := doLink()
@@ -101,26 +117,28 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us
return dstFile, st.Size(), err
}
-func prepareOtherLayersCache(layersMetadata map[string][]internal.ZstdFileMetadata) map[string]map[string]*internal.ZstdFileMetadata {
- maps := make(map[string]map[string]*internal.ZstdFileMetadata)
+func prepareOtherLayersCache(layersMetadata map[string][]internal.FileMetadata) map[string]map[string][]*internal.FileMetadata {
+ maps := make(map[string]map[string][]*internal.FileMetadata)
for layerID, v := range layersMetadata {
- r := make(map[string]*internal.ZstdFileMetadata)
+ r := make(map[string][]*internal.FileMetadata)
for i := range v {
- r[v[i].Digest] = &v[i]
+ if v[i].Digest != "" {
+ r[v[i].Digest] = append(r[v[i].Digest], &v[i])
+ }
}
maps[layerID] = r
}
return maps
}
-func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata, map[string]string, error) {
+func getLayersCache(store storage.Store) (map[string][]internal.FileMetadata, map[string]string, error) {
allLayers, err := store.Layers()
if err != nil {
return nil, nil, err
}
- layersMetadata := make(map[string][]internal.ZstdFileMetadata)
+ layersMetadata := make(map[string][]internal.FileMetadata)
layersTarget := make(map[string]string)
for _, r := range allLayers {
manifestReader, err := store.LayerBigData(r.ID, bigDataKey)
@@ -132,7 +150,7 @@ func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata
if err != nil {
return nil, nil, err
}
- var toc internal.ZstdTOC
+ var toc internal.TOC
if err := json.Unmarshal(manifest, &toc); err != nil {
continue
}
@@ -152,11 +170,14 @@ func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotat
if _, ok := annotations[internal.ManifestChecksumKey]; ok {
return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss)
}
+ if _, ok := annotations[estargz.TOCJSONDigestAnnotation]; ok {
+ return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss)
+ }
return nil, errors.New("blob type not supported for partial retrieval")
}
-func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedZstdDiffer, error) {
- manifest, err := readZstdChunkedManifest(iss, blobSize, annotations)
+func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedDiffer, error) {
+ manifest, tocOffset, err := readZstdChunkedManifest(iss, blobSize, annotations)
if err != nil {
return nil, err
}
@@ -165,11 +186,33 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
return nil, err
}
- return &chunkedZstdDiffer{
+ return &chunkedDiffer{
stream: iss,
manifest: manifest,
layersMetadata: layersMetadata,
layersTarget: layersTarget,
+ tocOffset: tocOffset,
+ fileType: fileTypeZstdChunked,
+ }, nil
+}
+
+func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*chunkedDiffer, error) {
+ manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, annotations)
+ if err != nil {
+ return nil, err
+ }
+ layersMetadata, layersTarget, err := getLayersCache(store)
+ if err != nil {
+ return nil, err
+ }
+
+ return &chunkedDiffer{
+ stream: iss,
+ manifest: manifest,
+ layersMetadata: layersMetadata,
+ layersTarget: layersTarget,
+ tocOffset: tocOffset,
+ fileType: fileTypeEstargz,
}, nil
}
@@ -179,7 +222,7 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
// otherFile contains the metadata for the file.
// dirfd is an open file descriptor to the destination root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
-func copyFileFromOtherLayer(file internal.ZstdFileMetadata, source string, otherFile *internal.ZstdFileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
if err != nil {
return false, nil, 0, err
@@ -199,30 +242,98 @@ func copyFileFromOtherLayer(file internal.ZstdFileMetadata, source string, other
return true, dstFile, written, err
}
+// canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile.
+// It checks that the two files have the same UID, GID, file mode and xattrs.
+func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool {
+ if file.UID != otherFile.UID {
+ return false
+ }
+ if file.GID != otherFile.GID {
+ return false
+ }
+ if file.Mode != otherFile.Mode {
+ return false
+ }
+ if !reflect.DeepEqual(file.Xattrs, otherFile.Xattrs) {
+ return false
+ }
+ return true
+}
+
+// canDedupFileWithHardLink checks if the specified file can be deduplicated by an
+// open file, given its descriptor and stat data.
+func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool {
+ st, ok := s.Sys().(*syscall.Stat_t)
+ if !ok {
+ return false
+ }
+
+ path := fmt.Sprintf("/proc/self/fd/%d", fd)
+
+ listXattrs, err := system.Llistxattr(path)
+ if err != nil {
+ return false
+ }
+
+ xattrsToIgnore := map[string]interface{}{
+ "security.selinux": true,
+ }
+
+ xattrs := make(map[string]string)
+ for _, x := range listXattrs {
+ v, err := system.Lgetxattr(path, x)
+ if err != nil {
+ return false
+ }
+
+ if _, found := xattrsToIgnore[x]; found {
+ continue
+ }
+ xattrs[x] = string(v)
+ }
+ // fill only the attributes used by canDedupMetadataWithHardLink.
+ otherFile := internal.FileMetadata{
+ UID: int(st.Uid),
+ GID: int(st.Gid),
+ Mode: int64(st.Mode),
+ Xattrs: xattrs,
+ }
+ return canDedupMetadataWithHardLink(file, &otherFile)
+}
+
// findFileInOtherLayers finds the specified file in other layers.
// file is the file to look for.
// dirfd is an open file descriptor to the checkout root directory.
// layersMetadata contains the metadata for each layer in the storage.
// layersTarget maps each layer to its checkout on disk.
// useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileInOtherLayers(file internal.ZstdFileMetadata, dirfd int, layersMetadata map[string]map[string]*internal.ZstdFileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
+func findFileInOtherLayers(file *internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
// this is ugly, needs to be indexed
for layerID, checksums := range layersMetadata {
- m, found := checksums[file.Digest]
- if !found {
- continue
- }
-
source, ok := layersTarget[layerID]
if !ok {
continue
}
+ files, found := checksums[file.Digest]
+ if !found {
+ continue
+ }
+ for _, candidate := range files {
+ // check if it is a valid candidate to dedup file
+ if useHardLinks && !canDedupMetadataWithHardLink(file, candidate) {
+ continue
+ }
- found, dstFile, written, err := copyFileFromOtherLayer(file, source, m, dirfd, useHardLinks)
- if found && err == nil {
- return found, dstFile, written, err
+ found, dstFile, written, err := copyFileFromOtherLayer(file, source, candidate, dirfd, useHardLinks)
+ if found && err == nil {
+ return found, dstFile, written, err
+ }
}
}
+ // If hard links deduplication was used and it has failed, try again without hard links.
+ if useHardLinks {
+ return findFileInOtherLayers(file, dirfd, layersMetadata, layersTarget, false)
+ }
return false, nil, 0, nil
}
@@ -240,7 +351,7 @@ func getFileDigest(f *os.File) (digest.Digest, error) {
// file is the file to look for.
// dirfd is an open fd to the destination checkout.
// useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
sourceFile := filepath.Clean(filepath.Join("/", file.Name))
if !strings.HasPrefix(sourceFile, "/usr/") {
// limit host deduplication to files under /usr.
@@ -278,6 +389,9 @@ func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks b
return false, nil, 0, nil
}
+ // check if the open file can be deduplicated with hard links
+ useHardLinks = useHardLinks && canDedupFileWithHardLink(file, fd, st)
+
dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks)
if err != nil {
return false, nil, 0, nil
@@ -300,7 +414,7 @@ func findFileOnTheHost(file internal.ZstdFileMetadata, dirfd int, useHardLinks b
return true, dstFile, written, nil
}
-func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOptions) error {
+func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 {
return nil
}
@@ -327,7 +441,7 @@ func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOp
}
type missingFile struct {
- File *internal.ZstdFileMetadata
+ File *internal.FileMetadata
Gap int64
}
@@ -341,7 +455,7 @@ type missingChunk struct {
}
// setFileAttrs sets the file attributes for file given metadata
-func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
+func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
if file == nil || file.Fd() < 0 {
return errors.Errorf("invalid file")
}
@@ -401,7 +515,7 @@ func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) (
return os.NewFile(uintptr(fd), name), nil
}
-func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) (err error) {
+func (c *chunkedDiffer) createFileFromCompressedStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) (err error) {
file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
if err != nil {
return err
@@ -413,18 +527,48 @@ func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os.
}
}()
- z, err := zstd.NewReader(reader)
- if err != nil {
- return err
- }
- defer z.Close()
-
digester := digest.Canonical.Digester()
checksum := digester.Hash()
- _, err = z.WriteTo(io.MultiWriter(file, checksum))
- if err != nil {
- return err
+ to := io.MultiWriter(file, checksum)
+
+ switch c.fileType {
+ case fileTypeZstdChunked:
+ z, err := zstd.NewReader(reader)
+ if err != nil {
+ return err
+ }
+ defer z.Close()
+
+ if _, err := io.Copy(to, io.LimitReader(z, metadata.Size)); err != nil {
+ return err
+ }
+ if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+ return err
+ }
+ case fileTypeEstargz:
+ if c.gzipReader == nil {
+ r, err := pgzip.NewReader(reader)
+ if err != nil {
+ return err
+ }
+ c.gzipReader = r
+ } else {
+ if err := c.gzipReader.Reset(reader); err != nil {
+ return err
+ }
+ }
+ defer c.gzipReader.Close()
+
+ if _, err := io.Copy(to, io.LimitReader(c.gzipReader, metadata.Size)); err != nil {
+ return err
+ }
+ if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("unknown file type %q", c.fileType)
}
+
manifestChecksum, err := digest.Parse(metadata.Digest)
if err != nil {
return err
@@ -435,7 +579,7 @@ func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, mode os.
return setFileAttrs(file, mode, metadata, options)
}
-func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
+func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
for mc := 0; ; mc++ {
var part io.ReadCloser
select {
@@ -448,9 +592,10 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string,
if mc == len(missingChunks) {
break
}
- return errors.Errorf("invalid stream returned %d %d", mc, len(missingChunks))
+ return errors.Errorf("invalid stream returned")
}
if mc == len(missingChunks) {
+ part.Close()
return errors.Errorf("too many chunks returned")
}
@@ -459,6 +604,7 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string,
limitReader := io.LimitReader(part, mf.Gap)
_, err := io.Copy(ioutil.Discard, limitReader)
if err != nil {
+ part.Close()
return err
}
continue
@@ -466,7 +612,7 @@ func storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string,
limitReader := io.LimitReader(part, mf.Length())
- if err := createFileFromZstdStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil {
+ if err := c.createFileFromCompressedStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil {
part.Close()
return err
}
@@ -505,18 +651,20 @@ func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk
newMissingChunks = append(newMissingChunks, missingChunks[i])
} else {
prev := &newMissingChunks[len(newMissingChunks)-1]
- gapFile := missingFile{
- Gap: int64(gap),
- }
prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length
- prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...)
+ if gap > 0 {
+ gapFile := missingFile{
+ Gap: int64(gap),
+ }
+ prev.Files = append(prev.Files, gapFile)
+ }
+ prev.Files = append(prev.Files, missingChunks[i].Files...)
}
}
-
return newMissingChunks
}
-func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
+func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
var chunksToRequest []ImageSourceChunk
for _, c := range missingChunks {
chunksToRequest = append(chunksToRequest, c.RawChunk)
@@ -527,7 +675,7 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss
var err error
var errs chan error
for {
- streams, errs, err = input.stream.GetBlobAt(chunksToRequest)
+ streams, errs, err = c.stream.GetBlobAt(chunksToRequest)
if err == nil {
break
}
@@ -546,13 +694,13 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss
return err
}
- if err := storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil {
+ if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil {
return err
}
return nil
}
-func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
+func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
parent := filepath.Dir(metadata.Name)
base := filepath.Base(metadata.Name)
@@ -581,7 +729,7 @@ func safeMkdir(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata,
return setFileAttrs(file, mode, metadata, options)
}
-func safeLink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
+func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_RDONLY, 0)
if err != nil {
return err
@@ -613,7 +761,7 @@ func safeLink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata,
return setFileAttrs(newFile, mode, metadata, options)
}
-func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
+func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
@@ -691,7 +839,7 @@ type hardLinkToCreate struct {
dest string
dirfd int
mode os.FileMode
- metadata *internal.ZstdFileMetadata
+ metadata *internal.FileMetadata
}
func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool {
@@ -701,12 +849,12 @@ func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bo
return def
}
-func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) {
+func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) {
bigData := map[string][]byte{
- bigDataKey: d.manifest,
+ bigDataKey: c.manifest,
}
output := graphdriver.DriverWithDifferOutput{
- Differ: d,
+ Differ: c,
BigData: bigData,
}
@@ -726,30 +874,21 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
useHardLinks := parseBooleanPullOption(&storeOpts, "use_hard_links", false)
// Generate the manifest
- var toc internal.ZstdTOC
- if err := json.Unmarshal(d.manifest, &toc); err != nil {
+ var toc internal.TOC
+ if err := json.Unmarshal(c.manifest, &toc); err != nil {
return output, err
}
whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
var missingChunks []missingChunk
- var mergedEntries []internal.ZstdFileMetadata
- if err := maybeDoIDRemap(toc.Entries, options); err != nil {
+ mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
+ if err != nil {
return output, err
}
-
- for _, e := range toc.Entries {
- if e.Type == TypeChunk {
- l := len(mergedEntries)
- if l == 0 || mergedEntries[l-1].Type != TypeReg {
- return output, errors.New("chunk type without a regular file")
- }
- mergedEntries[l-1].EndOffset = e.EndOffset
- continue
- }
- mergedEntries = append(mergedEntries, e)
+ if err := maybeDoIDRemap(mergedEntries, options); err != nil {
+ return output, err
}
if options.ForceMask != nil {
@@ -768,7 +907,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
}
defer unix.Close(dirfd)
- otherLayersCache := prepareOtherLayersCache(d.layersMetadata)
+ otherLayersCache := prepareOtherLayersCache(c.layersMetadata)
// hardlinks can point to missing files. So create them after all files
// are retrieved
@@ -870,7 +1009,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
totalChunksSize += r.Size
- found, dstFile, _, err := findFileInOtherLayers(r, dirfd, otherLayersCache, d.layersTarget, useHardLinks)
+ found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
if err != nil {
return output, err
}
@@ -886,7 +1025,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
}
if enableHostDedup {
- found, dstFile, _, err = findFileOnTheHost(r, dirfd, useHardLinks)
+ found, dstFile, _, err = findFileOnTheHost(&r, dirfd, useHardLinks)
if err != nil {
return output, err
}
@@ -908,9 +1047,11 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
Offset: uint64(r.Offset),
Length: uint64(r.EndOffset - r.Offset),
}
+
file := missingFile{
- File: &toc.Entries[i],
+ File: &mergedEntries[i],
}
+
missingChunks = append(missingChunks, missingChunk{
RawChunk: rawChunk,
Files: []missingFile{
@@ -922,7 +1063,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
// There are some missing files. Prepare a multirange request for the missing chunks.
if len(missingChunks) > 0 {
missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks)
- if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, options); err != nil {
+ if err := c.retrieveMissingFiles(dest, dirfd, missingChunks, options); err != nil {
return output, err
}
}
@@ -938,3 +1079,37 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
}
return output, nil
}
+
+func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, error) {
+ var mergedEntries []internal.FileMetadata
+ var prevEntry *internal.FileMetadata
+ for _, entry := range entries {
+ e := entry
+
+ // ignore the metadata files for the estargz format.
+ if fileType == fileTypeEstargz && (e.Name == estargz.PrefetchLandmark || e.Name == estargz.NoPrefetchLandmark || e.Name == estargz.TOCTarName) {
+ continue
+ }
+
+ if e.Type == TypeChunk {
+ if prevEntry == nil || prevEntry.Type != TypeReg {
+ return nil, errors.New("chunk type without a regular file")
+ }
+ prevEntry.EndOffset = e.EndOffset
+ continue
+ }
+ mergedEntries = append(mergedEntries, e)
+ prevEntry = &e
+ }
+ // stargz/estargz doesn't store EndOffset so let's calculate it here
+ lastOffset := c.tocOffset
+ for i := len(mergedEntries) - 1; i >= 0; i-- {
+ if mergedEntries[i].EndOffset == 0 {
+ mergedEntries[i].EndOffset = lastOffset
+ }
+ if mergedEntries[i].Offset != 0 {
+ lastOffset = mergedEntries[i].Offset
+ }
+ }
+ return mergedEntries, nil
+}