summaryrefslogtreecommitdiff
path: root/vendor/github.com/vbatts/tar-split/tar/asm
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/vbatts/tar-split/tar/asm')
-rw-r--r--vendor/github.com/vbatts/tar-split/tar/asm/README.md44
-rw-r--r--vendor/github.com/vbatts/tar-split/tar/asm/assemble.go130
-rw-r--r--vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go141
-rw-r--r--vendor/github.com/vbatts/tar-split/tar/asm/doc.go9
4 files changed, 324 insertions, 0 deletions
diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/README.md b/vendor/github.com/vbatts/tar-split/tar/asm/README.md
new file mode 100644
index 000000000..2a3a5b56a
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/tar/asm/README.md
@@ -0,0 +1,44 @@
+asm
+===
+
+This library for assembly and disassembly of tar archives, facilitated by
+`github.com/vbatts/tar-split/tar/storage`.
+
+
+Concerns
+--------
+
+For completely safe assembly/disassembly, there will need to be a Content
+Addressable Storage (CAS) directory, that maps to a checksum in the
+`storage.Entity` of `storage.FileType`.
+
+This is due to the fact that tar archives _can_ allow multiple records for the
+same path, but the last one effectively wins. Even if the prior records had a
+different payload.
+
+In this way, when assembling an archive from relative paths, if the archive has
+multiple entries for the same path, then all payloads read in from a relative
+path would be identical.
+
+
+Thoughts
+--------
+
+Have a look-aside directory or storage. This way when a clobbering record is
+encountered from the tar stream, then the payload of the prior/existing file is
+stored to the CAS. This way the clobbering record's file payload can be
+extracted, but we'll have preserved the payload needed to reassemble a precise
+tar archive.
+
+clobbered/path/to/file.[0-N]
+
+*alternatively*
+
+We could just _not_ support tar streams that have clobbering file paths.
+Appending records to the archive is not incredibly common, and doesn't happen
+by default for most implementations. Not supporting them wouldn't be a
+security concern either, as if it did occur, we would reassemble an archive
+that doesn't validate signature/checksum, so it shouldn't be trusted anyway.
+
+Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE.
+
diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go b/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go
new file mode 100644
index 000000000..d624450ab
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go
@@ -0,0 +1,130 @@
+package asm
+
+import (
+ "bytes"
+ "fmt"
+ "hash"
+ "hash/crc64"
+ "io"
+ "sync"
+
+ "github.com/vbatts/tar-split/tar/storage"
+)
+
+// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive
+// stream.
+//
+// It takes a storage.FileGetter, for mapping the file payloads that are to be read in,
+// and a storage.Unpacker, which has access to the rawbytes and file order
+// metadata. With the combination of these two items, a precise assembled Tar
+// archive is possible.
+func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser {
+ // ... Since these are interfaces, this is possible, so let's not have a nil pointer
+ if fg == nil || up == nil {
+ return nil
+ }
+ pr, pw := io.Pipe()
+ go func() {
+ err := WriteOutputTarStream(fg, up, pw)
+ if err != nil {
+ pw.CloseWithError(err)
+ } else {
+ pw.Close()
+ }
+ }()
+ return pr
+}
+
+// WriteOutputTarStream writes assembled tar archive to a writer.
+func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error {
+ // ... Since these are interfaces, this is possible, so let's not have a nil pointer
+ if fg == nil || up == nil {
+ return nil
+ }
+ var copyBuffer []byte
+ var crcHash hash.Hash
+ var crcSum []byte
+ var multiWriter io.Writer
+ for {
+ entry, err := up.Next()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ switch entry.Type {
+ case storage.SegmentType:
+ if _, err := w.Write(entry.Payload); err != nil {
+ return err
+ }
+ case storage.FileType:
+ if entry.Size == 0 {
+ continue
+ }
+ fh, err := fg.Get(entry.GetName())
+ if err != nil {
+ return err
+ }
+ if crcHash == nil {
+ crcHash = crc64.New(storage.CRCTable)
+ crcSum = make([]byte, 8)
+ multiWriter = io.MultiWriter(w, crcHash)
+ copyBuffer = byteBufferPool.Get().([]byte)
+ defer byteBufferPool.Put(copyBuffer)
+ } else {
+ crcHash.Reset()
+ }
+
+ if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil {
+ fh.Close()
+ return err
+ }
+
+ if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) {
+ // I would rather this be a comparable ErrInvalidChecksum or such,
+ // but since it's coming through the PipeReader, the context of
+ // _which_ file would be lost...
+ fh.Close()
+ return fmt.Errorf("file integrity checksum failed for %q", entry.GetName())
+ }
+ fh.Close()
+ }
+ }
+}
+
+var byteBufferPool = &sync.Pool{
+ New: func() interface{} {
+ return make([]byte, 32*1024)
+ },
+}
+
+// copyWithBuffer is taken from stdlib io.Copy implementation
+// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367
+func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
+ for {
+ nr, er := src.Read(buf)
+ if nr > 0 {
+ nw, ew := dst.Write(buf[0:nr])
+ if nw > 0 {
+ written += int64(nw)
+ }
+ if ew != nil {
+ err = ew
+ break
+ }
+ if nr != nw {
+ err = io.ErrShortWrite
+ break
+ }
+ }
+ if er == io.EOF {
+ break
+ }
+ if er != nil {
+ err = er
+ break
+ }
+ }
+ return written, err
+}
diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go
new file mode 100644
index 000000000..54ef23aed
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go
@@ -0,0 +1,141 @@
+package asm
+
+import (
+ "io"
+ "io/ioutil"
+
+ "github.com/vbatts/tar-split/archive/tar"
+ "github.com/vbatts/tar-split/tar/storage"
+)
+
+// NewInputTarStream wraps the Reader stream of a tar archive and provides a
+// Reader stream of the same.
+//
+// In the middle it will pack the segments and file metadata to storage.Packer
+// `p`.
+//
+// The the storage.FilePutter is where payload of files in the stream are
+// stashed. If this stashing is not needed, you can provide a nil
+// storage.FilePutter. Since the checksumming is still needed, then a default
+// of NewDiscardFilePutter will be used internally
+func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
+ // What to do here... folks will want their own access to the Reader that is
+ // their tar archive stream, but we'll need that same stream to use our
+ // forked 'archive/tar'.
+ // Perhaps do an io.TeeReader that hands back an io.Reader for them to read
+ // from, and we'll MITM the stream to store metadata.
+ // We'll need a storage.FilePutter too ...
+
+ // Another concern, whether to do any storage.FilePutter operations, such that we
+ // don't extract any amount of the archive. But then again, we're not making
+ // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
+ // Perhaps we have a DiscardFilePutter that is a bit bucket.
+
+ // we'll return the pipe reader, since TeeReader does not buffer and will
+ // only read what the outputRdr Read's. Since Tar archives have padding on
+ // the end, we want to be the one reading the padding, even if the user's
+ // `archive/tar` doesn't care.
+ pR, pW := io.Pipe()
+ outputRdr := io.TeeReader(r, pW)
+
+ // we need a putter that will generate the crc64 sums of file payloads
+ if fp == nil {
+ fp = storage.NewDiscardFilePutter()
+ }
+
+ go func() {
+ tr := tar.NewReader(outputRdr)
+ tr.RawAccounting = true
+ for {
+ hdr, err := tr.Next()
+ if err != nil {
+ if err != io.EOF {
+ pW.CloseWithError(err)
+ return
+ }
+ // even when an EOF is reached, there is often 1024 null bytes on
+ // the end of an archive. Collect them too.
+ if b := tr.RawBytes(); len(b) > 0 {
+ _, err := p.AddEntry(storage.Entry{
+ Type: storage.SegmentType,
+ Payload: b,
+ })
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+ }
+ break // not return. We need the end of the reader.
+ }
+ if hdr == nil {
+ break // not return. We need the end of the reader.
+ }
+
+ if b := tr.RawBytes(); len(b) > 0 {
+ _, err := p.AddEntry(storage.Entry{
+ Type: storage.SegmentType,
+ Payload: b,
+ })
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+ }
+
+ var csum []byte
+ if hdr.Size > 0 {
+ var err error
+ _, csum, err = fp.Put(hdr.Name, tr)
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+ }
+
+ entry := storage.Entry{
+ Type: storage.FileType,
+ Size: hdr.Size,
+ Payload: csum,
+ }
+ // For proper marshalling of non-utf8 characters
+ entry.SetName(hdr.Name)
+
+ // File entries added, regardless of size
+ _, err = p.AddEntry(entry)
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+
+ if b := tr.RawBytes(); len(b) > 0 {
+ _, err = p.AddEntry(storage.Entry{
+ Type: storage.SegmentType,
+ Payload: b,
+ })
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+ }
+ }
+
+ // it is allowable, and not uncommon that there is further padding on the
+ // end of an archive, apart from the expected 1024 null bytes.
+ remainder, err := ioutil.ReadAll(outputRdr)
+ if err != nil && err != io.EOF {
+ pW.CloseWithError(err)
+ return
+ }
+ _, err = p.AddEntry(storage.Entry{
+ Type: storage.SegmentType,
+ Payload: remainder,
+ })
+ if err != nil {
+ pW.CloseWithError(err)
+ return
+ }
+ pW.Close()
+ }()
+
+ return pR, nil
+}
diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/doc.go b/vendor/github.com/vbatts/tar-split/tar/asm/doc.go
new file mode 100644
index 000000000..4367b9022
--- /dev/null
+++ b/vendor/github.com/vbatts/tar-split/tar/asm/doc.go
@@ -0,0 +1,9 @@
+/*
+Package asm provides the API for streaming assembly and disassembly of tar
+archives.
+
+Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the
+metadata for a stream, as well as an implementation of Getting/Putting the file
+entries' payload.
+*/
+package asm