diff options
Diffstat (limited to 'vendor/github.com/vbatts/tar-split/tar/asm')
4 files changed, 324 insertions, 0 deletions
diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/README.md b/vendor/github.com/vbatts/tar-split/tar/asm/README.md new file mode 100644 index 000000000..2a3a5b56a --- /dev/null +++ b/vendor/github.com/vbatts/tar-split/tar/asm/README.md @@ -0,0 +1,44 @@ +asm +=== + +This library for assembly and disassembly of tar archives, facilitated by +`github.com/vbatts/tar-split/tar/storage`. + + +Concerns +-------- + +For completely safe assembly/disassembly, there will need to be a Content +Addressable Storage (CAS) directory, that maps to a checksum in the +`storage.Entity` of `storage.FileType`. + +This is due to the fact that tar archives _can_ allow multiple records for the +same path, but the last one effectively wins. Even if the prior records had a +different payload. + +In this way, when assembling an archive from relative paths, if the archive has +multiple entries for the same path, then all payloads read in from a relative +path would be identical. + + +Thoughts +-------- + +Have a look-aside directory or storage. This way when a clobbering record is +encountered from the tar stream, then the payload of the prior/existing file is +stored to the CAS. This way the clobbering record's file payload can be +extracted, but we'll have preserved the payload needed to reassemble a precise +tar archive. + +clobbered/path/to/file.[0-N] + +*alternatively* + +We could just _not_ support tar streams that have clobbering file paths. +Appending records to the archive is not incredibly common, and doesn't happen +by default for most implementations. Not supporting them wouldn't be a +security concern either, as if it did occur, we would reassemble an archive +that doesn't validate signature/checksum, so it shouldn't be trusted anyway. + +Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE. + diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go b/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go new file mode 100644 index 000000000..d624450ab --- /dev/null +++ b/vendor/github.com/vbatts/tar-split/tar/asm/assemble.go @@ -0,0 +1,130 @@ +package asm + +import ( + "bytes" + "fmt" + "hash" + "hash/crc64" + "io" + "sync" + + "github.com/vbatts/tar-split/tar/storage" +) + +// NewOutputTarStream returns an io.ReadCloser that is an assembled tar archive +// stream. +// +// It takes a storage.FileGetter, for mapping the file payloads that are to be read in, +// and a storage.Unpacker, which has access to the rawbytes and file order +// metadata. With the combination of these two items, a precise assembled Tar +// archive is possible. +func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser { + // ... Since these are interfaces, this is possible, so let's not have a nil pointer + if fg == nil || up == nil { + return nil + } + pr, pw := io.Pipe() + go func() { + err := WriteOutputTarStream(fg, up, pw) + if err != nil { + pw.CloseWithError(err) + } else { + pw.Close() + } + }() + return pr +} + +// WriteOutputTarStream writes assembled tar archive to a writer. +func WriteOutputTarStream(fg storage.FileGetter, up storage.Unpacker, w io.Writer) error { + // ... Since these are interfaces, this is possible, so let's not have a nil pointer + if fg == nil || up == nil { + return nil + } + var copyBuffer []byte + var crcHash hash.Hash + var crcSum []byte + var multiWriter io.Writer + for { + entry, err := up.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + switch entry.Type { + case storage.SegmentType: + if _, err := w.Write(entry.Payload); err != nil { + return err + } + case storage.FileType: + if entry.Size == 0 { + continue + } + fh, err := fg.Get(entry.GetName()) + if err != nil { + return err + } + if crcHash == nil { + crcHash = crc64.New(storage.CRCTable) + crcSum = make([]byte, 8) + multiWriter = io.MultiWriter(w, crcHash) + copyBuffer = byteBufferPool.Get().([]byte) + defer byteBufferPool.Put(copyBuffer) + } else { + crcHash.Reset() + } + + if _, err := copyWithBuffer(multiWriter, fh, copyBuffer); err != nil { + fh.Close() + return err + } + + if !bytes.Equal(crcHash.Sum(crcSum[:0]), entry.Payload) { + // I would rather this be a comparable ErrInvalidChecksum or such, + // but since it's coming through the PipeReader, the context of + // _which_ file would be lost... + fh.Close() + return fmt.Errorf("file integrity checksum failed for %q", entry.GetName()) + } + fh.Close() + } + } +} + +var byteBufferPool = &sync.Pool{ + New: func() interface{} { + return make([]byte, 32*1024) + }, +} + +// copyWithBuffer is taken from stdlib io.Copy implementation +// https://github.com/golang/go/blob/go1.5.1/src/io/io.go#L367 +func copyWithBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { + for { + nr, er := src.Read(buf) + if nr > 0 { + nw, ew := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + return written, err +} diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go new file mode 100644 index 000000000..54ef23aed --- /dev/null +++ b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go @@ -0,0 +1,141 @@ +package asm + +import ( + "io" + "io/ioutil" + + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/storage" +) + +// NewInputTarStream wraps the Reader stream of a tar archive and provides a +// Reader stream of the same. +// +// In the middle it will pack the segments and file metadata to storage.Packer +// `p`. +// +// The the storage.FilePutter is where payload of files in the stream are +// stashed. If this stashing is not needed, you can provide a nil +// storage.FilePutter. Since the checksumming is still needed, then a default +// of NewDiscardFilePutter will be used internally +func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { + // What to do here... folks will want their own access to the Reader that is + // their tar archive stream, but we'll need that same stream to use our + // forked 'archive/tar'. + // Perhaps do an io.TeeReader that hands back an io.Reader for them to read + // from, and we'll MITM the stream to store metadata. + // We'll need a storage.FilePutter too ... + + // Another concern, whether to do any storage.FilePutter operations, such that we + // don't extract any amount of the archive. But then again, we're not making + // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. + // Perhaps we have a DiscardFilePutter that is a bit bucket. + + // we'll return the pipe reader, since TeeReader does not buffer and will + // only read what the outputRdr Read's. Since Tar archives have padding on + // the end, we want to be the one reading the padding, even if the user's + // `archive/tar` doesn't care. + pR, pW := io.Pipe() + outputRdr := io.TeeReader(r, pW) + + // we need a putter that will generate the crc64 sums of file payloads + if fp == nil { + fp = storage.NewDiscardFilePutter() + } + + go func() { + tr := tar.NewReader(outputRdr) + tr.RawAccounting = true + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + pW.CloseWithError(err) + return + } + // even when an EOF is reached, there is often 1024 null bytes on + // the end of an archive. Collect them too. + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } + } + break // not return. We need the end of the reader. + } + if hdr == nil { + break // not return. We need the end of the reader. + } + + if b := tr.RawBytes(); len(b) > 0 { + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } + } + + var csum []byte + if hdr.Size > 0 { + var err error + _, csum, err = fp.Put(hdr.Name, tr) + if err != nil { + pW.CloseWithError(err) + return + } + } + + entry := storage.Entry{ + Type: storage.FileType, + Size: hdr.Size, + Payload: csum, + } + // For proper marshalling of non-utf8 characters + entry.SetName(hdr.Name) + + // File entries added, regardless of size + _, err = p.AddEntry(entry) + if err != nil { + pW.CloseWithError(err) + return + } + + if b := tr.RawBytes(); len(b) > 0 { + _, err = p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } + } + } + + // it is allowable, and not uncommon that there is further padding on the + // end of an archive, apart from the expected 1024 null bytes. + remainder, err := ioutil.ReadAll(outputRdr) + if err != nil && err != io.EOF { + pW.CloseWithError(err) + return + } + _, err = p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: remainder, + }) + if err != nil { + pW.CloseWithError(err) + return + } + pW.Close() + }() + + return pR, nil +} diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/doc.go b/vendor/github.com/vbatts/tar-split/tar/asm/doc.go new file mode 100644 index 000000000..4367b9022 --- /dev/null +++ b/vendor/github.com/vbatts/tar-split/tar/asm/doc.go @@ -0,0 +1,9 @@ +/* +Package asm provides the API for streaming assembly and disassembly of tar +archives. + +Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the +metadata for a stream, as well as an implementation of Getting/Putting the file +entries' payload. +*/ +package asm |