aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/vbatts/tar-split/tar/storage/packer.go
blob: aba694818549b65dddf999a719d5f38c108bde5f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package storage

import (
	"encoding/json"
	"errors"
	"io"
	"path/filepath"
	"unicode/utf8"
)

// ErrDuplicatePath occurs when a tar archive has more than one entry for the
// same file path
var ErrDuplicatePath = errors.New("duplicates of file paths not supported")

// Packer describes the methods to pack Entries to a storage destination
type Packer interface {
	// AddEntry packs the Entry and returns its position
	AddEntry(e Entry) (int, error)
}

// Unpacker describes the methods to read Entries from a source
type Unpacker interface {
	// Next returns the next Entry being unpacked, or error, until io.EOF
	Next() (*Entry, error)
}

/* TODO(vbatts) figure out a good model for this
type PackUnpacker interface {
	Packer
	Unpacker
}
*/

type jsonUnpacker struct {
	seen seenNames
	dec  *json.Decoder
}

func (jup *jsonUnpacker) Next() (*Entry, error) {
	var e Entry
	err := jup.dec.Decode(&e)
	if err != nil {
		return nil, err
	}

	// check for dup name
	if e.Type == FileType {
		cName := filepath.Clean(e.GetName())
		if _, ok := jup.seen[cName]; ok {
			return nil, ErrDuplicatePath
		}
		jup.seen[cName] = struct{}{}
	}

	return &e, err
}

// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and
// FileType) as a json document.
//
// Each Entry read are expected to be delimited by new line.
func NewJSONUnpacker(r io.Reader) Unpacker {
	return &jsonUnpacker{
		dec:  json.NewDecoder(r),
		seen: seenNames{},
	}
}

type jsonPacker struct {
	w    io.Writer
	e    *json.Encoder
	pos  int
	seen seenNames
}

type seenNames map[string]struct{}

func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
	// if Name is not valid utf8, switch it to raw first.
	if e.Name != "" {
		if !utf8.ValidString(e.Name) {
			e.NameRaw = []byte(e.Name)
			e.Name = ""
		}
	}

	// check early for dup name
	if e.Type == FileType {
		cName := filepath.Clean(e.GetName())
		if _, ok := jp.seen[cName]; ok {
			return -1, ErrDuplicatePath
		}
		jp.seen[cName] = struct{}{}
	}

	e.Position = jp.pos
	err := jp.e.Encode(e)
	if err != nil {
		return -1, err
	}

	// made it this far, increment now
	jp.pos++
	return e.Position, nil
}

// NewJSONPacker provides a Packer that writes each Entry (SegmentType and
// FileType) as a json document.
//
// The Entries are delimited by new line.
func NewJSONPacker(w io.Writer) Packer {
	return &jsonPacker{
		w:    w,
		e:    json.NewEncoder(w),
		seen: seenNames{},
	}
}

/*
TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck
(https://github.com/ugorji/go)


Even though, since our jsonUnpacker and jsonPacker just take
io.Reader/io.Writer, then we can get away with passing them a
gzip.Reader/gzip.Writer
*/