Browse code

Finalize TarSum Version 1 w/ refactor

The current Dev version of TarSum includes hashing of extended
file attributes and omits inclusion of modified time headers.

I refactored the logic around the version differences to make it
more clear that the difference between versions is in how tar
headers are selected and ordered.

TarSum Version 1 is now declared with the new Dev version continuing
to track it.

Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)

Josh Hawn authored on 2014/10/31 05:47:31
Showing 3 changed files
... ...
@@ -7,8 +7,6 @@ import (
7 7
 	"encoding/hex"
8 8
 	"hash"
9 9
 	"io"
10
-	"sort"
11
-	"strconv"
12 10
 	"strings"
13 11
 
14 12
 	"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
... ...
@@ -29,18 +27,20 @@ const (
29 29
 // including the byte payload of the image's json metadata as well, and for
30 30
 // calculating the checksums for buildcache.
31 31
 func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
32
-	if _, ok := tarSumVersions[v]; !ok {
33
-		return nil, ErrVersionNotImplemented
32
+	headerSelector, err := getTarHeaderSelector(v)
33
+	if err != nil {
34
+		return nil, err
34 35
 	}
35
-	return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v}, nil
36
+	return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector}, nil
36 37
 }
37 38
 
38 39
 // Create a new TarSum, providing a THash to use rather than the DefaultTHash
39 40
 func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
40
-	if _, ok := tarSumVersions[v]; !ok {
41
-		return nil, ErrVersionNotImplemented
41
+	headerSelector, err := getTarHeaderSelector(v)
42
+	if err != nil {
43
+		return nil, err
42 44
 	}
43
-	return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, tHash: tHash}, nil
45
+	return &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}, nil
44 46
 }
45 47
 
46 48
 // TarSum is the generic interface for calculating fixed time
... ...
@@ -69,8 +69,9 @@ type tarSum struct {
69 69
 	currentFile        string
70 70
 	finished           bool
71 71
 	first              bool
72
-	DisableCompression bool    // false by default. When false, the output gzip compressed.
73
-	tarSumVersion      Version // this field is not exported so it can not be mutated during use
72
+	DisableCompression bool              // false by default. When false, the output gzip compressed.
73
+	tarSumVersion      Version           // this field is not exported so it can not be mutated during use
74
+	headerSelector     tarHeaderSelector // handles selecting and ordering headers for files in the archive
74 75
 }
75 76
 
76 77
 func (ts tarSum) Hash() THash {
... ...
@@ -103,49 +104,12 @@ type simpleTHash struct {
103 103
 func (sth simpleTHash) Name() string    { return sth.n }
104 104
 func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
105 105
 
106
-func (ts tarSum) selectHeaders(h *tar.Header, v Version) (set [][2]string) {
107
-	for _, elem := range [][2]string{
108
-		{"name", h.Name},
109
-		{"mode", strconv.Itoa(int(h.Mode))},
110
-		{"uid", strconv.Itoa(h.Uid)},
111
-		{"gid", strconv.Itoa(h.Gid)},
112
-		{"size", strconv.Itoa(int(h.Size))},
113
-		{"mtime", strconv.Itoa(int(h.ModTime.UTC().Unix()))},
114
-		{"typeflag", string([]byte{h.Typeflag})},
115
-		{"linkname", h.Linkname},
116
-		{"uname", h.Uname},
117
-		{"gname", h.Gname},
118
-		{"devmajor", strconv.Itoa(int(h.Devmajor))},
119
-		{"devminor", strconv.Itoa(int(h.Devminor))},
120
-	} {
121
-		if v >= VersionDev && elem[0] == "mtime" {
122
-			continue
123
-		}
124
-		set = append(set, elem)
125
-	}
126
-	return
127
-}
128
-
129 106
 func (ts *tarSum) encodeHeader(h *tar.Header) error {
130
-	for _, elem := range ts.selectHeaders(h, ts.Version()) {
107
+	for _, elem := range ts.headerSelector.selectHeaders(h) {
131 108
 		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
132 109
 			return err
133 110
 		}
134 111
 	}
135
-
136
-	// include the additional pax headers, from an ordered list
137
-	if ts.Version() >= VersionDev {
138
-		var keys []string
139
-		for k := range h.Xattrs {
140
-			keys = append(keys, k)
141
-		}
142
-		sort.Strings(keys)
143
-		for _, k := range keys {
144
-			if _, err := ts.h.Write([]byte(k + h.Xattrs[k])); err != nil {
145
-				return err
146
-			}
147
-		}
148
-	}
149 112
 	return nil
150 113
 }
151 114
 
... ...
@@ -2,7 +2,11 @@ package tarsum
2 2
 
3 3
 import (
4 4
 	"errors"
5
+	"sort"
6
+	"strconv"
5 7
 	"strings"
8
+
9
+	"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
6 10
 )
7 11
 
8 12
 // versioning of the TarSum algorithm
... ...
@@ -10,11 +14,11 @@ import (
10 10
 // i.e. "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"
11 11
 type Version int
12 12
 
13
+// Prefix of "tarsum"
13 14
 const (
14
-	// Prefix of "tarsum"
15 15
 	Version0 Version = iota
16
-	// Prefix of "tarsum.dev"
17
-	// NOTE: this variable will be of an unsettled next-version of the TarSum calculation
16
+	Version1
17
+	// NOTE: this variable will be either the latest or an unsettled next-version of the TarSum calculation
18 18
 	VersionDev
19 19
 )
20 20
 
... ...
@@ -28,8 +32,9 @@ func GetVersions() []Version {
28 28
 }
29 29
 
30 30
 var tarSumVersions = map[Version]string{
31
-	0: "tarsum",
32
-	1: "tarsum.dev",
31
+	Version0:   "tarsum",
32
+	Version1:   "tarsum.v1",
33
+	VersionDev: "tarsum.dev",
33 34
 }
34 35
 
35 36
 func (tsv Version) String() string {
... ...
@@ -50,7 +55,78 @@ func GetVersionFromTarsum(tarsum string) (Version, error) {
50 50
 	return -1, ErrNotVersion
51 51
 }
52 52
 
53
+// Errors that may be returned by functions in this package
53 54
 var (
54 55
 	ErrNotVersion            = errors.New("string does not include a TarSum Version")
55 56
 	ErrVersionNotImplemented = errors.New("TarSum Version is not yet implemented")
56 57
 )
58
+
59
+// tarHeaderSelector is the interface which different versions
60
+// of tarsum should use for selecting and ordering tar headers
61
+// for each item in the archive.
62
+type tarHeaderSelector interface {
63
+	selectHeaders(h *tar.Header) (orderedHeaders [][2]string)
64
+}
65
+
66
+type tarHeaderSelectFunc func(h *tar.Header) (orderedHeaders [][2]string)
67
+
68
+func (f tarHeaderSelectFunc) selectHeaders(h *tar.Header) (orderedHeaders [][2]string) {
69
+	return f(h)
70
+}
71
+
72
+func v0TarHeaderSelect(h *tar.Header) (orderedHeaders [][2]string) {
73
+	return [][2]string{
74
+		{"name", h.Name},
75
+		{"mode", strconv.Itoa(int(h.Mode))},
76
+		{"uid", strconv.Itoa(h.Uid)},
77
+		{"gid", strconv.Itoa(h.Gid)},
78
+		{"size", strconv.Itoa(int(h.Size))},
79
+		{"mtime", strconv.Itoa(int(h.ModTime.UTC().Unix()))},
80
+		{"typeflag", string([]byte{h.Typeflag})},
81
+		{"linkname", h.Linkname},
82
+		{"uname", h.Uname},
83
+		{"gname", h.Gname},
84
+		{"devmajor", strconv.Itoa(int(h.Devmajor))},
85
+		{"devminor", strconv.Itoa(int(h.Devminor))},
86
+	}
87
+}
88
+
89
+func v1TarHeaderSelect(h *tar.Header) (orderedHeaders [][2]string) {
90
+	// Get extended attributes.
91
+	xAttrKeys := make([]string, len(h.Xattrs))
92
+	for k := range h.Xattrs {
93
+		xAttrKeys = append(xAttrKeys, k)
94
+	}
95
+	sort.Strings(xAttrKeys)
96
+
97
+	// Make the slice with enough capacity to hold the 11 basic headers
98
+	// we want from the v0 selector plus however many xattrs we have.
99
+	orderedHeaders = make([][2]string, 0, 11+len(xAttrKeys))
100
+
101
+	// Copy all headers from v0 excluding the 'mtime' header (the 5th element).
102
+	v0headers := v0TarHeaderSelect(h)
103
+	orderedHeaders = append(orderedHeaders, v0headers[0:5]...)
104
+	orderedHeaders = append(orderedHeaders, v0headers[6:]...)
105
+
106
+	// Finally, append the sorted xattrs.
107
+	for _, k := range xAttrKeys {
108
+		orderedHeaders = append(orderedHeaders, [2]string{k, h.Xattrs[k]})
109
+	}
110
+
111
+	return
112
+}
113
+
114
+var registeredHeaderSelectors = map[Version]tarHeaderSelectFunc{
115
+	Version0:   v0TarHeaderSelect,
116
+	Version1:   v1TarHeaderSelect,
117
+	VersionDev: v1TarHeaderSelect,
118
+}
119
+
120
+func getTarHeaderSelector(v Version) (tarHeaderSelector, error) {
121
+	headerSelector, ok := registeredHeaderSelectors[v]
122
+	if !ok {
123
+		return nil, ErrVersionNotImplemented
124
+	}
125
+
126
+	return headerSelector, nil
127
+}
... ...
@@ -11,11 +11,17 @@ func TestVersion(t *testing.T) {
11 11
 		t.Errorf("expected %q, got %q", expected, v.String())
12 12
 	}
13 13
 
14
-	expected = "tarsum.dev"
14
+	expected = "tarsum.v1"
15 15
 	v = 1
16 16
 	if v.String() != expected {
17 17
 		t.Errorf("expected %q, got %q", expected, v.String())
18 18
 	}
19
+
20
+	expected = "tarsum.dev"
21
+	v = 2
22
+	if v.String() != expected {
23
+		t.Errorf("expected %q, got %q", expected, v.String())
24
+	}
19 25
 }
20 26
 
21 27
 func TestGetVersion(t *testing.T) {