Browse code

TarSum: versioning

This introduces Versions for TarSum checksums.
Fixes: https://github.com/docker/docker/issues/7526

It preserves current functionality and abstracts the interface for
future flexibility of hashing algorithms. As a POC, the VersionDev
Tarsum does not include the mtime in the checksum calculation, and would
solve https://github.com/docker/docker/issues/7387 though this is not a
settled Version is subject to change until a version number is assigned.

Signed-off-by: Vincent Batts <vbatts@redhat.com>

Vincent Batts authored on 2014/08/22 05:12:52
Showing 8 changed files
... ...
@@ -93,12 +93,12 @@ type Builder struct {
93 93
 	// both of these are controlled by the Remove and ForceRemove options in BuildOpts
94 94
 	TmpContainers map[string]struct{} // a map of containers used for removes
95 95
 
96
-	dockerfile  *parser.Node   // the syntax tree of the dockerfile
97
-	image       string         // image name for commit processing
98
-	maintainer  string         // maintainer name. could probably be removed.
99
-	cmdSet      bool           // indicates is CMD was set in current Dockerfile
100
-	context     *tarsum.TarSum // the context is a tarball that is uploaded by the client
101
-	contextPath string         // the path of the temporary directory the local context is unpacked to (server side)
96
+	dockerfile  *parser.Node           // the syntax tree of the dockerfile
97
+	image       string                 // image name for commit processing
98
+	maintainer  string                 // maintainer name. could probably be removed.
99
+	cmdSet      bool                   // indicates is CMD was set in current Dockerfile
100
+	context     tarsum.TarSumInterface // the context is a tarball that is uploaded by the client
101
+	contextPath string                 // the path of the temporary directory the local context is unpacked to (server side)
102 102
 
103 103
 }
104 104
 
... ...
@@ -41,7 +41,9 @@ func (b *Builder) readContext(context io.Reader) error {
41 41
 		return err
42 42
 	}
43 43
 
44
-	b.context = &tarsum.TarSum{Reader: decompressedStream, DisableCompression: true}
44
+	if b.context, err = tarsum.NewTarSum(decompressedStream, true, tarsum.Version0); err != nil {
45
+		return err
46
+	}
45 47
 	if err := archive.Untar(b.context, tmpdirPath, nil); err != nil {
46 48
 		return err
47 49
 	}
... ...
@@ -22,6 +22,29 @@ const (
22 22
 	buf32K = 32 * 1024
23 23
 )
24 24
 
25
+// NewTarSum creates a new interface for calculating a fixed time checksum of a
26
+// tar archive.
27
+//
28
+// This is used for calculating checksums of layers of an image, in some cases
29
+// including the byte payload of the image's json metadata as well, and for
30
+// calculating the checksums for buildcache.
31
+func NewTarSum(r io.Reader, dc bool, v Version) (TarSumInterface, error) {
32
+	if _, ok := tarSumVersions[v]; !ok {
33
+		return nil, ErrVersionNotImplemented
34
+	}
35
+	return &TarSum{Reader: r, DisableCompression: dc, tarSumVersion: v}, nil
36
+}
37
+
38
+// TarSumInterface is the generic interface for calculating fixed time
39
+// checksums of a tar archive
40
+type TarSumInterface interface {
41
+	io.Reader
42
+	GetSums() map[string]string
43
+	Sum([]byte) string
44
+	Version() Version
45
+}
46
+
47
+// TarSum struct is the structure for a Version0 checksum calculation
25 48
 type TarSum struct {
26 49
 	io.Reader
27 50
 	tarR               *tar.Reader
... ...
@@ -35,27 +58,15 @@ type TarSum struct {
35 35
 	currentFile        string
36 36
 	finished           bool
37 37
 	first              bool
38
-	DisableCompression bool
39
-}
40
-
41
-type writeCloseFlusher interface {
42
-	io.WriteCloser
43
-	Flush() error
44
-}
45
-
46
-type nopCloseFlusher struct {
47
-	io.Writer
38
+	DisableCompression bool    // false by default. When false, the output gzip compressed.
39
+	tarSumVersion      Version // this field is not exported so it can not be mutated during use
48 40
 }
49 41
 
50
-func (n *nopCloseFlusher) Close() error {
51
-	return nil
42
+func (ts TarSum) Version() Version {
43
+	return ts.tarSumVersion
52 44
 }
53 45
 
54
-func (n *nopCloseFlusher) Flush() error {
55
-	return nil
56
-}
57
-
58
-func (ts *TarSum) encodeHeader(h *tar.Header) error {
46
+func (ts TarSum) selectHeaders(h *tar.Header, v Version) (set [][2]string) {
59 47
 	for _, elem := range [][2]string{
60 48
 		{"name", h.Name},
61 49
 		{"mode", strconv.Itoa(int(h.Mode))},
... ...
@@ -69,9 +80,17 @@ func (ts *TarSum) encodeHeader(h *tar.Header) error {
69 69
 		{"gname", h.Gname},
70 70
 		{"devmajor", strconv.Itoa(int(h.Devmajor))},
71 71
 		{"devminor", strconv.Itoa(int(h.Devminor))},
72
-		// {"atime", strconv.Itoa(int(h.AccessTime.UTC().Unix()))},
73
-		// {"ctime", strconv.Itoa(int(h.ChangeTime.UTC().Unix()))},
74 72
 	} {
73
+		if v == VersionDev && elem[0] == "mtime" {
74
+			continue
75
+		}
76
+		set = append(set, elem)
77
+	}
78
+	return
79
+}
80
+
81
+func (ts *TarSum) encodeHeader(h *tar.Header) error {
82
+	for _, elem := range ts.selectHeaders(h, ts.Version()) {
75 83
 		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
76 84
 			return err
77 85
 		}
... ...
@@ -193,7 +212,7 @@ func (ts *TarSum) Sum(extra []byte) string {
193 193
 		log.Debugf("-->%s<--", sum)
194 194
 		h.Write([]byte(sum))
195 195
 	}
196
-	checksum := "tarsum+sha256:" + hex.EncodeToString(h.Sum(nil))
196
+	checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
197 197
 	log.Debugf("checksum processed: %s", checksum)
198 198
 	return checksum
199 199
 }
... ...
@@ -18,16 +18,23 @@ type testLayer struct {
18 18
 	jsonfile string
19 19
 	gzip     bool
20 20
 	tarsum   string
21
+	version  Version
21 22
 }
22 23
 
23 24
 var testLayers = []testLayer{
24 25
 	{
25 26
 		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
26 27
 		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
28
+		version:  Version0,
27 29
 		tarsum:   "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"},
28 30
 	{
29 31
 		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
30 32
 		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
33
+		version:  VersionDev,
34
+		tarsum:   "tarsum.dev+sha256:486b86e25c4db4551228154848bc4663b15dd95784b1588980f4ba1cb42e83e9"},
35
+	{
36
+		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
37
+		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
31 38
 		gzip:     true,
32 39
 		tarsum:   "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"},
33 40
 	{
... ...
@@ -118,7 +125,11 @@ func TestTarSums(t *testing.T) {
118 118
 		}
119 119
 
120 120
 		//                                  double negatives!
121
-		ts := &TarSum{Reader: fh, DisableCompression: !layer.gzip}
121
+		ts, err := NewTarSum(fh, !layer.gzip, layer.version)
122
+		if err != nil {
123
+			t.Errorf("%q :: %q", err, layer.filename)
124
+			continue
125
+		}
122 126
 		_, err = io.Copy(ioutil.Discard, ts)
123 127
 		if err != nil {
124 128
 			t.Errorf("failed to copy from %s: %s", layer.filename, err)
... ...
@@ -160,7 +171,11 @@ func Benchmark9kTar(b *testing.B) {
160 160
 	b.SetBytes(n)
161 161
 	b.ResetTimer()
162 162
 	for i := 0; i < b.N; i++ {
163
-		ts := &TarSum{Reader: buf, DisableCompression: true}
163
+		ts, err := NewTarSum(buf, true, Version0)
164
+		if err != nil {
165
+			b.Error(err)
166
+			return
167
+		}
164 168
 		io.Copy(ioutil.Discard, ts)
165 169
 		ts.Sum(nil)
166 170
 	}
... ...
@@ -179,7 +194,11 @@ func Benchmark9kTarGzip(b *testing.B) {
179 179
 	b.SetBytes(n)
180 180
 	b.ResetTimer()
181 181
 	for i := 0; i < b.N; i++ {
182
-		ts := &TarSum{Reader: buf, DisableCompression: false}
182
+		ts, err := NewTarSum(buf, false, Version0)
183
+		if err != nil {
184
+			b.Error(err)
185
+			return
186
+		}
183 187
 		io.Copy(ioutil.Discard, ts)
184 188
 		ts.Sum(nil)
185 189
 	}
... ...
@@ -217,7 +236,11 @@ func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) {
217 217
 	b.SetBytes(opts.size * opts.num)
218 218
 	b.ResetTimer()
219 219
 	for i := 0; i < b.N; i++ {
220
-		ts := &TarSum{Reader: fh, DisableCompression: !isGzip}
220
+		ts, err := NewTarSum(fh, !isGzip, Version0)
221
+		if err != nil {
222
+			b.Error(err)
223
+			return
224
+		}
221 225
 		io.Copy(ioutil.Discard, ts)
222 226
 		ts.Sum(nil)
223 227
 		fh.Seek(0, 0)
224 228
new file mode 100644
... ...
@@ -0,0 +1,56 @@
0
+package tarsum
1
+
2
+import (
3
+	"errors"
4
+	"strings"
5
+)
6
+
7
+// versioning of the TarSum algorithm
8
+// based on the prefix of the hash used
9
+// i.e. "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b"
10
+type Version int
11
+
12
+const (
13
+	// Prefix of "tarsum"
14
+	Version0 Version = iota
15
+	// Prefix of "tarsum.dev"
16
+	// NOTE: this variable will be of an unsettled next-version of the TarSum calculation
17
+	VersionDev
18
+)
19
+
20
+// Get a list of all known tarsum Version
21
+func GetVersions() []Version {
22
+	v := []Version{}
23
+	for k := range tarSumVersions {
24
+		v = append(v, k)
25
+	}
26
+	return v
27
+}
28
+
29
+var tarSumVersions = map[Version]string{
30
+	0: "tarsum",
31
+	1: "tarsum.dev",
32
+}
33
+
34
+func (tsv Version) String() string {
35
+	return tarSumVersions[tsv]
36
+}
37
+
38
+// GetVersionFromTarsum returns the Version from the provided string
39
+func GetVersionFromTarsum(tarsum string) (Version, error) {
40
+	tsv := tarsum
41
+	if strings.Contains(tarsum, "+") {
42
+		tsv = strings.SplitN(tarsum, "+", 2)[0]
43
+	}
44
+	for v, s := range tarSumVersions {
45
+		if s == tsv {
46
+			return v, nil
47
+		}
48
+	}
49
+	return -1, ErrNotVersion
50
+}
51
+
52
+var (
53
+	ErrNotVersion            = errors.New("string does not include a TarSum Version")
54
+	ErrVersionNotImplemented = errors.New("TarSum Version is not yet implemented")
55
+)
0 56
new file mode 100644
... ...
@@ -0,0 +1,49 @@
0
+package tarsum
1
+
2
+import (
3
+	"testing"
4
+)
5
+
6
+func TestVersion(t *testing.T) {
7
+	expected := "tarsum"
8
+	var v Version
9
+	if v.String() != expected {
10
+		t.Errorf("expected %q, got %q", expected, v.String())
11
+	}
12
+
13
+	expected = "tarsum.dev"
14
+	v = 1
15
+	if v.String() != expected {
16
+		t.Errorf("expected %q, got %q", expected, v.String())
17
+	}
18
+}
19
+
20
+func TestGetVersion(t *testing.T) {
21
+	testSet := []struct {
22
+		Str      string
23
+		Expected Version
24
+	}{
25
+		{"tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", Version0},
26
+		{"tarsum+sha256", Version0},
27
+		{"tarsum", Version0},
28
+		{"tarsum.dev", VersionDev},
29
+		{"tarsum.dev+sha256:deadbeef", VersionDev},
30
+	}
31
+
32
+	for _, ts := range testSet {
33
+		v, err := GetVersionFromTarsum(ts.Str)
34
+		if err != nil {
35
+			t.Fatalf("%q : %s", err, ts.Str)
36
+		}
37
+		if v != ts.Expected {
38
+			t.Errorf("expected %d (%q), got %d (%q)", ts.Expected, ts.Expected, v, v)
39
+		}
40
+	}
41
+
42
+	// test one that does not exist, to ensure it errors
43
+	str := "weak+md5:abcdeabcde"
44
+	_, err := GetVersionFromTarsum(str)
45
+	if err != ErrNotVersion {
46
+		t.Fatalf("%q : %s", err, str)
47
+	}
48
+}
0 49
new file mode 100644
... ...
@@ -0,0 +1,22 @@
0
+package tarsum
1
+
2
+import (
3
+	"io"
4
+)
5
+
6
+type writeCloseFlusher interface {
7
+	io.WriteCloser
8
+	Flush() error
9
+}
10
+
11
+type nopCloseFlusher struct {
12
+	io.Writer
13
+}
14
+
15
+func (n *nopCloseFlusher) Close() error {
16
+	return nil
17
+}
18
+
19
+func (n *nopCloseFlusher) Flush() error {
20
+	return nil
21
+}
... ...
@@ -407,7 +407,10 @@ func (r *Session) PushImageLayerRegistry(imgID string, layer io.Reader, registry
407 407
 
408 408
 	log.Debugf("[registry] Calling PUT %s", registry+"images/"+imgID+"/layer")
409 409
 
410
-	tarsumLayer := &tarsum.TarSum{Reader: layer}
410
+	tarsumLayer, err := tarsum.NewTarSum(layer, false, tarsum.Version0)
411
+	if err != nil {
412
+		return "", "", err
413
+	}
411 414
 	h := sha256.New()
412 415
 	h.Write(jsonRaw)
413 416
 	h.Write([]byte{'\n'})