Browse code

tarsum: name collision fix

If a tar were constructed with duplicate file names, then depending on
the order, it could result in same tarsum.

Signed-off-by: Vincent Batts <vbatts@redhat.com>

Vincent Batts authored on 2014/09/05 05:13:50
Showing 9 changed files
... ...
@@ -214,11 +214,11 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp
214 214
 			return err
215 215
 		} else if fi.IsDir() {
216 216
 			var subfiles []string
217
-			for file, sum := range sums {
218
-				absFile := path.Join(b.contextPath, file)
217
+			for _, fileInfo := range sums {
218
+				absFile := path.Join(b.contextPath, fileInfo.Name())
219 219
 				absOrigPath := path.Join(b.contextPath, origPath)
220 220
 				if strings.HasPrefix(absFile, absOrigPath) {
221
-					subfiles = append(subfiles, sum)
221
+					subfiles = append(subfiles, fileInfo.Sum())
222 222
 				}
223 223
 			}
224 224
 			sort.Strings(subfiles)
... ...
@@ -230,8 +230,9 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp
230 230
 				origPath = origPath[1:]
231 231
 			}
232 232
 			origPath = strings.TrimPrefix(origPath, "./")
233
-			if h, ok := sums[origPath]; ok {
234
-				hash = "file:" + h
233
+			// This will match on the first file in sums of the archive
234
+			if fis := sums.GetFile(origPath); fis != nil {
235
+				hash = "file:" + fis.Sum()
235 236
 			}
236 237
 		}
237 238
 		b.Config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) %s %s in %s", cmdName, hash, dest)}
238 239
new file mode 100644
... ...
@@ -0,0 +1,125 @@
0
+package tarsum
1
+
2
+import "sort"
3
+
4
+// This info will be accessed through interface so the actual name and sum cannot be medled with
5
+type FileInfoSumInterface interface {
6
+	// File name
7
+	Name() string
8
+	// Checksum of this particular file and its headers
9
+	Sum() string
10
+	// Position of file in the tar
11
+	Pos() int64
12
+}
13
+
14
+type fileInfoSum struct {
15
+	name string
16
+	sum  string
17
+	pos  int64
18
+}
19
+
20
+func (fis fileInfoSum) Name() string {
21
+	return fis.name
22
+}
23
+func (fis fileInfoSum) Sum() string {
24
+	return fis.sum
25
+}
26
+func (fis fileInfoSum) Pos() int64 {
27
+	return fis.pos
28
+}
29
+
30
+type FileInfoSums []FileInfoSumInterface
31
+
32
+// GetFile returns the first FileInfoSumInterface with a matching name
33
+func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
34
+	for i := range fis {
35
+		if fis[i].Name() == name {
36
+			return fis[i]
37
+		}
38
+	}
39
+	return nil
40
+}
41
+
42
+// GetAllFile returns a FileInfoSums with all matching names
43
+func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
44
+	f := FileInfoSums{}
45
+	for i := range fis {
46
+		if fis[i].Name() == name {
47
+			f = append(f, fis[i])
48
+		}
49
+	}
50
+	return f
51
+}
52
+
53
+func contains(s []string, e string) bool {
54
+	for _, a := range s {
55
+		if a == e {
56
+			return true
57
+		}
58
+	}
59
+	return false
60
+}
61
+
62
+func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
63
+	seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map.
64
+	for i := range fis {
65
+		f := fis[i]
66
+		if _, ok := seen[f.Name()]; ok {
67
+			dups = append(dups, f)
68
+		} else {
69
+			seen[f.Name()] = 0
70
+		}
71
+	}
72
+	return dups
73
+}
74
+
75
+func (fis FileInfoSums) Len() int      { return len(fis) }
76
+func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
77
+
78
+func (fis FileInfoSums) SortByPos() {
79
+	sort.Sort(byPos{fis})
80
+}
81
+
82
+func (fis FileInfoSums) SortByNames() {
83
+	sort.Sort(byName{fis})
84
+}
85
+
86
+func (fis FileInfoSums) SortBySums() {
87
+	dups := fis.GetDuplicatePaths()
88
+	if len(dups) > 0 {
89
+		sort.Sort(bySum{fis, dups})
90
+	} else {
91
+		sort.Sort(bySum{fis, nil})
92
+	}
93
+}
94
+
95
+// byName is a sort.Sort helper for sorting by file names.
96
+// If names are the same, order them by their appearance in the tar archive
97
+type byName struct{ FileInfoSums }
98
+
99
+func (bn byName) Less(i, j int) bool {
100
+	if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
101
+		return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos()
102
+	}
103
+	return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name()
104
+}
105
+
106
+// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive
107
+type bySum struct {
108
+	FileInfoSums
109
+	dups FileInfoSums
110
+}
111
+
112
+func (bs bySum) Less(i, j int) bool {
113
+	if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
114
+		return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos()
115
+	}
116
+	return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum()
117
+}
118
+
119
+// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order
120
+type byPos struct{ FileInfoSums }
121
+
122
+func (bp byPos) Less(i, j int) bool {
123
+	return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos()
124
+}
0 125
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+package tarsum
1
+
2
+import "testing"
3
+
4
+func newFileInfoSums() FileInfoSums {
5
+	return FileInfoSums{
6
+		fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
7
+		fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
8
+		fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
9
+		fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
10
+		fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
11
+		fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
12
+	}
13
+}
14
+
15
+func TestSortFileInfoSums(t *testing.T) {
16
+	dups := newFileInfoSums().GetAllFile("dup1")
17
+	if len(dups) != 2 {
18
+		t.Errorf("expected length 2, got %d", len(dups))
19
+	}
20
+	dups.SortByNames()
21
+	if dups[0].Pos() != 4 {
22
+		t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
23
+	}
24
+
25
+	fis := newFileInfoSums()
26
+	expected := "0abcdef1234567890"
27
+	fis.SortBySums()
28
+	got := fis[0].Sum()
29
+	if got != expected {
30
+		t.Errorf("Expected %q, got %q", expected, got)
31
+	}
32
+
33
+	fis = newFileInfoSums()
34
+	expected = "dup1"
35
+	fis.SortByNames()
36
+	gotFis := fis[0]
37
+	if gotFis.Name() != expected {
38
+		t.Errorf("Expected %q, got %q", expected, gotFis.Name())
39
+	}
40
+	// since a duplicate is first, ensure it is ordered first by position too
41
+	if gotFis.Pos() != 4 {
42
+		t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
43
+	}
44
+}
... ...
@@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
39 39
 // checksums of a tar archive
40 40
 type TarSum interface {
41 41
 	io.Reader
42
-	GetSums() map[string]string
42
+	GetSums() FileInfoSums
43 43
 	Sum([]byte) string
44 44
 	Version() Version
45 45
 }
... ...
@@ -54,7 +54,8 @@ type tarSum struct {
54 54
 	bufGz              *bytes.Buffer
55 55
 	bufData            []byte
56 56
 	h                  hash.Hash
57
-	sums               map[string]string
57
+	sums               FileInfoSums
58
+	fileCounter        int64
58 59
 	currentFile        string
59 60
 	finished           bool
60 61
 	first              bool
... ...
@@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
126 126
 		ts.h = sha256.New()
127 127
 		ts.h.Reset()
128 128
 		ts.first = true
129
-		ts.sums = make(map[string]string)
129
+		ts.sums = FileInfoSums{}
130 130
 	}
131 131
 
132 132
 	if ts.finished {
... ...
@@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
153 153
 				return 0, err
154 154
 			}
155 155
 			if !ts.first {
156
-				ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
156
+				ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
157
+				ts.fileCounter++
157 158
 				ts.h.Reset()
158 159
 			} else {
159 160
 				ts.first = false
... ...
@@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
218 218
 }
219 219
 
220 220
 func (ts *tarSum) Sum(extra []byte) string {
221
-	var sums []string
222
-
223
-	for _, sum := range ts.sums {
224
-		sums = append(sums, sum)
225
-	}
226
-	sort.Strings(sums)
221
+	ts.sums.SortBySums()
227 222
 	h := sha256.New()
228 223
 	if extra != nil {
229 224
 		h.Write(extra)
230 225
 	}
231
-	for _, sum := range sums {
232
-		log.Debugf("-->%s<--", sum)
233
-		h.Write([]byte(sum))
226
+	for _, fis := range ts.sums {
227
+		log.Debugf("-->%s<--", fis.Sum())
228
+		h.Write([]byte(fis.Sum()))
234 229
 	}
235 230
 	checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil))
236 231
 	log.Debugf("checksum processed: %s", checksum)
237 232
 	return checksum
238 233
 }
239 234
 
240
-func (ts *tarSum) GetSums() map[string]string {
235
+func (ts *tarSum) GetSums() FileInfoSums {
241 236
 	return ts.sums
242 237
 }
... ...
@@ -59,6 +59,22 @@ var testLayers = []testLayer{
59 59
 	{
60 60
 		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
61 61
 		tarsum:  "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
62
+	{
63
+		// this tar has two files with the same path
64
+		filename: "testdata/collision/collision-0.tar",
65
+		tarsum:   "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
66
+	{
67
+		// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
68
+		filename: "testdata/collision/collision-1.tar",
69
+		tarsum:   "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
70
+	{
71
+		// this tar has newer of collider-0.tar, ensuring is has different hash
72
+		filename: "testdata/collision/collision-2.tar",
73
+		tarsum:   "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
74
+	{
75
+		// this tar has newer of collider-1.tar, ensuring is has different hash
76
+		filename: "testdata/collision/collision-3.tar",
77
+		tarsum:   "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
62 78
 }
63 79
 
64 80
 type sizedOptions struct {
65 81
new file mode 100644
66 82
Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-0.tar differ
67 83
new file mode 100644
68 84
Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-1.tar differ
69 85
new file mode 100644
70 86
Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-2.tar differ
71 87
new file mode 100644
72 88
Binary files /dev/null and b/pkg/tarsum/testdata/collision/collision-3.tar differ