If a tar were constructed with duplicate file names, then depending on
the order, it could result in same tarsum.
Signed-off-by: Vincent Batts <vbatts@redhat.com>
| ... | ... |
@@ -214,11 +214,11 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp |
| 214 | 214 |
return err |
| 215 | 215 |
} else if fi.IsDir() {
|
| 216 | 216 |
var subfiles []string |
| 217 |
- for file, sum := range sums {
|
|
| 218 |
- absFile := path.Join(b.contextPath, file) |
|
| 217 |
+ for _, fileInfo := range sums {
|
|
| 218 |
+ absFile := path.Join(b.contextPath, fileInfo.Name()) |
|
| 219 | 219 |
absOrigPath := path.Join(b.contextPath, origPath) |
| 220 | 220 |
if strings.HasPrefix(absFile, absOrigPath) {
|
| 221 |
- subfiles = append(subfiles, sum) |
|
| 221 |
+ subfiles = append(subfiles, fileInfo.Sum()) |
|
| 222 | 222 |
} |
| 223 | 223 |
} |
| 224 | 224 |
sort.Strings(subfiles) |
| ... | ... |
@@ -230,8 +230,9 @@ func (b *Builder) runContextCommand(args []string, allowRemote bool, allowDecomp |
| 230 | 230 |
origPath = origPath[1:] |
| 231 | 231 |
} |
| 232 | 232 |
origPath = strings.TrimPrefix(origPath, "./") |
| 233 |
- if h, ok := sums[origPath]; ok {
|
|
| 234 |
- hash = "file:" + h |
|
| 233 |
+ // This will match on the first file in sums of the archive |
|
| 234 |
+ if fis := sums.GetFile(origPath); fis != nil {
|
|
| 235 |
+ hash = "file:" + fis.Sum() |
|
| 235 | 236 |
} |
| 236 | 237 |
} |
| 237 | 238 |
b.Config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) %s %s in %s", cmdName, hash, dest)}
|
| 238 | 239 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,125 @@ |
| 0 |
+package tarsum |
|
| 1 |
+ |
|
| 2 |
+import "sort" |
|
| 3 |
+ |
|
| 4 |
+// This info will be accessed through interface so the actual name and sum cannot be medled with |
|
| 5 |
+type FileInfoSumInterface interface {
|
|
| 6 |
+ // File name |
|
| 7 |
+ Name() string |
|
| 8 |
+ // Checksum of this particular file and its headers |
|
| 9 |
+ Sum() string |
|
| 10 |
+ // Position of file in the tar |
|
| 11 |
+ Pos() int64 |
|
| 12 |
+} |
|
| 13 |
+ |
|
| 14 |
+type fileInfoSum struct {
|
|
| 15 |
+ name string |
|
| 16 |
+ sum string |
|
| 17 |
+ pos int64 |
|
| 18 |
+} |
|
| 19 |
+ |
|
| 20 |
+func (fis fileInfoSum) Name() string {
|
|
| 21 |
+ return fis.name |
|
| 22 |
+} |
|
| 23 |
+func (fis fileInfoSum) Sum() string {
|
|
| 24 |
+ return fis.sum |
|
| 25 |
+} |
|
| 26 |
+func (fis fileInfoSum) Pos() int64 {
|
|
| 27 |
+ return fis.pos |
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+type FileInfoSums []FileInfoSumInterface |
|
| 31 |
+ |
|
| 32 |
+// GetFile returns the first FileInfoSumInterface with a matching name |
|
| 33 |
+func (fis FileInfoSums) GetFile(name string) FileInfoSumInterface {
|
|
| 34 |
+ for i := range fis {
|
|
| 35 |
+ if fis[i].Name() == name {
|
|
| 36 |
+ return fis[i] |
|
| 37 |
+ } |
|
| 38 |
+ } |
|
| 39 |
+ return nil |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+// GetAllFile returns a FileInfoSums with all matching names |
|
| 43 |
+func (fis FileInfoSums) GetAllFile(name string) FileInfoSums {
|
|
| 44 |
+ f := FileInfoSums{}
|
|
| 45 |
+ for i := range fis {
|
|
| 46 |
+ if fis[i].Name() == name {
|
|
| 47 |
+ f = append(f, fis[i]) |
|
| 48 |
+ } |
|
| 49 |
+ } |
|
| 50 |
+ return f |
|
| 51 |
+} |
|
| 52 |
+ |
|
| 53 |
+func contains(s []string, e string) bool {
|
|
| 54 |
+ for _, a := range s {
|
|
| 55 |
+ if a == e {
|
|
| 56 |
+ return true |
|
| 57 |
+ } |
|
| 58 |
+ } |
|
| 59 |
+ return false |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+func (fis FileInfoSums) GetDuplicatePaths() (dups FileInfoSums) {
|
|
| 63 |
+ seen := make(map[string]int, len(fis)) // allocate earl. no need to grow this map. |
|
| 64 |
+ for i := range fis {
|
|
| 65 |
+ f := fis[i] |
|
| 66 |
+ if _, ok := seen[f.Name()]; ok {
|
|
| 67 |
+ dups = append(dups, f) |
|
| 68 |
+ } else {
|
|
| 69 |
+ seen[f.Name()] = 0 |
|
| 70 |
+ } |
|
| 71 |
+ } |
|
| 72 |
+ return dups |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+func (fis FileInfoSums) Len() int { return len(fis) }
|
|
| 76 |
+func (fis FileInfoSums) Swap(i, j int) { fis[i], fis[j] = fis[j], fis[i] }
|
|
| 77 |
+ |
|
| 78 |
+func (fis FileInfoSums) SortByPos() {
|
|
| 79 |
+ sort.Sort(byPos{fis})
|
|
| 80 |
+} |
|
| 81 |
+ |
|
| 82 |
+func (fis FileInfoSums) SortByNames() {
|
|
| 83 |
+ sort.Sort(byName{fis})
|
|
| 84 |
+} |
|
| 85 |
+ |
|
| 86 |
+func (fis FileInfoSums) SortBySums() {
|
|
| 87 |
+ dups := fis.GetDuplicatePaths() |
|
| 88 |
+ if len(dups) > 0 {
|
|
| 89 |
+ sort.Sort(bySum{fis, dups})
|
|
| 90 |
+ } else {
|
|
| 91 |
+ sort.Sort(bySum{fis, nil})
|
|
| 92 |
+ } |
|
| 93 |
+} |
|
| 94 |
+ |
|
| 95 |
+// byName is a sort.Sort helper for sorting by file names. |
|
| 96 |
+// If names are the same, order them by their appearance in the tar archive |
|
| 97 |
+type byName struct{ FileInfoSums }
|
|
| 98 |
+ |
|
| 99 |
+func (bn byName) Less(i, j int) bool {
|
|
| 100 |
+ if bn.FileInfoSums[i].Name() == bn.FileInfoSums[j].Name() {
|
|
| 101 |
+ return bn.FileInfoSums[i].Pos() < bn.FileInfoSums[j].Pos() |
|
| 102 |
+ } |
|
| 103 |
+ return bn.FileInfoSums[i].Name() < bn.FileInfoSums[j].Name() |
|
| 104 |
+} |
|
| 105 |
+ |
|
| 106 |
+// bySum is a sort.Sort helper for sorting by the sums of all the fileinfos in the tar archive |
|
| 107 |
+type bySum struct {
|
|
| 108 |
+ FileInfoSums |
|
| 109 |
+ dups FileInfoSums |
|
| 110 |
+} |
|
| 111 |
+ |
|
| 112 |
+func (bs bySum) Less(i, j int) bool {
|
|
| 113 |
+ if bs.dups != nil && bs.FileInfoSums[i].Name() == bs.FileInfoSums[j].Name() {
|
|
| 114 |
+ return bs.FileInfoSums[i].Pos() < bs.FileInfoSums[j].Pos() |
|
| 115 |
+ } |
|
| 116 |
+ return bs.FileInfoSums[i].Sum() < bs.FileInfoSums[j].Sum() |
|
| 117 |
+} |
|
| 118 |
+ |
|
| 119 |
+// byPos is a sort.Sort helper for sorting by the sums of all the fileinfos by their original order |
|
| 120 |
+type byPos struct{ FileInfoSums }
|
|
| 121 |
+ |
|
| 122 |
+func (bp byPos) Less(i, j int) bool {
|
|
| 123 |
+ return bp.FileInfoSums[i].Pos() < bp.FileInfoSums[j].Pos() |
|
| 124 |
+} |
| 0 | 125 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,45 @@ |
| 0 |
+package tarsum |
|
| 1 |
+ |
|
| 2 |
+import "testing" |
|
| 3 |
+ |
|
| 4 |
+func newFileInfoSums() FileInfoSums {
|
|
| 5 |
+ return FileInfoSums{
|
|
| 6 |
+ fileInfoSum{name: "file3", sum: "2abcdef1234567890", pos: 2},
|
|
| 7 |
+ fileInfoSum{name: "dup1", sum: "deadbeef1", pos: 5},
|
|
| 8 |
+ fileInfoSum{name: "file1", sum: "0abcdef1234567890", pos: 0},
|
|
| 9 |
+ fileInfoSum{name: "file4", sum: "3abcdef1234567890", pos: 3},
|
|
| 10 |
+ fileInfoSum{name: "dup1", sum: "deadbeef0", pos: 4},
|
|
| 11 |
+ fileInfoSum{name: "file2", sum: "1abcdef1234567890", pos: 1},
|
|
| 12 |
+ } |
|
| 13 |
+} |
|
| 14 |
+ |
|
| 15 |
+func TestSortFileInfoSums(t *testing.T) {
|
|
| 16 |
+ dups := newFileInfoSums().GetAllFile("dup1")
|
|
| 17 |
+ if len(dups) != 2 {
|
|
| 18 |
+ t.Errorf("expected length 2, got %d", len(dups))
|
|
| 19 |
+ } |
|
| 20 |
+ dups.SortByNames() |
|
| 21 |
+ if dups[0].Pos() != 4 {
|
|
| 22 |
+ t.Errorf("sorted dups should be ordered by position. Expected 4, got %d", dups[0].Pos())
|
|
| 23 |
+ } |
|
| 24 |
+ |
|
| 25 |
+ fis := newFileInfoSums() |
|
| 26 |
+ expected := "0abcdef1234567890" |
|
| 27 |
+ fis.SortBySums() |
|
| 28 |
+ got := fis[0].Sum() |
|
| 29 |
+ if got != expected {
|
|
| 30 |
+ t.Errorf("Expected %q, got %q", expected, got)
|
|
| 31 |
+ } |
|
| 32 |
+ |
|
| 33 |
+ fis = newFileInfoSums() |
|
| 34 |
+ expected = "dup1" |
|
| 35 |
+ fis.SortByNames() |
|
| 36 |
+ gotFis := fis[0] |
|
| 37 |
+ if gotFis.Name() != expected {
|
|
| 38 |
+ t.Errorf("Expected %q, got %q", expected, gotFis.Name())
|
|
| 39 |
+ } |
|
| 40 |
+ // since a duplicate is first, ensure it is ordered first by position too |
|
| 41 |
+ if gotFis.Pos() != 4 {
|
|
| 42 |
+ t.Errorf("Expected %d, got %d", 4, gotFis.Pos())
|
|
| 43 |
+ } |
|
| 44 |
+} |
| ... | ... |
@@ -39,7 +39,7 @@ func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
|
| 39 | 39 |
// checksums of a tar archive |
| 40 | 40 |
type TarSum interface {
|
| 41 | 41 |
io.Reader |
| 42 |
- GetSums() map[string]string |
|
| 42 |
+ GetSums() FileInfoSums |
|
| 43 | 43 |
Sum([]byte) string |
| 44 | 44 |
Version() Version |
| 45 | 45 |
} |
| ... | ... |
@@ -54,7 +54,8 @@ type tarSum struct {
|
| 54 | 54 |
bufGz *bytes.Buffer |
| 55 | 55 |
bufData []byte |
| 56 | 56 |
h hash.Hash |
| 57 |
- sums map[string]string |
|
| 57 |
+ sums FileInfoSums |
|
| 58 |
+ fileCounter int64 |
|
| 58 | 59 |
currentFile string |
| 59 | 60 |
finished bool |
| 60 | 61 |
first bool |
| ... | ... |
@@ -126,7 +127,7 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
| 126 | 126 |
ts.h = sha256.New() |
| 127 | 127 |
ts.h.Reset() |
| 128 | 128 |
ts.first = true |
| 129 |
- ts.sums = make(map[string]string) |
|
| 129 |
+ ts.sums = FileInfoSums{}
|
|
| 130 | 130 |
} |
| 131 | 131 |
|
| 132 | 132 |
if ts.finished {
|
| ... | ... |
@@ -153,7 +154,8 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
| 153 | 153 |
return 0, err |
| 154 | 154 |
} |
| 155 | 155 |
if !ts.first {
|
| 156 |
- ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil)) |
|
| 156 |
+ ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
|
|
| 157 |
+ ts.fileCounter++ |
|
| 157 | 158 |
ts.h.Reset() |
| 158 | 159 |
} else {
|
| 159 | 160 |
ts.first = false |
| ... | ... |
@@ -218,25 +220,20 @@ func (ts *tarSum) Read(buf []byte) (int, error) {
|
| 218 | 218 |
} |
| 219 | 219 |
|
| 220 | 220 |
func (ts *tarSum) Sum(extra []byte) string {
|
| 221 |
- var sums []string |
|
| 222 |
- |
|
| 223 |
- for _, sum := range ts.sums {
|
|
| 224 |
- sums = append(sums, sum) |
|
| 225 |
- } |
|
| 226 |
- sort.Strings(sums) |
|
| 221 |
+ ts.sums.SortBySums() |
|
| 227 | 222 |
h := sha256.New() |
| 228 | 223 |
if extra != nil {
|
| 229 | 224 |
h.Write(extra) |
| 230 | 225 |
} |
| 231 |
- for _, sum := range sums {
|
|
| 232 |
- log.Debugf("-->%s<--", sum)
|
|
| 233 |
- h.Write([]byte(sum)) |
|
| 226 |
+ for _, fis := range ts.sums {
|
|
| 227 |
+ log.Debugf("-->%s<--", fis.Sum())
|
|
| 228 |
+ h.Write([]byte(fis.Sum())) |
|
| 234 | 229 |
} |
| 235 | 230 |
checksum := ts.Version().String() + "+sha256:" + hex.EncodeToString(h.Sum(nil)) |
| 236 | 231 |
log.Debugf("checksum processed: %s", checksum)
|
| 237 | 232 |
return checksum |
| 238 | 233 |
} |
| 239 | 234 |
|
| 240 |
-func (ts *tarSum) GetSums() map[string]string {
|
|
| 235 |
+func (ts *tarSum) GetSums() FileInfoSums {
|
|
| 241 | 236 |
return ts.sums |
| 242 | 237 |
} |
| ... | ... |
@@ -59,6 +59,22 @@ var testLayers = []testLayer{
|
| 59 | 59 |
{
|
| 60 | 60 |
options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
|
| 61 | 61 |
tarsum: "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"}, |
| 62 |
+ {
|
|
| 63 |
+ // this tar has two files with the same path |
|
| 64 |
+ filename: "testdata/collision/collision-0.tar", |
|
| 65 |
+ tarsum: "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"}, |
|
| 66 |
+ {
|
|
| 67 |
+ // this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above |
|
| 68 |
+ filename: "testdata/collision/collision-1.tar", |
|
| 69 |
+ tarsum: "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"}, |
|
| 70 |
+ {
|
|
| 71 |
+ // this tar has newer of collider-0.tar, ensuring is has different hash |
|
| 72 |
+ filename: "testdata/collision/collision-2.tar", |
|
| 73 |
+ tarsum: "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"}, |
|
| 74 |
+ {
|
|
| 75 |
+ // this tar has newer of collider-1.tar, ensuring is has different hash |
|
| 76 |
+ filename: "testdata/collision/collision-3.tar", |
|
| 77 |
+ tarsum: "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"}, |
|
| 62 | 78 |
} |
| 63 | 79 |
|
| 64 | 80 |
type sizedOptions struct {
|