Browse code

Merge pull request #3353 from creack/improve_add_cache

Improve add cache

Guillaume J. Charmes authored on 2014/01/03 09:07:33
Showing 3 changed files
... ...
@@ -1,7 +1,6 @@
1 1
 package docker
2 2
 
3 3
 import (
4
-	"archive/tar"
5 4
 	"crypto/sha256"
6 5
 	"encoding/hex"
7 6
 	"encoding/json"
... ...
@@ -18,8 +17,8 @@ import (
18 18
 	"path/filepath"
19 19
 	"reflect"
20 20
 	"regexp"
21
+	"sort"
21 22
 	"strings"
22
-	"time"
23 23
 )
24 24
 
25 25
 var (
... ...
@@ -36,10 +35,13 @@ type buildFile struct {
36 36
 	runtime *Runtime
37 37
 	srv     *Server
38 38
 
39
-	image        string
40
-	maintainer   string
41
-	config       *Config
42
-	context      string
39
+	image      string
40
+	maintainer string
41
+	config     *Config
42
+
43
+	contextPath string
44
+	context     *utils.TarSum
45
+
43 46
 	verbose      bool
44 47
 	utilizeCache bool
45 48
 	rm           bool
... ...
@@ -118,66 +120,6 @@ func (b *buildFile) probeCache() (bool, error) {
118 118
 	return false, nil
119 119
 }
120 120
 
121
-// hashPath calculates a strong hash (sha256) value for a file tree located
122
-// at `basepth`/`pth`, including all attributes that would normally be
123
-// captured by `tar`. The path to hash is passed in two pieces only to
124
-// permit logging the second piece in isolation, assuming the first is a
125
-// temporary directory in which docker is running. If `clobberTimes` is
126
-// true and hashPath is applied to a single file, the ctime/atime/mtime of
127
-// the file is considered to be unix time 0, for purposes of hashing.
128
-func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) {
129
-
130
-	p := path.Join(basePth, pth)
131
-
132
-	st, err := os.Stat(p)
133
-	if err != nil {
134
-		return "", err
135
-	}
136
-
137
-	h := sha256.New()
138
-
139
-	if st.IsDir() {
140
-		tarRd, err := archive.Tar(p, archive.Uncompressed)
141
-		if err != nil {
142
-			return "", err
143
-		}
144
-		_, err = io.Copy(h, tarRd)
145
-		if err != nil {
146
-			return "", err
147
-		}
148
-
149
-	} else {
150
-		hdr, err := tar.FileInfoHeader(st, "")
151
-		if err != nil {
152
-			return "", err
153
-		}
154
-		if clobberTimes {
155
-			hdr.AccessTime = time.Unix(0, 0)
156
-			hdr.ChangeTime = time.Unix(0, 0)
157
-			hdr.ModTime = time.Unix(0, 0)
158
-		}
159
-		hdr.Name = filepath.Base(p)
160
-		tarWr := tar.NewWriter(h)
161
-		if err := tarWr.WriteHeader(hdr); err != nil {
162
-			return "", err
163
-		}
164
-
165
-		fileRd, err := os.Open(p)
166
-		if err != nil {
167
-			return "", err
168
-		}
169
-
170
-		if _, err = io.Copy(tarWr, fileRd); err != nil {
171
-			return "", err
172
-		}
173
-		tarWr.Close()
174
-	}
175
-
176
-	hstr := hex.EncodeToString(h.Sum(nil))
177
-	fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr)
178
-	return hstr, nil
179
-}
180
-
181 121
 func (b *buildFile) CmdRun(args string) error {
182 122
 	if b.image == "" {
183 123
 		return fmt.Errorf("Please provide a source image with `from` prior to run")
... ...
@@ -347,8 +289,8 @@ func (b *buildFile) CmdVolume(args string) error {
347 347
 }
348 348
 
349 349
 func (b *buildFile) checkPathForAddition(orig string) error {
350
-	origPath := path.Join(b.context, orig)
351
-	if !strings.HasPrefix(origPath, b.context) {
350
+	origPath := path.Join(b.contextPath, orig)
351
+	if !strings.HasPrefix(origPath, b.contextPath) {
352 352
 		return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
353 353
 	}
354 354
 	_, err := os.Stat(origPath)
... ...
@@ -359,8 +301,10 @@ func (b *buildFile) checkPathForAddition(orig string) error {
359 359
 }
360 360
 
361 361
 func (b *buildFile) addContext(container *Container, orig, dest string) error {
362
-	origPath := path.Join(b.context, orig)
363
-	destPath := path.Join(container.RootfsPath(), dest)
362
+	var (
363
+		origPath = path.Join(b.contextPath, orig)
364
+		destPath = path.Join(container.RootfsPath(), dest)
365
+	)
364 366
 	// Preserve the trailing '/'
365 367
 	if strings.HasSuffix(dest, "/") {
366 368
 		destPath = destPath + "/"
... ...
@@ -388,7 +332,7 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error {
388 388
 }
389 389
 
390 390
 func (b *buildFile) CmdAdd(args string) error {
391
-	if b.context == "" {
391
+	if b.context == nil {
392 392
 		return fmt.Errorf("No context given. Impossible to use ADD")
393 393
 	}
394 394
 	tmp := strings.SplitN(args, " ", 2)
... ...
@@ -408,22 +352,20 @@ func (b *buildFile) CmdAdd(args string) error {
408 408
 
409 409
 	cmd := b.config.Cmd
410 410
 	b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)}
411
-
412 411
 	b.config.Image = b.image
413 412
 
414
-	origPath := orig
415
-	destPath := dest
416
-	clobberTimes := false
413
+	// FIXME: do we really need this?
414
+	var (
415
+		origPath = orig
416
+		destPath = dest
417
+	)
417 418
 
418 419
 	if utils.IsURL(orig) {
419
-
420
-		clobberTimes = true
421
-
422 420
 		resp, err := utils.Download(orig)
423 421
 		if err != nil {
424 422
 			return err
425 423
 		}
426
-		tmpDirName, err := ioutil.TempDir(b.context, "docker-remote")
424
+		tmpDirName, err := ioutil.TempDir(b.contextPath, "docker-remote")
427 425
 		if err != nil {
428 426
 			return err
429 427
 		}
... ...
@@ -464,9 +406,25 @@ func (b *buildFile) CmdAdd(args string) error {
464 464
 
465 465
 	// Hash path and check the cache
466 466
 	if b.utilizeCache {
467
-		hash, err := b.hashPath(b.context, origPath, clobberTimes)
468
-		if err != nil {
467
+		var (
468
+			hash string
469
+			sums = b.context.GetSums()
470
+		)
471
+		if fi, err := os.Stat(path.Join(b.contextPath, origPath)); err != nil {
469 472
 			return err
473
+		} else if fi.IsDir() {
474
+			var subfiles []string
475
+			for file, sum := range sums {
476
+				if strings.HasPrefix(file, origPath) {
477
+					subfiles = append(subfiles, sum)
478
+				}
479
+			}
480
+			sort.Strings(subfiles)
481
+			hasher := sha256.New()
482
+			hasher.Write([]byte(strings.Join(subfiles, ",")))
483
+			hash = "dir:" + hex.EncodeToString(hasher.Sum(nil))
484
+		} else {
485
+			hash = "file:" + sums[origPath]
470 486
 		}
471 487
 		b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)}
472 488
 		hit, err := b.probeCache()
... ...
@@ -635,17 +593,17 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error {
635 635
 var lineContinuation = regexp.MustCompile(`\s*\\\s*\n`)
636 636
 
637 637
 func (b *buildFile) Build(context io.Reader) (string, error) {
638
-	// FIXME: @creack "name" is a terrible variable name
639
-	name, err := ioutil.TempDir("", "docker-build")
638
+	tmpdirPath, err := ioutil.TempDir("", "docker-build")
640 639
 	if err != nil {
641 640
 		return "", err
642 641
 	}
643
-	if err := archive.Untar(context, name, nil); err != nil {
642
+	b.context = &utils.TarSum{Reader: context}
643
+	if err := archive.Untar(b.context, tmpdirPath, nil); err != nil {
644 644
 		return "", err
645 645
 	}
646
-	defer os.RemoveAll(name)
647
-	b.context = name
648
-	filename := path.Join(name, "Dockerfile")
646
+	defer os.RemoveAll(tmpdirPath)
647
+	b.contextPath = tmpdirPath
648
+	filename := path.Join(tmpdirPath, "Dockerfile")
649 649
 	if _, err := os.Stat(filename); os.IsNotExist(err) {
650 650
 		return "", fmt.Errorf("Can't build a directory with no Dockerfile")
651 651
 	}
... ...
@@ -21,6 +21,7 @@ import (
21 21
 	"path"
22 22
 	"path/filepath"
23 23
 	"runtime"
24
+	"sort"
24 25
 	"strconv"
25 26
 	"strings"
26 27
 	"sync"
... ...
@@ -1695,16 +1696,13 @@ func (srv *Server) ImageGetCached(imgID string, config *Config) (*Image, error)
1695 1695
 	}
1696 1696
 
1697 1697
 	// Store the tree in a map of map (map[parentId][childId])
1698
-	imageMap := make(map[string]map[string]struct{})
1698
+	imageMap := make(map[string][]string)
1699 1699
 	for _, img := range images {
1700
-		if _, exists := imageMap[img.Parent]; !exists {
1701
-			imageMap[img.Parent] = make(map[string]struct{})
1702
-		}
1703
-		imageMap[img.Parent][img.ID] = struct{}{}
1700
+		imageMap[img.Parent] = append(imageMap[img.Parent], img.ID)
1704 1701
 	}
1705
-
1702
+	sort.Strings(imageMap[imgID])
1706 1703
 	// Loop on the children of the given image and check the config
1707
-	for elem := range imageMap[imgID] {
1704
+	for _, elem := range imageMap[imgID] {
1708 1705
 		img, err := srv.runtime.graph.Get(elem)
1709 1706
 		if err != nil {
1710 1707
 			return nil, err
... ...
@@ -1,38 +1,30 @@
1 1
 package utils
2 2
 
3 3
 import (
4
+	"archive/tar"
4 5
 	"bytes"
5 6
 	"compress/gzip"
6 7
 	"crypto/sha256"
7 8
 	"encoding/hex"
8
-	"archive/tar"
9 9
 	"hash"
10 10
 	"io"
11 11
 	"sort"
12 12
 	"strconv"
13
+	"strings"
13 14
 )
14 15
 
15
-type verboseHash struct {
16
-	hash.Hash
17
-}
18
-
19
-func (h verboseHash) Write(buf []byte) (int, error) {
20
-	Debugf("--->%s<---", buf)
21
-	return h.Hash.Write(buf)
22
-}
23
-
24 16
 type TarSum struct {
25 17
 	io.Reader
26
-	tarR     *tar.Reader
27
-	tarW     *tar.Writer
28
-	gz       *gzip.Writer
29
-	bufTar   *bytes.Buffer
30
-	bufGz    *bytes.Buffer
31
-	h        hash.Hash
32
-	h2       verboseHash
33
-	sums     []string
34
-	finished bool
35
-	first    bool
18
+	tarR        *tar.Reader
19
+	tarW        *tar.Writer
20
+	gz          *gzip.Writer
21
+	bufTar      *bytes.Buffer
22
+	bufGz       *bytes.Buffer
23
+	h           hash.Hash
24
+	sums        map[string]string
25
+	currentFile string
26
+	finished    bool
27
+	first       bool
36 28
 }
37 29
 
38 30
 func (ts *TarSum) encodeHeader(h *tar.Header) error {
... ...
@@ -52,7 +44,6 @@ func (ts *TarSum) encodeHeader(h *tar.Header) error {
52 52
 		// {"atime", strconv.Itoa(int(h.AccessTime.UTC().Unix()))},
53 53
 		// {"ctime", strconv.Itoa(int(h.ChangeTime.UTC().Unix()))},
54 54
 	} {
55
-		//		Debugf("-->%s<-- -->%s<--", elem[0], elem[1])
56 55
 		if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
57 56
 			return err
58 57
 		}
... ...
@@ -68,9 +59,9 @@ func (ts *TarSum) Read(buf []byte) (int, error) {
68 68
 		ts.tarW = tar.NewWriter(ts.bufTar)
69 69
 		ts.gz = gzip.NewWriter(ts.bufGz)
70 70
 		ts.h = sha256.New()
71
-		//		ts.h = verboseHash{sha256.New()}
72 71
 		ts.h.Reset()
73 72
 		ts.first = true
73
+		ts.sums = make(map[string]string)
74 74
 	}
75 75
 
76 76
 	if ts.finished {
... ...
@@ -85,7 +76,7 @@ func (ts *TarSum) Read(buf []byte) (int, error) {
85 85
 				return 0, err
86 86
 			}
87 87
 			if !ts.first {
88
-				ts.sums = append(ts.sums, hex.EncodeToString(ts.h.Sum(nil)))
88
+				ts.sums[ts.currentFile] = hex.EncodeToString(ts.h.Sum(nil))
89 89
 				ts.h.Reset()
90 90
 			} else {
91 91
 				ts.first = false
... ...
@@ -102,6 +93,7 @@ func (ts *TarSum) Read(buf []byte) (int, error) {
102 102
 				}
103 103
 				return n, err
104 104
 			}
105
+			ts.currentFile = strings.TrimSuffix(strings.TrimPrefix(currentHeader.Name, "./"), "/")
105 106
 			if err := ts.encodeHeader(currentHeader); err != nil {
106 107
 				return 0, err
107 108
 			}
... ...
@@ -143,12 +135,17 @@ func (ts *TarSum) Read(buf []byte) (int, error) {
143 143
 }
144 144
 
145 145
 func (ts *TarSum) Sum(extra []byte) string {
146
-	sort.Strings(ts.sums)
146
+	var sums []string
147
+
148
+	for _, sum := range ts.sums {
149
+		sums = append(sums, sum)
150
+	}
151
+	sort.Strings(sums)
147 152
 	h := sha256.New()
148 153
 	if extra != nil {
149 154
 		h.Write(extra)
150 155
 	}
151
-	for _, sum := range ts.sums {
156
+	for _, sum := range sums {
152 157
 		Debugf("-->%s<--", sum)
153 158
 		h.Write([]byte(sum))
154 159
 	}
... ...
@@ -156,3 +153,7 @@ func (ts *TarSum) Sum(extra []byte) string {
156 156
 	Debugf("checksum processed: %s", checksum)
157 157
 	return checksum
158 158
 }
159
+
160
+func (ts *TarSum) GetSums() map[string]string {
161
+	return ts.sums
162
+}