Browse code

Merge pull request #2809 from graydon/880-cache-ADD-commands-in-dockerfiles

Issue #880 - cache ADD commands in dockerfiles

Guillaume J. Charmes authored on 2013/12/25 09:22:51
Showing 4 changed files
... ...
@@ -69,6 +69,7 @@ Francisco Souza <f@souza.cc>
69 69
 Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
70 70
 Gabriel Monroy <gabriel@opdemand.com>
71 71
 Gareth Rushgrove <gareth@morethanseven.net>
72
+Graydon Hoare <graydon@pobox.com>
72 73
 Greg Thornton <xdissent@me.com>
73 74
 Guillaume J. Charmes <guillaume.charmes@dotcloud.com>
74 75
 Gurjeet Singh <gurjeet@singh.im>
... ...
@@ -1,5 +1,9 @@
1 1
 # Changelog
2 2
 
3
+#### Builder
4
+
5
+- ADD now uses image cache, based on sha256 of added content.
6
+
3 7
 ## 0.7.2 (2013-12-16)
4 8
 
5 9
 #### Runtime
... ...
@@ -1,6 +1,9 @@
1 1
 package docker
2 2
 
3 3
 import (
4
+	"archive/tar"
5
+	"crypto/sha256"
6
+	"encoding/hex"
4 7
 	"encoding/json"
5 8
 	"errors"
6 9
 	"fmt"
... ...
@@ -12,9 +15,11 @@ import (
12 12
 	"net/url"
13 13
 	"os"
14 14
 	"path"
15
+	"path/filepath"
15 16
 	"reflect"
16 17
 	"regexp"
17 18
 	"strings"
19
+	"time"
18 20
 )
19 21
 
20 22
 var (
... ...
@@ -92,6 +97,87 @@ func (b *buildFile) CmdMaintainer(name string) error {
92 92
 	return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name))
93 93
 }
94 94
 
95
+// probeCache checks to see if image-caching is enabled (`b.utilizeCache`)
96
+// and if so attempts to look up the current `b.image` and `b.config` pair
97
+// in the current server `b.srv`. If an image is found, probeCache returns
98
+// `(true, nil)`. If no image is found, it returns `(false, nil)`. If there
99
+// is any error, it returns `(false, err)`.
100
+func (b *buildFile) probeCache() (bool, error) {
101
+	if b.utilizeCache {
102
+		if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
103
+			return false, err
104
+		} else if cache != nil {
105
+			fmt.Fprintf(b.outStream, " ---> Using cache\n")
106
+			utils.Debugf("[BUILDER] Use cached version")
107
+			b.image = cache.ID
108
+			return true, nil
109
+		} else {
110
+			utils.Debugf("[BUILDER] Cache miss")
111
+		}
112
+	}
113
+	return false, nil
114
+}
115
+
116
+// hashPath calculates a strong hash (sha256) value for a file tree located
117
+// at `basepth`/`pth`, including all attributes that would normally be
118
+// captured by `tar`. The path to hash is passed in two pieces only to
119
+// permit logging the second piece in isolation, assuming the first is a
120
+// temporary directory in which docker is running. If `clobberTimes` is
121
+// true and hashPath is applied to a single file, the ctime/atime/mtime of
122
+// the file is considered to be unix time 0, for purposes of hashing.
123
+func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) {
124
+
125
+	p := path.Join(basePth, pth)
126
+
127
+	st, err := os.Stat(p)
128
+	if err != nil {
129
+		return "", err
130
+	}
131
+
132
+	h := sha256.New()
133
+
134
+	if st.IsDir() {
135
+		tarRd, err := archive.Tar(p, archive.Uncompressed)
136
+		if err != nil {
137
+			return "", err
138
+		}
139
+		_, err = io.Copy(h, tarRd)
140
+		if err != nil {
141
+			return "", err
142
+		}
143
+
144
+	} else {
145
+		hdr, err := tar.FileInfoHeader(st, "")
146
+		if err != nil {
147
+			return "", err
148
+		}
149
+		if clobberTimes {
150
+			hdr.AccessTime = time.Unix(0, 0)
151
+			hdr.ChangeTime = time.Unix(0, 0)
152
+			hdr.ModTime = time.Unix(0, 0)
153
+		}
154
+		hdr.Name = filepath.Base(p)
155
+		tarWr := tar.NewWriter(h)
156
+		if err := tarWr.WriteHeader(hdr); err != nil {
157
+			return "", err
158
+		}
159
+
160
+		fileRd, err := os.Open(p)
161
+		if err != nil {
162
+			return "", err
163
+		}
164
+
165
+		if _, err = io.Copy(tarWr, fileRd); err != nil {
166
+			return "", err
167
+		}
168
+		tarWr.Close()
169
+	}
170
+
171
+	hstr := hex.EncodeToString(h.Sum(nil))
172
+	fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr)
173
+	return hstr, nil
174
+}
175
+
95 176
 func (b *buildFile) CmdRun(args string) error {
96 177
 	if b.image == "" {
97 178
 		return fmt.Errorf("Please provide a source image with `from` prior to run")
... ...
@@ -109,17 +195,12 @@ func (b *buildFile) CmdRun(args string) error {
109 109
 
110 110
 	utils.Debugf("Command to be executed: %v", b.config.Cmd)
111 111
 
112
-	if b.utilizeCache {
113
-		if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
114
-			return err
115
-		} else if cache != nil {
116
-			fmt.Fprintf(b.outStream, " ---> Using cache\n")
117
-			utils.Debugf("[BUILDER] Use cached version")
118
-			b.image = cache.ID
119
-			return nil
120
-		} else {
121
-			utils.Debugf("[BUILDER] Cache miss")
122
-		}
112
+	hit, err := b.probeCache()
113
+	if err != nil {
114
+		return err
115
+	}
116
+	if hit {
117
+		return nil
123 118
 	}
124 119
 
125 120
 	cid, err := b.run()
... ...
@@ -265,32 +346,16 @@ func (b *buildFile) CmdVolume(args string) error {
265 265
 	return nil
266 266
 }
267 267
 
268
-func (b *buildFile) addRemote(container *Container, orig, dest string) error {
269
-	file, err := utils.Download(orig)
270
-	if err != nil {
271
-		return err
268
+func (b *buildFile) checkPathForAddition(orig string) error {
269
+	origPath := path.Join(b.context, orig)
270
+	if !strings.HasPrefix(origPath, b.context) {
271
+		return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
272 272
 	}
273
-	defer file.Body.Close()
274
-
275
-	// If the destination is a directory, figure out the filename.
276
-	if strings.HasSuffix(dest, "/") {
277
-		u, err := url.Parse(orig)
278
-		if err != nil {
279
-			return err
280
-		}
281
-		path := u.Path
282
-		if strings.HasSuffix(path, "/") {
283
-			path = path[:len(path)-1]
284
-		}
285
-		parts := strings.Split(path, "/")
286
-		filename := parts[len(parts)-1]
287
-		if filename == "" {
288
-			return fmt.Errorf("cannot determine filename from url: %s", u)
289
-		}
290
-		dest = dest + filename
273
+	_, err := os.Stat(origPath)
274
+	if err != nil {
275
+		return fmt.Errorf("%s: no such file or directory", orig)
291 276
 	}
292
-
293
-	return container.Inject(file.Body, dest)
277
+	return nil
294 278
 }
295 279
 
296 280
 func (b *buildFile) addContext(container *Container, orig, dest string) error {
... ...
@@ -300,9 +365,6 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error {
300 300
 	if strings.HasSuffix(dest, "/") {
301 301
 		destPath = destPath + "/"
302 302
 	}
303
-	if !strings.HasPrefix(origPath, b.context) {
304
-		return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
305
-	}
306 303
 	fi, err := os.Stat(origPath)
307 304
 	if err != nil {
308 305
 		return fmt.Errorf("%s: no such file or directory", orig)
... ...
@@ -348,6 +410,74 @@ func (b *buildFile) CmdAdd(args string) error {
348 348
 	b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)}
349 349
 
350 350
 	b.config.Image = b.image
351
+
352
+	origPath := orig
353
+	destPath := dest
354
+	clobberTimes := false
355
+
356
+	if utils.IsURL(orig) {
357
+
358
+		clobberTimes = true
359
+
360
+		resp, err := utils.Download(orig)
361
+		if err != nil {
362
+			return err
363
+		}
364
+		tmpDirName, err := ioutil.TempDir(b.context, "docker-remote")
365
+		if err != nil {
366
+			return err
367
+		}
368
+		tmpFileName := path.Join(tmpDirName, "tmp")
369
+		tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
370
+		if err != nil {
371
+			return err
372
+		}
373
+		defer os.RemoveAll(tmpDirName)
374
+		if _, err = io.Copy(tmpFile, resp.Body); err != nil {
375
+			return err
376
+		}
377
+		origPath = path.Join(filepath.Base(tmpDirName), filepath.Base(tmpFileName))
378
+		tmpFile.Close()
379
+
380
+		// If the destination is a directory, figure out the filename.
381
+		if strings.HasSuffix(dest, "/") {
382
+			u, err := url.Parse(orig)
383
+			if err != nil {
384
+				return err
385
+			}
386
+			path := u.Path
387
+			if strings.HasSuffix(path, "/") {
388
+				path = path[:len(path)-1]
389
+			}
390
+			parts := strings.Split(path, "/")
391
+			filename := parts[len(parts)-1]
392
+			if filename == "" {
393
+				return fmt.Errorf("cannot determine filename from url: %s", u)
394
+			}
395
+			destPath = dest + filename
396
+		}
397
+	}
398
+
399
+	if err := b.checkPathForAddition(origPath); err != nil {
400
+		return err
401
+	}
402
+
403
+	// Hash path and check the cache
404
+	if b.utilizeCache {
405
+		hash, err := b.hashPath(b.context, origPath, clobberTimes)
406
+		if err != nil {
407
+			return err
408
+		}
409
+		b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)}
410
+		hit, err := b.probeCache()
411
+		if err != nil {
412
+			return err
413
+		}
414
+		if hit {
415
+			return nil
416
+		}
417
+	}
418
+
351 419
 	// Create the container and start it
352 420
 	container, _, err := b.runtime.Create(b.config, "")
353 421
 	if err != nil {
... ...
@@ -360,14 +490,8 @@ func (b *buildFile) CmdAdd(args string) error {
360 360
 	}
361 361
 	defer container.Unmount()
362 362
 
363
-	if utils.IsURL(orig) {
364
-		if err := b.addRemote(container, orig, dest); err != nil {
365
-			return err
366
-		}
367
-	} else {
368
-		if err := b.addContext(container, orig, dest); err != nil {
369
-			return err
370
-		}
363
+	if err := b.addContext(container, origPath, destPath); err != nil {
364
+		return err
371 365
 	}
372 366
 
373 367
 	if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil {
... ...
@@ -465,17 +589,12 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error {
465 465
 		b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment}
466 466
 		defer func(cmd []string) { b.config.Cmd = cmd }(cmd)
467 467
 
468
-		if b.utilizeCache {
469
-			if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
470
-				return err
471
-			} else if cache != nil {
472
-				fmt.Fprintf(b.outStream, " ---> Using cache\n")
473
-				utils.Debugf("[BUILDER] Use cached version")
474
-				b.image = cache.ID
475
-				return nil
476
-			} else {
477
-				utils.Debugf("[BUILDER] Cache miss")
478
-			}
468
+		hit, err := b.probeCache()
469
+		if err != nil {
470
+			return err
471
+		}
472
+		if hit {
473
+			return nil
479 474
 		}
480 475
 
481 476
 		container, warnings, err := b.runtime.Create(b.config, "")
... ...
@@ -425,16 +425,10 @@ func TestBuildEntrypointRunCleanup(t *testing.T) {
425 425
 	}
426 426
 }
427 427
 
428
-func TestBuildImageWithCache(t *testing.T) {
428
+func checkCacheBehavior(t *testing.T, template testContextTemplate, expectHit bool) {
429 429
 	eng := NewTestEngine(t)
430 430
 	defer nuke(mkRuntimeFromEngine(eng, t))
431 431
 
432
-	template := testContextTemplate{`
433
-        from {IMAGE}
434
-        maintainer dockerio
435
-        `,
436
-		nil, nil}
437
-
438 432
 	img, err := buildImage(template, t, eng, true)
439 433
 	if err != nil {
440 434
 		t.Fatal(err)
... ...
@@ -443,43 +437,115 @@ func TestBuildImageWithCache(t *testing.T) {
443 443
 	imageId := img.ID
444 444
 
445 445
 	img = nil
446
-	img, err = buildImage(template, t, eng, true)
446
+	img, err = buildImage(template, t, eng, expectHit)
447 447
 	if err != nil {
448 448
 		t.Fatal(err)
449 449
 	}
450 450
 
451
-	if imageId != img.ID {
452
-		t.Logf("Image ids should match: %s != %s", imageId, img.ID)
451
+	hit := imageId == img.ID
452
+	if hit != expectHit {
453
+		t.Logf("Cache misbehavior, got hit=%t, expected hit=%t: (first: %s, second %s)",
454
+			hit, expectHit, imageId, img.ID)
453 455
 		t.Fail()
454 456
 	}
455 457
 }
456 458
 
457
-func TestBuildImageWithoutCache(t *testing.T) {
458
-	eng := NewTestEngine(t)
459
-	defer nuke(mkRuntimeFromEngine(eng, t))
459
+func TestBuildImageWithCache(t *testing.T) {
460
+	template := testContextTemplate{`
461
+        from {IMAGE}
462
+        maintainer dockerio
463
+        `,
464
+		nil, nil}
465
+	checkCacheBehavior(t, template, true)
466
+}
460 467
 
468
+func TestBuildImageWithoutCache(t *testing.T) {
461 469
 	template := testContextTemplate{`
462 470
         from {IMAGE}
463 471
         maintainer dockerio
464 472
         `,
465 473
 		nil, nil}
474
+	checkCacheBehavior(t, template, false)
475
+}
466 476
 
467
-	img, err := buildImage(template, t, eng, true)
468
-	if err != nil {
469
-		t.Fatal(err)
470
-	}
471
-	imageId := img.ID
477
+func TestBuildADDLocalFileWithCache(t *testing.T) {
478
+	template := testContextTemplate{`
479
+        from {IMAGE}
480
+        maintainer dockerio
481
+        run echo "first"
482
+        add foo /usr/lib/bla/bar
483
+        run echo "second"
484
+        `,
485
+		[][2]string{{"foo", "hello"}},
486
+		nil}
487
+	checkCacheBehavior(t, template, true)
488
+}
472 489
 
473
-	img = nil
474
-	img, err = buildImage(template, t, eng, false)
475
-	if err != nil {
476
-		t.Fatal(err)
477
-	}
490
+func TestBuildADDLocalFileWithoutCache(t *testing.T) {
491
+	template := testContextTemplate{`
492
+        from {IMAGE}
493
+        maintainer dockerio
494
+        run echo "first"
495
+        add foo /usr/lib/bla/bar
496
+        run echo "second"
497
+        `,
498
+		[][2]string{{"foo", "hello"}},
499
+		nil}
500
+	checkCacheBehavior(t, template, false)
501
+}
478 502
 
479
-	if imageId == img.ID {
480
-		t.Logf("Image ids should not match: %s == %s", imageId, img.ID)
481
-		t.Fail()
482
-	}
503
+func TestBuildADDRemoteFileWithCache(t *testing.T) {
504
+	template := testContextTemplate{`
505
+        from {IMAGE}
506
+        maintainer dockerio
507
+        run echo "first"
508
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
509
+        run echo "second"
510
+        `,
511
+		nil,
512
+		[][2]string{{"/baz", "world!"}}}
513
+	checkCacheBehavior(t, template, true)
514
+}
515
+
516
+func TestBuildADDRemoteFileWithoutCache(t *testing.T) {
517
+	template := testContextTemplate{`
518
+        from {IMAGE}
519
+        maintainer dockerio
520
+        run echo "first"
521
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
522
+        run echo "second"
523
+        `,
524
+		nil,
525
+		[][2]string{{"/baz", "world!"}}}
526
+	checkCacheBehavior(t, template, false)
527
+}
528
+
529
+func TestBuildADDLocalAndRemoteFilesWithCache(t *testing.T) {
530
+	template := testContextTemplate{`
531
+        from {IMAGE}
532
+        maintainer dockerio
533
+        run echo "first"
534
+        add foo /usr/lib/bla/bar
535
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
536
+        run echo "second"
537
+        `,
538
+		[][2]string{{"foo", "hello"}},
539
+		[][2]string{{"/baz", "world!"}}}
540
+	checkCacheBehavior(t, template, true)
541
+}
542
+
543
+func TestBuildADDLocalAndRemoteFilesWithoutCache(t *testing.T) {
544
+	template := testContextTemplate{`
545
+        from {IMAGE}
546
+        maintainer dockerio
547
+        run echo "first"
548
+        add foo /usr/lib/bla/bar
549
+        add http://{SERVERADDR}/baz /usr/lib/baz/quux
550
+        run echo "second"
551
+        `,
552
+		[][2]string{{"foo", "hello"}},
553
+		[][2]string{{"/baz", "world!"}}}
554
+	checkCacheBehavior(t, template, false)
483 555
 }
484 556
 
485 557
 func TestForbiddenContextPath(t *testing.T) {