Browse code

Refactor to optimize storage driver ApplyDiff()

To avoid an expensive call to archive.ChangesDirs() which walks two directory
trees and compares every entry, archive.ApplyLayer() has been extended to
also return the size of the layer changes.

Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)

Josh Hawn authored on 2014/12/18 11:26:03
Showing 9 changed files
... ...
@@ -312,7 +312,7 @@ func (a *Driver) applyDiff(id string, diff archive.ArchiveReader) error {
312 312
 // DiffSize calculates the changes between the specified id
313 313
 // and its parent and returns the size in bytes of the changes
314 314
 // relative to its base filesystem directory.
315
-func (a *Driver) DiffSize(id, parent string) (bytes int64, err error) {
315
+func (a *Driver) DiffSize(id, parent string) (size int64, err error) {
316 316
 	// AUFS doesn't need the parent layer to calculate the diff size.
317 317
 	return utils.TreeSize(path.Join(a.rootPath(), "diff", id))
318 318
 }
... ...
@@ -320,7 +320,7 @@ func (a *Driver) DiffSize(id, parent string) (bytes int64, err error) {
320 320
 // ApplyDiff extracts the changeset from the given diff into the
321 321
 // layer with the specified id and parent, returning the size of the
322 322
 // new layer in bytes.
323
-func (a *Driver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
323
+func (a *Driver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) {
324 324
 	// AUFS doesn't need the parent id to apply the diff.
325 325
 	if err = a.applyDiff(id, diff); err != nil {
326 326
 		return
... ...
@@ -63,11 +63,11 @@ type Driver interface {
63 63
 	// ApplyDiff extracts the changeset from the given diff into the
64 64
 	// layer with the specified id and parent, returning the size of the
65 65
 	// new layer in bytes.
66
-	ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
66
+	ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error)
67 67
 	// DiffSize calculates the changes between the specified id
68 68
 	// and its parent and returns the size in bytes of the changes
69 69
 	// relative to its base filesystem directory.
70
-	DiffSize(id, parent string) (bytes int64, err error)
70
+	DiffSize(id, parent string) (size int64, err error)
71 71
 }
72 72
 
73 73
 var (
... ...
@@ -3,14 +3,12 @@
3 3
 package graphdriver
4 4
 
5 5
 import (
6
-	"fmt"
7 6
 	"time"
8 7
 
9 8
 	log "github.com/Sirupsen/logrus"
10 9
 	"github.com/docker/docker/pkg/archive"
11 10
 	"github.com/docker/docker/pkg/chrootarchive"
12 11
 	"github.com/docker/docker/pkg/ioutils"
13
-	"github.com/docker/docker/utils"
14 12
 )
15 13
 
16 14
 // naiveDiffDriver takes a ProtoDriver and adds the
... ...
@@ -27,8 +25,8 @@ type naiveDiffDriver struct {
27 27
 // it may or may not support on its own:
28 28
 //     Diff(id, parent string) (archive.Archive, error)
29 29
 //     Changes(id, parent string) ([]archive.Change, error)
30
-//     ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
31
-//     DiffSize(id, parent string) (bytes int64, err error)
30
+//     ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error)
31
+//     DiffSize(id, parent string) (size int64, err error)
32 32
 func NaiveDiffDriver(driver ProtoDriver) Driver {
33 33
 	return &naiveDiffDriver{ProtoDriver: driver}
34 34
 }
... ...
@@ -111,7 +109,7 @@ func (gdw *naiveDiffDriver) Changes(id, parent string) ([]archive.Change, error)
111 111
 // ApplyDiff extracts the changeset from the given diff into the
112 112
 // layer with the specified id and parent, returning the size of the
113 113
 // new layer in bytes.
114
-func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
114
+func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) {
115 115
 	driver := gdw.ProtoDriver
116 116
 
117 117
 	// Mount the root filesystem so we can apply the diff/layer.
... ...
@@ -123,34 +121,18 @@ func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveRea
123 123
 
124 124
 	start := time.Now().UTC()
125 125
 	log.Debugf("Start untar layer")
126
-	if err = chrootarchive.ApplyLayer(layerFs, diff); err != nil {
126
+	if size, err = chrootarchive.ApplyLayer(layerFs, diff); err != nil {
127 127
 		return
128 128
 	}
129 129
 	log.Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds())
130 130
 
131
-	if parent == "" {
132
-		return utils.TreeSize(layerFs)
133
-	}
134
-
135
-	parentFs, err := driver.Get(parent, "")
136
-	if err != nil {
137
-		err = fmt.Errorf("Driver %s failed to get image parent %s: %s", driver, parent, err)
138
-		return
139
-	}
140
-	defer driver.Put(parent)
141
-
142
-	changes, err := archive.ChangesDirs(layerFs, parentFs)
143
-	if err != nil {
144
-		return
145
-	}
146
-
147
-	return archive.ChangesSize(layerFs, changes), nil
131
+	return
148 132
 }
149 133
 
150 134
 // DiffSize calculates the changes between the specified layer
151 135
 // and its parent and returns the size in bytes of the changes
152 136
 // relative to its base filesystem directory.
153
-func (gdw *naiveDiffDriver) DiffSize(id, parent string) (bytes int64, err error) {
137
+func (gdw *naiveDiffDriver) DiffSize(id, parent string) (size int64, err error) {
154 138
 	driver := gdw.ProtoDriver
155 139
 
156 140
 	changes, err := gdw.Changes(id, parent)
... ...
@@ -28,7 +28,7 @@ var (
28 28
 
29 29
 type ApplyDiffProtoDriver interface {
30 30
 	graphdriver.ProtoDriver
31
-	ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
31
+	ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error)
32 32
 }
33 33
 
34 34
 type naiveDiffDriverWithApply struct {
... ...
@@ -309,7 +309,7 @@ func (d *Driver) Put(id string) {
309 309
 	delete(d.active, id)
310 310
 }
311 311
 
312
-func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
312
+func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (size int64, err error) {
313 313
 	dir := d.dir(id)
314 314
 
315 315
 	if parent == "" {
... ...
@@ -347,7 +347,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader)
347 347
 		return 0, err
348 348
 	}
349 349
 
350
-	if err := chrootarchive.ApplyLayer(tmpRootDir, diff); err != nil {
350
+	if size, err = chrootarchive.ApplyLayer(tmpRootDir, diff); err != nil {
351 351
 		return 0, err
352 352
 	}
353 353
 
... ...
@@ -356,12 +356,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader)
356 356
 		return 0, err
357 357
 	}
358 358
 
359
-	changes, err := archive.ChangesDirs(rootDir, parentRootDir)
360
-	if err != nil {
361
-		return 0, err
362
-	}
363
-
364
-	return archive.ChangesSize(rootDir, changes), nil
359
+	return
365 360
 }
366 361
 
367 362
 func (d *Driver) Exists(id string) bool {
... ...
@@ -286,7 +286,7 @@ func TestApplyLayer(t *testing.T) {
286 286
 		t.Fatal(err)
287 287
 	}
288 288
 
289
-	if err := ApplyLayer(src, layerCopy); err != nil {
289
+	if _, err := ApplyLayer(src, layerCopy); err != nil {
290 290
 		t.Fatal(err)
291 291
 	}
292 292
 
... ...
@@ -15,7 +15,7 @@ import (
15 15
 	"github.com/docker/docker/pkg/system"
16 16
 )
17 17
 
18
-func UnpackLayer(dest string, layer ArchiveReader) error {
18
+func UnpackLayer(dest string, layer ArchiveReader) (size int64, err error) {
19 19
 	tr := tar.NewReader(layer)
20 20
 	trBuf := pools.BufioReader32KPool.Get(tr)
21 21
 	defer pools.BufioReader32KPool.Put(trBuf)
... ...
@@ -33,9 +33,11 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
33 33
 			break
34 34
 		}
35 35
 		if err != nil {
36
-			return err
36
+			return 0, err
37 37
 		}
38 38
 
39
+		size += hdr.Size
40
+
39 41
 		// Normalize name, for safety and for a simple is-root check
40 42
 		hdr.Name = filepath.Clean(hdr.Name)
41 43
 
... ...
@@ -48,7 +50,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
48 48
 			if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) {
49 49
 				err = os.MkdirAll(parentPath, 0600)
50 50
 				if err != nil {
51
-					return err
51
+					return 0, err
52 52
 				}
53 53
 			}
54 54
 		}
... ...
@@ -63,12 +65,12 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
63 63
 				aufsHardlinks[basename] = hdr
64 64
 				if aufsTempdir == "" {
65 65
 					if aufsTempdir, err = ioutil.TempDir("", "dockerplnk"); err != nil {
66
-						return err
66
+						return 0, err
67 67
 					}
68 68
 					defer os.RemoveAll(aufsTempdir)
69 69
 				}
70 70
 				if err := createTarFile(filepath.Join(aufsTempdir, basename), dest, hdr, tr, true); err != nil {
71
-					return err
71
+					return 0, err
72 72
 				}
73 73
 			}
74 74
 			continue
... ...
@@ -77,10 +79,10 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
77 77
 		path := filepath.Join(dest, hdr.Name)
78 78
 		rel, err := filepath.Rel(dest, path)
79 79
 		if err != nil {
80
-			return err
80
+			return 0, err
81 81
 		}
82 82
 		if strings.HasPrefix(rel, "..") {
83
-			return breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest))
83
+			return 0, breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest))
84 84
 		}
85 85
 		base := filepath.Base(path)
86 86
 
... ...
@@ -88,7 +90,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
88 88
 			originalBase := base[len(".wh."):]
89 89
 			originalPath := filepath.Join(filepath.Dir(path), originalBase)
90 90
 			if err := os.RemoveAll(originalPath); err != nil {
91
-				return err
91
+				return 0, err
92 92
 			}
93 93
 		} else {
94 94
 			// If path exits we almost always just want to remove and replace it.
... ...
@@ -98,7 +100,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
98 98
 			if fi, err := os.Lstat(path); err == nil {
99 99
 				if !(fi.IsDir() && hdr.Typeflag == tar.TypeDir) {
100 100
 					if err := os.RemoveAll(path); err != nil {
101
-						return err
101
+						return 0, err
102 102
 					}
103 103
 				}
104 104
 			}
... ...
@@ -113,18 +115,18 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
113 113
 				linkBasename := filepath.Base(hdr.Linkname)
114 114
 				srcHdr = aufsHardlinks[linkBasename]
115 115
 				if srcHdr == nil {
116
-					return fmt.Errorf("Invalid aufs hardlink")
116
+					return 0, fmt.Errorf("Invalid aufs hardlink")
117 117
 				}
118 118
 				tmpFile, err := os.Open(filepath.Join(aufsTempdir, linkBasename))
119 119
 				if err != nil {
120
-					return err
120
+					return 0, err
121 121
 				}
122 122
 				defer tmpFile.Close()
123 123
 				srcData = tmpFile
124 124
 			}
125 125
 
126 126
 			if err := createTarFile(path, dest, srcHdr, srcData, true); err != nil {
127
-				return err
127
+				return 0, err
128 128
 			}
129 129
 
130 130
 			// Directory mtimes must be handled at the end to avoid further
... ...
@@ -139,27 +141,29 @@ func UnpackLayer(dest string, layer ArchiveReader) error {
139 139
 		path := filepath.Join(dest, hdr.Name)
140 140
 		ts := []syscall.Timespec{timeToTimespec(hdr.AccessTime), timeToTimespec(hdr.ModTime)}
141 141
 		if err := syscall.UtimesNano(path, ts); err != nil {
142
-			return err
142
+			return 0, err
143 143
 		}
144 144
 	}
145
-	return nil
145
+
146
+	return size, nil
146 147
 }
147 148
 
148 149
 // ApplyLayer parses a diff in the standard layer format from `layer`, and
149
-// applies it to the directory `dest`.
150
-func ApplyLayer(dest string, layer ArchiveReader) error {
150
+// applies it to the directory `dest`. Returns the size in bytes of the
151
+// contents of the layer.
152
+func ApplyLayer(dest string, layer ArchiveReader) (int64, error) {
151 153
 	dest = filepath.Clean(dest)
152 154
 
153 155
 	// We need to be able to set any perms
154 156
 	oldmask, err := system.Umask(0)
155 157
 	if err != nil {
156
-		return err
158
+		return 0, err
157 159
 	}
158 160
 	defer system.Umask(oldmask) // ignore err, ErrNotSupportedPlatform
159 161
 
160 162
 	layer, err = DecompressStream(layer)
161 163
 	if err != nil {
162
-		return err
164
+		return 0, err
163 165
 	}
164 166
 	return UnpackLayer(dest, layer)
165 167
 }
... ...
@@ -17,7 +17,8 @@ var testUntarFns = map[string]func(string, io.Reader) error{
17 17
 		return Untar(r, dest, nil)
18 18
 	},
19 19
 	"applylayer": func(dest string, r io.Reader) error {
20
-		return ApplyLayer(dest, ArchiveReader(r))
20
+		_, err := ApplyLayer(dest, ArchiveReader(r))
21
+		return err
21 22
 	},
22 23
 }
23 24
 
... ...
@@ -95,7 +95,7 @@ func TestChrootApplyEmptyArchiveFromSlowReader(t *testing.T) {
95 95
 		t.Fatal(err)
96 96
 	}
97 97
 	stream := &slowEmptyTarReader{size: 10240, chunkSize: 1024}
98
-	if err := ApplyLayer(dest, stream); err != nil {
98
+	if _, err := ApplyLayer(dest, stream); err != nil {
99 99
 		t.Fatal(err)
100 100
 	}
101 101
 }
... ...
@@ -1,6 +1,8 @@
1 1
 package chrootarchive
2 2
 
3 3
 import (
4
+	"bytes"
5
+	"encoding/json"
4 6
 	"flag"
5 7
 	"fmt"
6 8
 	"io"
... ...
@@ -14,6 +16,10 @@ import (
14 14
 	"github.com/docker/docker/pkg/reexec"
15 15
 )
16 16
 
17
+type applyLayerResponse struct {
18
+	LayerSize int64 `json:"layerSize"`
19
+}
20
+
17 21
 func applyLayer() {
18 22
 	runtime.LockOSThread()
19 23
 	flag.Parse()
... ...
@@ -21,6 +27,7 @@ func applyLayer() {
21 21
 	if err := chroot(flag.Arg(0)); err != nil {
22 22
 		fatal(err)
23 23
 	}
24
+
24 25
 	// We need to be able to set any perms
25 26
 	oldmask := syscall.Umask(0)
26 27
 	defer syscall.Umask(oldmask)
... ...
@@ -28,33 +35,53 @@ func applyLayer() {
28 28
 	if err != nil {
29 29
 		fatal(err)
30 30
 	}
31
+
31 32
 	os.Setenv("TMPDIR", tmpDir)
32
-	err = archive.UnpackLayer("/", os.Stdin)
33
+	size, err := archive.UnpackLayer("/", os.Stdin)
33 34
 	os.RemoveAll(tmpDir)
34 35
 	if err != nil {
35 36
 		fatal(err)
36 37
 	}
37
-	os.RemoveAll(tmpDir)
38
+
39
+	encoder := json.NewEncoder(os.Stdout)
40
+	if err := encoder.Encode(applyLayerResponse{size}); err != nil {
41
+		fatal(fmt.Errorf("unable to encode layerSize JSON: %s", err))
42
+	}
43
+
44
+	flush(os.Stdout)
38 45
 	flush(os.Stdin)
39 46
 	os.Exit(0)
40 47
 }
41 48
 
42
-func ApplyLayer(dest string, layer archive.ArchiveReader) error {
49
+func ApplyLayer(dest string, layer archive.ArchiveReader) (size int64, err error) {
43 50
 	dest = filepath.Clean(dest)
44 51
 	decompressed, err := archive.DecompressStream(layer)
45 52
 	if err != nil {
46
-		return err
53
+		return 0, err
47 54
 	}
55
+
48 56
 	defer func() {
49 57
 		if c, ok := decompressed.(io.Closer); ok {
50 58
 			c.Close()
51 59
 		}
52 60
 	}()
61
+
53 62
 	cmd := reexec.Command("docker-applyLayer", dest)
54 63
 	cmd.Stdin = decompressed
55
-	out, err := cmd.CombinedOutput()
56
-	if err != nil {
57
-		return fmt.Errorf("ApplyLayer %s %s", err, out)
64
+
65
+	outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
66
+	cmd.Stdout, cmd.Stderr = outBuf, errBuf
67
+
68
+	if err = cmd.Run(); err != nil {
69
+		return 0, fmt.Errorf("ApplyLayer %s stdout: %s stderr: %s", err, outBuf, errBuf)
70
+	}
71
+
72
+	// Stdout should be a valid JSON struct representing an applyLayerResponse.
73
+	response := applyLayerResponse{}
74
+	decoder := json.NewDecoder(outBuf)
75
+	if err = decoder.Decode(&response); err != nil {
76
+		return 0, fmt.Errorf("unable to decode ApplyLayer JSON response: %s", err)
58 77
 	}
59
-	return nil
78
+
79
+	return response.LayerSize, nil
60 80
 }