Browse code

Add overlayfs graph backend

This backend uses the overlayfs union filesystem for containers
plus hard link file sharing for images.

Each container/image can have a "root" subdirectory which is a plain
filesystem hierarchy, or they can use overlayfs.

If they use overlayfs there is a "upper" directory and a "lower-id"
file, as well as "merged" and "work" directories. The "upper"
directory has the upper layer of the overlay, and "lower-id" contains
the id of the parent whose "root" directory shall be used as the lower
layer in the overlay. The overlay itself is mounted in the "merged"
directory, and the "work" dir is needed for overlayfs to work.

When a overlay layer is created there are two cases, either the
parent has a "root" dir, then we start out with a empty "upper"
directory overlaid on the parents root. This is typically the
case with the init layer of a container which is based on an image.
If there is no "root" in the parent, we inherit the lower-id from
the parent and start by making a copy if the parents "upper" dir.
This is typically the case for a container layer which copies
its parent -init upper layer.

Additionally we also have a custom implementation of ApplyLayer
which makes a recursive copy of the parent "root" layer using
hardlinks to share file data, and then applies the layer on top
of that. This means all chile images share file (but not directory)
data with the parent.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)

Alexander Larsson authored on 2014/08/19 18:23:55
Showing 5 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+// +build !exclude_graphdriver_overlayfs
1
+
2
+package daemon
3
+
4
+import (
5
+	_ "github.com/docker/docker/daemon/graphdriver/overlayfs"
6
+)
... ...
@@ -81,6 +81,8 @@ var (
81 81
 		"btrfs",
82 82
 		"devicemapper",
83 83
 		"vfs",
84
+		// experimental, has to be enabled manually for now
85
+		"overlayfs",
84 86
 	}
85 87
 
86 88
 	ErrNotSupported   = errors.New("driver not supported")
87 89
new file mode 100644
... ...
@@ -0,0 +1,157 @@
0
+// +build linux
1
+
2
+package overlayfs
3
+
4
+import (
5
+	"fmt"
6
+	"io"
7
+	"os"
8
+	"path/filepath"
9
+	"syscall"
10
+
11
+	"github.com/docker/docker/pkg/system"
12
+)
13
+
14
+type CopyFlags int
15
+
16
+const (
17
+	CopyHardlink CopyFlags = 1 << iota
18
+)
19
+
20
+func copyRegular(srcPath, dstPath string, mode os.FileMode) error {
21
+	srcFile, err := os.Open(srcPath)
22
+	if err != nil {
23
+		return err
24
+	}
25
+	defer srcFile.Close()
26
+
27
+	dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode)
28
+	if err != nil {
29
+		return err
30
+	}
31
+	defer dstFile.Close()
32
+
33
+	_, err = io.Copy(dstFile, srcFile)
34
+
35
+	return err
36
+}
37
+
38
+func copyXattr(srcPath, dstPath, attr string) error {
39
+	data, err := system.Lgetxattr(srcPath, attr)
40
+	if err != nil {
41
+		return err
42
+	}
43
+	if data != nil {
44
+		if err := system.Lsetxattr(dstPath, attr, data, 0); err != nil {
45
+			return err
46
+		}
47
+	}
48
+	return nil
49
+}
50
+
51
+func copyDir(srcDir, dstDir string, flags CopyFlags) error {
52
+	err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
53
+		if err != nil {
54
+			return err
55
+		}
56
+
57
+		// Rebase path
58
+		relPath, err := filepath.Rel(srcDir, srcPath)
59
+		if err != nil {
60
+			return err
61
+		}
62
+
63
+		dstPath := filepath.Join(dstDir, relPath)
64
+		if err != nil {
65
+			return err
66
+		}
67
+
68
+		stat, ok := f.Sys().(*syscall.Stat_t)
69
+		if !ok {
70
+			return fmt.Errorf("Unable to get raw syscall.Stat_t data for %s", srcPath)
71
+		}
72
+
73
+		switch f.Mode() & os.ModeType {
74
+		case 0: // Regular file
75
+			if flags&CopyHardlink != 0 {
76
+				if err := os.Link(srcPath, dstPath); err != nil {
77
+					return err
78
+				}
79
+			} else {
80
+				if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil {
81
+					return err
82
+				}
83
+			}
84
+
85
+		case os.ModeDir:
86
+			if err := os.Mkdir(dstPath, f.Mode()); err != nil && !os.IsExist(err) {
87
+				return err
88
+			}
89
+
90
+		case os.ModeSymlink:
91
+			link, err := os.Readlink(srcPath)
92
+			if err != nil {
93
+				return err
94
+			}
95
+
96
+			if err := os.Symlink(link, dstPath); err != nil {
97
+				return err
98
+			}
99
+
100
+		case os.ModeNamedPipe:
101
+			fallthrough
102
+		case os.ModeSocket:
103
+			if err := syscall.Mkfifo(dstPath, stat.Mode); err != nil {
104
+				return err
105
+			}
106
+
107
+		case os.ModeDevice:
108
+			if err := syscall.Mknod(dstPath, stat.Mode, int(stat.Rdev)); err != nil {
109
+				return err
110
+			}
111
+
112
+		default:
113
+			return fmt.Errorf("Unknown file type for %s\n", srcPath)
114
+		}
115
+
116
+		if err := os.Lchown(dstPath, int(stat.Uid), int(stat.Gid)); err != nil {
117
+			return err
118
+		}
119
+
120
+		if err := copyXattr(srcPath, dstPath, "security.capability"); err != nil {
121
+			return err
122
+		}
123
+
124
+		// We need to copy this attribute if it appears in an overlayfs upper layer, as
125
+		// this function is used to copy those. It is set by overlayfs if a directory
126
+		// is removed and then re-created and should not inherit anything from the
127
+		// same dir in the lower dir.
128
+		if err := copyXattr(srcPath, dstPath, "trusted.overlay.opaque"); err != nil {
129
+			return err
130
+		}
131
+
132
+		isSymlink := f.Mode()&os.ModeSymlink != 0
133
+
134
+		// There is no LChmod, so ignore mode for symlink. Also, this
135
+		// must happen after chown, as that can modify the file mode
136
+		if !isSymlink {
137
+			if err := os.Chmod(dstPath, f.Mode()); err != nil {
138
+				return err
139
+			}
140
+		}
141
+
142
+		ts := []syscall.Timespec{stat.Atim, stat.Mtim}
143
+		// syscall.UtimesNano doesn't support a NOFOLLOW flag atm, and
144
+		if !isSymlink {
145
+			if err := system.UtimesNano(dstPath, ts); err != nil {
146
+				return err
147
+			}
148
+		} else {
149
+			if err := system.LUtimesNano(dstPath, ts); err != nil {
150
+				return err
151
+			}
152
+		}
153
+		return nil
154
+	})
155
+	return err
156
+}
0 157
new file mode 100644
... ...
@@ -0,0 +1,369 @@
0
+// +build linux
1
+
2
+package overlayfs
3
+
4
+import (
5
+	"bufio"
6
+	"fmt"
7
+	"io/ioutil"
8
+	"os"
9
+	"os/exec"
10
+	"path"
11
+	"strings"
12
+	"sync"
13
+	"syscall"
14
+
15
+	log "github.com/Sirupsen/logrus"
16
+	"github.com/docker/docker/daemon/graphdriver"
17
+	"github.com/docker/docker/pkg/archive"
18
+	"github.com/docker/libcontainer/label"
19
+)
20
+
21
+// This is a small wrapper over the NaiveDiffWriter that lets us have a custom
22
+// implementation of ApplyDiff()
23
+
24
+var (
25
+	ErrApplyDiffFallback = fmt.Errorf("Fall back to normal ApplyDiff")
26
+)
27
+
28
+type ApplyDiffProtoDriver interface {
29
+	graphdriver.ProtoDriver
30
+	ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error)
31
+}
32
+
33
+type naiveDiffDriverWithApply struct {
34
+	graphdriver.Driver
35
+	applyDiff ApplyDiffProtoDriver
36
+}
37
+
38
+func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver {
39
+	return &naiveDiffDriverWithApply{
40
+		Driver:    graphdriver.NaiveDiffDriver(driver),
41
+		applyDiff: driver,
42
+	}
43
+}
44
+
45
+func (d *naiveDiffDriverWithApply) ApplyDiff(id, parent string, diff archive.ArchiveReader) (int64, error) {
46
+	b, err := d.applyDiff.ApplyDiff(id, parent, diff)
47
+	if err == ErrApplyDiffFallback {
48
+		return d.Driver.ApplyDiff(id, parent, diff)
49
+	}
50
+	return b, err
51
+}
52
+
53
+// This backend uses the overlayfs union filesystem for containers
54
+// plus hard link file sharing for images.
55
+
56
+// Each container/image can have a "root" subdirectory which is a plain
57
+// filesystem hierarchy, or they can use overlayfs.
58
+
59
+// If they use overlayfs there is a "upper" directory and a "lower-id"
60
+// file, as well as "merged" and "work" directories. The "upper"
61
+// directory has the upper layer of the overlay, and "lower-id" contains
62
+// the id of the parent whose "root" directory shall be used as the lower
63
+// layer in the overlay. The overlay itself is mounted in the "merged"
64
+// directory, and the "work" dir is needed for overlayfs to work.
65
+
66
+// When a overlay layer is created there are two cases, either the
67
+// parent has a "root" dir, then we start out with a empty "upper"
68
+// directory overlaid on the parents root. This is typically the
69
+// case with the init layer of a container which is based on an image.
70
+// If there is no "root" in the parent, we inherit the lower-id from
71
+// the parent and start by making a copy if the parents "upper" dir.
72
+// This is typically the case for a container layer which copies
73
+// its parent -init upper layer.
74
+
75
+// Additionally we also have a custom implementation of ApplyLayer
76
+// which makes a recursive copy of the parent "root" layer using
77
+// hardlinks to share file data, and then applies the layer on top
78
+// of that. This means all child images share file (but not directory)
79
+// data with the parent.
80
+
81
+type ActiveMount struct {
82
+	count   int
83
+	path    string
84
+	mounted bool
85
+}
86
+type Driver struct {
87
+	home       string
88
+	sync.Mutex // Protects concurrent modification to active
89
+	active     map[string]*ActiveMount
90
+}
91
+
92
+func init() {
93
+	graphdriver.Register("overlayfs", Init)
94
+}
95
+
96
+func Init(home string, options []string) (graphdriver.Driver, error) {
97
+	if err := supportsOverlayfs(); err != nil {
98
+		return nil, graphdriver.ErrNotSupported
99
+	}
100
+
101
+	// Create the driver home dir
102
+	if err := os.MkdirAll(home, 0755); err != nil && !os.IsExist(err) {
103
+		return nil, err
104
+	}
105
+
106
+	d := &Driver{
107
+		home:   home,
108
+		active: make(map[string]*ActiveMount),
109
+	}
110
+
111
+	return NaiveDiffDriverWithApply(d), nil
112
+}
113
+
114
+func supportsOverlayfs() error {
115
+	// We can try to modprobe overlayfs first before looking at
116
+	// proc/filesystems for when overlayfs is supported
117
+	exec.Command("modprobe", "overlayfs").Run()
118
+
119
+	f, err := os.Open("/proc/filesystems")
120
+	if err != nil {
121
+		return err
122
+	}
123
+	defer f.Close()
124
+
125
+	s := bufio.NewScanner(f)
126
+	for s.Scan() {
127
+		if strings.Contains(s.Text(), "overlayfs") {
128
+			return nil
129
+		}
130
+	}
131
+	return graphdriver.ErrNotSupported
132
+}
133
+
134
+func (d *Driver) String() string {
135
+	return "overlayfs"
136
+}
137
+
138
+func (d *Driver) Status() [][2]string {
139
+	return nil
140
+}
141
+
142
+func (d *Driver) Cleanup() error {
143
+	return nil
144
+}
145
+
146
+func (d *Driver) Create(id string, parent string) (retErr error) {
147
+	dir := d.dir(id)
148
+	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
149
+		return err
150
+	}
151
+	if err := os.Mkdir(dir, 0700); err != nil {
152
+		return err
153
+	}
154
+
155
+	defer func() {
156
+		// Clean up on failure
157
+		if retErr != nil {
158
+			os.RemoveAll(dir)
159
+		}
160
+	}()
161
+
162
+	// Toplevel images are just a "root" dir
163
+	if parent == "" {
164
+		if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil {
165
+			return err
166
+		}
167
+		return nil
168
+	}
169
+
170
+	parentDir := d.dir(parent)
171
+
172
+	// Ensure parent exists
173
+	if _, err := os.Lstat(parentDir); err != nil {
174
+		return err
175
+	}
176
+
177
+	// If parent has a root, just do a overlayfs to it
178
+	parentRoot := path.Join(parentDir, "root")
179
+
180
+	if s, err := os.Lstat(parentRoot); err == nil {
181
+		if err := os.Mkdir(path.Join(dir, "upper"), s.Mode()); err != nil {
182
+			return err
183
+		}
184
+		if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
185
+			return err
186
+		}
187
+		if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
188
+			return err
189
+		}
190
+		if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil {
191
+			return err
192
+		}
193
+		return nil
194
+	}
195
+
196
+	// Otherwise, copy the upper and the lower-id from the parent
197
+
198
+	lowerId, err := ioutil.ReadFile(path.Join(parentDir, "lower-id"))
199
+	if err != nil {
200
+		return err
201
+	}
202
+
203
+	if err := ioutil.WriteFile(path.Join(dir, "lower-id"), lowerId, 0666); err != nil {
204
+		return err
205
+	}
206
+
207
+	parentUpperDir := path.Join(parentDir, "upper")
208
+	s, err := os.Lstat(parentUpperDir)
209
+	if err != nil {
210
+		return err
211
+	}
212
+
213
+	upperDir := path.Join(dir, "upper")
214
+	if err := os.Mkdir(upperDir, s.Mode()); err != nil {
215
+		return err
216
+	}
217
+	if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
218
+		return err
219
+	}
220
+	if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
221
+		return err
222
+	}
223
+
224
+	return copyDir(parentUpperDir, upperDir, 0)
225
+}
226
+
227
+func (d *Driver) dir(id string) string {
228
+	return path.Join(d.home, id)
229
+}
230
+
231
+func (d *Driver) Remove(id string) error {
232
+	dir := d.dir(id)
233
+	if _, err := os.Stat(dir); err != nil {
234
+		return err
235
+	}
236
+	return os.RemoveAll(dir)
237
+}
238
+
239
+func (d *Driver) Get(id string, mountLabel string) (string, error) {
240
+	// Protect the d.active from concurrent access
241
+	d.Lock()
242
+	defer d.Unlock()
243
+
244
+	mount := d.active[id]
245
+	if mount != nil {
246
+		mount.count++
247
+		return mount.path, nil
248
+	} else {
249
+		mount = &ActiveMount{count: 1}
250
+	}
251
+
252
+	dir := d.dir(id)
253
+	if _, err := os.Stat(dir); err != nil {
254
+		return "", err
255
+	}
256
+
257
+	// If id has a root, just return it
258
+	rootDir := path.Join(dir, "root")
259
+	if _, err := os.Stat(rootDir); err == nil {
260
+		mount.path = rootDir
261
+		d.active[id] = mount
262
+		return mount.path, nil
263
+	}
264
+
265
+	lowerId, err := ioutil.ReadFile(path.Join(dir, "lower-id"))
266
+	if err != nil {
267
+		return "", err
268
+	}
269
+	lowerDir := path.Join(d.dir(string(lowerId)), "root")
270
+	upperDir := path.Join(dir, "upper")
271
+	workDir := path.Join(dir, "work")
272
+	mergedDir := path.Join(dir, "merged")
273
+
274
+	opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, upperDir, workDir)
275
+	if err := syscall.Mount("overlayfs", mergedDir, "overlayfs", 0, label.FormatMountLabel(opts, mountLabel)); err != nil {
276
+		return "", err
277
+	}
278
+	mount.path = mergedDir
279
+	mount.mounted = true
280
+	d.active[id] = mount
281
+
282
+	return mount.path, nil
283
+}
284
+
285
+func (d *Driver) Put(id string) {
286
+	// Protect the d.active from concurrent access
287
+	d.Lock()
288
+	defer d.Unlock()
289
+
290
+	mount := d.active[id]
291
+	if mount == nil {
292
+		log.Debugf("Put on a non-mounted device %s", id)
293
+		return
294
+	}
295
+
296
+	mount.count--
297
+	if mount.count > 0 {
298
+		return
299
+	}
300
+
301
+	if mount.mounted {
302
+		if err := syscall.Unmount(mount.path, 0); err != nil {
303
+			log.Debugf("Failed to unmount %s overlayfs: %v", id, err)
304
+		}
305
+	}
306
+
307
+	delete(d.active, id)
308
+}
309
+
310
+func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) {
311
+	dir := d.dir(id)
312
+
313
+	if parent == "" {
314
+		return 0, ErrApplyDiffFallback
315
+	}
316
+
317
+	parentRootDir := path.Join(d.dir(parent), "root")
318
+	if _, err := os.Stat(parentRootDir); err != nil {
319
+		return 0, ErrApplyDiffFallback
320
+	}
321
+
322
+	// We now know there is a parent, and it has a "root" directory containing
323
+	// the full root filesystem. We can just hardlink it and apply the
324
+	// layer. This relies on two things:
325
+	// 1) ApplyDiff is only run once on a clean (no writes to upper layer) container
326
+	// 2) ApplyDiff doesn't do any in-place writes to files (would break hardlinks)
327
+	// These are all currently true and are not expected to break
328
+
329
+	tmpRootDir, err := ioutil.TempDir(dir, "tmproot")
330
+	if err != nil {
331
+		return 0, err
332
+	}
333
+	defer func() {
334
+		if err != nil {
335
+			os.RemoveAll(tmpRootDir)
336
+		} else {
337
+			os.RemoveAll(path.Join(dir, "upper"))
338
+			os.RemoveAll(path.Join(dir, "work"))
339
+			os.RemoveAll(path.Join(dir, "merged"))
340
+			os.RemoveAll(path.Join(dir, "lower-id"))
341
+		}
342
+	}()
343
+
344
+	if err = copyDir(parentRootDir, tmpRootDir, CopyHardlink); err != nil {
345
+		return 0, err
346
+	}
347
+
348
+	if err := archive.ApplyLayer(tmpRootDir, diff); err != nil {
349
+		return 0, err
350
+	}
351
+
352
+	rootDir := path.Join(dir, "root")
353
+	if err := os.Rename(tmpRootDir, rootDir); err != nil {
354
+		return 0, err
355
+	}
356
+
357
+	changes, err := archive.ChangesDirs(rootDir, parentRootDir)
358
+	if err != nil {
359
+		return 0, err
360
+	}
361
+
362
+	return archive.ChangesSize(rootDir, changes), nil
363
+}
364
+
365
+func (d *Driver) Exists(id string) bool {
366
+	_, err := os.Stat(d.dir(id))
367
+	return err == nil
368
+}
0 369
new file mode 100644
... ...
@@ -0,0 +1,28 @@
0
+package overlayfs
1
+
2
+import (
3
+	"github.com/docker/docker/daemon/graphdriver/graphtest"
4
+	"testing"
5
+)
6
+
7
+// This avoids creating a new driver for each test if all tests are run
8
+// Make sure to put new tests between TestOverlayfsSetup and TestOverlayfsTeardown
9
+func TestOverlayfsSetup(t *testing.T) {
10
+	graphtest.GetDriver(t, "overlayfs")
11
+}
12
+
13
+func TestOverlayfsCreateEmpty(t *testing.T) {
14
+	graphtest.DriverTestCreateEmpty(t, "overlayfs")
15
+}
16
+
17
+func TestOverlayfsCreateBase(t *testing.T) {
18
+	graphtest.DriverTestCreateBase(t, "overlayfs")
19
+}
20
+
21
+func TestOverlayfsCreateSnap(t *testing.T) {
22
+	graphtest.DriverTestCreateSnap(t, "overlayfs")
23
+}
24
+
25
+func TestOverlayfsTeardown(t *testing.T) {
26
+	graphtest.PutDriver(t)
27
+}