Browse code

Add zero-copy support to copy module

This changeset allows Docker's VFS, and Overlay to take advantage of
Linux's zerocopy APIs.

The copy function first tries to use the ficlone ioctl. Reason being:
- they do not allow partial success (aka short writes)
- clones are expected to be a fast metadata operation
See: http://oss.sgi.com/archives/xfs/2015-12/msg00356.html

If the clone fails, we fall back to copy_file_range, which internally
may fall back to splice, which has an upper limit on the size
of copy it can perform. Given that, we have to loop until the copy
is done.

For a given dirCopy operation, if the clone fails, we will not try
it again during any other file copy. Same is true with copy_file_range.

If all else fails, we fall back to traditional copy.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>

Sargun Dhillon authored on 2017/08/31 11:07:02
Showing 2 changed files
... ...
@@ -2,8 +2,17 @@
2 2
 
3 3
 package copy
4 4
 
5
+/*
6
+#include <linux/fs.h>
7
+
8
+#ifndef FICLONE
9
+#define FICLONE		_IOW(0x94, 9, int)
10
+#endif
11
+*/
12
+import "C"
5 13
 import (
6 14
 	"fmt"
15
+	"io"
7 16
 	"os"
8 17
 	"path/filepath"
9 18
 	"syscall"
... ...
@@ -15,6 +24,7 @@ import (
15 15
 	"golang.org/x/sys/unix"
16 16
 )
17 17
 
18
+// Mode indicates whether to use hardlink or copy content
18 19
 type Mode int
19 20
 
20 21
 const (
... ...
@@ -24,20 +34,61 @@ const (
24 24
 	Hardlink
25 25
 )
26 26
 
27
-func copyRegular(srcPath, dstPath string, mode os.FileMode) error {
27
+func copyRegular(srcPath, dstPath string, fileinfo os.FileInfo, copyWithFileRange, copyWithFileClone *bool) error {
28 28
 	srcFile, err := os.Open(srcPath)
29 29
 	if err != nil {
30 30
 		return err
31 31
 	}
32 32
 	defer srcFile.Close()
33 33
 
34
-	dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE, mode)
34
+	// If the destination file already exists, we shouldn't blow it away
35
+	dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileinfo.Mode())
35 36
 	if err != nil {
36 37
 		return err
37 38
 	}
38 39
 	defer dstFile.Close()
39 40
 
40
-	_, err = pools.Copy(dstFile, srcFile)
41
+	if *copyWithFileClone {
42
+		_, _, err = unix.Syscall(unix.SYS_IOCTL, dstFile.Fd(), C.FICLONE, srcFile.Fd())
43
+		if err == nil {
44
+			return nil
45
+		}
46
+
47
+		*copyWithFileClone = false
48
+		if err == unix.EXDEV {
49
+			*copyWithFileRange = false
50
+		}
51
+	}
52
+	if *copyWithFileRange {
53
+		err = doCopyWithFileRange(srcFile, dstFile, fileinfo)
54
+		// Trying the file_clone may not have caught the exdev case
55
+		// as the ioctl may not have been available (therefore EINVAL)
56
+		if err == unix.EXDEV || err == unix.ENOSYS {
57
+			*copyWithFileRange = false
58
+		} else if err != nil {
59
+			return err
60
+		}
61
+	}
62
+	return legacyCopy(srcFile, dstFile)
63
+}
64
+
65
+func doCopyWithFileRange(srcFile, dstFile *os.File, fileinfo os.FileInfo) error {
66
+	amountLeftToCopy := fileinfo.Size()
67
+
68
+	for amountLeftToCopy > 0 {
69
+		n, err := unix.CopyFileRange(int(srcFile.Fd()), nil, int(dstFile.Fd()), nil, int(amountLeftToCopy), 0)
70
+		if err != nil {
71
+			return err
72
+		}
73
+
74
+		amountLeftToCopy = amountLeftToCopy - int64(n)
75
+	}
76
+
77
+	return nil
78
+}
79
+
80
+func legacyCopy(srcFile io.Reader, dstFile io.Writer) error {
81
+	_, err := pools.Copy(dstFile, srcFile)
41 82
 
42 83
 	return err
43 84
 }
... ...
@@ -58,6 +109,8 @@ func copyXattr(srcPath, dstPath, attr string) error {
58 58
 // DirCopy copies or hardlinks the contents of one directory to another,
59 59
 // properly handling xattrs, and soft links
60 60
 func DirCopy(srcDir, dstDir string, copyMode Mode) error {
61
+	copyWithFileRange := true
62
+	copyWithFileClone := true
61 63
 	err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
62 64
 		if err != nil {
63 65
 			return err
... ...
@@ -85,13 +138,12 @@ func DirCopy(srcDir, dstDir string, copyMode Mode) error {
85 85
 		case 0: // Regular file
86 86
 			if copyMode == Hardlink {
87 87
 				isHardlink = true
88
-				if err := os.Link(srcPath, dstPath); err != nil {
89
-					return err
88
+				if err2 := os.Link(srcPath, dstPath); err2 != nil {
89
+					return err2
90 90
 				}
91 91
 			} else {
92
-				// Always fall back to Content copymode
93
-				if err := copyRegular(srcPath, dstPath, f.Mode()); err != nil {
94
-					return err
92
+				if err2 := copyRegular(srcPath, dstPath, f, &copyWithFileRange, &copyWithFileClone); err2 != nil {
93
+					return err2
95 94
 				}
96 95
 			}
97 96
 
98 97
new file mode 100644
... ...
@@ -0,0 +1,67 @@
0
+// +build linux
1
+
2
+package copy
3
+
4
+import (
5
+	"io/ioutil"
6
+	"math/rand"
7
+	"os"
8
+	"path/filepath"
9
+	"testing"
10
+
11
+	"github.com/docker/docker/pkg/parsers/kernel"
12
+	"github.com/stretchr/testify/assert"
13
+	"github.com/stretchr/testify/require"
14
+)
15
+
16
+func TestIsCopyFileRangeSyscallAvailable(t *testing.T) {
17
+	// Verifies:
18
+	// 1. That copyFileRangeEnabled is being set to true when copy_file_range syscall is available
19
+	// 2. That isCopyFileRangeSyscallAvailable() works on "new" kernels
20
+	v, err := kernel.GetKernelVersion()
21
+	require.NoError(t, err)
22
+
23
+	copyWithFileRange := true
24
+	copyWithFileClone := false
25
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
26
+
27
+	if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 5, Minor: 0}) < 0 {
28
+		assert.False(t, copyWithFileRange)
29
+	} else {
30
+		assert.True(t, copyWithFileRange)
31
+	}
32
+
33
+}
34
+
35
+func TestCopy(t *testing.T) {
36
+	copyWithFileRange := true
37
+	copyWithFileClone := true
38
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
39
+}
40
+
41
+func TestCopyWithoutRange(t *testing.T) {
42
+	copyWithFileRange := false
43
+	copyWithFileClone := false
44
+	doCopyTest(t, &copyWithFileRange, &copyWithFileClone)
45
+}
46
+
47
+func doCopyTest(t *testing.T, copyWithFileRange, copyWithFileClone *bool) {
48
+	dir, err := ioutil.TempDir("", "docker-copy-check")
49
+	require.NoError(t, err)
50
+	defer os.RemoveAll(dir)
51
+	srcFilename := filepath.Join(dir, "srcFilename")
52
+	dstFilename := filepath.Join(dir, "dstilename")
53
+
54
+	r := rand.New(rand.NewSource(0))
55
+	buf := make([]byte, 1024)
56
+	_, err = r.Read(buf)
57
+	require.NoError(t, err)
58
+	require.NoError(t, ioutil.WriteFile(srcFilename, buf, 0777))
59
+	fileinfo, err := os.Stat(srcFilename)
60
+	require.NoError(t, err)
61
+
62
+	require.NoError(t, copyRegular(srcFilename, dstFilename, fileinfo, copyWithFileRange, copyWithFileClone))
63
+	readBuf, err := ioutil.ReadFile(dstFilename)
64
+	require.NoError(t, err)
65
+	assert.Equal(t, buf, readBuf)
66
+}