Browse code

Add user namespace enable flag `--userns-remap` in experimental build

This adds the capability to turn on user namespace support when using an
experimental build Docker daemon binary using the `--userns-remap` flag.

Also documentation is added to the experimental docs.

Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)

Phil Estes authored on 2015/10/09 00:57:30
Showing 7 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,119 @@
0
+// +build experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"fmt"
6
+	"strconv"
7
+	"strings"
8
+
9
+	"github.com/docker/docker/pkg/idtools"
10
+	flag "github.com/docker/docker/pkg/mflag"
11
+	"github.com/opencontainers/runc/libcontainer/user"
12
+)
13
+
14
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
15
+	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
16
+}
17
+
18
+const (
19
+	defaultIDSpecifier string = "default"
20
+	defaultRemappedID  string = "dockremap"
21
+)
22
+
23
+// Parse the remapped root (user namespace) option, which can be one of:
24
+//   username            - valid username from /etc/passwd
25
+//   username:groupname  - valid username; valid groupname from /etc/group
26
+//   uid                 - 32-bit unsigned int valid Linux UID value
27
+//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
28
+//
29
+//  If no groupname is specified, and a username is specified, an attempt
30
+//  will be made to lookup a gid for that username as a groupname
31
+//
32
+//  If names are used, they are verified to exist in passwd/group
33
+func parseRemappedRoot(usergrp string) (string, string, error) {
34
+
35
+	var (
36
+		userID, groupID     int
37
+		username, groupname string
38
+	)
39
+
40
+	idparts := strings.Split(usergrp, ":")
41
+	if len(idparts) > 2 {
42
+		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
43
+	}
44
+
45
+	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
46
+		// must be a uid; take it as valid
47
+		userID = int(uid)
48
+		luser, err := user.LookupUid(userID)
49
+		if err != nil {
50
+			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
51
+		}
52
+		username = luser.Name
53
+		if len(idparts) == 1 {
54
+			// if the uid was numeric and no gid was specified, take the uid as the gid
55
+			groupID = userID
56
+			lgrp, err := user.LookupGid(groupID)
57
+			if err != nil {
58
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
59
+			}
60
+			groupname = lgrp.Name
61
+		}
62
+	} else {
63
+		lookupName := idparts[0]
64
+		// special case: if the user specified "default", they want Docker to create or
65
+		// use (after creation) the "dockremap" user/group for root remapping
66
+		if lookupName == defaultIDSpecifier {
67
+			lookupName = defaultRemappedID
68
+		}
69
+		luser, err := user.LookupUser(lookupName)
70
+		if err != nil && idparts[0] != defaultIDSpecifier {
71
+			// error if the name requested isn't the special "dockremap" ID
72
+			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
73
+		} else if err != nil {
74
+			// special case-- if the username == "default", then we have been asked
75
+			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
76
+			// ranges will be used for the user and group mappings in user namespaced containers
77
+			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
78
+			if err == nil {
79
+				return defaultRemappedID, defaultRemappedID, nil
80
+			}
81
+			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
82
+		}
83
+		userID = luser.Uid
84
+		username = luser.Name
85
+		if len(idparts) == 1 {
86
+			// we only have a string username, and no group specified; look up gid from username as group
87
+			group, err := user.LookupGroup(lookupName)
88
+			if err != nil {
89
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
90
+			}
91
+			groupID = group.Gid
92
+			groupname = group.Name
93
+		}
94
+	}
95
+
96
+	if len(idparts) == 2 {
97
+		// groupname or gid is separately specified and must be resolved
98
+		// to a unsigned 32-bit gid
99
+		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
100
+			// must be a gid, take it as valid
101
+			groupID = int(gid)
102
+			lgrp, err := user.LookupGid(groupID)
103
+			if err != nil {
104
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
105
+			}
106
+			groupname = lgrp.Name
107
+		} else {
108
+			// not a number; attempt a lookup
109
+			group, err := user.LookupGroup(idparts[1])
110
+			if err != nil {
111
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
112
+			}
113
+			groupID = group.Gid
114
+			groupname = idparts[1]
115
+		}
116
+	}
117
+	return username, groupname, nil
118
+}
0 119
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+// +build !experimental
1
+
2
+package daemon
3
+
4
+import flag "github.com/docker/docker/pkg/mflag"
5
+
6
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
7
+}
0 8
new file mode 100644
... ...
@@ -0,0 +1,110 @@
0
+// +build experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"fmt"
6
+	"os"
7
+	"path/filepath"
8
+	"runtime"
9
+
10
+	"github.com/Sirupsen/logrus"
11
+	"github.com/docker/docker/pkg/directory"
12
+	"github.com/docker/docker/pkg/idtools"
13
+	"github.com/docker/docker/runconfig"
14
+)
15
+
16
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
17
+	if config.ExecDriver != "native" && config.RemappedRoot != "" {
18
+		return nil, nil, fmt.Errorf("User namespace remapping is only supported with the native execdriver")
19
+	}
20
+	if runtime.GOOS == "windows" && config.RemappedRoot != "" {
21
+		return nil, nil, fmt.Errorf("User namespaces are not supported on Windows")
22
+	}
23
+
24
+	// if the daemon was started with remapped root option, parse
25
+	// the config option to the int uid,gid values
26
+	var (
27
+		uidMaps, gidMaps []idtools.IDMap
28
+	)
29
+	if config.RemappedRoot != "" {
30
+		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
31
+		if err != nil {
32
+			return nil, nil, err
33
+		}
34
+		if username == "root" {
35
+			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
36
+			// effectively
37
+			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
38
+			return uidMaps, gidMaps, nil
39
+		}
40
+		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
41
+		// update remapped root setting now that we have resolved them to actual names
42
+		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
43
+
44
+		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
45
+		if err != nil {
46
+			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
47
+		}
48
+	}
49
+	return uidMaps, gidMaps, nil
50
+}
51
+
52
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
53
+	// the main docker root needs to be accessible by all users, as user namespace support
54
+	// will create subdirectories owned by either a) the real system root (when no remapping
55
+	// is setup) or b) the remapped root host ID (when --root=uid:gid is used)
56
+	// for "first time" users of user namespaces, we need to migrate the current directory
57
+	// contents to the "0.0" (root == root "namespace" daemon root)
58
+	nsRoot := "0.0"
59
+	if _, err := os.Stat(rootDir); err == nil {
60
+		// root current exists; we need to check for a prior migration
61
+		if _, err := os.Stat(filepath.Join(rootDir, nsRoot)); err != nil && os.IsNotExist(err) {
62
+			// need to migrate current root to "0.0" subroot
63
+			// 1. create non-usernamespaced root as "0.0"
64
+			if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
65
+				return fmt.Errorf("Cannot create daemon root %q: %v", filepath.Join(rootDir, nsRoot), err)
66
+			}
67
+			// 2. move current root content to "0.0" new subroot
68
+			if err := directory.MoveToSubdir(rootDir, nsRoot); err != nil {
69
+				return fmt.Errorf("Cannot migrate current daemon root %q for user namespaces: %v", rootDir, err)
70
+			}
71
+			// 3. chmod outer root to 755
72
+			if chmodErr := os.Chmod(rootDir, 0755); chmodErr != nil {
73
+				return chmodErr
74
+			}
75
+		}
76
+	} else if os.IsNotExist(err) {
77
+		// no root exists yet, create it 0755 with root:root ownership
78
+		if err := os.MkdirAll(rootDir, 0755); err != nil {
79
+			return err
80
+		}
81
+		// create the "0.0" subroot (so no future "migration" happens of the root)
82
+		if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
83
+			return err
84
+		}
85
+	}
86
+
87
+	// for user namespaces we will create a subtree underneath the specified root
88
+	// with any/all specified remapped root uid/gid options on the daemon creating
89
+	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
90
+	// `chdir()` to work for containers namespaced to that uid/gid)
91
+	if config.RemappedRoot != "" {
92
+		nsRoot = fmt.Sprintf("%d.%d", rootUID, rootGID)
93
+	}
94
+	config.Root = filepath.Join(rootDir, nsRoot)
95
+	logrus.Debugf("Creating actual daemon root: %s", config.Root)
96
+
97
+	// Create the root directory if it doesn't exists
98
+	if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
99
+		return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
100
+	}
101
+	return nil
102
+}
103
+
104
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
105
+	if hostConfig.Privileged && daemon.config().RemappedRoot != "" {
106
+		return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
107
+	}
108
+	return nil, nil
109
+}
0 110
new file mode 100644
... ...
@@ -0,0 +1,28 @@
0
+// +build !experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"os"
6
+
7
+	"github.com/docker/docker/pkg/idtools"
8
+	"github.com/docker/docker/pkg/system"
9
+	"github.com/docker/docker/runconfig"
10
+)
11
+
12
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
13
+	return nil, nil, nil
14
+}
15
+
16
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
17
+	config.Root = rootDir
18
+	// Create the root directory if it doesn't exists
19
+	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
20
+		return err
21
+	}
22
+	return nil
23
+}
24
+
25
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
26
+	return nil, nil
27
+}
... ...
@@ -71,6 +71,9 @@ to build a Docker binary with the experimental features enabled:
71 71
 
72 72
 ## Current experimental features
73 73
 
74
+ * [External graphdriver plugins](plugins_graphdriver.md)
75
+ * [User namespaces](userns.md)
76
+
74 77
 ## How to comment on an experimental feature
75 78
 
76 79
 Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.  
77 80
new file mode 100644
... ...
@@ -0,0 +1,120 @@
0
+# Experimental: User namespace support
1
+
2
+Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
3
+a process--and therefore a container--to have a unique range of user and
4
+group IDs which are outside the traditional user and group range utilized by
5
+the host system. Potentially the most important security improvement is that,
6
+by default, container processes running as the `root` user will have expected
7
+administrative privilege (with some restrictions) inside the container but will
8
+effectively be mapped to an unprivileged `uid` on the host.
9
+
10
+In this experimental phase, the Docker daemon creates a single daemon-wide mapping
11
+for all containers running on the same engine instance. The mappings will
12
+utilize the existing subordinate user and group ID feature available on all modern
13
+Linux distributions.
14
+The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and 
15
+[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
16
+read for the user, and optional group, specified to the `--userns-remap` 
17
+parameter.  If you do not wish to specify your own user and/or group, you can 
18
+provide `default` as the value to this flag, and a user will be created on your behalf
19
+and provided subordinate uid and gid ranges. This default user will be named
20
+`dockremap`, and entries will be created for it in `/etc/passwd` and 
21
+`/etc/group` using your distro's standard user and group creation tools.
22
+
23
+> **Note**: The single mapping per-daemon restriction exists for this experimental
24
+> phase because Docker shares image layers from its local cache across all
25
+> containers running on the engine instance.  Since file ownership must be
26
+> the same for all containers sharing the same layer content, the decision
27
+> was made to map the file ownership on `docker pull` to the daemon's user and
28
+> group mappings so that there is no delay for running containers once the
29
+> content is downloaded--exactly the same performance characteristics as with
30
+> user namespaces disabled.
31
+
32
+## Starting the daemon with user namespaces enabled
33
+To enable this experimental user namespace support for a Docker daemon instance,
34
+start the daemon with the aforementioned `--userns-remap` flag, which accepts
35
+values in the following formats:
36
+
37
+ - uid
38
+ - uid:gid
39
+ - username
40
+ - username:groupname
41
+
42
+If numeric IDs are provided, translation back to valid user or group names
43
+will occur so that the subordinate uid and gid information can be read, given
44
+these resources are name-based, not id-based.  If the numeric ID information
45
+provided does not exist as entries in `/etc/passwd` or `/etc/group`, dameon
46
+startup will fail with an error message.
47
+
48
+*An example: starting with default Docker user management:*
49
+
50
+```
51
+     $ docker daemon --userns-remap=default
52
+```    
53
+In this case, Docker will create--or find the existing--user and group
54
+named `dockremap`. If the user is created, and the Linux distribution has
55
+appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
56
+with a contiguous 65536 length range of subordinate user and group IDs, starting
57
+at an offset based on prior entries in those files.  For example, Ubuntu will
58
+create the following range, based on an existing user already having the first
59
+65536 range:
60
+
61
+```
62
+     $ cat /etc/subuid
63
+     user1:100000:65536
64
+     dockremap:165536:65536
65
+```
66
+
67
+> **Note:** On a fresh Fedora install, we found that we had to `touch` the
68
+> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
69
+> were created.  Once these files existed, range assigment on user creation
70
+> worked properly.
71
+
72
+If you have a preferred/self-managed user with subordinate ID mappings already
73
+configured, you can provide that username or uid to the `--userns-remap` flag.
74
+If you have a group that doesn't match the username, you may provide the `gid`
75
+or group name as well; otherwise the username will be used as the group name
76
+when querying the system for the subordinate group ID range.
77
+
78
+## Detailed information on `subuid`/`subgid` ranges
79
+
80
+Given there may be advanced use of the subordinate ID ranges by power users, we will
81
+describe how the Docker daemon uses the range entries within these files under the
82
+current experimental user namespace support.
83
+
84
+The simplest case exists where only one contiguous range is defined for the
85
+provided user or group. In this case, Docker will use that entire contiguous
86
+range for the mapping of host uids and gids to the container process.  This 
87
+means that the first ID in the range will be the remapped root user, and the
88
+IDs above that initial ID will map host ID 1 through the end of the range.
89
+
90
+From the example `/etc/subid` content shown above, that means the remapped root
91
+user would be uid 165536.
92
+
93
+If the system administrator has set up multiple ranges for a single user or
94
+group, the Docker daemon will read all the available ranges and use the
95
+following algorithm to create the mapping ranges:
96
+
97
+1. The ranges will be sorted by *start ID* ascending
98
+2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on.  This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
99
+3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
100
+
101
+## User namespace known restrictions
102
+
103
+The following standard Docker features are currently incompatible when
104
+running a Docker daemon with experimental user namespaces enabled:
105
+
106
+ - sharing namespaces with the host (--pid=host, --net=host, etc.)
107
+ - sharing namespaces with other containers (--net=container:*other*)
108
+ - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
109
+ - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
110
+ - Using `--privileged` mode containers
111
+ - Using the lxc execdriver (only the `native` execdriver is enabled to use user namespaces)
112
+ - volume use without pre-arranging proper file ownership in mounted volumes
113
+
114
+Additionally, while the `root` user inside a user namespaced container
115
+process has many of the privileges of the administrative root user, the
116
+following operations will fail:
117
+
118
+ - Use of `mknod` - permission is denied for device creation by the container root
119
+ - others will be listed here when fully tested
... ...
@@ -3,8 +3,15 @@
3 3
 package main
4 4
 
5 5
 import (
6
+	"fmt"
7
+	"io/ioutil"
8
+	"os"
9
+	"os/exec"
10
+	"path/filepath"
11
+	"strconv"
6 12
 	"strings"
7 13
 
14
+	"github.com/docker/docker/pkg/system"
8 15
 	"github.com/go-check/check"
9 16
 )
10 17
 
... ...
@@ -21,3 +28,57 @@ func (s *DockerSuite) TestExperimentalVersion(c *check.C) {
21 21
 		c.Fatalf("docker version did not contain experimental: %s", out)
22 22
 	}
23 23
 }
24
+
25
+// user namespaces test: run daemon with remapped root setting
26
+// 1. validate uid/gid maps are set properly
27
+// 2. verify that files created are owned by remapped root
28
+func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
29
+	testRequires(c, NativeExecDriver)
30
+	testRequires(c, SameHostDaemon)
31
+
32
+	c.Assert(s.d.StartWithBusybox("--userns-remap", "default"), check.IsNil)
33
+
34
+	tmpDir, err := ioutil.TempDir("", "userns")
35
+	if err != nil {
36
+		c.Fatal(err)
37
+	}
38
+	defer os.RemoveAll(tmpDir)
39
+
40
+	// we need to find the uid and gid of the remapped root from the daemon's root dir info
41
+	uidgid := strings.Split(filepath.Base(s.d.root), ".")
42
+	c.Assert(len(uidgid), check.Equals, 2, check.Commentf("Should have gotten uid/gid strings from root dirname: %s", filepath.Base(s.d.root)))
43
+	uid, err := strconv.Atoi(uidgid[0])
44
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse uid: %v", err))
45
+	gid, err := strconv.Atoi(uidgid[1])
46
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse gid: %v", err))
47
+
48
+	//writeable by the remapped root UID/GID pair
49
+	c.Assert(os.Chown(tmpDir, uid, gid), check.IsNil)
50
+
51
+	out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
52
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
53
+
54
+	pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
55
+	if err != nil {
56
+		c.Fatalf("Could not inspect running container: out: %q; err: %v", pid, err)
57
+	}
58
+	// check the uid and gid maps for the PID to ensure root is remapped
59
+	// (cmd = cat /proc/<pid>/uid_map | grep -E '0\s+9999\s+1')
60
+	out, rc1, err := runCommandPipelineWithOutput(
61
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/uid_map"),
62
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", uid)))
63
+	c.Assert(rc1, check.Equals, 0, check.Commentf("Didn't match uid_map: output: %s", out))
64
+
65
+	out, rc2, err := runCommandPipelineWithOutput(
66
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/gid_map"),
67
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", gid)))
68
+	c.Assert(rc2, check.Equals, 0, check.Commentf("Didn't match gid_map: output: %s", out))
69
+
70
+	// check that the touched file is owned by remapped uid:gid
71
+	stat, err := system.Stat(filepath.Join(tmpDir, "testfile"))
72
+	if err != nil {
73
+		c.Fatal(err)
74
+	}
75
+	c.Assert(stat.UID(), check.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
76
+	c.Assert(stat.Gid(), check.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
77
+}