Browse code

Merge pull request #19187 from estesp/lets-do-this

User namespaces: graduate from experimental

Sebastiaan van Stijn authored on 2016/01/13 02:34:19
Showing 12 changed files
... ...
@@ -2,118 +2,7 @@
2 2
 
3 3
 package daemon
4 4
 
5
-import (
6
-	"fmt"
7
-	"strconv"
8
-	"strings"
9
-
10
-	"github.com/docker/docker/pkg/idtools"
11
-	flag "github.com/docker/docker/pkg/mflag"
12
-	"github.com/opencontainers/runc/libcontainer/user"
13
-)
5
+import flag "github.com/docker/docker/pkg/mflag"
14 6
 
15 7
 func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
16
-	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
17
-}
18
-
19
-const (
20
-	defaultIDSpecifier string = "default"
21
-	defaultRemappedID  string = "dockremap"
22
-)
23
-
24
-// Parse the remapped root (user namespace) option, which can be one of:
25
-//   username            - valid username from /etc/passwd
26
-//   username:groupname  - valid username; valid groupname from /etc/group
27
-//   uid                 - 32-bit unsigned int valid Linux UID value
28
-//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
29
-//
30
-//  If no groupname is specified, and a username is specified, an attempt
31
-//  will be made to lookup a gid for that username as a groupname
32
-//
33
-//  If names are used, they are verified to exist in passwd/group
34
-func parseRemappedRoot(usergrp string) (string, string, error) {
35
-
36
-	var (
37
-		userID, groupID     int
38
-		username, groupname string
39
-	)
40
-
41
-	idparts := strings.Split(usergrp, ":")
42
-	if len(idparts) > 2 {
43
-		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
44
-	}
45
-
46
-	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
47
-		// must be a uid; take it as valid
48
-		userID = int(uid)
49
-		luser, err := user.LookupUid(userID)
50
-		if err != nil {
51
-			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
52
-		}
53
-		username = luser.Name
54
-		if len(idparts) == 1 {
55
-			// if the uid was numeric and no gid was specified, take the uid as the gid
56
-			groupID = userID
57
-			lgrp, err := user.LookupGid(groupID)
58
-			if err != nil {
59
-				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
60
-			}
61
-			groupname = lgrp.Name
62
-		}
63
-	} else {
64
-		lookupName := idparts[0]
65
-		// special case: if the user specified "default", they want Docker to create or
66
-		// use (after creation) the "dockremap" user/group for root remapping
67
-		if lookupName == defaultIDSpecifier {
68
-			lookupName = defaultRemappedID
69
-		}
70
-		luser, err := user.LookupUser(lookupName)
71
-		if err != nil && idparts[0] != defaultIDSpecifier {
72
-			// error if the name requested isn't the special "dockremap" ID
73
-			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
74
-		} else if err != nil {
75
-			// special case-- if the username == "default", then we have been asked
76
-			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
77
-			// ranges will be used for the user and group mappings in user namespaced containers
78
-			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
79
-			if err == nil {
80
-				return defaultRemappedID, defaultRemappedID, nil
81
-			}
82
-			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
83
-		}
84
-		userID = luser.Uid
85
-		username = luser.Name
86
-		if len(idparts) == 1 {
87
-			// we only have a string username, and no group specified; look up gid from username as group
88
-			group, err := user.LookupGroup(lookupName)
89
-			if err != nil {
90
-				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
91
-			}
92
-			groupID = group.Gid
93
-			groupname = group.Name
94
-		}
95
-	}
96
-
97
-	if len(idparts) == 2 {
98
-		// groupname or gid is separately specified and must be resolved
99
-		// to a unsigned 32-bit gid
100
-		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
101
-			// must be a gid, take it as valid
102
-			groupID = int(gid)
103
-			lgrp, err := user.LookupGid(groupID)
104
-			if err != nil {
105
-				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
106
-			}
107
-			groupname = lgrp.Name
108
-		} else {
109
-			// not a number; attempt a lookup
110
-			group, err := user.LookupGroup(idparts[1])
111
-			if err != nil {
112
-				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
113
-			}
114
-			groupID = group.Gid
115
-			groupname = idparts[1]
116
-		}
117
-	}
118
-	return username, groupname, nil
119 8
 }
... ...
@@ -79,6 +79,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
79 79
 	cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header"))
80 80
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
81 81
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
82
+	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
82 83
 
83 84
 	config.attachExperimentalFlags(cmd, usageFn)
84 85
 }
... ...
@@ -2,88 +2,8 @@
2 2
 
3 3
 package daemon
4 4
 
5
-import (
6
-	"fmt"
7
-	"os"
8
-	"path/filepath"
9
-	"runtime"
10
-
11
-	"github.com/Sirupsen/logrus"
12
-	"github.com/docker/docker/pkg/idtools"
13
-	"github.com/docker/engine-api/types/container"
14
-)
15
-
16
-func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
17
-	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
18
-		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
19
-	}
20
-
21
-	// if the daemon was started with remapped root option, parse
22
-	// the config option to the int uid,gid values
23
-	var (
24
-		uidMaps, gidMaps []idtools.IDMap
25
-	)
26
-	if config.RemappedRoot != "" {
27
-		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
28
-		if err != nil {
29
-			return nil, nil, err
30
-		}
31
-		if username == "root" {
32
-			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
33
-			// effectively
34
-			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
35
-			return uidMaps, gidMaps, nil
36
-		}
37
-		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
38
-		// update remapped root setting now that we have resolved them to actual names
39
-		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
40
-
41
-		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
42
-		if err != nil {
43
-			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
44
-		}
45
-	}
46
-	return uidMaps, gidMaps, nil
47
-}
48
-
49
-func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
50
-	config.Root = rootDir
51
-	// the docker root metadata directory needs to have execute permissions for all users (o+x)
52
-	// so that syscalls executing as non-root, operating on subdirectories of the graph root
53
-	// (e.g. mounted layers of a container) can traverse this path.
54
-	// The user namespace support will create subdirectories for the remapped root host uid:gid
55
-	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
56
-	// layer content subtrees.
57
-	if _, err := os.Stat(rootDir); err == nil {
58
-		// root current exists; verify the access bits are correct by setting them
59
-		if err = os.Chmod(rootDir, 0701); err != nil {
60
-			return err
61
-		}
62
-	} else if os.IsNotExist(err) {
63
-		// no root exists yet, create it 0701 with root:root ownership
64
-		if err := os.MkdirAll(rootDir, 0701); err != nil {
65
-			return err
66
-		}
67
-	}
68
-
69
-	// if user namespaces are enabled we will create a subtree underneath the specified root
70
-	// with any/all specified remapped root uid/gid options on the daemon creating
71
-	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
72
-	// `chdir()` to work for containers namespaced to that uid/gid)
73
-	if config.RemappedRoot != "" {
74
-		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
75
-		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
76
-		// Create the root directory if it doesn't exists
77
-		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
78
-			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
79
-		}
80
-	}
81
-	return nil
82
-}
5
+import "github.com/docker/engine-api/types/container"
83 6
 
84 7
 func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
85
-	if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" {
86
-		return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
87
-	}
88 8
 	return nil, nil
89 9
 }
... ...
@@ -2,26 +2,7 @@
2 2
 
3 3
 package daemon
4 4
 
5
-import (
6
-	"os"
7
-
8
-	"github.com/docker/docker/pkg/idtools"
9
-	"github.com/docker/docker/pkg/system"
10
-	"github.com/docker/engine-api/types/container"
11
-)
12
-
13
-func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
14
-	return nil, nil, nil
15
-}
16
-
17
-func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
18
-	config.Root = rootDir
19
-	// Create the root directory if it doesn't exists
20
-	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
21
-		return err
22
-	}
23
-	return nil
24
-}
5
+import "github.com/docker/engine-api/types/container"
25 6
 
26 7
 func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) {
27 8
 	return nil, nil
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"net"
8 8
 	"os"
9 9
 	"path/filepath"
10
+	"runtime"
10 11
 	"strconv"
11 12
 	"strings"
12 13
 	"syscall"
... ...
@@ -33,6 +34,7 @@ import (
33 33
 	"github.com/docker/libnetwork/types"
34 34
 	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
35 35
 	"github.com/opencontainers/runc/libcontainer/label"
36
+	"github.com/opencontainers/runc/libcontainer/user"
36 37
 )
37 38
 
38 39
 const (
... ...
@@ -42,6 +44,9 @@ const (
42 42
 	platformSupported = true
43 43
 	// It's not kernel limit, we want this 4M limit to supply a reasonable functional container
44 44
 	linuxMinMemory = 4194304
45
+	// constants for remapped root settings
46
+	defaultIDSpecifier string = "default"
47
+	defaultRemappedID  string = "dockremap"
45 48
 )
46 49
 
47 50
 func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
... ...
@@ -375,6 +380,24 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
375 375
 		warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.")
376 376
 		logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
377 377
 	}
378
+	// check for various conflicting options with user namespaces
379
+	if daemon.configStore.RemappedRoot != "" {
380
+		if hostConfig.Privileged {
381
+			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces.")
382
+		}
383
+		if hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsContainer() {
384
+			return warnings, fmt.Errorf("Cannot share the host or a container's network namespace when user namespaces are enabled.")
385
+		}
386
+		if hostConfig.PidMode.IsHost() {
387
+			return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled.")
388
+		}
389
+		if hostConfig.IpcMode.IsContainer() {
390
+			return warnings, fmt.Errorf("Cannot share a container's IPC namespace when user namespaces are enabled.")
391
+		}
392
+		if hostConfig.ReadonlyRootfs {
393
+			return warnings, fmt.Errorf("Cannot use the --read-only option when user namespaces are enabled.")
394
+		}
395
+	}
378 396
 	return warnings, nil
379 397
 }
380 398
 
... ...
@@ -674,6 +697,171 @@ func setupInitLayer(initLayer string, rootUID, rootGID int) error {
674 674
 	return nil
675 675
 }
676 676
 
677
+// Parse the remapped root (user namespace) option, which can be one of:
678
+//   username            - valid username from /etc/passwd
679
+//   username:groupname  - valid username; valid groupname from /etc/group
680
+//   uid                 - 32-bit unsigned int valid Linux UID value
681
+//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
682
+//
683
+//  If no groupname is specified, and a username is specified, an attempt
684
+//  will be made to lookup a gid for that username as a groupname
685
+//
686
+//  If names are used, they are verified to exist in passwd/group
687
+func parseRemappedRoot(usergrp string) (string, string, error) {
688
+
689
+	var (
690
+		userID, groupID     int
691
+		username, groupname string
692
+	)
693
+
694
+	idparts := strings.Split(usergrp, ":")
695
+	if len(idparts) > 2 {
696
+		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
697
+	}
698
+
699
+	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
700
+		// must be a uid; take it as valid
701
+		userID = int(uid)
702
+		luser, err := user.LookupUid(userID)
703
+		if err != nil {
704
+			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
705
+		}
706
+		username = luser.Name
707
+		if len(idparts) == 1 {
708
+			// if the uid was numeric and no gid was specified, take the uid as the gid
709
+			groupID = userID
710
+			lgrp, err := user.LookupGid(groupID)
711
+			if err != nil {
712
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
713
+			}
714
+			groupname = lgrp.Name
715
+		}
716
+	} else {
717
+		lookupName := idparts[0]
718
+		// special case: if the user specified "default", they want Docker to create or
719
+		// use (after creation) the "dockremap" user/group for root remapping
720
+		if lookupName == defaultIDSpecifier {
721
+			lookupName = defaultRemappedID
722
+		}
723
+		luser, err := user.LookupUser(lookupName)
724
+		if err != nil && idparts[0] != defaultIDSpecifier {
725
+			// error if the name requested isn't the special "dockremap" ID
726
+			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
727
+		} else if err != nil {
728
+			// special case-- if the username == "default", then we have been asked
729
+			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
730
+			// ranges will be used for the user and group mappings in user namespaced containers
731
+			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
732
+			if err == nil {
733
+				return defaultRemappedID, defaultRemappedID, nil
734
+			}
735
+			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
736
+		}
737
+		userID = luser.Uid
738
+		username = luser.Name
739
+		if len(idparts) == 1 {
740
+			// we only have a string username, and no group specified; look up gid from username as group
741
+			group, err := user.LookupGroup(lookupName)
742
+			if err != nil {
743
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
744
+			}
745
+			groupID = group.Gid
746
+			groupname = group.Name
747
+		}
748
+	}
749
+
750
+	if len(idparts) == 2 {
751
+		// groupname or gid is separately specified and must be resolved
752
+		// to a unsigned 32-bit gid
753
+		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
754
+			// must be a gid, take it as valid
755
+			groupID = int(gid)
756
+			lgrp, err := user.LookupGid(groupID)
757
+			if err != nil {
758
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
759
+			}
760
+			groupname = lgrp.Name
761
+		} else {
762
+			// not a number; attempt a lookup
763
+			group, err := user.LookupGroup(idparts[1])
764
+			if err != nil {
765
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
766
+			}
767
+			groupID = group.Gid
768
+			groupname = idparts[1]
769
+		}
770
+	}
771
+	return username, groupname, nil
772
+}
773
+
774
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
775
+	if runtime.GOOS != "linux" && config.RemappedRoot != "" {
776
+		return nil, nil, fmt.Errorf("User namespaces are only supported on Linux")
777
+	}
778
+
779
+	// if the daemon was started with remapped root option, parse
780
+	// the config option to the int uid,gid values
781
+	var (
782
+		uidMaps, gidMaps []idtools.IDMap
783
+	)
784
+	if config.RemappedRoot != "" {
785
+		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
786
+		if err != nil {
787
+			return nil, nil, err
788
+		}
789
+		if username == "root" {
790
+			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
791
+			// effectively
792
+			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
793
+			return uidMaps, gidMaps, nil
794
+		}
795
+		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
796
+		// update remapped root setting now that we have resolved them to actual names
797
+		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
798
+
799
+		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
800
+		if err != nil {
801
+			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
802
+		}
803
+	}
804
+	return uidMaps, gidMaps, nil
805
+}
806
+
807
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
808
+	config.Root = rootDir
809
+	// the docker root metadata directory needs to have execute permissions for all users (o+x)
810
+	// so that syscalls executing as non-root, operating on subdirectories of the graph root
811
+	// (e.g. mounted layers of a container) can traverse this path.
812
+	// The user namespace support will create subdirectories for the remapped root host uid:gid
813
+	// pair owned by that same uid:gid pair for proper write access to those needed metadata and
814
+	// layer content subtrees.
815
+	if _, err := os.Stat(rootDir); err == nil {
816
+		// root current exists; verify the access bits are correct by setting them
817
+		if err = os.Chmod(rootDir, 0701); err != nil {
818
+			return err
819
+		}
820
+	} else if os.IsNotExist(err) {
821
+		// no root exists yet, create it 0701 with root:root ownership
822
+		if err := os.MkdirAll(rootDir, 0701); err != nil {
823
+			return err
824
+		}
825
+	}
826
+
827
+	// if user namespaces are enabled we will create a subtree underneath the specified root
828
+	// with any/all specified remapped root uid/gid options on the daemon creating
829
+	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
830
+	// `chdir()` to work for containers namespaced to that uid/gid)
831
+	if config.RemappedRoot != "" {
832
+		config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID))
833
+		logrus.Debugf("Creating user namespaced daemon root: %s", config.Root)
834
+		// Create the root directory if it doesn't exists
835
+		if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
836
+			return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
837
+		}
838
+	}
839
+	return nil
840
+}
841
+
677 842
 // registerLinks writes the links to a file.
678 843
 func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error {
679 844
 	if hostConfig == nil {
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"encoding/json"
5 5
 	"errors"
6 6
 	"fmt"
7
+	"os"
7 8
 	"path/filepath"
8 9
 	"runtime"
9 10
 	"strings"
... ...
@@ -18,6 +19,7 @@ import (
18 18
 	containertypes "github.com/docker/engine-api/types/container"
19 19
 	// register the windows graph driver
20 20
 	"github.com/docker/docker/daemon/graphdriver/windows"
21
+	"github.com/docker/docker/pkg/idtools"
21 22
 	"github.com/docker/docker/pkg/system"
22 23
 	"github.com/docker/libnetwork"
23 24
 	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
... ...
@@ -135,6 +137,19 @@ func (daemon *Daemon) cleanupMounts() error {
135 135
 	return nil
136 136
 }
137 137
 
138
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
139
+	return nil, nil, nil
140
+}
141
+
142
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
143
+	config.Root = rootDir
144
+	// Create the root directory if it doesn't exists
145
+	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
146
+		return err
147
+	}
148
+	return nil
149
+}
150
+
138 151
 // conditionalMountOnStart is a platform specific helper function during the
139 152
 // container start to call mount.
140 153
 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
... ...
@@ -62,6 +62,7 @@ weight = -1
62 62
       --tlscert="~/.docker/cert.pem"         Path to TLS certificate file
63 63
       --tlskey="~/.docker/key.pem"           Path to TLS key file
64 64
       --tlsverify                            Use TLS and verify the remote
65
+      --userns-remap="default"               Enable user namespace remapping
65 66
       --userland-proxy=true                  Use userland proxy for loopback traffic
66 67
 
67 68
 Options with [] may be specified multiple times.
... ...
@@ -632,6 +633,133 @@ For information about how to create an authorization plugin, see [authorization
632 632
 plugin](../../extend/authorization.md) section in the Docker extend section of this documentation.
633 633
 
634 634
 
635
+## Daemon user namespace options
636
+
637
+The Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
638
+a process, and therefore a container, to have a unique range of user and
639
+group IDs which are outside the traditional user and group range utilized by
640
+the host system. Potentially the most important security improvement is that,
641
+by default, container processes running as the `root` user will have expected
642
+administrative privilege (with some restrictions) inside the container but will
643
+effectively be mapped to an unprivileged `uid` on the host.
644
+
645
+When user namespace support is enabled, Docker creates a single daemon-wide mapping
646
+for all containers running on the same engine instance. The mappings will
647
+utilize the existing subordinate user and group ID feature available on all modern
648
+Linux distributions.
649
+The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
650
+[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
651
+read for the user, and optional group, specified to the `--userns-remap`
652
+parameter.  If you do not wish to specify your own user and/or group, you can
653
+provide `default` as the value to this flag, and a user will be created on your behalf
654
+and provided subordinate uid and gid ranges. This default user will be named
655
+`dockremap`, and entries will be created for it in `/etc/passwd` and
656
+`/etc/group` using your distro's standard user and group creation tools.
657
+
658
+> **Note**: The single mapping per-daemon restriction is in place for now
659
+> because Docker shares image layers from its local cache across all
660
+> containers running on the engine instance.  Since file ownership must be
661
+> the same for all containers sharing the same layer content, the decision
662
+> was made to map the file ownership on `docker pull` to the daemon's user and
663
+> group mappings so that there is no delay for running containers once the
664
+> content is downloaded. This design preserves the same performance for `docker
665
+> pull`, `docker push`, and container startup as users expect with
666
+> user namespaces disabled.
667
+
668
+### Starting the daemon with user namespaces enabled
669
+
670
+To enable user namespace support, start the daemon with the
671
+`--userns-remap` flag, which accepts values in the following formats:
672
+
673
+ - uid
674
+ - uid:gid
675
+ - username
676
+ - username:groupname
677
+
678
+If numeric IDs are provided, translation back to valid user or group names
679
+will occur so that the subordinate uid and gid information can be read, given
680
+these resources are name-based, not id-based.  If the numeric ID information
681
+provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
682
+startup will fail with an error message.
683
+
684
+*Example: starting with default Docker user management:*
685
+
686
+```
687
+     $ docker daemon --userns-remap=default
688
+```    
689
+When `default` is provided, Docker will create - or find the existing - user and group
690
+named `dockremap`. If the user is created, and the Linux distribution has
691
+appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
692
+with a contiguous 65536 length range of subordinate user and group IDs, starting
693
+at an offset based on prior entries in those files.  For example, Ubuntu will
694
+create the following range, based on an existing user named `user1` already owning
695
+the first 65536 range:
696
+
697
+```
698
+     $ cat /etc/subuid
699
+     user1:100000:65536
700
+     dockremap:165536:65536
701
+```
702
+
703
+> **Note:** On a fresh Fedora install, we had to `touch` the
704
+> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
705
+> were created.  Once these files existed, range assignment on user creation
706
+> worked properly.
707
+
708
+If you have a preferred/self-managed user with subordinate ID mappings already
709
+configured, you can provide that username or uid to the `--userns-remap` flag.
710
+If you have a group that doesn't match the username, you may provide the `gid`
711
+or group name as well; otherwise the username will be used as the group name
712
+when querying the system for the subordinate group ID range.
713
+
714
+### Detailed information on `subuid`/`subgid` ranges
715
+
716
+Given potential advanced use of the subordinate ID ranges by power users, the 
717
+following paragraphs define how the Docker daemon currently uses the range entries
718
+found within the subordinate range files.
719
+
720
+The simplest case is that only one contiguous range is defined for the
721
+provided user or group. In this case, Docker will use that entire contiguous
722
+range for the mapping of host uids and gids to the container process.  This
723
+means that the first ID in the range will be the remapped root user, and the
724
+IDs above that initial ID will map host ID 1 through the end of the range.
725
+
726
+From the example `/etc/subid` content shown above, the remapped root
727
+user would be uid 165536.
728
+
729
+If the system administrator has set up multiple ranges for a single user or
730
+group, the Docker daemon will read all the available ranges and use the
731
+following algorithm to create the mapping ranges:
732
+
733
+1. The range segments found for the particular user will be sorted by *start ID* ascending.
734
+2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
735
+3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
736
+
737
+### User namespace known restrictions
738
+
739
+The following standard Docker features are currently incompatible when
740
+running a Docker daemon with user namespaces enabled:
741
+
742
+ - sharing PID or NET namespaces with the host (`--pid=host` or `--net=host`)
743
+ - sharing a network namespace with an existing container (`--net=container:*other*`)
744
+ - sharing an IPC namespace with an existing container (`--ipc=container:*other*`)
745
+ - A `--readonly` container filesystem (this is a Linux kernel restriction against remounting with modified flags of a currently mounted filesystem when inside a user namespace)
746
+ - external (volume or graph) drivers which are unaware/incapable of using daemon user mappings
747
+ - Using `--privileged` mode flag on `docker run`
748
+
749
+In general, user namespaces are an advanced feature and will require
750
+coordination with other capabilities. For example, if volumes are mounted from
751
+the host, file ownership will have to be pre-arranged if the user or
752
+administrator wishes the containers to have expected access to the volume
753
+contents.
754
+
755
+Finally, while the `root` user inside a user namespaced container process has
756
+many of the expected admin privileges that go along with being the superuser, the
757
+Linux kernel has restrictions based on internal knowledge that this is a user namespaced
758
+process. The most notable restriction that we are aware of at this time is the
759
+inability to use `mknod`. Permission will be denied for device creation even as
760
+container `root` inside a user namespace.
761
+
635 762
 ## Miscellaneous options
636 763
 
637 764
 IP masquerading uses address translation to allow containers without a public
... ...
@@ -72,7 +72,7 @@ to build a Docker binary with the experimental features enabled:
72 72
 ## Current experimental features
73 73
 
74 74
  * [External graphdriver plugins](plugins_graphdriver.md)
75
- * [User namespaces](userns.md)
75
+ * The user namespaces feature has graduated from experimental.
76 76
 
77 77
 ## How to comment on an experimental feature
78 78
 
79 79
deleted file mode 100644
... ...
@@ -1,119 +0,0 @@
1
-# Experimental: User namespace support
2
-
3
-Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
4
-a process--and therefore a container--to have a unique range of user and
5
-group IDs which are outside the traditional user and group range utilized by
6
-the host system. Potentially the most important security improvement is that,
7
-by default, container processes running as the `root` user will have expected
8
-administrative privilege (with some restrictions) inside the container but will
9
-effectively be mapped to an unprivileged `uid` on the host.
10
-
11
-In this experimental phase, the Docker daemon creates a single daemon-wide mapping
12
-for all containers running on the same engine instance. The mappings will
13
-utilize the existing subordinate user and group ID feature available on all modern
14
-Linux distributions.
15
-The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and
16
-[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
17
-read for the user, and optional group, specified to the `--userns-remap`
18
-parameter.  If you do not wish to specify your own user and/or group, you can
19
-provide `default` as the value to this flag, and a user will be created on your behalf
20
-and provided subordinate uid and gid ranges. This default user will be named
21
-`dockremap`, and entries will be created for it in `/etc/passwd` and
22
-`/etc/group` using your distro's standard user and group creation tools.
23
-
24
-> **Note**: The single mapping per-daemon restriction exists for this experimental
25
-> phase because Docker shares image layers from its local cache across all
26
-> containers running on the engine instance.  Since file ownership must be
27
-> the same for all containers sharing the same layer content, the decision
28
-> was made to map the file ownership on `docker pull` to the daemon's user and
29
-> group mappings so that there is no delay for running containers once the
30
-> content is downloaded--exactly the same performance characteristics as with
31
-> user namespaces disabled.
32
-
33
-## Starting the daemon with user namespaces enabled
34
-To enable this experimental user namespace support for a Docker daemon instance,
35
-start the daemon with the aforementioned `--userns-remap` flag, which accepts
36
-values in the following formats:
37
-
38
- - uid
39
- - uid:gid
40
- - username
41
- - username:groupname
42
-
43
-If numeric IDs are provided, translation back to valid user or group names
44
-will occur so that the subordinate uid and gid information can be read, given
45
-these resources are name-based, not id-based.  If the numeric ID information
46
-provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon
47
-startup will fail with an error message.
48
-
49
-*An example: starting with default Docker user management:*
50
-
51
-```
52
-     $ docker daemon --userns-remap=default
53
-```    
54
-In this case, Docker will create--or find the existing--user and group
55
-named `dockremap`. If the user is created, and the Linux distribution has
56
-appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
57
-with a contiguous 65536 length range of subordinate user and group IDs, starting
58
-at an offset based on prior entries in those files.  For example, Ubuntu will
59
-create the following range, based on an existing user already having the first
60
-65536 range:
61
-
62
-```
63
-     $ cat /etc/subuid
64
-     user1:100000:65536
65
-     dockremap:165536:65536
66
-```
67
-
68
-> **Note:** On a fresh Fedora install, we found that we had to `touch` the
69
-> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
70
-> were created.  Once these files existed, range assignment on user creation
71
-> worked properly.
72
-
73
-If you have a preferred/self-managed user with subordinate ID mappings already
74
-configured, you can provide that username or uid to the `--userns-remap` flag.
75
-If you have a group that doesn't match the username, you may provide the `gid`
76
-or group name as well; otherwise the username will be used as the group name
77
-when querying the system for the subordinate group ID range.
78
-
79
-## Detailed information on `subuid`/`subgid` ranges
80
-
81
-Given there may be advanced use of the subordinate ID ranges by power users, we will
82
-describe how the Docker daemon uses the range entries within these files under the
83
-current experimental user namespace support.
84
-
85
-The simplest case exists where only one contiguous range is defined for the
86
-provided user or group. In this case, Docker will use that entire contiguous
87
-range for the mapping of host uids and gids to the container process.  This
88
-means that the first ID in the range will be the remapped root user, and the
89
-IDs above that initial ID will map host ID 1 through the end of the range.
90
-
91
-From the example `/etc/subid` content shown above, that means the remapped root
92
-user would be uid 165536.
93
-
94
-If the system administrator has set up multiple ranges for a single user or
95
-group, the Docker daemon will read all the available ranges and use the
96
-following algorithm to create the mapping ranges:
97
-
98
-1. The ranges will be sorted by *start ID* ascending
99
-2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on.  This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
100
-3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
101
-
102
-## User namespace known restrictions
103
-
104
-The following standard Docker features are currently incompatible when
105
-running a Docker daemon with experimental user namespaces enabled:
106
-
107
- - sharing namespaces with the host (--pid=host, --net=host, etc.)
108
- - sharing namespaces with other containers (--net=container:*other*)
109
- - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
110
- - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
111
- - Using `--privileged` mode containers
112
- - volume use without pre-arranging proper file ownership in mounted volumes
113
-
114
-Additionally, while the `root` user inside a user namespaced container
115
-process has many of the privileges of the administrative root user, the
116
-following operations will fail:
117
-
118
- - Use of `mknod` - permission is denied for device creation by the container root
119
- - others will be listed here when fully tested
... ...
@@ -99,7 +99,7 @@ if [ ! "$GOPATH" ]; then
99 99
 	exit 1
100 100
 fi
101 101
 
102
-if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then
102
+if [ "$DOCKER_EXPERIMENTAL" ]; then
103 103
 	echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features'
104 104
 	echo >&2
105 105
 	DOCKER_BUILDTAGS+=" experimental pkcs11"
... ...
@@ -652,10 +652,14 @@ func (s *DockerSuite) TestContainerApiCreateWithDomainName(c *check.C) {
652 652
 	c.Assert(containerJSON.Config.Domainname, checker.Equals, domainName, check.Commentf("Mismatched Domainname"))
653 653
 }
654 654
 
655
-func (s *DockerSuite) TestContainerApiCreateNetworkMode(c *check.C) {
655
+func (s *DockerSuite) TestContainerApiCreateBridgeNetworkMode(c *check.C) {
656 656
 	testRequires(c, DaemonIsLinux)
657
-	UtilCreateNetworkMode(c, "host")
658 657
 	UtilCreateNetworkMode(c, "bridge")
658
+}
659
+
660
+func (s *DockerSuite) TestContainerApiCreateOtherNetworkModes(c *check.C) {
661
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
662
+	UtilCreateNetworkMode(c, "host")
659 663
 	UtilCreateNetworkMode(c, "container:web1")
660 664
 }
661 665
 
... ...
@@ -53,6 +53,7 @@ docker-daemon - Enable daemon mode
53 53
 [**--tlskey**[=*~/.docker/key.pem*]]
54 54
 [**--tlsverify**]
55 55
 [**--userland-proxy**[=*true*]]
56
+[**--userns-remap**[=*default*]]
56 57
 
57 58
 # DESCRIPTION
58 59
 **docker** has two distinct functions. It is used for starting the Docker
... ...
@@ -223,6 +224,9 @@ unix://[/path/to/socket] to use.
223 223
 **--userland-proxy**=*true*|*false*
224 224
     Rely on a userland proxy implementation for inter-container and outside-to-container loopback communications. Default is true.
225 225
 
226
+**--userns-remap**=*default*|*uid:gid*|*user:group*|*user*|*uid*
227
+    Enable user namespaces for containers on the daemon. Specifying "default" will cause a new user and group to be created to handle UID and GID range remapping for the user namespace mappings used for contained processes. Specifying a user (or uid) and optionally a group (or gid) will cause the daemon to lookup the user and group's subordinate ID ranges for use as the user namespace mappings for contained processes.
228
+
226 229
 # STORAGE DRIVER OPTIONS
227 230
 
228 231
 Docker uses storage backends (known as "graphdrivers" in the Docker