Browse code

Merge pull request #12648 from estesp/userns-impl

Phase 1 implementation of user namespaces as a remapped container root

Arnaud Porterie authored on 2015/10/11 03:38:07
Showing 83 changed files
... ...
@@ -9,6 +9,7 @@ DOCKER_ENVS := \
9 9
 	-e DOCKER_DEBUG \
10 10
 	-e DOCKER_EXECDRIVER \
11 11
 	-e DOCKER_EXPERIMENTAL \
12
+	-e DOCKER_REMAP_ROOT \
12 13
 	-e DOCKER_GRAPHDRIVER \
13 14
 	-e DOCKER_STORAGE_OPTS \
14 15
 	-e DOCKER_USERLANDPROXY \
... ...
@@ -18,6 +18,8 @@ import (
18 18
 	"github.com/docker/docker/daemon/daemonbuilder"
19 19
 	"github.com/docker/docker/graph"
20 20
 	"github.com/docker/docker/graph/tags"
21
+	"github.com/docker/docker/pkg/archive"
22
+	"github.com/docker/docker/pkg/chrootarchive"
21 23
 	"github.com/docker/docker/pkg/ioutils"
22 24
 	"github.com/docker/docker/pkg/parsers"
23 25
 	"github.com/docker/docker/pkg/progressreader"
... ...
@@ -393,7 +395,13 @@ func (s *router) postBuild(ctx context.Context, w http.ResponseWriter, r *http.R
393 393
 		}
394 394
 	}()
395 395
 
396
-	docker := daemonbuilder.Docker{s.daemon, output, authConfigs}
396
+	uidMaps, gidMaps := s.daemon.GetUIDGIDMaps()
397
+	defaultArchiver := &archive.Archiver{
398
+		Untar:   chrootarchive.Untar,
399
+		UIDMaps: uidMaps,
400
+		GIDMaps: gidMaps,
401
+	}
402
+	docker := daemonbuilder.Docker{s.daemon, output, authConfigs, defaultArchiver}
397 403
 
398 404
 	b, err := dockerfile.NewBuilder(buildConfig, docker, builder.DockerIgnoreContext{context}, nil)
399 405
 	if err != nil {
... ...
@@ -30,6 +30,7 @@ type CommonConfig struct {
30 30
 	LogConfig      runconfig.LogConfig
31 31
 	Mtu            int
32 32
 	Pidfile        string
33
+	RemappedRoot   string
33 34
 	Root           string
34 35
 	TrustKeyPath   string
35 36
 	DefaultNetwork string
36 37
new file mode 100644
... ...
@@ -0,0 +1,119 @@
0
+// +build experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"fmt"
6
+	"strconv"
7
+	"strings"
8
+
9
+	"github.com/docker/docker/pkg/idtools"
10
+	flag "github.com/docker/docker/pkg/mflag"
11
+	"github.com/opencontainers/runc/libcontainer/user"
12
+)
13
+
14
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
15
+	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
16
+}
17
+
18
+const (
19
+	defaultIDSpecifier string = "default"
20
+	defaultRemappedID  string = "dockremap"
21
+)
22
+
23
+// Parse the remapped root (user namespace) option, which can be one of:
24
+//   username            - valid username from /etc/passwd
25
+//   username:groupname  - valid username; valid groupname from /etc/group
26
+//   uid                 - 32-bit unsigned int valid Linux UID value
27
+//   uid:gid             - uid value; 32-bit unsigned int Linux GID value
28
+//
29
+//  If no groupname is specified, and a username is specified, an attempt
30
+//  will be made to lookup a gid for that username as a groupname
31
+//
32
+//  If names are used, they are verified to exist in passwd/group
33
+func parseRemappedRoot(usergrp string) (string, string, error) {
34
+
35
+	var (
36
+		userID, groupID     int
37
+		username, groupname string
38
+	)
39
+
40
+	idparts := strings.Split(usergrp, ":")
41
+	if len(idparts) > 2 {
42
+		return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp)
43
+	}
44
+
45
+	if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil {
46
+		// must be a uid; take it as valid
47
+		userID = int(uid)
48
+		luser, err := user.LookupUid(userID)
49
+		if err != nil {
50
+			return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err)
51
+		}
52
+		username = luser.Name
53
+		if len(idparts) == 1 {
54
+			// if the uid was numeric and no gid was specified, take the uid as the gid
55
+			groupID = userID
56
+			lgrp, err := user.LookupGid(groupID)
57
+			if err != nil {
58
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err)
59
+			}
60
+			groupname = lgrp.Name
61
+		}
62
+	} else {
63
+		lookupName := idparts[0]
64
+		// special case: if the user specified "default", they want Docker to create or
65
+		// use (after creation) the "dockremap" user/group for root remapping
66
+		if lookupName == defaultIDSpecifier {
67
+			lookupName = defaultRemappedID
68
+		}
69
+		luser, err := user.LookupUser(lookupName)
70
+		if err != nil && idparts[0] != defaultIDSpecifier {
71
+			// error if the name requested isn't the special "dockremap" ID
72
+			return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err)
73
+		} else if err != nil {
74
+			// special case-- if the username == "default", then we have been asked
75
+			// to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid}
76
+			// ranges will be used for the user and group mappings in user namespaced containers
77
+			_, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID)
78
+			if err == nil {
79
+				return defaultRemappedID, defaultRemappedID, nil
80
+			}
81
+			return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err)
82
+		}
83
+		userID = luser.Uid
84
+		username = luser.Name
85
+		if len(idparts) == 1 {
86
+			// we only have a string username, and no group specified; look up gid from username as group
87
+			group, err := user.LookupGroup(lookupName)
88
+			if err != nil {
89
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err)
90
+			}
91
+			groupID = group.Gid
92
+			groupname = group.Name
93
+		}
94
+	}
95
+
96
+	if len(idparts) == 2 {
97
+		// groupname or gid is separately specified and must be resolved
98
+		// to a unsigned 32-bit gid
99
+		if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil {
100
+			// must be a gid, take it as valid
101
+			groupID = int(gid)
102
+			lgrp, err := user.LookupGid(groupID)
103
+			if err != nil {
104
+				return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err)
105
+			}
106
+			groupname = lgrp.Name
107
+		} else {
108
+			// not a number; attempt a lookup
109
+			group, err := user.LookupGroup(idparts[1])
110
+			if err != nil {
111
+				return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err)
112
+			}
113
+			groupID = group.Gid
114
+			groupname = idparts[1]
115
+		}
116
+	}
117
+	return username, groupname, nil
118
+}
0 119
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+// +build !experimental
1
+
2
+package daemon
3
+
4
+import flag "github.com/docker/docker/pkg/mflag"
5
+
6
+func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) {
7
+}
... ...
@@ -27,6 +27,7 @@ type Config struct {
27 27
 	CorsHeaders          string
28 28
 	EnableCors           bool
29 29
 	EnableSelinuxSupport bool
30
+	RemappedRoot         string
30 31
 	SocketGroup          string
31 32
 	Ulimits              map[string]*ulimit.Ulimit
32 33
 }
... ...
@@ -77,4 +78,6 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
77 77
 	cmd.BoolVar(&config.Bridge.EnableUserlandProxy, []string{"-userland-proxy"}, true, usageFn("Use userland proxy for loopback traffic"))
78 78
 	cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header"))
79 79
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
80
+
81
+	config.attachExperimentalFlags(cmd, usageFn)
80 82
 }
... ...
@@ -553,7 +553,12 @@ func (container *Container) export() (archive.Archive, error) {
553 553
 		return nil, err
554 554
 	}
555 555
 
556
-	archive, err := archive.Tar(container.basefs, archive.Uncompressed)
556
+	uidMaps, gidMaps := container.daemon.GetUIDGIDMaps()
557
+	archive, err := archive.TarWithOptions(container.basefs, &archive.TarOptions{
558
+		Compression: archive.Uncompressed,
559
+		UIDMaps:     uidMaps,
560
+		GIDMaps:     gidMaps,
561
+	})
557 562
 	if err != nil {
558 563
 		container.Unmount()
559 564
 		return nil, err
... ...
@@ -20,6 +20,7 @@ import (
20 20
 	"github.com/docker/docker/daemon/network"
21 21
 	derr "github.com/docker/docker/errors"
22 22
 	"github.com/docker/docker/pkg/directory"
23
+	"github.com/docker/docker/pkg/idtools"
23 24
 	"github.com/docker/docker/pkg/nat"
24 25
 	"github.com/docker/docker/pkg/stringid"
25 26
 	"github.com/docker/docker/pkg/system"
... ...
@@ -302,6 +303,14 @@ func populateCommand(c *Container, env []string) error {
302 302
 	processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
303 303
 	processConfig.Env = env
304 304
 
305
+	remappedRoot := &execdriver.User{}
306
+	rootUID, rootGID := c.daemon.GetRemappedUIDGID()
307
+	if rootUID != 0 {
308
+		remappedRoot.UID = rootUID
309
+		remappedRoot.GID = rootGID
310
+	}
311
+	uidMap, gidMap := c.daemon.GetUIDGIDMaps()
312
+
305 313
 	c.command = &execdriver.Command{
306 314
 		ID:                 c.ID,
307 315
 		Rootfs:             c.rootfsPath(),
... ...
@@ -310,6 +319,9 @@ func populateCommand(c *Container, env []string) error {
310 310
 		WorkingDir:         c.Config.WorkingDir,
311 311
 		Network:            en,
312 312
 		Ipc:                ipc,
313
+		UIDMapping:         uidMap,
314
+		GIDMapping:         gidMap,
315
+		RemappedRoot:       remappedRoot,
313 316
 		Pid:                pid,
314 317
 		UTS:                uts,
315 318
 		Resources:          resources,
... ...
@@ -1343,19 +1355,23 @@ func (container *Container) hasMountFor(path string) bool {
1343 1343
 }
1344 1344
 
1345 1345
 func (container *Container) setupIpcDirs() error {
1346
+	rootUID, rootGID := container.daemon.GetRemappedUIDGID()
1346 1347
 	if !container.hasMountFor("/dev/shm") {
1347 1348
 		shmPath, err := container.shmPath()
1348 1349
 		if err != nil {
1349 1350
 			return err
1350 1351
 		}
1351 1352
 
1352
-		if err := os.MkdirAll(shmPath, 0700); err != nil {
1353
+		if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
1353 1354
 			return err
1354 1355
 		}
1355 1356
 
1356 1357
 		if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel("mode=1777,size=65536k", container.getMountLabel())); err != nil {
1357 1358
 			return fmt.Errorf("mounting shm tmpfs: %s", err)
1358 1359
 		}
1360
+		if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
1361
+			return err
1362
+		}
1359 1363
 	}
1360 1364
 
1361 1365
 	if !container.hasMountFor("/dev/mqueue") {
... ...
@@ -1364,13 +1380,16 @@ func (container *Container) setupIpcDirs() error {
1364 1364
 			return err
1365 1365
 		}
1366 1366
 
1367
-		if err := os.MkdirAll(mqueuePath, 0700); err != nil {
1367
+		if err := idtools.MkdirAllAs(mqueuePath, 0700, rootUID, rootGID); err != nil {
1368 1368
 			return err
1369 1369
 		}
1370 1370
 
1371 1371
 		if err := syscall.Mount("mqueue", mqueuePath, "mqueue", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), ""); err != nil {
1372 1372
 			return fmt.Errorf("mounting mqueue mqueue : %s", err)
1373 1373
 		}
1374
+		if err := os.Chown(mqueuePath, rootUID, rootGID); err != nil {
1375
+			return err
1376
+		}
1374 1377
 	}
1375 1378
 
1376 1379
 	return nil
... ...
@@ -37,6 +37,7 @@ import (
37 37
 	"github.com/docker/docker/pkg/discovery"
38 38
 	"github.com/docker/docker/pkg/fileutils"
39 39
 	"github.com/docker/docker/pkg/graphdb"
40
+	"github.com/docker/docker/pkg/idtools"
40 41
 	"github.com/docker/docker/pkg/ioutils"
41 42
 	"github.com/docker/docker/pkg/namesgenerator"
42 43
 	"github.com/docker/docker/pkg/nat"
... ...
@@ -121,6 +122,8 @@ type Daemon struct {
121 121
 	discoveryWatcher discovery.Watcher
122 122
 	root             string
123 123
 	shutdown         bool
124
+	uidMaps          []idtools.IDMap
125
+	gidMaps          []idtools.IDMap
124 126
 }
125 127
 
126 128
 // Get looks for a container using the provided information, which could be
... ...
@@ -632,6 +635,15 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
632 632
 	// on Windows to dump Go routine stacks
633 633
 	setupDumpStackTrap()
634 634
 
635
+	uidMaps, gidMaps, err := setupRemappedRoot(config)
636
+	if err != nil {
637
+		return nil, err
638
+	}
639
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
640
+	if err != nil {
641
+		return nil, err
642
+	}
643
+
635 644
 	// get the canonical path to the Docker root directory
636 645
 	var realRoot string
637 646
 	if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) {
... ...
@@ -642,14 +654,13 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
642 642
 			return nil, fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err)
643 643
 		}
644 644
 	}
645
-	config.Root = realRoot
646
-	// Create the root directory if it doesn't exists
647
-	if err := system.MkdirAll(config.Root, 0700); err != nil {
645
+
646
+	if err = setupDaemonRoot(config, realRoot, rootUID, rootGID); err != nil {
648 647
 		return nil, err
649 648
 	}
650 649
 
651 650
 	// set up the tmpDir to use a canonical path
652
-	tmp, err := tempDir(config.Root)
651
+	tmp, err := tempDir(config.Root, rootUID, rootGID)
653 652
 	if err != nil {
654 653
 		return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err)
655 654
 	}
... ...
@@ -663,7 +674,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
663 663
 	graphdriver.DefaultDriver = config.GraphDriver
664 664
 
665 665
 	// Load storage driver
666
-	driver, err := graphdriver.New(config.Root, config.GraphOptions)
666
+	driver, err := graphdriver.New(config.Root, config.GraphOptions, uidMaps, gidMaps)
667 667
 	if err != nil {
668 668
 		return nil, fmt.Errorf("error initializing graphdriver: %v", err)
669 669
 	}
... ...
@@ -696,7 +707,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
696 696
 
697 697
 	daemonRepo := filepath.Join(config.Root, "containers")
698 698
 
699
-	if err := system.MkdirAll(daemonRepo, 0700); err != nil {
699
+	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
700 700
 		return nil, err
701 701
 	}
702 702
 
... ...
@@ -706,13 +717,13 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
706 706
 	}
707 707
 
708 708
 	logrus.Debug("Creating images graph")
709
-	g, err := graph.NewGraph(filepath.Join(config.Root, "graph"), d.driver)
709
+	g, err := graph.NewGraph(filepath.Join(config.Root, "graph"), d.driver, uidMaps, gidMaps)
710 710
 	if err != nil {
711 711
 		return nil, err
712 712
 	}
713 713
 
714 714
 	// Configure the volumes driver
715
-	volStore, err := configureVolumes(config)
715
+	volStore, err := configureVolumes(config, rootUID, rootGID)
716 716
 	if err != nil {
717 717
 		return nil, err
718 718
 	}
... ...
@@ -777,7 +788,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
777 777
 
778 778
 	var sysInitPath string
779 779
 	if config.ExecDriver == "lxc" {
780
-		initPath, err := configureSysInit(config)
780
+		initPath, err := configureSysInit(config, rootUID, rootGID)
781 781
 		if err != nil {
782 782
 			return nil, err
783 783
 		}
... ...
@@ -812,6 +823,8 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
812 812
 	d.EventsService = eventsService
813 813
 	d.volumes = volStore
814 814
 	d.root = config.Root
815
+	d.uidMaps = uidMaps
816
+	d.gidMaps = gidMaps
815 817
 
816 818
 	if err := d.cleanupMounts(); err != nil {
817 819
 		return nil, err
... ...
@@ -974,7 +987,11 @@ func (daemon *Daemon) diff(container *Container) (archive.Archive, error) {
974 974
 func (daemon *Daemon) createRootfs(container *Container) error {
975 975
 	// Step 1: create the container directory.
976 976
 	// This doubles as a barrier to avoid race conditions.
977
-	if err := os.Mkdir(container.root, 0700); err != nil {
977
+	rootUID, rootGID, err := idtools.GetRootUIDGID(daemon.uidMaps, daemon.gidMaps)
978
+	if err != nil {
979
+		return err
980
+	}
981
+	if err := idtools.MkdirAs(container.root, 0700, rootUID, rootGID); err != nil {
978 982
 		return err
979 983
 	}
980 984
 	initID := fmt.Sprintf("%s-init", container.ID)
... ...
@@ -986,7 +1003,7 @@ func (daemon *Daemon) createRootfs(container *Container) error {
986 986
 		return err
987 987
 	}
988 988
 
989
-	if err := setupInitLayer(initPath); err != nil {
989
+	if err := setupInitLayer(initPath, rootUID, rootGID); err != nil {
990 990
 		daemon.driver.Put(initID)
991 991
 		return err
992 992
 	}
... ...
@@ -1105,6 +1122,21 @@ func (daemon *Daemon) containerGraph() *graphdb.Database {
1105 1105
 	return daemon.containerGraphDB
1106 1106
 }
1107 1107
 
1108
+// GetUIDGIDMaps returns the current daemon's user namespace settings
1109
+// for the full uid and gid maps which will be applied to containers
1110
+// started in this instance.
1111
+func (daemon *Daemon) GetUIDGIDMaps() ([]idtools.IDMap, []idtools.IDMap) {
1112
+	return daemon.uidMaps, daemon.gidMaps
1113
+}
1114
+
1115
+// GetRemappedUIDGID returns the current daemon's uid and gid values
1116
+// if user namespaces are in use for this daemon instance.  If not
1117
+// this function will return "real" root values of 0, 0.
1118
+func (daemon *Daemon) GetRemappedUIDGID() (int, int) {
1119
+	uid, gid, _ := idtools.GetRootUIDGID(daemon.uidMaps, daemon.gidMaps)
1120
+	return uid, gid
1121
+}
1122
+
1108 1123
 // ImageGetCached returns the earliest created image that is a child
1109 1124
 // of the image with imgID, that had the same config when it was
1110 1125
 // created. nil is returned if a child cannot be found. An error is
... ...
@@ -1139,12 +1171,12 @@ func (daemon *Daemon) ImageGetCached(imgID string, config *runconfig.Config) (*i
1139 1139
 }
1140 1140
 
1141 1141
 // tempDir returns the default directory to use for temporary files.
1142
-func tempDir(rootDir string) (string, error) {
1142
+func tempDir(rootDir string, rootUID, rootGID int) (string, error) {
1143 1143
 	var tmpDir string
1144 1144
 	if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" {
1145 1145
 		tmpDir = filepath.Join(rootDir, "tmp")
1146 1146
 	}
1147
-	return tmpDir, system.MkdirAll(tmpDir, 0700)
1147
+	return tmpDir, idtools.MkdirAllAs(tmpDir, 0700, rootUID, rootGID)
1148 1148
 }
1149 1149
 
1150 1150
 func (daemon *Daemon) setHostConfig(container *Container, hostConfig *runconfig.HostConfig) error {
... ...
@@ -1228,8 +1260,8 @@ func (daemon *Daemon) verifyContainerSettings(hostConfig *runconfig.HostConfig,
1228 1228
 	return verifyPlatformContainerSettings(daemon, hostConfig, config)
1229 1229
 }
1230 1230
 
1231
-func configureVolumes(config *Config) (*store.VolumeStore, error) {
1232
-	volumesDriver, err := local.New(config.Root)
1231
+func configureVolumes(config *Config, rootUID, rootGID int) (*store.VolumeStore, error) {
1232
+	volumesDriver, err := local.New(config.Root, rootUID, rootGID)
1233 1233
 	if err != nil {
1234 1234
 		return nil, err
1235 1235
 	}
1236 1236
new file mode 100644
... ...
@@ -0,0 +1,110 @@
0
+// +build experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"fmt"
6
+	"os"
7
+	"path/filepath"
8
+	"runtime"
9
+
10
+	"github.com/Sirupsen/logrus"
11
+	"github.com/docker/docker/pkg/directory"
12
+	"github.com/docker/docker/pkg/idtools"
13
+	"github.com/docker/docker/runconfig"
14
+)
15
+
16
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
17
+	if config.ExecDriver != "native" && config.RemappedRoot != "" {
18
+		return nil, nil, fmt.Errorf("User namespace remapping is only supported with the native execdriver")
19
+	}
20
+	if runtime.GOOS == "windows" && config.RemappedRoot != "" {
21
+		return nil, nil, fmt.Errorf("User namespaces are not supported on Windows")
22
+	}
23
+
24
+	// if the daemon was started with remapped root option, parse
25
+	// the config option to the int uid,gid values
26
+	var (
27
+		uidMaps, gidMaps []idtools.IDMap
28
+	)
29
+	if config.RemappedRoot != "" {
30
+		username, groupname, err := parseRemappedRoot(config.RemappedRoot)
31
+		if err != nil {
32
+			return nil, nil, err
33
+		}
34
+		if username == "root" {
35
+			// Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op
36
+			// effectively
37
+			logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF")
38
+			return uidMaps, gidMaps, nil
39
+		}
40
+		logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname)
41
+		// update remapped root setting now that we have resolved them to actual names
42
+		config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname)
43
+
44
+		uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname)
45
+		if err != nil {
46
+			return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err)
47
+		}
48
+	}
49
+	return uidMaps, gidMaps, nil
50
+}
51
+
52
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
53
+	// the main docker root needs to be accessible by all users, as user namespace support
54
+	// will create subdirectories owned by either a) the real system root (when no remapping
55
+	// is setup) or b) the remapped root host ID (when --root=uid:gid is used)
56
+	// for "first time" users of user namespaces, we need to migrate the current directory
57
+	// contents to the "0.0" (root == root "namespace" daemon root)
58
+	nsRoot := "0.0"
59
+	if _, err := os.Stat(rootDir); err == nil {
60
+		// root current exists; we need to check for a prior migration
61
+		if _, err := os.Stat(filepath.Join(rootDir, nsRoot)); err != nil && os.IsNotExist(err) {
62
+			// need to migrate current root to "0.0" subroot
63
+			// 1. create non-usernamespaced root as "0.0"
64
+			if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
65
+				return fmt.Errorf("Cannot create daemon root %q: %v", filepath.Join(rootDir, nsRoot), err)
66
+			}
67
+			// 2. move current root content to "0.0" new subroot
68
+			if err := directory.MoveToSubdir(rootDir, nsRoot); err != nil {
69
+				return fmt.Errorf("Cannot migrate current daemon root %q for user namespaces: %v", rootDir, err)
70
+			}
71
+			// 3. chmod outer root to 755
72
+			if chmodErr := os.Chmod(rootDir, 0755); chmodErr != nil {
73
+				return chmodErr
74
+			}
75
+		}
76
+	} else if os.IsNotExist(err) {
77
+		// no root exists yet, create it 0755 with root:root ownership
78
+		if err := os.MkdirAll(rootDir, 0755); err != nil {
79
+			return err
80
+		}
81
+		// create the "0.0" subroot (so no future "migration" happens of the root)
82
+		if err := os.Mkdir(filepath.Join(rootDir, nsRoot), 0700); err != nil {
83
+			return err
84
+		}
85
+	}
86
+
87
+	// for user namespaces we will create a subtree underneath the specified root
88
+	// with any/all specified remapped root uid/gid options on the daemon creating
89
+	// a new subdirectory with ownership set to the remapped uid/gid (so as to allow
90
+	// `chdir()` to work for containers namespaced to that uid/gid)
91
+	if config.RemappedRoot != "" {
92
+		nsRoot = fmt.Sprintf("%d.%d", rootUID, rootGID)
93
+	}
94
+	config.Root = filepath.Join(rootDir, nsRoot)
95
+	logrus.Debugf("Creating actual daemon root: %s", config.Root)
96
+
97
+	// Create the root directory if it doesn't exists
98
+	if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil {
99
+		return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err)
100
+	}
101
+	return nil
102
+}
103
+
104
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
105
+	if hostConfig.Privileged && daemon.config().RemappedRoot != "" {
106
+		return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings")
107
+	}
108
+	return nil, nil
109
+}
0 110
new file mode 100644
... ...
@@ -0,0 +1,28 @@
0
+// +build !experimental
1
+
2
+package daemon
3
+
4
+import (
5
+	"os"
6
+
7
+	"github.com/docker/docker/pkg/idtools"
8
+	"github.com/docker/docker/pkg/system"
9
+	"github.com/docker/docker/runconfig"
10
+)
11
+
12
+func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) {
13
+	return nil, nil, nil
14
+}
15
+
16
+func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error {
17
+	config.Root = rootDir
18
+	// Create the root directory if it doesn't exists
19
+	if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) {
20
+		return err
21
+	}
22
+	return nil
23
+}
24
+
25
+func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *runconfig.HostConfig, config *runconfig.Config) ([]string, error) {
26
+	return nil, nil
27
+}
... ...
@@ -509,7 +509,7 @@ func initDaemonForVolumesTest(tmp string) (*Daemon, error) {
509 509
 		volumes:    store.New(),
510 510
 	}
511 511
 
512
-	volumesDriver, err := local.New(tmp)
512
+	volumesDriver, err := local.New(tmp, 0, 0)
513 513
 	if err != nil {
514 514
 		return nil, err
515 515
 	}
... ...
@@ -15,10 +15,10 @@ import (
15 15
 	"github.com/docker/docker/daemon/graphdriver"
16 16
 	derr "github.com/docker/docker/errors"
17 17
 	"github.com/docker/docker/pkg/fileutils"
18
+	"github.com/docker/docker/pkg/idtools"
18 19
 	"github.com/docker/docker/pkg/parsers"
19 20
 	"github.com/docker/docker/pkg/parsers/kernel"
20 21
 	"github.com/docker/docker/pkg/sysinfo"
21
-	"github.com/docker/docker/pkg/system"
22 22
 	"github.com/docker/docker/runconfig"
23 23
 	"github.com/docker/docker/utils"
24 24
 	"github.com/docker/libnetwork"
... ...
@@ -121,6 +121,11 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *runconfig.HostC
121 121
 	warnings := []string{}
122 122
 	sysInfo := sysinfo.New(true)
123 123
 
124
+	warnings, err := daemon.verifyExperimentalContainerSettings(hostConfig, config)
125
+	if err != nil {
126
+		return warnings, err
127
+	}
128
+
124 129
 	if hostConfig.LxcConf.Len() > 0 && !strings.Contains(daemon.ExecutionDriver().Name(), "lxc") {
125 130
 		return warnings, fmt.Errorf("Cannot use --lxc-conf with execdriver: %s", daemon.ExecutionDriver().Name())
126 131
 	}
... ...
@@ -275,7 +280,7 @@ func migrateIfDownlevel(driver graphdriver.Driver, root string) error {
275 275
 	return migrateIfAufs(driver, root)
276 276
 }
277 277
 
278
-func configureSysInit(config *Config) (string, error) {
278
+func configureSysInit(config *Config, rootUID, rootGID int) (string, error) {
279 279
 	localCopy := filepath.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
280 280
 	sysInitPath := utils.DockerInitPath(localCopy)
281 281
 	if sysInitPath == "" {
... ...
@@ -284,7 +289,7 @@ func configureSysInit(config *Config) (string, error) {
284 284
 
285 285
 	if sysInitPath != localCopy {
286 286
 		// When we find a suitable dockerinit binary (even if it's our local binary), we copy it into config.Root at localCopy for future use (so that the original can go away without that being a problem, for example during a package upgrade).
287
-		if err := os.Mkdir(filepath.Dir(localCopy), 0700); err != nil && !os.IsExist(err) {
287
+		if err := idtools.MkdirAs(filepath.Dir(localCopy), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
288 288
 			return "", err
289 289
 		}
290 290
 		if _, err := fileutils.CopyFile(sysInitPath, localCopy); err != nil {
... ...
@@ -455,7 +460,7 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e
455 455
 //
456 456
 // This extra layer is used by all containers as the top-most ro layer. It protects
457 457
 // the container from unwanted side-effects on the rw layer.
458
-func setupInitLayer(initLayer string) error {
458
+func setupInitLayer(initLayer string, rootUID, rootGID int) error {
459 459
 	for pth, typ := range map[string]string{
460 460
 		"/dev/pts":         "dir",
461 461
 		"/dev/shm":         "dir",
... ...
@@ -478,12 +483,12 @@ func setupInitLayer(initLayer string) error {
478 478
 
479 479
 		if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
480 480
 			if os.IsNotExist(err) {
481
-				if err := system.MkdirAll(filepath.Join(initLayer, filepath.Dir(pth)), 0755); err != nil {
481
+				if err := idtools.MkdirAllAs(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootUID, rootGID); err != nil {
482 482
 					return err
483 483
 				}
484 484
 				switch typ {
485 485
 				case "dir":
486
-					if err := system.MkdirAll(filepath.Join(initLayer, pth), 0755); err != nil {
486
+					if err := idtools.MkdirAllAs(filepath.Join(initLayer, pth), 0755, rootUID, rootGID); err != nil {
487 487
 						return err
488 488
 					}
489 489
 				case "file":
... ...
@@ -492,6 +497,7 @@ func setupInitLayer(initLayer string) error {
492 492
 						return err
493 493
 					}
494 494
 					f.Close()
495
+					f.Chown(rootUID, rootGID)
495 496
 				default:
496 497
 					if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
497 498
 						return err
... ...
@@ -25,7 +25,7 @@ func parseSecurityOpt(container *Container, config *runconfig.HostConfig) error
25 25
 	return nil
26 26
 }
27 27
 
28
-func setupInitLayer(initLayer string) error {
28
+func setupInitLayer(initLayer string, rootUID, rootGID int) error {
29 29
 	return nil
30 30
 }
31 31
 
... ...
@@ -89,7 +89,7 @@ func migrateIfDownlevel(driver graphdriver.Driver, root string) error {
89 89
 	return nil
90 90
 }
91 91
 
92
-func configureSysInit(config *Config) (string, error) {
92
+func configureSysInit(config *Config, rootUID, rootGID int) (string, error) {
93 93
 	// TODO Windows.
94 94
 	return os.Getenv("TEMP"), nil
95 95
 }
... ...
@@ -16,7 +16,6 @@ import (
16 16
 	"github.com/docker/docker/graph"
17 17
 	"github.com/docker/docker/image"
18 18
 	"github.com/docker/docker/pkg/archive"
19
-	"github.com/docker/docker/pkg/chrootarchive"
20 19
 	"github.com/docker/docker/pkg/httputils"
21 20
 	"github.com/docker/docker/pkg/ioutils"
22 21
 	"github.com/docker/docker/pkg/parsers"
... ...
@@ -32,6 +31,7 @@ type Docker struct {
32 32
 	Daemon      *daemon.Daemon
33 33
 	OutOld      io.Writer
34 34
 	AuthConfigs map[string]cliconfig.AuthConfig
35
+	Archiver    *archive.Archiver
35 36
 }
36 37
 
37 38
 // ensure Docker implements builder.Docker
... ...
@@ -121,6 +121,7 @@ func (d Docker) Release(sessionID string, activeImages []string) {
121 121
 func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo, decompress bool) error {
122 122
 	srcPath := src.Path()
123 123
 	destExists := true
124
+	rootUID, rootGID := d.Daemon.GetRemappedUIDGID()
124 125
 
125 126
 	// Work in daemon-local OS specific file paths
126 127
 	destPath = filepath.FromSlash(destPath)
... ...
@@ -149,10 +150,10 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
149 149
 
150 150
 	if src.IsDir() {
151 151
 		// copy as directory
152
-		if err := chrootarchive.CopyWithTar(srcPath, destPath); err != nil {
152
+		if err := d.Archiver.CopyWithTar(srcPath, destPath); err != nil {
153 153
 			return err
154 154
 		}
155
-		return fixPermissions(srcPath, destPath, 0, 0, destExists)
155
+		return fixPermissions(srcPath, destPath, rootUID, rootGID, destExists)
156 156
 	}
157 157
 	if decompress {
158 158
 		// Only try to untar if it is a file and that we've been told to decompress (when ADD-ing a remote file)
... ...
@@ -167,7 +168,7 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
167 167
 		}
168 168
 
169 169
 		// try to successfully untar the orig
170
-		if err := chrootarchive.UntarPath(srcPath, tarDest); err == nil {
170
+		if err := d.Archiver.UntarPath(srcPath, tarDest); err == nil {
171 171
 			return nil
172 172
 		} else if err != io.EOF {
173 173
 			logrus.Debugf("Couldn't untar to %s: %v", tarDest, err)
... ...
@@ -182,11 +183,11 @@ func (d Docker) Copy(c *daemon.Container, destPath string, src builder.FileInfo,
182 182
 	if err := system.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
183 183
 		return err
184 184
 	}
185
-	if err := chrootarchive.CopyFileWithTar(srcPath, destPath); err != nil {
185
+	if err := d.Archiver.CopyFileWithTar(srcPath, destPath); err != nil {
186 186
 		return err
187 187
 	}
188 188
 
189
-	return fixPermissions(srcPath, destPath, 0, 0, destExists)
189
+	return fixPermissions(srcPath, destPath, rootUID, rootGID, destExists)
190 190
 }
191 191
 
192 192
 // GetCachedImage returns a reference to a cached image whose parent equals `parent`
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"os/exec"
7 7
 	"time"
8 8
 
9
+	"github.com/docker/docker/pkg/idtools"
9 10
 	// TODO Windows: Factor out ulimit
10 11
 	"github.com/docker/docker/pkg/ulimit"
11 12
 	"github.com/opencontainers/runc/libcontainer"
... ...
@@ -173,6 +174,12 @@ type Mount struct {
173 173
 	Slave       bool   `json:"slave"`
174 174
 }
175 175
 
176
+// User contains the uid and gid representing a Unix user
177
+type User struct {
178
+	UID int `json:"root_uid"`
179
+	GID int `json:"root_gid"`
180
+}
181
+
176 182
 // ProcessConfig describes a process that will be run inside a container.
177 183
 type ProcessConfig struct {
178 184
 	exec.Cmd `json:"-"`
... ...
@@ -202,6 +209,9 @@ type Command struct {
202 202
 	Ipc                *Ipc              `json:"ipc"`
203 203
 	Pid                *Pid              `json:"pid"`
204 204
 	UTS                *UTS              `json:"uts"`
205
+	RemappedRoot       *User             `json:"remap_root"`
206
+	UIDMapping         []idtools.IDMap   `json:"uidmapping"`
207
+	GIDMapping         []idtools.IDMap   `json:"gidmapping"`
205 208
 	Resources          *Resources        `json:"resources"`
206 209
 	Mounts             []Mount           `json:"mounts"`
207 210
 	AllowedDevices     []*configs.Device `json:"allowed_devices"`
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"syscall"
9 9
 
10 10
 	"github.com/docker/docker/daemon/execdriver"
11
+
11 12
 	"github.com/opencontainers/runc/libcontainer/apparmor"
12 13
 	"github.com/opencontainers/runc/libcontainer/configs"
13 14
 	"github.com/opencontainers/runc/libcontainer/devices"
... ...
@@ -30,6 +31,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
30 30
 		return nil, err
31 31
 	}
32 32
 
33
+	if err := d.setupRemappedRoot(container, c); err != nil {
34
+		return nil, err
35
+	}
36
+
33 37
 	if err := d.createNetwork(container, c, hooks); err != nil {
34 38
 		return nil, err
35 39
 	}
... ...
@@ -193,6 +198,40 @@ func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) err
193 193
 	return nil
194 194
 }
195 195
 
196
+func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
197
+	if c.RemappedRoot.UID == 0 {
198
+		container.Namespaces.Remove(configs.NEWUSER)
199
+		return nil
200
+	}
201
+
202
+	// convert the Docker daemon id map to the libcontainer variant of the same struct
203
+	// this keeps us from having to import libcontainer code across Docker client + daemon packages
204
+	cuidMaps := []configs.IDMap{}
205
+	cgidMaps := []configs.IDMap{}
206
+	for _, idMap := range c.UIDMapping {
207
+		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
208
+	}
209
+	for _, idMap := range c.GIDMapping {
210
+		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
211
+	}
212
+	container.UidMappings = cuidMaps
213
+	container.GidMappings = cgidMaps
214
+
215
+	for _, node := range container.Devices {
216
+		node.Uid = uint32(c.RemappedRoot.UID)
217
+		node.Gid = uint32(c.RemappedRoot.GID)
218
+	}
219
+	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
220
+	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
221
+	for i := range container.Mounts {
222
+		if container.Mounts[i].Device == "cgroup" {
223
+			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
224
+		}
225
+	}
226
+
227
+	return nil
228
+}
229
+
196 230
 func (d *Driver) setPrivileged(container *configs.Config) (err error) {
197 231
 	container.Capabilities = execdriver.GetAllCapabilities()
198 232
 	container.Cgroups.AllowAllDevices = true
... ...
@@ -255,6 +294,7 @@ func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) e
255 255
 		if m.Slave {
256 256
 			flags |= syscall.MS_SLAVE
257 257
 		}
258
+
258 259
 		container.Mounts = append(container.Mounts, &configs.Mount{
259 260
 			Source:      m.Source,
260 261
 			Destination: m.Destination,
... ...
@@ -443,22 +443,35 @@ func (t *TtyConsole) Close() error {
443 443
 }
444 444
 
445 445
 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
446
-	var term execdriver.Terminal
447
-	var err error
446
+
447
+	rootuid, err := container.HostUID()
448
+	if err != nil {
449
+		return err
450
+	}
448 451
 
449 452
 	if processConfig.Tty {
450
-		rootuid, err := container.HostUID()
453
+		cons, err := p.NewConsole(rootuid)
451 454
 		if err != nil {
452 455
 			return err
453 456
 		}
454
-		cons, err := p.NewConsole(rootuid)
457
+		term, err := NewTtyConsole(cons, pipes)
455 458
 		if err != nil {
456 459
 			return err
457 460
 		}
458
-		term, err = NewTtyConsole(cons, pipes)
459
-	} else {
461
+		processConfig.Terminal = term
462
+		return nil
463
+	}
464
+	// not a tty--set up stdio pipes
465
+	term := &execdriver.StdConsole{}
466
+	processConfig.Terminal = term
467
+
468
+	// if we are not in a user namespace, there is no reason to go through
469
+	// the hassle of setting up os-level pipes with proper (remapped) ownership
470
+	// so we will do the prior shortcut for non-userns containers
471
+	if rootuid == 0 {
460 472
 		p.Stdout = pipes.Stdout
461 473
 		p.Stderr = pipes.Stderr
474
+
462 475
 		r, w, err := os.Pipe()
463 476
 		if err != nil {
464 477
 			return err
... ...
@@ -470,12 +483,57 @@ func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConf
470 470
 			}()
471 471
 			p.Stdin = r
472 472
 		}
473
-		term = &execdriver.StdConsole{}
473
+		return nil
474 474
 	}
475
+
476
+	// if we have user namespaces enabled (rootuid != 0), we will set
477
+	// up os pipes for stderr, stdout, stdin so we can chown them to
478
+	// the proper ownership to allow for proper access to the underlying
479
+	// fds
480
+	var fds []int
481
+
482
+	//setup stdout
483
+	r, w, err := os.Pipe()
475 484
 	if err != nil {
476 485
 		return err
477 486
 	}
478
-	processConfig.Terminal = term
487
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
488
+	if pipes.Stdout != nil {
489
+		go io.Copy(pipes.Stdout, r)
490
+	}
491
+	term.Closers = append(term.Closers, r)
492
+	p.Stdout = w
493
+
494
+	//setup stderr
495
+	r, w, err = os.Pipe()
496
+	if err != nil {
497
+		return err
498
+	}
499
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
500
+	if pipes.Stderr != nil {
501
+		go io.Copy(pipes.Stderr, r)
502
+	}
503
+	term.Closers = append(term.Closers, r)
504
+	p.Stderr = w
505
+
506
+	//setup stdin
507
+	r, w, err = os.Pipe()
508
+	if err != nil {
509
+		return err
510
+	}
511
+	fds = append(fds, int(r.Fd()), int(w.Fd()))
512
+	if pipes.Stdin != nil {
513
+		go func() {
514
+			io.Copy(w, pipes.Stdin)
515
+			w.Close()
516
+		}()
517
+		p.Stdin = r
518
+	}
519
+	for _, fd := range fds {
520
+		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
521
+			return fmt.Errorf("Failed to chown pipes fd: %v", err)
522
+		}
523
+	}
479 524
 	return nil
480 525
 }
481 526
 
... ...
@@ -26,11 +26,18 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo
26 26
 		return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
27 27
 	}
28 28
 
29
+	user := processConfig.User
30
+	if c.RemappedRoot.UID != 0 && user == "" {
31
+		//if user namespaces are enabled, set user explicitly so uid/gid is set to 0
32
+		//otherwise we end up with the overflow id and no permissions (65534)
33
+		user = "0"
34
+	}
35
+
29 36
 	p := &libcontainer.Process{
30 37
 		Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
31 38
 		Env:  c.ProcessConfig.Env,
32 39
 		Cwd:  c.WorkingDir,
33
-		User: processConfig.User,
40
+		User: user,
34 41
 	}
35 42
 
36 43
 	if processConfig.Privileged {
... ...
@@ -34,6 +34,7 @@ func New() *configs.Config {
34 34
 			{Type: "NEWIPC"},
35 35
 			{Type: "NEWPID"},
36 36
 			{Type: "NEWNET"},
37
+			{Type: "NEWUSER"},
37 38
 		}),
38 39
 		Cgroups: &configs.Cgroup{
39 40
 			Parent:           "docker",
... ...
@@ -7,6 +7,8 @@ import (
7 7
 
8 8
 // StdConsole defines standard console operations for execdriver
9 9
 type StdConsole struct {
10
+	// Closers holds io.Closer references for closing at terminal close time
11
+	Closers []io.Closer
10 12
 }
11 13
 
12 14
 // NewStdConsole returns a new StdConsole struct
... ...
@@ -46,6 +48,8 @@ func (s *StdConsole) Resize(h, w int) error {
46 46
 
47 47
 // Close implements Close method of Terminal interface
48 48
 func (s *StdConsole) Close() error {
49
-	// nothing to close here
49
+	for _, c := range s.Closers {
50
+		c.Close()
51
+	}
50 52
 	return nil
51 53
 }
... ...
@@ -34,12 +34,15 @@ import (
34 34
 	"syscall"
35 35
 
36 36
 	"github.com/Sirupsen/logrus"
37
+
37 38
 	"github.com/docker/docker/daemon/graphdriver"
38 39
 	"github.com/docker/docker/pkg/archive"
39 40
 	"github.com/docker/docker/pkg/chrootarchive"
40 41
 	"github.com/docker/docker/pkg/directory"
42
+	"github.com/docker/docker/pkg/idtools"
41 43
 	mountpk "github.com/docker/docker/pkg/mount"
42 44
 	"github.com/docker/docker/pkg/stringid"
45
+
43 46
 	"github.com/opencontainers/runc/libcontainer/label"
44 47
 )
45 48
 
... ...
@@ -71,13 +74,15 @@ type data struct {
71 71
 // active maps mount id to the count
72 72
 type Driver struct {
73 73
 	root       string
74
+	uidMaps    []idtools.IDMap
75
+	gidMaps    []idtools.IDMap
74 76
 	sync.Mutex // Protects concurrent modification to active
75 77
 	active     map[string]*data
76 78
 }
77 79
 
78 80
 // Init returns a new AUFS driver.
79 81
 // An error is returned if AUFS is not supported.
80
-func Init(root string, options []string) (graphdriver.Driver, error) {
82
+func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
81 83
 
82 84
 	// Try to load the aufs kernel module
83 85
 	if err := supportsAufs(); err != nil {
... ...
@@ -105,12 +110,23 @@ func Init(root string, options []string) (graphdriver.Driver, error) {
105 105
 	}
106 106
 
107 107
 	a := &Driver{
108
-		root:   root,
109
-		active: make(map[string]*data),
108
+		root:    root,
109
+		active:  make(map[string]*data),
110
+		uidMaps: uidMaps,
111
+		gidMaps: gidMaps,
110 112
 	}
111 113
 
112
-	// Create the root aufs driver dir
113
-	if err := os.MkdirAll(root, 0755); err != nil {
114
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
115
+	if err != nil {
116
+		return nil, err
117
+	}
118
+	// Create the root aufs driver dir and return
119
+	// if it already exists
120
+	// If not populate the dir structure
121
+	if err := idtools.MkdirAllAs(root, 0755, rootUID, rootGID); err != nil {
122
+		if os.IsExist(err) {
123
+			return a, nil
124
+		}
114 125
 		return nil, err
115 126
 	}
116 127
 
... ...
@@ -120,7 +136,7 @@ func Init(root string, options []string) (graphdriver.Driver, error) {
120 120
 
121 121
 	// Populate the dir structure
122 122
 	for _, p := range paths {
123
-		if err := os.MkdirAll(path.Join(root, p), 0755); err != nil {
123
+		if err := idtools.MkdirAllAs(path.Join(root, p), 0755, rootUID, rootGID); err != nil {
124 124
 			return nil, err
125 125
 		}
126 126
 	}
... ...
@@ -221,8 +237,12 @@ func (a *Driver) createDirsFor(id string) error {
221 221
 		"diff",
222 222
 	}
223 223
 
224
+	rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
225
+	if err != nil {
226
+		return err
227
+	}
224 228
 	for _, p := range paths {
225
-		if err := os.MkdirAll(path.Join(a.rootPath(), p, id), 0755); err != nil {
229
+		if err := idtools.MkdirAllAs(path.Join(a.rootPath(), p, id), 0755, rootUID, rootGID); err != nil {
226 230
 			return err
227 231
 		}
228 232
 	}
... ...
@@ -334,11 +354,16 @@ func (a *Driver) Diff(id, parent string) (archive.Archive, error) {
334 334
 	return archive.TarWithOptions(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
335 335
 		Compression:     archive.Uncompressed,
336 336
 		ExcludePatterns: []string{archive.WhiteoutMetaPrefix + "*", "!" + archive.WhiteoutOpaqueDir},
337
+		UIDMaps:         a.uidMaps,
338
+		GIDMaps:         a.gidMaps,
337 339
 	})
338 340
 }
339 341
 
340 342
 func (a *Driver) applyDiff(id string, diff archive.Reader) error {
341
-	return chrootarchive.UntarUncompressed(diff, path.Join(a.rootPath(), "diff", id), nil)
343
+	return chrootarchive.UntarUncompressed(diff, path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
344
+		UIDMaps: a.uidMaps,
345
+		GIDMaps: a.gidMaps,
346
+	})
342 347
 }
343 348
 
344 349
 // DiffSize calculates the changes between the specified id
... ...
@@ -26,7 +26,7 @@ func init() {
26 26
 }
27 27
 
28 28
 func testInit(dir string, t *testing.T) graphdriver.Driver {
29
-	d, err := Init(dir, nil)
29
+	d, err := Init(dir, nil, nil, nil)
30 30
 	if err != nil {
31 31
 		if err == graphdriver.ErrNotSupported {
32 32
 			t.Skip(err)
... ...
@@ -8,6 +8,8 @@ import (
8 8
 	"io/ioutil"
9 9
 	"os"
10 10
 	"path"
11
+
12
+	"github.com/docker/docker/pkg/idtools"
11 13
 )
12 14
 
13 15
 type metadata struct {
... ...
@@ -38,7 +40,7 @@ func pathExists(pth string) bool {
38 38
 // For the migration we try to move the folder containing the layer files, if that
39 39
 // fails because the data is currently mounted we will fallback to creating a
40 40
 // symlink.
41
-func (a *Driver) Migrate(pth string, setupInit func(p string) error) error {
41
+func (a *Driver) Migrate(pth string, setupInit func(p string, rootUID, rootGID int) error) error {
42 42
 	if pathExists(path.Join(pth, "graph")) {
43 43
 		if err := a.migrateRepositories(pth); err != nil {
44 44
 			return err
... ...
@@ -59,12 +61,17 @@ func (a *Driver) migrateRepositories(pth string) error {
59 59
 	return nil
60 60
 }
61 61
 
62
-func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) error {
62
+func (a *Driver) migrateContainers(pth string, setupInit func(p string, rootUID, rootGID int) error) error {
63 63
 	fis, err := ioutil.ReadDir(pth)
64 64
 	if err != nil {
65 65
 		return err
66 66
 	}
67 67
 
68
+	rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
69
+	if err != nil {
70
+		return err
71
+	}
72
+
68 73
 	for _, fi := range fis {
69 74
 		if id := fi.Name(); fi.IsDir() && pathExists(path.Join(pth, id, "rw")) {
70 75
 			if err := tryRelocate(path.Join(pth, id, "rw"), path.Join(a.rootPath(), "diff", id)); err != nil {
... ...
@@ -88,7 +95,7 @@ func (a *Driver) migrateContainers(pth string, setupInit func(p string) error) e
88 88
 					return err
89 89
 				}
90 90
 				// setup init layer
91
-				if err := setupInit(initPath); err != nil {
91
+				if err := setupInit(initPath, rootUID, rootGID); err != nil {
92 92
 					return err
93 93
 				}
94 94
 
... ...
@@ -19,6 +19,7 @@ import (
19 19
 	"unsafe"
20 20
 
21 21
 	"github.com/docker/docker/daemon/graphdriver"
22
+	"github.com/docker/docker/pkg/idtools"
22 23
 	"github.com/docker/docker/pkg/mount"
23 24
 )
24 25
 
... ...
@@ -28,7 +29,7 @@ func init() {
28 28
 
29 29
 // Init returns a new BTRFS driver.
30 30
 // An error is returned if BTRFS is not supported.
31
-func Init(home string, options []string) (graphdriver.Driver, error) {
31
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
32 32
 	rootdir := path.Dir(home)
33 33
 
34 34
 	var buf syscall.Statfs_t
... ...
@@ -40,7 +41,11 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
40 40
 		return nil, graphdriver.ErrPrerequisites
41 41
 	}
42 42
 
43
-	if err := os.MkdirAll(home, 0700); err != nil {
43
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
44
+	if err != nil {
45
+		return nil, err
46
+	}
47
+	if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
44 48
 		return nil, err
45 49
 	}
46 50
 
... ...
@@ -49,16 +54,20 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
49 49
 	}
50 50
 
51 51
 	driver := &Driver{
52
-		home: home,
52
+		home:    home,
53
+		uidMaps: uidMaps,
54
+		gidMaps: gidMaps,
53 55
 	}
54 56
 
55
-	return graphdriver.NewNaiveDiffDriver(driver), nil
57
+	return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil
56 58
 }
57 59
 
58 60
 // Driver contains information about the filesystem mounted.
59 61
 type Driver struct {
60 62
 	//root of the file system
61
-	home string
63
+	home    string
64
+	uidMaps []idtools.IDMap
65
+	gidMaps []idtools.IDMap
62 66
 }
63 67
 
64 68
 // String prints the name of the driver (btrfs).
... ...
@@ -226,7 +235,11 @@ func (d *Driver) subvolumesDirID(id string) string {
226 226
 // Create the filesystem with given id.
227 227
 func (d *Driver) Create(id string, parent string) error {
228 228
 	subvolumes := path.Join(d.home, "subvolumes")
229
-	if err := os.MkdirAll(subvolumes, 0700); err != nil {
229
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
230
+	if err != nil {
231
+		return err
232
+	}
233
+	if err := idtools.MkdirAllAs(subvolumes, 0700, rootUID, rootGID); err != nil {
230 234
 		return err
231 235
 	}
232 236
 	if parent == "" {
... ...
@@ -19,11 +19,14 @@ import (
19 19
 	"time"
20 20
 
21 21
 	"github.com/Sirupsen/logrus"
22
+
22 23
 	"github.com/docker/docker/daemon/graphdriver"
23 24
 	"github.com/docker/docker/pkg/devicemapper"
25
+	"github.com/docker/docker/pkg/idtools"
24 26
 	"github.com/docker/docker/pkg/mount"
25 27
 	"github.com/docker/docker/pkg/parsers"
26 28
 	"github.com/docker/docker/pkg/units"
29
+
27 30
 	"github.com/opencontainers/runc/libcontainer/label"
28 31
 )
29 32
 
... ...
@@ -113,6 +116,8 @@ type DeviceSet struct {
113 113
 	BaseDeviceUUID        string //save UUID of base device
114 114
 	nrDeletedDevices      uint   //number of deleted devices
115 115
 	deletionWorkerTicker  *time.Ticker
116
+	uidMaps               []idtools.IDMap
117
+	gidMaps               []idtools.IDMap
116 118
 }
117 119
 
118 120
 // DiskUsage contains information about disk usage and is used when reporting Status of a device.
... ...
@@ -250,7 +255,11 @@ func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
250 250
 	dirname := devices.loopbackDir()
251 251
 	filename := path.Join(dirname, name)
252 252
 
253
-	if err := os.MkdirAll(dirname, 0700); err != nil {
253
+	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
254
+	if err != nil {
255
+		return "", err
256
+	}
257
+	if err := idtools.MkdirAllAs(dirname, 0700, uid, gid); err != nil && !os.IsExist(err) {
254 258
 		return "", err
255 259
 	}
256 260
 
... ...
@@ -1448,7 +1457,16 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
1448 1448
 		logrus.Warn("Udev sync is not supported. This will lead to unexpected behavior, data loss and errors. For more information, see https://docs.docker.com/reference/commandline/daemon/#daemon-storage-driver-option")
1449 1449
 	}
1450 1450
 
1451
-	if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil {
1451
+	//create the root dir of the devmapper driver ownership to match this
1452
+	//daemon's remapped root uid/gid so containers can start properly
1453
+	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
1454
+	if err != nil {
1455
+		return err
1456
+	}
1457
+	if err := idtools.MkdirAs(devices.root, 0700, uid, gid); err != nil && !os.IsExist(err) {
1458
+		return err
1459
+	}
1460
+	if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil && !os.IsExist(err) {
1452 1461
 		return err
1453 1462
 	}
1454 1463
 
... ...
@@ -2230,7 +2248,7 @@ func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, er
2230 2230
 }
2231 2231
 
2232 2232
 // NewDeviceSet creates the device set based on the options provided.
2233
-func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error) {
2233
+func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) {
2234 2234
 	devicemapper.SetDevDir("/dev")
2235 2235
 
2236 2236
 	devices := &DeviceSet{
... ...
@@ -2245,6 +2263,8 @@ func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error
2245 2245
 		thinpBlockSize:        defaultThinpBlockSize,
2246 2246
 		deviceIDMap:           make([]byte, deviceIDMapSz),
2247 2247
 		deletionWorkerTicker:  time.NewTicker(time.Second * 30),
2248
+		uidMaps:               uidMaps,
2249
+		gidMaps:               gidMaps,
2248 2250
 	}
2249 2251
 
2250 2252
 	foundBlkDiscard := false
... ...
@@ -67,7 +67,7 @@ func testChangeLoopBackSize(t *testing.T, delta, expectDataSize, expectMetaDataS
67 67
 	d, err := Init(driver.home, []string{
68 68
 		fmt.Sprintf("dm.loopdatasize=%d", defaultDataLoopbackSize+delta),
69 69
 		fmt.Sprintf("dm.loopmetadatasize=%d", defaultMetaDataLoopbackSize+delta),
70
-	})
70
+	}, nil, nil)
71 71
 	if err != nil {
72 72
 		t.Fatalf("error creating devicemapper driver: %v", err)
73 73
 	}
... ...
@@ -10,8 +10,10 @@ import (
10 10
 	"strconv"
11 11
 
12 12
 	"github.com/Sirupsen/logrus"
13
+
13 14
 	"github.com/docker/docker/daemon/graphdriver"
14 15
 	"github.com/docker/docker/pkg/devicemapper"
16
+	"github.com/docker/docker/pkg/idtools"
15 17
 	"github.com/docker/docker/pkg/mount"
16 18
 	"github.com/docker/docker/pkg/units"
17 19
 )
... ...
@@ -28,13 +30,15 @@ func init() {
28 28
 // Driver contains the device set mounted and the home directory
29 29
 type Driver struct {
30 30
 	*DeviceSet
31
-	home string
31
+	home    string
32
+	uidMaps []idtools.IDMap
33
+	gidMaps []idtools.IDMap
32 34
 }
33 35
 
34 36
 var backingFs = "<unknown>"
35 37
 
36 38
 // Init creates a driver with the given home and the set of options.
37
-func Init(home string, options []string) (graphdriver.Driver, error) {
39
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
38 40
 	fsMagic, err := graphdriver.GetFSMagic(home)
39 41
 	if err != nil {
40 42
 		return nil, err
... ...
@@ -43,7 +47,7 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
43 43
 		backingFs = fsName
44 44
 	}
45 45
 
46
-	deviceSet, err := NewDeviceSet(home, true, options)
46
+	deviceSet, err := NewDeviceSet(home, true, options, uidMaps, gidMaps)
47 47
 	if err != nil {
48 48
 		return nil, err
49 49
 	}
... ...
@@ -55,9 +59,11 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
55 55
 	d := &Driver{
56 56
 		DeviceSet: deviceSet,
57 57
 		home:      home,
58
+		uidMaps:   uidMaps,
59
+		gidMaps:   gidMaps,
58 60
 	}
59 61
 
60
-	return graphdriver.NewNaiveDiffDriver(d), nil
62
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
61 63
 }
62 64
 
63 65
 func (d *Driver) String() string {
... ...
@@ -160,8 +166,15 @@ func (d *Driver) Remove(id string) error {
160 160
 func (d *Driver) Get(id, mountLabel string) (string, error) {
161 161
 	mp := path.Join(d.home, "mnt", id)
162 162
 
163
+	uid, gid, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
164
+	if err != nil {
165
+		return "", err
166
+	}
163 167
 	// Create the target directories if they don't exist
164
-	if err := os.MkdirAll(mp, 0755); err != nil {
168
+	if err := idtools.MkdirAllAs(path.Join(d.home, "mnt"), 0755, uid, gid); err != nil && !os.IsExist(err) {
169
+		return "", err
170
+	}
171
+	if err := idtools.MkdirAs(mp, 0755, uid, gid); err != nil && !os.IsExist(err) {
165 172
 		return "", err
166 173
 	}
167 174
 
... ...
@@ -171,7 +184,7 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
171 171
 	}
172 172
 
173 173
 	rootFs := path.Join(mp, "rootfs")
174
-	if err := os.MkdirAll(rootFs, 0755); err != nil {
174
+	if err := idtools.MkdirAllAs(rootFs, 0755, uid, gid); err != nil && !os.IsExist(err) {
175 175
 		d.DeviceSet.UnmountDevice(id)
176 176
 		return "", err
177 177
 	}
... ...
@@ -8,7 +8,9 @@ import (
8 8
 	"strings"
9 9
 
10 10
 	"github.com/Sirupsen/logrus"
11
+
11 12
 	"github.com/docker/docker/pkg/archive"
13
+	"github.com/docker/docker/pkg/idtools"
12 14
 )
13 15
 
14 16
 // FsMagic unsigned id of the filesystem in use.
... ...
@@ -34,7 +36,7 @@ var (
34 34
 )
35 35
 
36 36
 // InitFunc initializes the storage driver.
37
-type InitFunc func(root string, options []string) (Driver, error)
37
+type InitFunc func(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error)
38 38
 
39 39
 // ProtoDriver defines the basic capabilities of a driver.
40 40
 // This interface exists solely to be a minimum set of methods
... ...
@@ -46,7 +48,6 @@ type ProtoDriver interface {
46 46
 	// String returns a string representation of this driver.
47 47
 	String() string
48 48
 	// Create creates a new, empty, filesystem layer with the
49
-	// specified id and parent. Parent may be "".
50 49
 	Create(id, parent string) error
51 50
 	// Remove attempts to remove the filesystem layer with this id.
52 51
 	Remove(id string) error
... ...
@@ -107,9 +108,9 @@ func Register(name string, initFunc InitFunc) error {
107 107
 }
108 108
 
109 109
 // GetDriver initializes and returns the registered driver
110
-func GetDriver(name, home string, options []string) (Driver, error) {
110
+func GetDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
111 111
 	if initFunc, exists := drivers[name]; exists {
112
-		return initFunc(filepath.Join(home, name), options)
112
+		return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
113 113
 	}
114 114
 	if pluginDriver, err := lookupPlugin(name, home, options); err == nil {
115 115
 		return pluginDriver, nil
... ...
@@ -119,20 +120,20 @@ func GetDriver(name, home string, options []string) (Driver, error) {
119 119
 }
120 120
 
121 121
 // getBuiltinDriver initalizes and returns the registered driver, but does not try to load from plugins
122
-func getBuiltinDriver(name, home string, options []string) (Driver, error) {
122
+func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
123 123
 	if initFunc, exists := drivers[name]; exists {
124
-		return initFunc(filepath.Join(home, name), options)
124
+		return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
125 125
 	}
126 126
 	logrus.Errorf("Failed to built-in GetDriver graph %s %s", name, home)
127 127
 	return nil, ErrNotSupported
128 128
 }
129 129
 
130 130
 // New creates the driver and initializes it at the specified root.
131
-func New(root string, options []string) (driver Driver, err error) {
131
+func New(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (driver Driver, err error) {
132 132
 	for _, name := range []string{os.Getenv("DOCKER_DRIVER"), DefaultDriver} {
133 133
 		if name != "" {
134 134
 			logrus.Debugf("[graphdriver] trying provided driver %q", name) // so the logs show specified driver
135
-			return GetDriver(name, root, options)
135
+			return GetDriver(name, root, options, uidMaps, gidMaps)
136 136
 		}
137 137
 	}
138 138
 
... ...
@@ -147,7 +148,7 @@ func New(root string, options []string) (driver Driver, err error) {
147 147
 			// of the state found from prior drivers, check in order of our priority
148 148
 			// which we would prefer
149 149
 			if prior == name {
150
-				driver, err = getBuiltinDriver(name, root, options)
150
+				driver, err = getBuiltinDriver(name, root, options, uidMaps, gidMaps)
151 151
 				if err != nil {
152 152
 					// unlike below, we will return error here, because there is prior
153 153
 					// state, and now it is no longer supported/prereq/compatible, so
... ...
@@ -167,7 +168,7 @@ func New(root string, options []string) (driver Driver, err error) {
167 167
 
168 168
 	// Check for priority drivers first
169 169
 	for _, name := range priority {
170
-		driver, err = getBuiltinDriver(name, root, options)
170
+		driver, err = getBuiltinDriver(name, root, options, uidMaps, gidMaps)
171 171
 		if err != nil {
172 172
 			if err == ErrNotSupported || err == ErrPrerequisites || err == ErrIncompatibleFS {
173 173
 				continue
... ...
@@ -179,7 +180,7 @@ func New(root string, options []string) (driver Driver, err error) {
179 179
 
180 180
 	// Check all registered drivers if no priority driver is found
181 181
 	for _, initFunc := range drivers {
182
-		if driver, err = initFunc(root, options); err != nil {
182
+		if driver, err = initFunc(root, options, uidMaps, gidMaps); err != nil {
183 183
 			if err == ErrNotSupported || err == ErrPrerequisites || err == ErrIncompatibleFS {
184 184
 				continue
185 185
 			}
... ...
@@ -6,8 +6,10 @@ import (
6 6
 	"time"
7 7
 
8 8
 	"github.com/Sirupsen/logrus"
9
+
9 10
 	"github.com/docker/docker/pkg/archive"
10 11
 	"github.com/docker/docker/pkg/chrootarchive"
12
+	"github.com/docker/docker/pkg/idtools"
11 13
 	"github.com/docker/docker/pkg/ioutils"
12 14
 )
13 15
 
... ...
@@ -18,6 +20,8 @@ import (
18 18
 // Notably, the AUFS driver doesn't need to be wrapped like this.
19 19
 type NaiveDiffDriver struct {
20 20
 	ProtoDriver
21
+	uidMaps []idtools.IDMap
22
+	gidMaps []idtools.IDMap
21 23
 }
22 24
 
23 25
 // NewNaiveDiffDriver returns a fully functional driver that wraps the
... ...
@@ -27,8 +31,10 @@ type NaiveDiffDriver struct {
27 27
 //     Changes(id, parent string) ([]archive.Change, error)
28 28
 //     ApplyDiff(id, parent string, diff archive.Reader) (size int64, err error)
29 29
 //     DiffSize(id, parent string) (size int64, err error)
30
-func NewNaiveDiffDriver(driver ProtoDriver) Driver {
31
-	return &NaiveDiffDriver{ProtoDriver: driver}
30
+func NewNaiveDiffDriver(driver ProtoDriver, uidMaps, gidMaps []idtools.IDMap) Driver {
31
+	return &NaiveDiffDriver{ProtoDriver: driver,
32
+		uidMaps: uidMaps,
33
+		gidMaps: gidMaps}
32 34
 }
33 35
 
34 36
 // Diff produces an archive of the changes between the specified
... ...
@@ -70,7 +76,7 @@ func (gdw *NaiveDiffDriver) Diff(id, parent string) (arch archive.Archive, err e
70 70
 		return nil, err
71 71
 	}
72 72
 
73
-	archive, err := archive.ExportChanges(layerFs, changes)
73
+	archive, err := archive.ExportChanges(layerFs, changes, gdw.uidMaps, gdw.gidMaps)
74 74
 	if err != nil {
75 75
 		return nil, err
76 76
 	}
... ...
@@ -119,9 +125,11 @@ func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, diff archive.Reader) (s
119 119
 	}
120 120
 	defer driver.Put(id)
121 121
 
122
+	options := &archive.TarOptions{UIDMaps: gdw.uidMaps,
123
+		GIDMaps: gdw.gidMaps}
122 124
 	start := time.Now().UTC()
123 125
 	logrus.Debugf("Start untar layer")
124
-	if size, err = chrootarchive.ApplyUncompressedLayer(layerFs, diff); err != nil {
126
+	if size, err = chrootarchive.ApplyUncompressedLayer(layerFs, diff, options); err != nil {
125 127
 		return
126 128
 	}
127 129
 	logrus.Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds())
... ...
@@ -74,7 +74,7 @@ func newDriver(t *testing.T, name string) *Driver {
74 74
 		t.Fatal(err)
75 75
 	}
76 76
 
77
-	d, err := graphdriver.GetDriver(name, root, nil)
77
+	d, err := graphdriver.GetDriver(name, root, nil, nil, nil)
78 78
 	if err != nil {
79 79
 		t.Logf("graphdriver: %v\n", err)
80 80
 		if err == graphdriver.ErrNotSupported || err == graphdriver.ErrPrerequisites || err == graphdriver.ErrIncompatibleFS {
... ...
@@ -13,9 +13,12 @@ import (
13 13
 	"syscall"
14 14
 
15 15
 	"github.com/Sirupsen/logrus"
16
+
16 17
 	"github.com/docker/docker/daemon/graphdriver"
17 18
 	"github.com/docker/docker/pkg/archive"
18 19
 	"github.com/docker/docker/pkg/chrootarchive"
20
+	"github.com/docker/docker/pkg/idtools"
21
+
19 22
 	"github.com/opencontainers/runc/libcontainer/label"
20 23
 )
21 24
 
... ...
@@ -41,9 +44,9 @@ type naiveDiffDriverWithApply struct {
41 41
 }
42 42
 
43 43
 // NaiveDiffDriverWithApply returns a NaiveDiff driver with custom ApplyDiff.
44
-func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver) graphdriver.Driver {
44
+func NaiveDiffDriverWithApply(driver ApplyDiffProtoDriver, uidMaps, gidMaps []idtools.IDMap) graphdriver.Driver {
45 45
 	return &naiveDiffDriverWithApply{
46
-		Driver:    graphdriver.NewNaiveDiffDriver(driver),
46
+		Driver:    graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps),
47 47
 		applyDiff: driver,
48 48
 	}
49 49
 }
... ...
@@ -98,6 +101,8 @@ type Driver struct {
98 98
 	home       string
99 99
 	sync.Mutex // Protects concurrent modification to active
100 100
 	active     map[string]*ActiveMount
101
+	uidMaps    []idtools.IDMap
102
+	gidMaps    []idtools.IDMap
101 103
 }
102 104
 
103 105
 var backingFs = "<unknown>"
... ...
@@ -109,7 +114,7 @@ func init() {
109 109
 // Init returns the NaiveDiffDriver, a native diff driver for overlay filesystem.
110 110
 // If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
111 111
 // If a overlay filesystem is not supported over a existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
112
-func Init(home string, options []string) (graphdriver.Driver, error) {
112
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
113 113
 
114 114
 	if err := supportsOverlay(); err != nil {
115 115
 		return nil, graphdriver.ErrNotSupported
... ...
@@ -136,17 +141,23 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
136 136
 		return nil, graphdriver.ErrIncompatibleFS
137 137
 	}
138 138
 
139
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
140
+	if err != nil {
141
+		return nil, err
142
+	}
139 143
 	// Create the driver home dir
140
-	if err := os.MkdirAll(home, 0755); err != nil {
144
+	if err := idtools.MkdirAllAs(home, 0755, rootUID, rootGID); err != nil && !os.IsExist(err) {
141 145
 		return nil, err
142 146
 	}
143 147
 
144 148
 	d := &Driver{
145
-		home:   home,
146
-		active: make(map[string]*ActiveMount),
149
+		home:    home,
150
+		active:  make(map[string]*ActiveMount),
151
+		uidMaps: uidMaps,
152
+		gidMaps: gidMaps,
147 153
 	}
148 154
 
149
-	return NaiveDiffDriverWithApply(d), nil
155
+	return NaiveDiffDriverWithApply(d, uidMaps, gidMaps), nil
150 156
 }
151 157
 
152 158
 func supportsOverlay() error {
... ...
@@ -221,10 +232,15 @@ func (d *Driver) Cleanup() error {
221 221
 // The parent filesystem is used to configure these directories for the overlay.
222 222
 func (d *Driver) Create(id string, parent string) (retErr error) {
223 223
 	dir := d.dir(id)
224
-	if err := os.MkdirAll(path.Dir(dir), 0700); err != nil {
224
+
225
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
226
+	if err != nil {
227
+		return err
228
+	}
229
+	if err := idtools.MkdirAllAs(path.Dir(dir), 0700, rootUID, rootGID); err != nil {
225 230
 		return err
226 231
 	}
227
-	if err := os.Mkdir(dir, 0700); err != nil {
232
+	if err := idtools.MkdirAs(dir, 0700, rootUID, rootGID); err != nil {
228 233
 		return err
229 234
 	}
230 235
 
... ...
@@ -237,7 +253,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
237 237
 
238 238
 	// Toplevel images are just a "root" dir
239 239
 	if parent == "" {
240
-		if err := os.Mkdir(path.Join(dir, "root"), 0755); err != nil {
240
+		if err := idtools.MkdirAs(path.Join(dir, "root"), 0755, rootUID, rootGID); err != nil {
241 241
 			return err
242 242
 		}
243 243
 		return nil
... ...
@@ -260,7 +276,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
260 260
 		if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
261 261
 			return err
262 262
 		}
263
-		if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
263
+		if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
264 264
 			return err
265 265
 		}
266 266
 		if err := ioutil.WriteFile(path.Join(dir, "lower-id"), []byte(parent), 0666); err != nil {
... ...
@@ -293,7 +309,7 @@ func (d *Driver) Create(id string, parent string) (retErr error) {
293 293
 	if err := os.Mkdir(path.Join(dir, "work"), 0700); err != nil {
294 294
 		return err
295 295
 	}
296
-	if err := os.Mkdir(path.Join(dir, "merged"), 0700); err != nil {
296
+	if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
297 297
 		return err
298 298
 	}
299 299
 
... ...
@@ -349,6 +365,12 @@ func (d *Driver) Get(id string, mountLabel string) (string, error) {
349 349
 	if err := syscall.Mount("overlay", mergedDir, "overlay", 0, label.FormatMountLabel(opts, mountLabel)); err != nil {
350 350
 		return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
351 351
 	}
352
+	// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
353
+	// user namespace requires this to move a directory from lower to upper.
354
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
355
+	if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
356
+		return "", err
357
+	}
352 358
 	mount.path = mergedDir
353 359
 	mount.mounted = true
354 360
 	d.active[id] = mount
... ...
@@ -431,7 +453,8 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.Reader) (size
431 431
 		return 0, err
432 432
 	}
433 433
 
434
-	if size, err = chrootarchive.ApplyUncompressedLayer(tmpRootDir, diff); err != nil {
434
+	options := &archive.TarOptions{UIDMaps: d.uidMaps, GIDMaps: d.gidMaps}
435
+	if size, err = chrootarchive.ApplyUncompressedLayer(tmpRootDir, diff, options); err != nil {
435 436
 		return 0, err
436 437
 	}
437 438
 
... ...
@@ -9,7 +9,8 @@ import (
9 9
 
10 10
 	"github.com/docker/docker/daemon/graphdriver"
11 11
 	"github.com/docker/docker/pkg/chrootarchive"
12
-	"github.com/docker/docker/pkg/system"
12
+	"github.com/docker/docker/pkg/idtools"
13
+
13 14
 	"github.com/opencontainers/runc/libcontainer/label"
14 15
 )
15 16
 
... ...
@@ -19,11 +20,20 @@ func init() {
19 19
 
20 20
 // Init returns a new VFS driver.
21 21
 // This sets the home directory for the driver and returns NaiveDiffDriver.
22
-func Init(home string, options []string) (graphdriver.Driver, error) {
22
+func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
23 23
 	d := &Driver{
24
-		home: home,
24
+		home:    home,
25
+		uidMaps: uidMaps,
26
+		gidMaps: gidMaps,
27
+	}
28
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
29
+	if err != nil {
30
+		return nil, err
31
+	}
32
+	if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
33
+		return nil, err
25 34
 	}
26
-	return graphdriver.NewNaiveDiffDriver(d), nil
35
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
27 36
 }
28 37
 
29 38
 // Driver holds information about the driver, home directory of the driver.
... ...
@@ -31,7 +41,9 @@ func Init(home string, options []string) (graphdriver.Driver, error) {
31 31
 // In order to support layering, files are copied from the parent layer into the new layer. There is no copy-on-write support.
32 32
 // Driver must be wrapped in NaiveDiffDriver to be used as a graphdriver.Driver
33 33
 type Driver struct {
34
-	home string
34
+	home    string
35
+	uidMaps []idtools.IDMap
36
+	gidMaps []idtools.IDMap
35 37
 }
36 38
 
37 39
 func (d *Driver) String() string {
... ...
@@ -56,10 +68,14 @@ func (d *Driver) Cleanup() error {
56 56
 // Create prepares the filesystem for the VFS driver and copies the directory for the given id under the parent.
57 57
 func (d *Driver) Create(id, parent string) error {
58 58
 	dir := d.dir(id)
59
-	if err := system.MkdirAll(filepath.Dir(dir), 0700); err != nil {
59
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
60
+	if err != nil {
61
+		return err
62
+	}
63
+	if err := idtools.MkdirAllAs(filepath.Dir(dir), 0700, rootUID, rootGID); err != nil {
60 64
 		return err
61 65
 	}
62
-	if err := os.Mkdir(dir, 0755); err != nil {
66
+	if err := idtools.MkdirAs(dir, 0755, rootUID, rootGID); err != nil {
63 67
 		return err
64 68
 	}
65 69
 	opts := []string{"level:s0"}
... ...
@@ -21,6 +21,7 @@ import (
21 21
 	"github.com/docker/docker/image"
22 22
 	"github.com/docker/docker/pkg/archive"
23 23
 	"github.com/docker/docker/pkg/chrootarchive"
24
+	"github.com/docker/docker/pkg/idtools"
24 25
 	"github.com/docker/docker/pkg/ioutils"
25 26
 	"github.com/docker/docker/pkg/random"
26 27
 	"github.com/microsoft/hcsshim"
... ...
@@ -50,7 +51,7 @@ type Driver struct {
50 50
 }
51 51
 
52 52
 // InitFilter returns a new Windows storage filter driver.
53
-func InitFilter(home string, options []string) (graphdriver.Driver, error) {
53
+func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
54 54
 	logrus.Debugf("WindowsGraphDriver InitFilter at %s", home)
55 55
 	d := &Driver{
56 56
 		info: hcsshim.DriverInfo{
... ...
@@ -63,7 +64,7 @@ func InitFilter(home string, options []string) (graphdriver.Driver, error) {
63 63
 }
64 64
 
65 65
 // InitDiff returns a new Windows differencing disk driver.
66
-func InitDiff(home string, options []string) (graphdriver.Driver, error) {
66
+func InitDiff(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
67 67
 	logrus.Debugf("WindowsGraphDriver InitDiff at %s", home)
68 68
 	d := &Driver{
69 69
 		info: hcsshim.DriverInfo{
... ...
@@ -328,7 +329,7 @@ func (d *Driver) ApplyDiff(id, parent string, diff archive.Reader) (size int64,
328 328
 		logrus.Debugf("WindowsGraphDriver ApplyDiff: Start untar layer")
329 329
 		destination := d.dir(id)
330 330
 		destination = filepath.Dir(destination)
331
-		if size, err = chrootarchive.ApplyUncompressedLayer(destination, diff); err != nil {
331
+		if size, err = chrootarchive.ApplyUncompressedLayer(destination, diff, nil); err != nil {
332 332
 			return
333 333
 		}
334 334
 		logrus.Debugf("WindowsGraphDriver ApplyDiff: Untar time: %vs", time.Now().UTC().Sub(start).Seconds())
... ...
@@ -15,6 +15,7 @@ import (
15 15
 
16 16
 	"github.com/Sirupsen/logrus"
17 17
 	"github.com/docker/docker/daemon/graphdriver"
18
+	"github.com/docker/docker/pkg/idtools"
18 19
 	"github.com/docker/docker/pkg/mount"
19 20
 	"github.com/docker/docker/pkg/parsers"
20 21
 	zfs "github.com/mistifyio/go-zfs"
... ...
@@ -41,7 +42,7 @@ func (*Logger) Log(cmd []string) {
41 41
 // Init returns a new ZFS driver.
42 42
 // It takes base mount path and a array of options which are represented as key value pairs.
43 43
 // Each option is in the for key=value. 'zfs.fsname' is expected to be a valid key in the options.
44
-func Init(base string, opt []string) (graphdriver.Driver, error) {
44
+func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
45 45
 	var err error
46 46
 
47 47
 	if _, err := exec.LookPath("zfs"); err != nil {
... ...
@@ -102,8 +103,10 @@ func Init(base string, opt []string) (graphdriver.Driver, error) {
102 102
 		dataset:          rootDataset,
103 103
 		options:          options,
104 104
 		filesystemsCache: filesystemsCache,
105
+		uidMaps:          uidMaps,
106
+		gidMaps:          gidMaps,
105 107
 	}
106
-	return graphdriver.NewNaiveDiffDriver(d), nil
108
+	return graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps), nil
107 109
 }
108 110
 
109 111
 func parseOptions(opt []string) (zfsOptions, error) {
... ...
@@ -156,6 +159,8 @@ type Driver struct {
156 156
 	options          zfsOptions
157 157
 	sync.Mutex       // protects filesystem cache against concurrent access
158 158
 	filesystemsCache map[string]bool
159
+	uidMaps          []idtools.IDMap
160
+	gidMaps          []idtools.IDMap
159 161
 }
160 162
 
161 163
 func (d *Driver) String() string {
... ...
@@ -294,12 +299,16 @@ func (d *Driver) Get(id, mountLabel string) (string, error) {
294 294
 	options := label.FormatMountLabel("", mountLabel)
295 295
 	logrus.Debugf(`[zfs] mount("%s", "%s", "%s")`, filesystem, mountpoint, options)
296 296
 
297
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
298
+	if err != nil {
299
+		return "", err
300
+	}
297 301
 	// Create the target directories if they don't exist
298
-	if err := os.MkdirAll(mountpoint, 0755); err != nil {
302
+	if err := idtools.MkdirAllAs(mountpoint, 0755, rootUID, rootGID); err != nil {
299 303
 		return "", err
300 304
 	}
301 305
 
302
-	err := mount.Mount(filesystem, mountpoint, "zfs", options)
306
+	err = mount.Mount(filesystem, mountpoint, "zfs", options)
303 307
 	if err != nil {
304 308
 		return "", fmt.Errorf("error creating zfs mount of %s to %s: %v", filesystem, mountpoint, err)
305 309
 	}
... ...
@@ -55,7 +55,17 @@ func (container *Container) setupMounts() ([]execdriver.Mount, error) {
55 55
 	}
56 56
 
57 57
 	mounts = sortMounts(mounts)
58
-	return append(mounts, container.networkMounts()...), nil
58
+	netMounts := container.networkMounts()
59
+	// if we are going to mount any of the network files from container
60
+	// metadata, the ownership must be set properly for potential container
61
+	// remapped root (user namespaces)
62
+	rootUID, rootGID := container.daemon.GetRemappedUIDGID()
63
+	for _, mount := range netMounts {
64
+		if err := os.Chown(mount.Source, rootUID, rootGID); err != nil {
65
+			return nil, err
66
+		}
67
+	}
68
+	return append(mounts, netMounts...), nil
59 69
 }
60 70
 
61 71
 // parseBindMount validates the configuration of mount information in runconfig is valid.
... ...
@@ -71,6 +71,9 @@ to build a Docker binary with the experimental features enabled:
71 71
 
72 72
 ## Current experimental features
73 73
 
74
+ * [External graphdriver plugins](plugins_graphdriver.md)
75
+ * [User namespaces](userns.md)
76
+
74 77
 ## How to comment on an experimental feature
75 78
 
76 79
 Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.  
77 80
new file mode 100644
... ...
@@ -0,0 +1,120 @@
0
+# Experimental: User namespace support
1
+
2
+Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling
3
+a process--and therefore a container--to have a unique range of user and
4
+group IDs which are outside the traditional user and group range utilized by
5
+the host system. Potentially the most important security improvement is that,
6
+by default, container processes running as the `root` user will have expected
7
+administrative privilege (with some restrictions) inside the container but will
8
+effectively be mapped to an unprivileged `uid` on the host.
9
+
10
+In this experimental phase, the Docker daemon creates a single daemon-wide mapping
11
+for all containers running on the same engine instance. The mappings will
12
+utilize the existing subordinate user and group ID feature available on all modern
13
+Linux distributions.
14
+The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and 
15
+[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be
16
+read for the user, and optional group, specified to the `--userns-remap` 
17
+parameter.  If you do not wish to specify your own user and/or group, you can 
18
+provide `default` as the value to this flag, and a user will be created on your behalf
19
+and provided subordinate uid and gid ranges. This default user will be named
20
+`dockremap`, and entries will be created for it in `/etc/passwd` and 
21
+`/etc/group` using your distro's standard user and group creation tools.
22
+
23
+> **Note**: The single mapping per-daemon restriction exists for this experimental
24
+> phase because Docker shares image layers from its local cache across all
25
+> containers running on the engine instance.  Since file ownership must be
26
+> the same for all containers sharing the same layer content, the decision
27
+> was made to map the file ownership on `docker pull` to the daemon's user and
28
+> group mappings so that there is no delay for running containers once the
29
+> content is downloaded--exactly the same performance characteristics as with
30
+> user namespaces disabled.
31
+
32
+## Starting the daemon with user namespaces enabled
33
+To enable this experimental user namespace support for a Docker daemon instance,
34
+start the daemon with the aforementioned `--userns-remap` flag, which accepts
35
+values in the following formats:
36
+
37
+ - uid
38
+ - uid:gid
39
+ - username
40
+ - username:groupname
41
+
42
+If numeric IDs are provided, translation back to valid user or group names
43
+will occur so that the subordinate uid and gid information can be read, given
44
+these resources are name-based, not id-based.  If the numeric ID information
45
+provided does not exist as entries in `/etc/passwd` or `/etc/group`, dameon
46
+startup will fail with an error message.
47
+
48
+*An example: starting with default Docker user management:*
49
+
50
+```
51
+     $ docker daemon --userns-remap=default
52
+```    
53
+In this case, Docker will create--or find the existing--user and group
54
+named `dockremap`. If the user is created, and the Linux distribution has
55
+appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated
56
+with a contiguous 65536 length range of subordinate user and group IDs, starting
57
+at an offset based on prior entries in those files.  For example, Ubuntu will
58
+create the following range, based on an existing user already having the first
59
+65536 range:
60
+
61
+```
62
+     $ cat /etc/subuid
63
+     user1:100000:65536
64
+     dockremap:165536:65536
65
+```
66
+
67
+> **Note:** On a fresh Fedora install, we found that we had to `touch` the
68
+> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users
69
+> were created.  Once these files existed, range assigment on user creation
70
+> worked properly.
71
+
72
+If you have a preferred/self-managed user with subordinate ID mappings already
73
+configured, you can provide that username or uid to the `--userns-remap` flag.
74
+If you have a group that doesn't match the username, you may provide the `gid`
75
+or group name as well; otherwise the username will be used as the group name
76
+when querying the system for the subordinate group ID range.
77
+
78
+## Detailed information on `subuid`/`subgid` ranges
79
+
80
+Given there may be advanced use of the subordinate ID ranges by power users, we will
81
+describe how the Docker daemon uses the range entries within these files under the
82
+current experimental user namespace support.
83
+
84
+The simplest case exists where only one contiguous range is defined for the
85
+provided user or group. In this case, Docker will use that entire contiguous
86
+range for the mapping of host uids and gids to the container process.  This 
87
+means that the first ID in the range will be the remapped root user, and the
88
+IDs above that initial ID will map host ID 1 through the end of the range.
89
+
90
+From the example `/etc/subid` content shown above, that means the remapped root
91
+user would be uid 165536.
92
+
93
+If the system administrator has set up multiple ranges for a single user or
94
+group, the Docker daemon will read all the available ranges and use the
95
+following algorithm to create the mapping ranges:
96
+
97
+1. The ranges will be sorted by *start ID* ascending
98
+2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on.  This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths.
99
+3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`)
100
+
101
+## User namespace known restrictions
102
+
103
+The following standard Docker features are currently incompatible when
104
+running a Docker daemon with experimental user namespaces enabled:
105
+
106
+ - sharing namespaces with the host (--pid=host, --net=host, etc.)
107
+ - sharing namespaces with other containers (--net=container:*other*)
108
+ - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace)
109
+ - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings
110
+ - Using `--privileged` mode containers
111
+ - Using the lxc execdriver (only the `native` execdriver is enabled to use user namespaces)
112
+ - volume use without pre-arranging proper file ownership in mounted volumes
113
+
114
+Additionally, while the `root` user inside a user namespaced container
115
+process has many of the privileges of the administrative root user, the
116
+following operations will fail:
117
+
118
+ - Use of `mknod` - permission is denied for device creation by the container root
119
+ - others will be listed here when fully tested
... ...
@@ -21,10 +21,10 @@ import (
21 21
 	"github.com/docker/docker/daemon/graphdriver"
22 22
 	"github.com/docker/docker/image"
23 23
 	"github.com/docker/docker/pkg/archive"
24
+	"github.com/docker/docker/pkg/idtools"
24 25
 	"github.com/docker/docker/pkg/progressreader"
25 26
 	"github.com/docker/docker/pkg/streamformatter"
26 27
 	"github.com/docker/docker/pkg/stringid"
27
-	"github.com/docker/docker/pkg/system"
28 28
 	"github.com/docker/docker/pkg/truncindex"
29 29
 	"github.com/docker/docker/runconfig"
30 30
 	"github.com/vbatts/tar-split/tar/asm"
... ...
@@ -82,6 +82,8 @@ type Graph struct {
82 82
 	imageMutex       imageMutex // protect images in driver.
83 83
 	retained         *retainedLayers
84 84
 	tarSplitDisabled bool
85
+	uidMaps          []idtools.IDMap
86
+	gidMaps          []idtools.IDMap
85 87
 }
86 88
 
87 89
 // file names for ./graph/<ID>/
... ...
@@ -101,13 +103,18 @@ var (
101 101
 
102 102
 // NewGraph instantiates a new graph at the given root path in the filesystem.
103 103
 // `root` will be created if it doesn't exist.
104
-func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
104
+func NewGraph(root string, driver graphdriver.Driver, uidMaps, gidMaps []idtools.IDMap) (*Graph, error) {
105 105
 	abspath, err := filepath.Abs(root)
106 106
 	if err != nil {
107 107
 		return nil, err
108 108
 	}
109
+
110
+	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
111
+	if err != nil {
112
+		return nil, err
113
+	}
109 114
 	// Create the root directory if it doesn't exists
110
-	if err := system.MkdirAll(root, 0700); err != nil {
115
+	if err := idtools.MkdirAllAs(root, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
111 116
 		return nil, err
112 117
 	}
113 118
 
... ...
@@ -116,6 +123,8 @@ func NewGraph(root string, driver graphdriver.Driver) (*Graph, error) {
116 116
 		idIndex:  truncindex.NewTruncIndex([]string{}),
117 117
 		driver:   driver,
118 118
 		retained: &retainedLayers{layerHolders: make(map[string]map[string]struct{})},
119
+		uidMaps:  uidMaps,
120
+		gidMaps:  gidMaps,
119 121
 	}
120 122
 
121 123
 	// Windows does not currently support tarsplit functionality.
... ...
@@ -325,7 +334,11 @@ func (graph *Graph) TempLayerArchive(id string, sf *streamformatter.StreamFormat
325 325
 // mktemp creates a temporary sub-directory inside the graph's filesystem.
326 326
 func (graph *Graph) mktemp() (string, error) {
327 327
 	dir := filepath.Join(graph.root, "_tmp", stringid.GenerateNonCryptoID())
328
-	if err := system.MkdirAll(dir, 0700); err != nil {
328
+	rootUID, rootGID, err := idtools.GetRootUIDGID(graph.uidMaps, graph.gidMaps)
329
+	if err != nil {
330
+		return "", err
331
+	}
332
+	if err := idtools.MkdirAllAs(dir, 0700, rootUID, rootGID); err != nil {
329 333
 		return "", err
330 334
 	}
331 335
 	return dir, nil
... ...
@@ -281,11 +281,11 @@ func tempGraph(t *testing.T) (*Graph, graphdriver.Driver) {
281 281
 	if err != nil {
282 282
 		t.Fatal(err)
283 283
 	}
284
-	driver, err := graphdriver.New(tmp, nil)
284
+	driver, err := graphdriver.New(tmp, nil, nil, nil)
285 285
 	if err != nil {
286 286
 		t.Fatal(err)
287 287
 	}
288
-	graph, err := NewGraph(tmp, driver)
288
+	graph, err := NewGraph(tmp, driver, nil, nil)
289 289
 	if err != nil {
290 290
 		t.Fatal(err)
291 291
 	}
... ...
@@ -54,11 +54,11 @@ func fakeTar() (io.Reader, error) {
54 54
 }
55 55
 
56 56
 func mkTestTagStore(root string, t *testing.T) *TagStore {
57
-	driver, err := graphdriver.New(root, nil)
57
+	driver, err := graphdriver.New(root, nil, nil, nil)
58 58
 	if err != nil {
59 59
 		t.Fatal(err)
60 60
 	}
61
-	graph, err := NewGraph(root, driver)
61
+	graph, err := NewGraph(root, driver, nil, nil)
62 62
 	if err != nil {
63 63
 		t.Fatal(err)
64 64
 	}
... ...
@@ -96,7 +96,7 @@ if [ ! "$GOPATH" ]; then
96 96
 	exit 1
97 97
 fi
98 98
 
99
-if [ "$DOCKER_EXPERIMENTAL" ]; then
99
+if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then
100 100
 	echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features'
101 101
 	echo >&2
102 102
 	DOCKER_BUILDTAGS+=" experimental"
... ...
@@ -220,6 +220,7 @@ test_env() {
220 220
 		DOCKER_GRAPHDRIVER="$DOCKER_GRAPHDRIVER" \
221 221
 		DOCKER_USERLANDPROXY="$DOCKER_USERLANDPROXY" \
222 222
 		DOCKER_HOST="$DOCKER_HOST" \
223
+		DOCKER_REMAP_ROOT="$DOCKER_REMAP_ROOT" \
223 224
 		DOCKER_REMOTE_DAEMON="$DOCKER_REMOTE_DAEMON" \
224 225
 		GOPATH="$GOPATH" \
225 226
 		HOME="$ABS_DEST/fake-HOME" \
... ...
@@ -26,6 +26,12 @@ if [ -n "$DOCKER_STORAGE_OPTS" ]; then
26 26
 	unset IFS
27 27
 fi
28 28
 
29
+# example usage: DOCKER_STORAGE_OPTS="dm.basesize=20G,dm.loopdatasize=200G"
30
+extra_params=""
31
+if [ "$DOCKER_REMAP_ROOT" ]; then
32
+	extra_params="--userns-remap $DOCKER_REMAP_ROOT"
33
+fi
34
+
29 35
 if [ -z "$DOCKER_TEST_HOST" ]; then
30 36
 	# Start apparmor if it is enabled
31 37
 	if [ -e "/sys/module/apparmor/parameters/enabled" ] && [ "$(cat /sys/module/apparmor/parameters/enabled)" == "Y" ]; then
... ...
@@ -47,6 +53,7 @@ if [ -z "$DOCKER_TEST_HOST" ]; then
47 47
 		--pidfile "$DEST/docker.pid" \
48 48
 		--userland-proxy="$DOCKER_USERLANDPROXY" \
49 49
 		$storage_params \
50
+		$extra_params \
50 51
 			&> "$DEST/docker.log"
51 52
 	) &
52 53
 	# make sure that if the script exits unexpectedly, we stop this daemon we just started
... ...
@@ -45,6 +45,7 @@ func (s *DockerSuite) TestBuildApiDockerfilePath(c *check.C) {
45 45
 }
46 46
 
47 47
 func (s *DockerSuite) TestBuildApiDockerFileRemote(c *check.C) {
48
+	testRequires(c, NotUserNamespace)
48 49
 	testRequires(c, DaemonIsLinux)
49 50
 	server, err := fakeStorage(map[string]string{
50 51
 		"testD": `FROM busybox
... ...
@@ -1487,7 +1487,9 @@ func (s *DockerSuite) TestContainersApiCreateNoHostConfig118(c *check.C) {
1487 1487
 // extract an archive to a symlink in a writable volume which points to a
1488 1488
 // directory outside of the volume.
1489 1489
 func (s *DockerSuite) TestPutContainerArchiveErrSymlinkInVolumeToReadOnlyRootfs(c *check.C) {
1490
-	testRequires(c, SameHostDaemon) // Requires local volume mount bind.
1490
+	// Requires local volume mount bind.
1491
+	// --read-only + userns has remount issues
1492
+	testRequires(c, SameHostDaemon, NotUserNamespace)
1491 1493
 
1492 1494
 	testVol := getTestDir(c, "test-put-container-archive-err-symlink-in-volume-to-read-only-rootfs-")
1493 1495
 	defer os.RemoveAll(testVol)
... ...
@@ -2183,6 +2183,8 @@ func (s *DockerSuite) TestBuildWorkdirWithEnvVariables(c *check.C) {
2183 2183
 }
2184 2184
 
2185 2185
 func (s *DockerSuite) TestBuildRelativeCopy(c *check.C) {
2186
+	// cat /test1/test2/foo gets permission denied for the user
2187
+	testRequires(c, NotUserNamespace)
2186 2188
 	testRequires(c, DaemonIsLinux)
2187 2189
 	name := "testbuildrelativecopy"
2188 2190
 	dockerfile := `
... ...
@@ -2248,7 +2250,7 @@ func (s *DockerSuite) TestBuildContextCleanup(c *check.C) {
2248 2248
 	testRequires(c, SameHostDaemon)
2249 2249
 
2250 2250
 	name := "testbuildcontextcleanup"
2251
-	entries, err := ioutil.ReadDir("/var/lib/docker/tmp")
2251
+	entries, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
2252 2252
 	if err != nil {
2253 2253
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
2254 2254
 	}
... ...
@@ -2259,7 +2261,7 @@ func (s *DockerSuite) TestBuildContextCleanup(c *check.C) {
2259 2259
 	if err != nil {
2260 2260
 		c.Fatal(err)
2261 2261
 	}
2262
-	entriesFinal, err := ioutil.ReadDir("/var/lib/docker/tmp")
2262
+	entriesFinal, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
2263 2263
 	if err != nil {
2264 2264
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
2265 2265
 	}
... ...
@@ -2274,7 +2276,7 @@ func (s *DockerSuite) TestBuildContextCleanupFailedBuild(c *check.C) {
2274 2274
 	testRequires(c, SameHostDaemon)
2275 2275
 
2276 2276
 	name := "testbuildcontextcleanup"
2277
-	entries, err := ioutil.ReadDir("/var/lib/docker/tmp")
2277
+	entries, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
2278 2278
 	if err != nil {
2279 2279
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
2280 2280
 	}
... ...
@@ -2285,7 +2287,7 @@ func (s *DockerSuite) TestBuildContextCleanupFailedBuild(c *check.C) {
2285 2285
 	if err == nil {
2286 2286
 		c.Fatalf("expected build to fail, but it didn't")
2287 2287
 	}
2288
-	entriesFinal, err := ioutil.ReadDir("/var/lib/docker/tmp")
2288
+	entriesFinal, err := ioutil.ReadDir(filepath.Join(dockerBasePath, "tmp"))
2289 2289
 	if err != nil {
2290 2290
 		c.Fatalf("failed to list contents of tmp dir: %s", err)
2291 2291
 	}
... ...
@@ -2683,6 +2685,8 @@ func (s *DockerSuite) TestBuildConditionalCache(c *check.C) {
2683 2683
 }
2684 2684
 
2685 2685
 func (s *DockerSuite) TestBuildAddLocalFileWithCache(c *check.C) {
2686
+	// local files are not owned by the correct user
2687
+	testRequires(c, NotUserNamespace)
2686 2688
 	testRequires(c, DaemonIsLinux)
2687 2689
 	name := "testbuildaddlocalfilewithcache"
2688 2690
 	name2 := "testbuildaddlocalfilewithcache2"
... ...
@@ -2741,6 +2745,8 @@ func (s *DockerSuite) TestBuildAddMultipleLocalFileWithCache(c *check.C) {
2741 2741
 }
2742 2742
 
2743 2743
 func (s *DockerSuite) TestBuildAddLocalFileWithoutCache(c *check.C) {
2744
+	// local files are not owned by the correct user
2745
+	testRequires(c, NotUserNamespace)
2744 2746
 	testRequires(c, DaemonIsLinux)
2745 2747
 	name := "testbuildaddlocalfilewithoutcache"
2746 2748
 	name2 := "testbuildaddlocalfilewithoutcache2"
... ...
@@ -3862,6 +3868,8 @@ RUN [ "$(id -u):$(id -g)/$(id -un):$(id -gn)/$(id -G):$(id -Gn)" = '1042:1043/10
3862 3862
 }
3863 3863
 
3864 3864
 func (s *DockerSuite) TestBuildEnvUsage(c *check.C) {
3865
+	// /docker/world/hello is not owned by the correct user
3866
+	testRequires(c, NotUserNamespace)
3865 3867
 	testRequires(c, DaemonIsLinux)
3866 3868
 	name := "testbuildenvusage"
3867 3869
 	dockerfile := `FROM busybox
... ...
@@ -3898,6 +3906,8 @@ RUN    [ "$ghi" = "def" ]
3898 3898
 }
3899 3899
 
3900 3900
 func (s *DockerSuite) TestBuildEnvUsage2(c *check.C) {
3901
+	// /docker/world/hello is not owned by the correct user
3902
+	testRequires(c, NotUserNamespace)
3901 3903
 	testRequires(c, DaemonIsLinux)
3902 3904
 	name := "testbuildenvusage2"
3903 3905
 	dockerfile := `FROM busybox
... ...
@@ -4024,6 +4034,8 @@ RUN [ "$(cat /testfile)" = 'test!' ]`
4024 4024
 }
4025 4025
 
4026 4026
 func (s *DockerSuite) TestBuildAddTar(c *check.C) {
4027
+	// /test/foo is not owned by the correct user
4028
+	testRequires(c, NotUserNamespace)
4027 4029
 	testRequires(c, DaemonIsLinux)
4028 4030
 	name := "testbuildaddtar"
4029 4031
 
... ...
@@ -4080,7 +4092,8 @@ RUN cat /existing-directory-trailing-slash/test/foo | grep Hi`
4080 4080
 }
4081 4081
 
4082 4082
 func (s *DockerSuite) TestBuildAddTarXz(c *check.C) {
4083
-	testRequires(c, DaemonIsLinux)
4083
+	// /test/foo is not owned by the correct user
4084
+	testRequires(c, NotUserNamespace)
4084 4085
 	testRequires(c, DaemonIsLinux)
4085 4086
 	name := "testbuildaddtarxz"
4086 4087
 
... ...
@@ -4839,6 +4852,8 @@ func (s *DockerSuite) TestBuildSymlinkBreakout(c *check.C) {
4839 4839
 }
4840 4840
 
4841 4841
 func (s *DockerSuite) TestBuildXZHost(c *check.C) {
4842
+	// /usr/local/sbin/xz gets permission denied for the user
4843
+	testRequires(c, NotUserNamespace)
4842 4844
 	testRequires(c, DaemonIsLinux)
4843 4845
 	name := "testbuildxzhost"
4844 4846
 
... ...
@@ -4867,6 +4882,8 @@ RUN [ ! -e /injected ]`,
4867 4867
 }
4868 4868
 
4869 4869
 func (s *DockerSuite) TestBuildVolumesRetainContents(c *check.C) {
4870
+	// /foo/file gets permission denied for the user
4871
+	testRequires(c, NotUserNamespace)
4870 4872
 	testRequires(c, DaemonIsLinux)
4871 4873
 	var (
4872 4874
 		name     = "testbuildvolumescontent"
... ...
@@ -559,7 +559,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
559 559
 	// Copy actual /etc/resolv.conf
560 560
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/resolv.conf", outDir)
561 561
 
562
-	expected, err := ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/resolv.conf")
562
+	expected, err := readContainerFile(cleanedContainerID, "resolv.conf")
563 563
 	actual, err := ioutil.ReadFile(outDir + "/resolv.conf")
564 564
 
565 565
 	if !bytes.Equal(actual, expected) {
... ...
@@ -569,7 +569,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
569 569
 	// Copy actual /etc/hosts
570 570
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/hosts", outDir)
571 571
 
572
-	expected, err = ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/hosts")
572
+	expected, err = readContainerFile(cleanedContainerID, "hosts")
573 573
 	actual, err = ioutil.ReadFile(outDir + "/hosts")
574 574
 
575 575
 	if !bytes.Equal(actual, expected) {
... ...
@@ -579,7 +579,7 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
579 579
 	// Copy actual /etc/resolv.conf
580 580
 	dockerCmd(c, "cp", cleanedContainerID+":/etc/hostname", outDir)
581 581
 
582
-	expected, err = ioutil.ReadFile("/var/lib/docker/containers/" + cleanedContainerID + "/hostname")
582
+	expected, err = readContainerFile(cleanedContainerID, "hostname")
583 583
 	actual, err = ioutil.ReadFile(outDir + "/hostname")
584 584
 
585 585
 	if !bytes.Equal(actual, expected) {
... ...
@@ -589,6 +589,8 @@ func (s *DockerSuite) TestCpSpecialFiles(c *check.C) {
589 589
 }
590 590
 
591 591
 func (s *DockerSuite) TestCpVolumePath(c *check.C) {
592
+	//  stat /tmp/cp-test-volumepath851508420/test gets permission denied for the user
593
+	testRequires(c, NotUserNamespace)
592 594
 	testRequires(c, DaemonIsLinux)
593 595
 	testRequires(c, SameHostDaemon)
594 596
 
... ...
@@ -153,6 +153,8 @@ func (s *DockerSuite) TestCpToErrDstNotDir(c *check.C) {
153 153
 // Check that copying from a local path to a symlink in a container copies to
154 154
 // the symlink target and does not overwrite the container symlink itself.
155 155
 func (s *DockerSuite) TestCpToSymlinkDestination(c *check.C) {
156
+	//  stat /tmp/test-cp-to-symlink-destination-262430901/vol3 gets permission denied for the user
157
+	testRequires(c, NotUserNamespace)
156 158
 	testRequires(c, DaemonIsLinux)
157 159
 	testRequires(c, SameHostDaemon) // Requires local volume mount bind.
158 160
 
... ...
@@ -699,7 +701,8 @@ func (s *DockerSuite) TestCpToCaseJ(c *check.C) {
699 699
 // The `docker cp` command should also ensure that you cannot
700 700
 // write to a container rootfs that is marked as read-only.
701 701
 func (s *DockerSuite) TestCpToErrReadOnlyRootfs(c *check.C) {
702
-	testRequires(c, DaemonIsLinux)
702
+	// --read-only + userns has remount issues
703
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
703 704
 	tmpDir := getTestDir(c, "test-cp-to-err-read-only-rootfs")
704 705
 	defer os.RemoveAll(tmpDir)
705 706
 
... ...
@@ -732,7 +735,8 @@ func (s *DockerSuite) TestCpToErrReadOnlyRootfs(c *check.C) {
732 732
 // The `docker cp` command should also ensure that you
733 733
 // cannot write to a volume that is mounted as read-only.
734 734
 func (s *DockerSuite) TestCpToErrReadOnlyVolume(c *check.C) {
735
-	testRequires(c, DaemonIsLinux)
735
+	// --read-only + userns has remount issues
736
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
736 737
 	tmpDir := getTestDir(c, "test-cp-to-err-read-only-volume")
737 738
 	defer os.RemoveAll(tmpDir)
738 739
 
... ...
@@ -276,7 +276,7 @@ func (s *DockerSuite) TestCreateRM(c *check.C) {
276 276
 
277 277
 func (s *DockerSuite) TestCreateModeIpcContainer(c *check.C) {
278 278
 	testRequires(c, DaemonIsLinux)
279
-	testRequires(c, SameHostDaemon)
279
+	testRequires(c, SameHostDaemon, NotUserNamespace)
280 280
 
281 281
 	out, _ := dockerCmd(c, "create", "busybox")
282 282
 	id := strings.TrimSpace(out)
... ...
@@ -1075,7 +1075,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverDefault(c *check.C) {
1075 1075
 	if out, err := s.d.Cmd("wait", id); err != nil {
1076 1076
 		c.Fatal(out, err)
1077 1077
 	}
1078
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
1078
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
1079 1079
 
1080 1080
 	if _, err := os.Stat(logPath); err != nil {
1081 1081
 		c.Fatal(err)
... ...
@@ -1117,7 +1117,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverDefaultOverride(c *check.C) {
1117 1117
 	if out, err := s.d.Cmd("wait", id); err != nil {
1118 1118
 		c.Fatal(out, err)
1119 1119
 	}
1120
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
1120
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
1121 1121
 
1122 1122
 	if _, err := os.Stat(logPath); err == nil || !os.IsNotExist(err) {
1123 1123
 		c.Fatalf("%s shouldn't exits, error on Stat: %s", logPath, err)
... ...
@@ -1159,7 +1159,7 @@ func (s *DockerDaemonSuite) TestDaemonLoggingDriverNoneOverride(c *check.C) {
1159 1159
 	if out, err := s.d.Cmd("wait", id); err != nil {
1160 1160
 		c.Fatal(out, err)
1161 1161
 	}
1162
-	logPath := filepath.Join(s.d.folder, "graph", "containers", id, id+"-json.log")
1162
+	logPath := filepath.Join(s.d.root, "containers", id, id+"-json.log")
1163 1163
 
1164 1164
 	if _, err := os.Stat(logPath); err != nil {
1165 1165
 		c.Fatal(err)
... ...
@@ -1483,7 +1483,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
1483 1483
 }
1484 1484
 
1485 1485
 func (s *DockerDaemonSuite) TestRunContainerWithBridgeNone(c *check.C) {
1486
-	testRequires(c, NativeExecDriver)
1486
+	testRequires(c, NativeExecDriver, NotUserNamespace)
1487 1487
 	c.Assert(s.d.StartWithBusybox("-b", "none"), check.IsNil)
1488 1488
 
1489 1489
 	out, err := s.d.Cmd("run", "--rm", "busybox", "ip", "l")
... ...
@@ -265,6 +265,7 @@ func (s *DockerSuite) TestExecStopNotHanging(c *check.C) {
265 265
 }
266 266
 
267 267
 func (s *DockerSuite) TestExecCgroup(c *check.C) {
268
+	testRequires(c, NotUserNamespace)
268 269
 	testRequires(c, DaemonIsLinux)
269 270
 	dockerCmd(c, "run", "-d", "--name", "testing", "busybox", "top")
270 271
 
... ...
@@ -547,7 +548,7 @@ func (s *DockerSuite) TestExecWithUser(c *check.C) {
547 547
 }
548 548
 
549 549
 func (s *DockerSuite) TestExecWithPrivileged(c *check.C) {
550
-	testRequires(c, DaemonIsLinux)
550
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
551 551
 	// Start main loop which attempts mknod repeatedly
552 552
 	dockerCmd(c, "run", "-d", "--name", "parent", "--cap-drop=ALL", "busybox", "sh", "-c", `while (true); do if [ -e /exec_priv ]; then cat /exec_priv && mknod /tmp/sda b 8 0 && echo "Success"; else echo "Privileged exec has not run yet"; fi; usleep 10000; done`)
553 553
 
... ...
@@ -605,7 +606,8 @@ func (s *DockerSuite) TestExecWithImageUser(c *check.C) {
605 605
 }
606 606
 
607 607
 func (s *DockerSuite) TestExecOnReadonlyContainer(c *check.C) {
608
-	testRequires(c, DaemonIsLinux)
608
+	// --read-only + userns has remount issues
609
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
609 610
 	dockerCmd(c, "run", "-d", "--read-only", "--name", "parent", "busybox", "top")
610 611
 	if _, status := dockerCmd(c, "exec", "parent", "true"); status != 0 {
611 612
 		c.Fatalf("exec into a read-only container failed with exit status %d", status)
... ...
@@ -3,8 +3,15 @@
3 3
 package main
4 4
 
5 5
 import (
6
+	"fmt"
7
+	"io/ioutil"
8
+	"os"
9
+	"os/exec"
10
+	"path/filepath"
11
+	"strconv"
6 12
 	"strings"
7 13
 
14
+	"github.com/docker/docker/pkg/system"
8 15
 	"github.com/go-check/check"
9 16
 )
10 17
 
... ...
@@ -21,3 +28,57 @@ func (s *DockerSuite) TestExperimentalVersion(c *check.C) {
21 21
 		c.Fatalf("docker version did not contain experimental: %s", out)
22 22
 	}
23 23
 }
24
+
25
+// user namespaces test: run daemon with remapped root setting
26
+// 1. validate uid/gid maps are set properly
27
+// 2. verify that files created are owned by remapped root
28
+func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
29
+	testRequires(c, NativeExecDriver)
30
+	testRequires(c, SameHostDaemon)
31
+
32
+	c.Assert(s.d.StartWithBusybox("--userns-remap", "default"), check.IsNil)
33
+
34
+	tmpDir, err := ioutil.TempDir("", "userns")
35
+	if err != nil {
36
+		c.Fatal(err)
37
+	}
38
+	defer os.RemoveAll(tmpDir)
39
+
40
+	// we need to find the uid and gid of the remapped root from the daemon's root dir info
41
+	uidgid := strings.Split(filepath.Base(s.d.root), ".")
42
+	c.Assert(len(uidgid), check.Equals, 2, check.Commentf("Should have gotten uid/gid strings from root dirname: %s", filepath.Base(s.d.root)))
43
+	uid, err := strconv.Atoi(uidgid[0])
44
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse uid: %v", err))
45
+	gid, err := strconv.Atoi(uidgid[1])
46
+	c.Assert(err, check.IsNil, check.Commentf("Can't parse gid: %v", err))
47
+
48
+	//writeable by the remapped root UID/GID pair
49
+	c.Assert(os.Chown(tmpDir, uid, gid), check.IsNil)
50
+
51
+	out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
52
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
53
+
54
+	pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
55
+	if err != nil {
56
+		c.Fatalf("Could not inspect running container: out: %q; err: %v", pid, err)
57
+	}
58
+	// check the uid and gid maps for the PID to ensure root is remapped
59
+	// (cmd = cat /proc/<pid>/uid_map | grep -E '0\s+9999\s+1')
60
+	out, rc1, err := runCommandPipelineWithOutput(
61
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/uid_map"),
62
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", uid)))
63
+	c.Assert(rc1, check.Equals, 0, check.Commentf("Didn't match uid_map: output: %s", out))
64
+
65
+	out, rc2, err := runCommandPipelineWithOutput(
66
+		exec.Command("cat", "/proc/"+strings.TrimSpace(pid)+"/gid_map"),
67
+		exec.Command("grep", "-E", fmt.Sprintf("0[[:space:]]+%d[[:space:]]+", gid)))
68
+	c.Assert(rc2, check.Equals, 0, check.Commentf("Didn't match gid_map: output: %s", out))
69
+
70
+	// check that the touched file is owned by remapped uid:gid
71
+	stat, err := system.Stat(filepath.Join(tmpDir, "testfile"))
72
+	if err != nil {
73
+		c.Fatal(err)
74
+	}
75
+	c.Assert(stat.UID(), check.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
76
+	c.Assert(stat.Gid(), check.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
77
+}
... ...
@@ -83,7 +83,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
83 83
 		w.Header().Set("Content-Type", "appplication/vnd.docker.plugins.v1+json")
84 84
 		switch t := data.(type) {
85 85
 		case error:
86
-			fmt.Fprintln(w, fmt.Sprintf(`{"Err": %s}`, t.Error()))
86
+			fmt.Fprintln(w, fmt.Sprintf(`{"Err": %q}`, t.Error()))
87 87
 		case string:
88 88
 			fmt.Fprintln(w, t)
89 89
 		default:
... ...
@@ -91,13 +91,21 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
91 91
 		}
92 92
 	}
93 93
 
94
+	decReq := func(b io.ReadCloser, out interface{}, w http.ResponseWriter) error {
95
+		defer b.Close()
96
+		if err := json.NewDecoder(b).Decode(&out); err != nil {
97
+			http.Error(w, fmt.Sprintf("error decoding json: %s", err.Error()), 500)
98
+		}
99
+		return nil
100
+	}
101
+
94 102
 	base, err := ioutil.TempDir("", "external-graph-test")
95 103
 	c.Assert(err, check.IsNil)
96
-	vfsProto, err := vfs.Init(base, []string{})
104
+	vfsProto, err := vfs.Init(base, []string{}, nil, nil)
97 105
 	if err != nil {
98 106
 		c.Fatalf("error initializing graph driver: %v", err)
99 107
 	}
100
-	driver := graphdriver.NewNaiveDiffDriver(vfsProto)
108
+	driver := graphdriver.NewNaiveDiffDriver(vfsProto, nil, nil)
101 109
 
102 110
 	mux.HandleFunc("/Plugin.Activate", func(w http.ResponseWriter, r *http.Request) {
103 111
 		s.ec.activations++
... ...
@@ -113,8 +121,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
113 113
 		s.ec.creations++
114 114
 
115 115
 		var req graphDriverRequest
116
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
117
-			http.Error(w, err.Error(), 500)
116
+		if err := decReq(r.Body, &req, w); err != nil {
118 117
 			return
119 118
 		}
120 119
 		if err := driver.Create(req.ID, req.Parent); err != nil {
... ...
@@ -128,8 +135,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
128 128
 		s.ec.removals++
129 129
 
130 130
 		var req graphDriverRequest
131
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
132
-			http.Error(w, err.Error(), 500)
131
+		if err := decReq(r.Body, &req, w); err != nil {
133 132
 			return
134 133
 		}
135 134
 
... ...
@@ -144,8 +150,8 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
144 144
 		s.ec.gets++
145 145
 
146 146
 		var req graphDriverRequest
147
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
148
-			http.Error(w, err.Error(), 500)
147
+		if err := decReq(r.Body, &req, w); err != nil {
148
+			return
149 149
 		}
150 150
 
151 151
 		dir, err := driver.Get(req.ID, req.MountLabel)
... ...
@@ -160,8 +166,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
160 160
 		s.ec.puts++
161 161
 
162 162
 		var req graphDriverRequest
163
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
164
-			http.Error(w, err.Error(), 500)
163
+		if err := decReq(r.Body, &req, w); err != nil {
165 164
 			return
166 165
 		}
167 166
 
... ...
@@ -176,8 +181,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
176 176
 		s.ec.exists++
177 177
 
178 178
 		var req graphDriverRequest
179
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
180
-			http.Error(w, err.Error(), 500)
179
+		if err := decReq(r.Body, &req, w); err != nil {
181 180
 			return
182 181
 		}
183 182
 		respond(w, &graphDriverResponse{Exists: driver.Exists(req.ID)})
... ...
@@ -185,7 +189,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
185 185
 
186 186
 	mux.HandleFunc("/GraphDriver.Status", func(w http.ResponseWriter, r *http.Request) {
187 187
 		s.ec.stats++
188
-		respond(w, `{"Status":{}}`)
188
+		respond(w, &graphDriverResponse{Status: driver.Status()})
189 189
 	})
190 190
 
191 191
 	mux.HandleFunc("/GraphDriver.Cleanup", func(w http.ResponseWriter, r *http.Request) {
... ...
@@ -202,8 +206,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
202 202
 		s.ec.metadata++
203 203
 
204 204
 		var req graphDriverRequest
205
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
206
-			http.Error(w, err.Error(), 500)
205
+		if err := decReq(r.Body, &req, w); err != nil {
207 206
 			return
208 207
 		}
209 208
 
... ...
@@ -219,8 +222,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
219 219
 		s.ec.diff++
220 220
 
221 221
 		var req graphDriverRequest
222
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
223
-			http.Error(w, err.Error(), 500)
222
+		if err := decReq(r.Body, &req, w); err != nil {
224 223
 			return
225 224
 		}
226 225
 
... ...
@@ -235,8 +237,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
235 235
 	mux.HandleFunc("/GraphDriver.Changes", func(w http.ResponseWriter, r *http.Request) {
236 236
 		s.ec.changes++
237 237
 		var req graphDriverRequest
238
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
239
-			http.Error(w, err.Error(), 500)
238
+		if err := decReq(r.Body, &req, w); err != nil {
240 239
 			return
241 240
 		}
242 241
 
... ...
@@ -250,10 +251,17 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
250 250
 
251 251
 	mux.HandleFunc("/GraphDriver.ApplyDiff", func(w http.ResponseWriter, r *http.Request) {
252 252
 		s.ec.applydiff++
253
+		var diff archive.Reader = r.Body
254
+		defer r.Body.Close()
255
+
253 256
 		id := r.URL.Query().Get("id")
254 257
 		parent := r.URL.Query().Get("parent")
255 258
 
256
-		size, err := driver.ApplyDiff(id, parent, r.Body)
259
+		if id == "" {
260
+			http.Error(w, fmt.Sprintf("missing id"), 409)
261
+		}
262
+
263
+		size, err := driver.ApplyDiff(id, parent, diff)
257 264
 		if err != nil {
258 265
 			respond(w, err)
259 266
 			return
... ...
@@ -265,8 +273,7 @@ func (s *DockerExternalGraphdriverSuite) SetUpSuite(c *check.C) {
265 265
 		s.ec.diffsize++
266 266
 
267 267
 		var req graphDriverRequest
268
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
269
-			http.Error(w, err.Error(), 500)
268
+		if err := decReq(r.Body, &req, w); err != nil {
270 269
 			return
271 270
 		}
272 271
 
... ...
@@ -296,7 +303,10 @@ func (s *DockerExternalGraphdriverSuite) TearDownSuite(c *check.C) {
296 296
 }
297 297
 
298 298
 func (s *DockerExternalGraphdriverSuite) TestExternalGraphDriver(c *check.C) {
299
-	c.Assert(s.d.StartWithBusybox("-s", "test-external-graph-driver"), check.IsNil)
299
+	if err := s.d.StartWithBusybox("-s", "test-external-graph-driver"); err != nil {
300
+		b, _ := ioutil.ReadFile(s.d.LogfileName())
301
+		c.Assert(err, check.IsNil, check.Commentf("\n%s", string(b)))
302
+	}
300 303
 
301 304
 	out, err := s.d.Cmd("run", "-d", "--name=graphtest", "busybox", "sh", "-c", "echo hello > /hello")
302 305
 	c.Assert(err, check.IsNil, check.Commentf(out))
... ...
@@ -326,7 +336,7 @@ func (s *DockerExternalGraphdriverSuite) TestExternalGraphDriver(c *check.C) {
326 326
 	c.Assert(s.ec.removals >= 1, check.Equals, true)
327 327
 	c.Assert(s.ec.gets >= 1, check.Equals, true)
328 328
 	c.Assert(s.ec.puts >= 1, check.Equals, true)
329
-	c.Assert(s.ec.stats, check.Equals, 1)
329
+	c.Assert(s.ec.stats, check.Equals, 3)
330 330
 	c.Assert(s.ec.cleanups, check.Equals, 2)
331 331
 	c.Assert(s.ec.exists >= 1, check.Equals, true)
332 332
 	c.Assert(s.ec.applydiff >= 1, check.Equals, true)
... ...
@@ -2,10 +2,11 @@ package main
2 2
 
3 3
 import (
4 4
 	"fmt"
5
-	"github.com/go-check/check"
6 5
 	"reflect"
7 6
 	"regexp"
8 7
 	"strings"
8
+
9
+	"github.com/go-check/check"
9 10
 )
10 11
 
11 12
 func (s *DockerSuite) TestLinksPingUnlinkedContainers(c *check.C) {
... ...
@@ -233,7 +234,7 @@ func (s *DockerSuite) TestLinkShortDefinition(c *check.C) {
233 233
 }
234 234
 
235 235
 func (s *DockerSuite) TestLinksNetworkHostContainer(c *check.C) {
236
-	testRequires(c, DaemonIsLinux)
236
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
237 237
 	dockerCmd(c, "run", "-d", "--net", "host", "--name", "host_container", "busybox", "top")
238 238
 	out, _, err := dockerCmdWithError("run", "--name", "should_fail", "--link", "host_container:tester", "busybox", "true")
239 239
 	if err == nil || !strings.Contains(out, "--net=host can't be used with links. This would result in undefined behavior") {
... ...
@@ -242,7 +243,7 @@ func (s *DockerSuite) TestLinksNetworkHostContainer(c *check.C) {
242 242
 }
243 243
 
244 244
 func (s *DockerSuite) TestLinksEtcHostsRegularFile(c *check.C) {
245
-	testRequires(c, DaemonIsLinux)
245
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
246 246
 	out, _ := dockerCmd(c, "run", "--net=host", "busybox", "ls", "-la", "/etc/hosts")
247 247
 	if !strings.HasPrefix(out, "-") {
248 248
 		c.Errorf("/etc/hosts should be a regular file")
... ...
@@ -12,7 +12,7 @@ import (
12 12
 func (s *DockerSuite) TestLinksEtcHostsContentMatch(c *check.C) {
13 13
 	// In a _unix file as using Unix specific files, and must be on the
14 14
 	// same host as the daemon.
15
-	testRequires(c, SameHostDaemon)
15
+	testRequires(c, SameHostDaemon, NotUserNamespace)
16 16
 
17 17
 	out, _ := dockerCmd(c, "run", "--net=host", "busybox", "cat", "/etc/hosts")
18 18
 	hosts, err := ioutil.ReadFile("/etc/hosts")
... ...
@@ -98,7 +98,7 @@ func (s *DockerSuite) TestNetworkLocalhostTCPNat(c *check.C) {
98 98
 
99 99
 func (s *DockerSuite) TestNetworkLoopbackNat(c *check.C) {
100 100
 	testRequires(c, DaemonIsLinux)
101
-	testRequires(c, SameHostDaemon, NativeExecDriver)
101
+	testRequires(c, SameHostDaemon, NativeExecDriver, NotUserNamespace)
102 102
 	msg := "it works"
103 103
 	startServerContainer(c, msg, 8080)
104 104
 	endpoint := getExternalAddress(c)
... ...
@@ -23,7 +23,7 @@ func checkContains(expected string, out string, c *check.C) {
23 23
 }
24 24
 
25 25
 func (s *DockerSuite) TestNetHostname(c *check.C) {
26
-	testRequires(c, DaemonIsLinux)
26
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
27 27
 
28 28
 	var (
29 29
 		out    string
... ...
@@ -81,7 +81,7 @@ func (s *DockerSuite) TestNetHostname(c *check.C) {
81 81
 }
82 82
 
83 83
 func (s *DockerSuite) TestConflictContainerNetworkAndLinks(c *check.C) {
84
-	testRequires(c, DaemonIsLinux)
84
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
85 85
 	var (
86 86
 		out    string
87 87
 		err    error
... ...
@@ -102,7 +102,7 @@ func (s *DockerSuite) TestConflictContainerNetworkAndLinks(c *check.C) {
102 102
 }
103 103
 
104 104
 func (s *DockerSuite) TestConflictNetworkModeAndOptions(c *check.C) {
105
-	testRequires(c, DaemonIsLinux)
105
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
106 106
 	var (
107 107
 		out    string
108 108
 		err    error
... ...
@@ -249,7 +249,7 @@ func (s *DockerSuite) TestUnpublishedPortsInPsOutput(c *check.C) {
249 249
 }
250 250
 
251 251
 func (s *DockerSuite) TestPortHostBinding(c *check.C) {
252
-	testRequires(c, DaemonIsLinux)
252
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
253 253
 	out, _ := dockerCmd(c, "run", "-d", "-p", "9876:80", "busybox",
254 254
 		"nc", "-l", "-p", "80")
255 255
 	firstID := strings.TrimSpace(out)
... ...
@@ -272,7 +272,7 @@ func (s *DockerSuite) TestPortHostBinding(c *check.C) {
272 272
 }
273 273
 
274 274
 func (s *DockerSuite) TestPortExposeHostBinding(c *check.C) {
275
-	testRequires(c, DaemonIsLinux)
275
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
276 276
 	out, _ := dockerCmd(c, "run", "-d", "-P", "--expose", "80", "busybox",
277 277
 		"nc", "-l", "-p", "80")
278 278
 	firstID := strings.TrimSpace(out)
... ...
@@ -707,7 +707,7 @@ func (s *DockerSuite) TestRunContainerNetwork(c *check.C) {
707 707
 func (s *DockerSuite) TestRunNetHostNotAllowedWithLinks(c *check.C) {
708 708
 	// TODO Windows: This is Linux specific as --link is not supported and
709 709
 	// this will be deprecated in favour of container networking model.
710
-	testRequires(c, DaemonIsLinux)
710
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
711 711
 	dockerCmd(c, "run", "--name", "linked", "busybox", "true")
712 712
 
713 713
 	_, _, err := dockerCmdWithError("run", "--net=host", "--link", "linked:linked", "busybox", "true")
... ...
@@ -733,7 +733,7 @@ func (s *DockerSuite) TestRunFullHostnameSet(c *check.C) {
733 733
 func (s *DockerSuite) TestRunPrivilegedCanMknod(c *check.C) {
734 734
 	// Not applicable for Windows as Windows daemon does not support
735 735
 	// the concept of --privileged, and mknod is a Unix concept.
736
-	testRequires(c, DaemonIsLinux)
736
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
737 737
 	out, _ := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
738 738
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
739 739
 		c.Fatalf("expected output ok received %s", actual)
... ...
@@ -743,7 +743,7 @@ func (s *DockerSuite) TestRunPrivilegedCanMknod(c *check.C) {
743 743
 func (s *DockerSuite) TestRunUnprivilegedCanMknod(c *check.C) {
744 744
 	// Not applicable for Windows as Windows daemon does not support
745 745
 	// the concept of --privileged, and mknod is a Unix concept.
746
-	testRequires(c, DaemonIsLinux)
746
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
747 747
 	out, _ := dockerCmd(c, "run", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
748 748
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
749 749
 		c.Fatalf("expected output ok received %s", actual)
... ...
@@ -799,7 +799,7 @@ func (s *DockerSuite) TestRunCapDropALLCannotMknod(c *check.C) {
799 799
 
800 800
 func (s *DockerSuite) TestRunCapDropALLAddMknodCanMknod(c *check.C) {
801 801
 	// Not applicable for Windows as there is no concept of --cap-drop or mknod
802
-	testRequires(c, DaemonIsLinux)
802
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
803 803
 	out, _ := dockerCmd(c, "run", "--cap-drop=ALL", "--cap-add=MKNOD", "--cap-add=SETGID", "busybox", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok")
804 804
 
805 805
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
... ...
@@ -861,7 +861,7 @@ func (s *DockerSuite) TestRunGroupAdd(c *check.C) {
861 861
 
862 862
 func (s *DockerSuite) TestRunPrivilegedCanMount(c *check.C) {
863 863
 	// Not applicable for Windows as there is no concept of --privileged
864
-	testRequires(c, DaemonIsLinux)
864
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
865 865
 	out, _ := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "mount -t tmpfs none /tmp && echo ok")
866 866
 
867 867
 	if actual := strings.Trim(out, "\r\n"); actual != "ok" {
... ...
@@ -892,7 +892,7 @@ func (s *DockerSuite) TestRunSysNotWritableInNonPrivilegedContainers(c *check.C)
892 892
 
893 893
 func (s *DockerSuite) TestRunSysWritableInPrivilegedContainers(c *check.C) {
894 894
 	// Not applicable for Windows as there is no concept of unprivileged
895
-	testRequires(c, DaemonIsLinux)
895
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
896 896
 	if _, code, err := dockerCmdWithError("run", "--privileged", "busybox", "touch", "/sys/kernel/profiling"); err != nil || code != 0 {
897 897
 		c.Fatalf("sys should be writable in privileged container")
898 898
 	}
... ...
@@ -908,7 +908,7 @@ func (s *DockerSuite) TestRunProcNotWritableInNonPrivilegedContainers(c *check.C
908 908
 
909 909
 func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
910 910
 	// Not applicable for Windows as there is no concept of --privileged
911
-	testRequires(c, DaemonIsLinux)
911
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
912 912
 	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger"); code != 0 {
913 913
 		c.Fatalf("proc should be writable in privileged container")
914 914
 	}
... ...
@@ -916,7 +916,8 @@ func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
916 916
 
917 917
 func (s *DockerSuite) TestRunDeviceNumbers(c *check.C) {
918 918
 	// Not applicable on Windows as /dev/ is a Unix specific concept
919
-	testRequires(c, DaemonIsLinux)
919
+	// TODO: NotUserNamespace could be removed here if "root" "root" is replaced w user
920
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
920 921
 	out, _ := dockerCmd(c, "run", "busybox", "sh", "-c", "ls -l /dev/null")
921 922
 	deviceLineFields := strings.Fields(out)
922 923
 	deviceLineFields[6] = ""
... ...
@@ -946,7 +947,7 @@ func (s *DockerSuite) TestRunUnprivilegedWithChroot(c *check.C) {
946 946
 
947 947
 func (s *DockerSuite) TestRunAddingOptionalDevices(c *check.C) {
948 948
 	// Not applicable on Windows as Windows does not support --device
949
-	testRequires(c, DaemonIsLinux)
949
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
950 950
 	out, _ := dockerCmd(c, "run", "--device", "/dev/zero:/dev/nulo", "busybox", "sh", "-c", "ls /dev/nulo")
951 951
 	if actual := strings.Trim(out, "\r\n"); actual != "/dev/nulo" {
952 952
 		c.Fatalf("expected output /dev/nulo, received %s", actual)
... ...
@@ -955,7 +956,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevices(c *check.C) {
955 955
 
956 956
 func (s *DockerSuite) TestRunAddingOptionalDevicesNoSrc(c *check.C) {
957 957
 	// Not applicable on Windows as Windows does not support --device
958
-	testRequires(c, DaemonIsLinux)
958
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
959 959
 	out, _ := dockerCmd(c, "run", "--device", "/dev/zero:rw", "busybox", "sh", "-c", "ls /dev/zero")
960 960
 	if actual := strings.Trim(out, "\r\n"); actual != "/dev/zero" {
961 961
 		c.Fatalf("expected output /dev/zero, received %s", actual)
... ...
@@ -964,7 +965,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevicesNoSrc(c *check.C) {
964 964
 
965 965
 func (s *DockerSuite) TestRunAddingOptionalDevicesInvalidMode(c *check.C) {
966 966
 	// Not applicable on Windows as Windows does not support --device
967
-	testRequires(c, DaemonIsLinux)
967
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
968 968
 	_, _, err := dockerCmdWithError("run", "--device", "/dev/zero:ro", "busybox", "sh", "-c", "ls /dev/zero")
969 969
 	if err == nil {
970 970
 		c.Fatalf("run container with device mode ro should fail")
... ...
@@ -973,7 +974,7 @@ func (s *DockerSuite) TestRunAddingOptionalDevicesInvalidMode(c *check.C) {
973 973
 
974 974
 func (s *DockerSuite) TestRunModeHostname(c *check.C) {
975 975
 	// Not applicable on Windows as Windows does not support -h
976
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
976
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
977 977
 
978 978
 	out, _ := dockerCmd(c, "run", "-h=testhostname", "busybox", "cat", "/etc/hostname")
979 979
 
... ...
@@ -1711,6 +1712,8 @@ func (s *DockerSuite) TestRunEntrypoint(c *check.C) {
1711 1711
 }
1712 1712
 
1713 1713
 func (s *DockerSuite) TestRunBindMounts(c *check.C) {
1714
+	// /tmp gets permission denied
1715
+	testRequires(c, NotUserNamespace)
1714 1716
 	// Cannot run on Windows as Windows does not support volumes
1715 1717
 	testRequires(c, DaemonIsLinux, SameHostDaemon)
1716 1718
 
... ...
@@ -1909,6 +1912,8 @@ func (s *DockerSuite) TestRunAllocatePortInReservedRange(c *check.C) {
1909 1909
 
1910 1910
 // Regression test for #7792
1911 1911
 func (s *DockerSuite) TestRunMountOrdering(c *check.C) {
1912
+	// tmp gets permission denied
1913
+	testRequires(c, NotUserNamespace)
1912 1914
 	// Not applicable on Windows as Windows does not support volumes
1913 1915
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
1914 1916
 
... ...
@@ -1953,6 +1958,8 @@ func (s *DockerSuite) TestRunMountOrdering(c *check.C) {
1953 1953
 
1954 1954
 // Regression test for https://github.com/docker/docker/issues/8259
1955 1955
 func (s *DockerSuite) TestRunReuseBindVolumeThatIsSymlink(c *check.C) {
1956
+	// /tmp gets permission denied
1957
+	testRequires(c, NotUserNamespace)
1956 1958
 	// Not applicable on Windows as Windows does not support volumes
1957 1959
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
1958 1960
 
... ...
@@ -2157,7 +2164,7 @@ func (s *DockerSuite) TestRunUnknownCommand(c *check.C) {
2157 2157
 
2158 2158
 func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
2159 2159
 	// Not applicable on Windows as uses Unix-specific capabilities
2160
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2160
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2161 2161
 
2162 2162
 	hostIpc, err := os.Readlink("/proc/1/ns/ipc")
2163 2163
 	if err != nil {
... ...
@@ -2179,7 +2186,7 @@ func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
2179 2179
 
2180 2180
 func (s *DockerSuite) TestRunModeIpcContainer(c *check.C) {
2181 2181
 	// Not applicable on Windows as uses Unix-specific capabilities
2182
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2182
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2183 2183
 
2184 2184
 	out, _ := dockerCmd(c, "run", "-d", "busybox", "sh", "-c", "echo -n test > /dev/shm/test && top")
2185 2185
 
... ...
@@ -2211,7 +2218,7 @@ func (s *DockerSuite) TestRunModeIpcContainer(c *check.C) {
2211 2211
 
2212 2212
 func (s *DockerSuite) TestRunModeIpcContainerNotExists(c *check.C) {
2213 2213
 	// Not applicable on Windows as uses Unix-specific capabilities
2214
-	testRequires(c, DaemonIsLinux)
2214
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
2215 2215
 	out, _, err := dockerCmdWithError("run", "-d", "--ipc", "container:abcd1234", "busybox", "top")
2216 2216
 	if !strings.Contains(out, "abcd1234") || err == nil {
2217 2217
 		c.Fatalf("run IPC from a non exists container should with correct error out")
... ...
@@ -2220,7 +2227,7 @@ func (s *DockerSuite) TestRunModeIpcContainerNotExists(c *check.C) {
2220 2220
 
2221 2221
 func (s *DockerSuite) TestRunModeIpcContainerNotRunning(c *check.C) {
2222 2222
 	// Not applicable on Windows as uses Unix-specific capabilities
2223
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2223
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2224 2224
 
2225 2225
 	out, _ := dockerCmd(c, "create", "busybox")
2226 2226
 
... ...
@@ -2250,7 +2257,7 @@ func (s *DockerSuite) TestRunMountShmMqueueFromHost(c *check.C) {
2250 2250
 
2251 2251
 func (s *DockerSuite) TestContainerNetworkMode(c *check.C) {
2252 2252
 	// Not applicable on Windows as uses Unix-specific capabilities
2253
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2253
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2254 2254
 
2255 2255
 	out, _ := dockerCmd(c, "run", "-d", "busybox", "top")
2256 2256
 	id := strings.TrimSpace(out)
... ...
@@ -2272,7 +2279,7 @@ func (s *DockerSuite) TestContainerNetworkMode(c *check.C) {
2272 2272
 
2273 2273
 func (s *DockerSuite) TestRunModePidHost(c *check.C) {
2274 2274
 	// Not applicable on Windows as uses Unix-specific capabilities
2275
-	testRequires(c, NativeExecDriver, SameHostDaemon, DaemonIsLinux)
2275
+	testRequires(c, NativeExecDriver, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2276 2276
 
2277 2277
 	hostPid, err := os.Readlink("/proc/1/ns/pid")
2278 2278
 	if err != nil {
... ...
@@ -2413,7 +2420,7 @@ func (s *DockerSuite) TestRunNonLocalMacAddress(c *check.C) {
2413 2413
 
2414 2414
 func (s *DockerSuite) TestRunNetHost(c *check.C) {
2415 2415
 	// Not applicable on Windows as uses Unix-specific capabilities
2416
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2416
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2417 2417
 
2418 2418
 	hostNet, err := os.Readlink("/proc/1/ns/net")
2419 2419
 	if err != nil {
... ...
@@ -2436,7 +2443,7 @@ func (s *DockerSuite) TestRunNetHost(c *check.C) {
2436 2436
 func (s *DockerSuite) TestRunNetHostTwiceSameName(c *check.C) {
2437 2437
 	// TODO Windows. As Windows networking evolves and converges towards
2438 2438
 	// CNM, this test may be possible to enable on Windows.
2439
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2439
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2440 2440
 
2441 2441
 	dockerCmd(c, "run", "--rm", "--name=thost", "--net=host", "busybox", "true")
2442 2442
 	dockerCmd(c, "run", "--rm", "--name=thost", "--net=host", "busybox", "true")
... ...
@@ -2444,7 +2451,7 @@ func (s *DockerSuite) TestRunNetHostTwiceSameName(c *check.C) {
2444 2444
 
2445 2445
 func (s *DockerSuite) TestRunNetContainerWhichHost(c *check.C) {
2446 2446
 	// Not applicable on Windows as uses Unix-specific capabilities
2447
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2447
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2448 2448
 
2449 2449
 	hostNet, err := os.Readlink("/proc/1/ns/net")
2450 2450
 	if err != nil {
... ...
@@ -2534,7 +2541,8 @@ func (s *DockerSuite) TestRunContainerWithReadonlyRootfs(c *check.C) {
2534 2534
 func (s *DockerSuite) TestPermissionsPtsReadonlyRootfs(c *check.C) {
2535 2535
 	// Not applicable on Windows due to use of Unix specific functionality, plus
2536 2536
 	// the use of --read-only which is not supported.
2537
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
2537
+	// --read-only + userns has remount issues
2538
+	testRequires(c, DaemonIsLinux, NativeExecDriver, NotUserNamespace)
2538 2539
 
2539 2540
 	// Ensure we have not broken writing /dev/pts
2540 2541
 	out, status := dockerCmd(c, "run", "--read-only", "--rm", "busybox", "mount")
... ...
@@ -2549,7 +2557,7 @@ func (s *DockerSuite) TestPermissionsPtsReadonlyRootfs(c *check.C) {
2549 2549
 
2550 2550
 func testReadOnlyFile(filename string, c *check.C) {
2551 2551
 	// Not applicable on Windows which does not support --read-only
2552
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
2552
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
2553 2553
 
2554 2554
 	out, _, err := dockerCmdWithError("run", "--read-only", "--rm", "busybox", "touch", filename)
2555 2555
 	if err == nil {
... ...
@@ -2572,7 +2580,8 @@ func testReadOnlyFile(filename string, c *check.C) {
2572 2572
 
2573 2573
 func (s *DockerSuite) TestRunContainerWithReadonlyEtcHostsAndLinkedContainer(c *check.C) {
2574 2574
 	// Not applicable on Windows which does not support --link
2575
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
2575
+	// --read-only + userns has remount issues
2576
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
2576 2577
 
2577 2578
 	dockerCmd(c, "run", "-d", "--name", "test-etc-hosts-ro-linked", "busybox", "top")
2578 2579
 
... ...
@@ -2583,9 +2592,9 @@ func (s *DockerSuite) TestRunContainerWithReadonlyEtcHostsAndLinkedContainer(c *
2583 2583
 }
2584 2584
 
2585 2585
 func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithDnsFlag(c *check.C) {
2586
-	// Not applicable on Windows which does not support either --read-only or
2587
-	// --dns.
2588
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
2586
+	// Not applicable on Windows which does not support either --read-only or --dns.
2587
+	// --read-only + userns has remount issues
2588
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
2589 2589
 
2590 2590
 	out, _ := dockerCmd(c, "run", "--read-only", "--dns", "1.1.1.1", "busybox", "/bin/cat", "/etc/resolv.conf")
2591 2591
 	if !strings.Contains(string(out), "1.1.1.1") {
... ...
@@ -2595,7 +2604,8 @@ func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithDnsFlag(c *check.C)
2595 2595
 
2596 2596
 func (s *DockerSuite) TestRunContainerWithReadonlyRootfsWithAddHostFlag(c *check.C) {
2597 2597
 	// Not applicable on Windows which does not support --read-only
2598
-	testRequires(c, NativeExecDriver, DaemonIsLinux)
2598
+	// --read-only + userns has remount issues
2599
+	testRequires(c, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
2599 2600
 
2600 2601
 	out, _ := dockerCmd(c, "run", "--read-only", "--add-host", "testreadonly:127.0.0.1", "busybox", "/bin/cat", "/etc/hosts")
2601 2602
 	if !strings.Contains(string(out), "testreadonly") {
... ...
@@ -2654,7 +2664,7 @@ func (s *DockerSuite) TestRunContainerWithRmFlagCannotStartContainer(c *check.C)
2654 2654
 
2655 2655
 func (s *DockerSuite) TestRunPidHostWithChildIsKillable(c *check.C) {
2656 2656
 	// Not applicable on Windows as uses Unix specific functionality
2657
-	testRequires(c, DaemonIsLinux)
2657
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
2658 2658
 	name := "ibuildthecloud"
2659 2659
 	dockerCmd(c, "run", "-d", "--pid=host", "--name", name, "busybox", "sh", "-c", "sleep 30; echo hi")
2660 2660
 
... ...
@@ -2734,7 +2744,7 @@ func (s *DockerSuite) TestRunReadProcLatency(c *check.C) {
2734 2734
 
2735 2735
 func (s *DockerSuite) TestRunReadFilteredProc(c *check.C) {
2736 2736
 	// Not applicable on Windows as uses Unix specific functionality
2737
-	testRequires(c, Apparmor, DaemonIsLinux)
2737
+	testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace)
2738 2738
 
2739 2739
 	testReadPaths := []string{
2740 2740
 		"/proc/latency_stats",
... ...
@@ -2767,7 +2777,8 @@ func (s *DockerSuite) TestMountIntoProc(c *check.C) {
2767 2767
 
2768 2768
 func (s *DockerSuite) TestMountIntoSys(c *check.C) {
2769 2769
 	// Not applicable on Windows as uses Unix specific functionality
2770
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
2770
+	testRequires(c, DaemonIsLinux)
2771
+	testRequires(c, NativeExecDriver, NotUserNamespace)
2771 2772
 	dockerCmd(c, "run", "-v", "/sys/fs/cgroup", "busybox", "true")
2772 2773
 }
2773 2774
 
... ...
@@ -2854,7 +2865,7 @@ func (s *DockerSuite) TestVolumeFromMixedRWOptions(c *check.C) {
2854 2854
 
2855 2855
 func (s *DockerSuite) TestRunWriteFilteredProc(c *check.C) {
2856 2856
 	// Not applicable on Windows as uses Unix specific functionality
2857
-	testRequires(c, Apparmor, NativeExecDriver, DaemonIsLinux)
2857
+	testRequires(c, Apparmor, NativeExecDriver, DaemonIsLinux, NotUserNamespace)
2858 2858
 
2859 2859
 	testWritePaths := []string{
2860 2860
 		/* modprobe and core_pattern should both be denied by generic
... ...
@@ -2917,7 +2928,8 @@ func (s *DockerSuite) TestRunNetworkFilesBindMountRO(c *check.C) {
2917 2917
 
2918 2918
 func (s *DockerSuite) TestRunNetworkFilesBindMountROFilesystem(c *check.C) {
2919 2919
 	// Not applicable on Windows as uses Unix specific functionality
2920
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
2920
+	// --read-only + userns has remount issues
2921
+	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
2921 2922
 
2922 2923
 	filename := createTmpFile(c, "test123")
2923 2924
 	defer os.Remove(filename)
... ...
@@ -3260,7 +3272,8 @@ func (s *DockerSuite) TestRunContainerWithCgroupParentAbsPath(c *check.C) {
3260 3260
 
3261 3261
 func (s *DockerSuite) TestRunContainerWithCgroupMountRO(c *check.C) {
3262 3262
 	// Not applicable on Windows as uses Unix specific functionality
3263
-	testRequires(c, DaemonIsLinux, NativeExecDriver)
3263
+	// --read-only + userns has remount issues
3264
+	testRequires(c, DaemonIsLinux, NativeExecDriver, NotUserNamespace)
3264 3265
 
3265 3266
 	filename := "/sys/fs/cgroup/devices/test123"
3266 3267
 	out, _, err := dockerCmdWithError("run", "busybox", "touch", filename)
... ...
@@ -3275,7 +3288,7 @@ func (s *DockerSuite) TestRunContainerWithCgroupMountRO(c *check.C) {
3275 3275
 
3276 3276
 func (s *DockerSuite) TestRunContainerNetworkModeToSelf(c *check.C) {
3277 3277
 	// Not applicable on Windows which does not support --net=container
3278
-	testRequires(c, DaemonIsLinux)
3278
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3279 3279
 	out, _, err := dockerCmdWithError("run", "--name=me", "--net=container:me", "busybox", "true")
3280 3280
 	if err == nil || !strings.Contains(out, "cannot join own network") {
3281 3281
 		c.Fatalf("using container net mode to self should result in an error\nerr: %q\nout: %s", err, out)
... ...
@@ -3284,7 +3297,7 @@ func (s *DockerSuite) TestRunContainerNetworkModeToSelf(c *check.C) {
3284 3284
 
3285 3285
 func (s *DockerSuite) TestRunContainerNetModeWithDnsMacHosts(c *check.C) {
3286 3286
 	// Not applicable on Windows which does not support --net=container
3287
-	testRequires(c, DaemonIsLinux)
3287
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3288 3288
 	out, _, err := dockerCmdWithError("run", "-d", "--name", "parent", "busybox", "top")
3289 3289
 	if err != nil {
3290 3290
 		c.Fatalf("failed to run container: %v, output: %q", err, out)
... ...
@@ -3308,7 +3321,7 @@ func (s *DockerSuite) TestRunContainerNetModeWithDnsMacHosts(c *check.C) {
3308 3308
 
3309 3309
 func (s *DockerSuite) TestRunContainerNetModeWithExposePort(c *check.C) {
3310 3310
 	// Not applicable on Windows which does not support --net=container
3311
-	testRequires(c, DaemonIsLinux)
3311
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3312 3312
 	dockerCmd(c, "run", "-d", "--name", "parent", "busybox", "top")
3313 3313
 
3314 3314
 	out, _, err := dockerCmdWithError("run", "-p", "5000:5000", "--net=container:parent", "busybox")
... ...
@@ -3329,7 +3342,7 @@ func (s *DockerSuite) TestRunContainerNetModeWithExposePort(c *check.C) {
3329 3329
 
3330 3330
 func (s *DockerSuite) TestRunLinkToContainerNetMode(c *check.C) {
3331 3331
 	// Not applicable on Windows which does not support --net=container or --link
3332
-	testRequires(c, DaemonIsLinux)
3332
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3333 3333
 	dockerCmd(c, "run", "--name", "test", "-d", "busybox", "top")
3334 3334
 	dockerCmd(c, "run", "--name", "parent", "-d", "--net=container:test", "busybox", "top")
3335 3335
 	dockerCmd(c, "run", "-d", "--link=parent:parent", "busybox", "top")
... ...
@@ -3373,7 +3386,7 @@ func (s *DockerSuite) TestRunLoopbackWhenNetworkDisabled(c *check.C) {
3373 3373
 
3374 3374
 func (s *DockerSuite) TestRunModeNetContainerHostname(c *check.C) {
3375 3375
 	// Windows does not support --net=container
3376
-	testRequires(c, DaemonIsLinux, ExecSupport)
3376
+	testRequires(c, DaemonIsLinux, ExecSupport, NotUserNamespace)
3377 3377
 
3378 3378
 	dockerCmd(c, "run", "-i", "-d", "--name", "parent", "busybox", "top")
3379 3379
 	out, _ := dockerCmd(c, "exec", "parent", "cat", "/etc/hostname")
... ...
@@ -3399,7 +3412,7 @@ func (s *DockerSuite) TestRunNetworkNotInitializedNoneMode(c *check.C) {
3399 3399
 
3400 3400
 func (s *DockerSuite) TestTwoContainersInNetHost(c *check.C) {
3401 3401
 	// Not applicable as Windows does not support --net=host
3402
-	testRequires(c, DaemonIsLinux)
3402
+	testRequires(c, DaemonIsLinux, NotUserNamespace, NotUserNamespace)
3403 3403
 	dockerCmd(c, "run", "-d", "--net=host", "--name=first", "busybox", "top")
3404 3404
 	dockerCmd(c, "run", "-d", "--net=host", "--name=second", "busybox", "top")
3405 3405
 	dockerCmd(c, "stop", "first")
... ...
@@ -3407,7 +3420,7 @@ func (s *DockerSuite) TestTwoContainersInNetHost(c *check.C) {
3407 3407
 }
3408 3408
 
3409 3409
 func (s *DockerSuite) TestContainersInUserDefinedNetwork(c *check.C) {
3410
-	testRequires(c, DaemonIsLinux)
3410
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3411 3411
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork")
3412 3412
 	dockerCmd(c, "run", "-d", "--net=testnetwork", "--name=first", "busybox", "top")
3413 3413
 	c.Assert(waitRun("first"), check.IsNil)
... ...
@@ -3418,7 +3431,7 @@ func (s *DockerSuite) TestContainersInUserDefinedNetwork(c *check.C) {
3418 3418
 }
3419 3419
 
3420 3420
 func (s *DockerSuite) TestContainersInMultipleNetworks(c *check.C) {
3421
-	testRequires(c, DaemonIsLinux)
3421
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3422 3422
 	// Create 2 networks using bridge driver
3423 3423
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
3424 3424
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
... ...
@@ -3441,7 +3454,7 @@ func (s *DockerSuite) TestContainersInMultipleNetworks(c *check.C) {
3441 3441
 }
3442 3442
 
3443 3443
 func (s *DockerSuite) TestContainersNetworkIsolation(c *check.C) {
3444
-	testRequires(c, DaemonIsLinux)
3444
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3445 3445
 	// Create 2 networks using bridge driver
3446 3446
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
3447 3447
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
... ...
@@ -3473,7 +3486,7 @@ func (s *DockerSuite) TestContainersNetworkIsolation(c *check.C) {
3473 3473
 }
3474 3474
 
3475 3475
 func (s *DockerSuite) TestNetworkRmWithActiveContainers(c *check.C) {
3476
-	testRequires(c, DaemonIsLinux)
3476
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3477 3477
 	// Create 2 networks using bridge driver
3478 3478
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
3479 3479
 	// Run and connect containers to testnetwork1
... ...
@@ -3495,7 +3508,7 @@ func (s *DockerSuite) TestNetworkRmWithActiveContainers(c *check.C) {
3495 3495
 }
3496 3496
 
3497 3497
 func (s *DockerSuite) TestContainerRestartInMultipleNetworks(c *check.C) {
3498
-	testRequires(c, DaemonIsLinux)
3498
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3499 3499
 	// Create 2 networks using bridge driver
3500 3500
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork1")
3501 3501
 	dockerCmd(c, "network", "create", "-d", "bridge", "testnetwork2")
... ...
@@ -3531,7 +3544,7 @@ func (s *DockerSuite) TestContainerRestartInMultipleNetworks(c *check.C) {
3531 3531
 }
3532 3532
 
3533 3533
 func (s *DockerSuite) TestContainerWithConflictingHostNetworks(c *check.C) {
3534
-	testRequires(c, DaemonIsLinux)
3534
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3535 3535
 	// Run a container with --net=host
3536 3536
 	dockerCmd(c, "run", "-d", "--net=host", "--name=first", "busybox", "top")
3537 3537
 	c.Assert(waitRun("first"), check.IsNil)
... ...
@@ -3547,7 +3560,7 @@ func (s *DockerSuite) TestContainerWithConflictingHostNetworks(c *check.C) {
3547 3547
 }
3548 3548
 
3549 3549
 func (s *DockerSuite) TestContainerWithConflictingSharedNetwork(c *check.C) {
3550
-	testRequires(c, DaemonIsLinux)
3550
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3551 3551
 	dockerCmd(c, "run", "-d", "--name=first", "busybox", "top")
3552 3552
 	c.Assert(waitRun("first"), check.IsNil)
3553 3553
 	// Run second container in first container's network namespace
... ...
@@ -3568,7 +3581,7 @@ func (s *DockerSuite) TestContainerWithConflictingSharedNetwork(c *check.C) {
3568 3568
 }
3569 3569
 
3570 3570
 func (s *DockerSuite) TestContainerWithConflictingNoneNetwork(c *check.C) {
3571
-	testRequires(c, DaemonIsLinux)
3571
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
3572 3572
 	dockerCmd(c, "run", "-d", "--net=none", "--name=first", "busybox", "top")
3573 3573
 	c.Assert(waitRun("first"), check.IsNil)
3574 3574
 
... ...
@@ -57,6 +57,8 @@ func (s *DockerSuite) TestRunRedirectStdout(c *check.C) {
57 57
 
58 58
 // Test recursive bind mount works by default
59 59
 func (s *DockerSuite) TestRunWithVolumesIsRecursive(c *check.C) {
60
+	// /tmp gets permission denied
61
+	testRequires(c, NotUserNamespace)
60 62
 	tmpDir, err := ioutil.TempDir("", "docker_recursive_mount_test")
61 63
 	if err != nil {
62 64
 		c.Fatal(err)
... ...
@@ -90,7 +92,7 @@ func (s *DockerSuite) TestRunWithVolumesIsRecursive(c *check.C) {
90 90
 }
91 91
 
92 92
 func (s *DockerSuite) TestRunDeviceDirectory(c *check.C) {
93
-	testRequires(c, NativeExecDriver)
93
+	testRequires(c, NativeExecDriver, NotUserNamespace)
94 94
 	if _, err := os.Stat("/dev/snd"); err != nil {
95 95
 		c.Skip("Host does not have /dev/snd")
96 96
 	}
... ...
@@ -30,7 +30,7 @@ func (s *DockerSuite) TestTopNonPrivileged(c *check.C) {
30 30
 }
31 31
 
32 32
 func (s *DockerSuite) TestTopPrivileged(c *check.C) {
33
-	testRequires(c, DaemonIsLinux)
33
+	testRequires(c, DaemonIsLinux, NotUserNamespace)
34 34
 	out, _ := dockerCmd(c, "run", "--privileged", "-i", "-d", "busybox", "top")
35 35
 	cleanedContainerID := strings.TrimSpace(out)
36 36
 
... ...
@@ -1,9 +1,12 @@
1 1
 package main
2 2
 
3 3
 import (
4
+	"encoding/json"
4 5
 	"fmt"
5 6
 	"os"
6 7
 	"os/exec"
8
+
9
+	"github.com/docker/docker/pkg/reexec"
7 10
 )
8 11
 
9 12
 var (
... ...
@@ -16,10 +19,6 @@ var (
16 16
 	// the private registry to use for tests
17 17
 	privateRegistryURL = "127.0.0.1:5000"
18 18
 
19
-	dockerBasePath       = "/var/lib/docker"
20
-	volumesConfigPath    = dockerBasePath + "/volumes"
21
-	containerStoragePath = dockerBasePath + "/containers"
22
-
23 19
 	runtimePath    = "/var/run/docker"
24 20
 	execDriverPath = runtimePath + "/execdriver/native"
25 21
 
... ...
@@ -38,6 +37,13 @@ var (
38 38
 	// daemonDefaultImage is the name of the default image to use when running
39 39
 	// tests. This is platform dependent.
40 40
 	daemonDefaultImage string
41
+
42
+	// For a local daemon on Linux, these values will be used for testing
43
+	// user namespace support as the standard graph path(s) will be
44
+	// appended with the root remapped uid.gid prefix
45
+	dockerBasePath       string
46
+	volumesConfigPath    string
47
+	containerStoragePath string
41 48
 )
42 49
 
43 50
 const (
... ...
@@ -50,6 +56,7 @@ const (
50 50
 )
51 51
 
52 52
 func init() {
53
+	reexec.Init()
53 54
 	if dockerBin := os.Getenv("DOCKER_BINARY"); dockerBin != "" {
54 55
 		dockerBinary = dockerBin
55 56
 	}
... ...
@@ -85,4 +92,21 @@ func init() {
85 85
 	} else {
86 86
 		isLocalDaemon = true
87 87
 	}
88
+
89
+	// This is only used for a tests with local daemon true (Linux-only today)
90
+	// default is "/var/lib/docker", but we'll try and ask the
91
+	// /info endpoint for the specific root dir
92
+	dockerBasePath = "/var/lib/docker"
93
+	type Info struct {
94
+		DockerRootDir string
95
+	}
96
+	var i Info
97
+	status, b, err := sockRequest("GET", "/info", nil)
98
+	if err == nil && status == 200 {
99
+		if err = json.Unmarshal(b, &i); err == nil {
100
+			dockerBasePath = i.DockerRootDir
101
+		}
102
+	}
103
+	volumesConfigPath = dockerBasePath + "/volumes"
104
+	containerStoragePath = dockerBasePath + "/containers"
88 105
 }
... ...
@@ -41,6 +41,7 @@ type Daemon struct {
41 41
 	c              *check.C
42 42
 	logFile        *os.File
43 43
 	folder         string
44
+	root           string
44 45
 	stdin          io.WriteCloser
45 46
 	stdout, stderr io.ReadCloser
46 47
 	cmd            *exec.Cmd
... ...
@@ -65,9 +66,10 @@ func NewDaemon(c *check.C) *Daemon {
65 65
 	if err != nil {
66 66
 		c.Fatalf("Could not make %q an absolute path: %v", dir, err)
67 67
 	}
68
+	daemonRoot := filepath.Join(daemonFolder, "root")
68 69
 
69
-	if err := os.MkdirAll(filepath.Join(daemonFolder, "graph"), 0600); err != nil {
70
-		c.Fatalf("Could not create %s/graph directory", daemonFolder)
70
+	if err := os.MkdirAll(daemonRoot, 0755); err != nil {
71
+		c.Fatalf("Could not create daemon root %q: %v", dir, err)
71 72
 	}
72 73
 
73 74
 	userlandProxy := true
... ...
@@ -82,6 +84,7 @@ func NewDaemon(c *check.C) *Daemon {
82 82
 		id:            id,
83 83
 		c:             c,
84 84
 		folder:        daemonFolder,
85
+		root:          daemonRoot,
85 86
 		storageDriver: os.Getenv("DOCKER_GRAPHDRIVER"),
86 87
 		execDriver:    os.Getenv("DOCKER_EXECDRIVER"),
87 88
 		userlandProxy: userlandProxy,
... ...
@@ -99,11 +102,15 @@ func (d *Daemon) Start(arg ...string) error {
99 99
 	args := append(d.GlobalFlags,
100 100
 		d.Command,
101 101
 		"--host", d.sock(),
102
-		"--graph", fmt.Sprintf("%s/graph", d.folder),
102
+		"--graph", d.root,
103 103
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
104 104
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
105 105
 	)
106 106
 
107
+	if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
108
+		args = append(args, []string{"--userns-remap", root}...)
109
+	}
110
+
107 111
 	// If we don't explicitly set the log-level or debug flag(-D) then
108 112
 	// turn on debug mode
109 113
 	foundIt := false
... ...
@@ -181,8 +188,11 @@ func (d *Daemon) Start(arg ...string) error {
181 181
 			if resp.StatusCode != http.StatusOK {
182 182
 				d.c.Logf("[%s] received status != 200 OK: %s", d.id, resp.Status)
183 183
 			}
184
-
185 184
 			d.c.Logf("[%s] daemon started", d.id)
185
+			d.root, err = d.queryRootDir()
186
+			if err != nil {
187
+				return fmt.Errorf("[%s] error querying daemon for root directory: %v", d.id, err)
188
+			}
186 189
 			return nil
187 190
 		}
188 191
 	}
... ...
@@ -278,6 +288,47 @@ func (d *Daemon) Restart(arg ...string) error {
278 278
 	return d.Start(arg...)
279 279
 }
280 280
 
281
+func (d *Daemon) queryRootDir() (string, error) {
282
+	// update daemon root by asking /info endpoint (to support user
283
+	// namespaced daemon with root remapped uid.gid directory)
284
+	conn, err := net.Dial("unix", filepath.Join(d.folder, "docker.sock"))
285
+	if err != nil {
286
+		return "", err
287
+	}
288
+	client := httputil.NewClientConn(conn, nil)
289
+
290
+	req, err := http.NewRequest("GET", "/info", nil)
291
+	if err != nil {
292
+		client.Close()
293
+		return "", err
294
+	}
295
+	req.Header.Set("Content-Type", "application/json")
296
+
297
+	resp, err := client.Do(req)
298
+	if err != nil {
299
+		client.Close()
300
+		return "", err
301
+	}
302
+	body := ioutils.NewReadCloserWrapper(resp.Body, func() error {
303
+		defer client.Close()
304
+		return resp.Body.Close()
305
+	})
306
+
307
+	type Info struct {
308
+		DockerRootDir string
309
+	}
310
+	var b []byte
311
+	var i Info
312
+	b, err = readBody(body)
313
+	if err == nil && resp.StatusCode == 200 {
314
+		// read the docker root dir
315
+		if err = json.Unmarshal(b, &i); err == nil {
316
+			return i.DockerRootDir, nil
317
+		}
318
+	}
319
+	return "", err
320
+}
321
+
281 322
 func (d *Daemon) sock() string {
282 323
 	return fmt.Sprintf("unix://%s/docker.sock", d.folder)
283 324
 }
... ...
@@ -1236,7 +1287,7 @@ func readFile(src string, c *check.C) (content string) {
1236 1236
 }
1237 1237
 
1238 1238
 func containerStorageFile(containerID, basename string) string {
1239
-	return filepath.Join("/var/lib/docker/containers", containerID, basename)
1239
+	return filepath.Join(containerStoragePath, containerID, basename)
1240 1240
 }
1241 1241
 
1242 1242
 // docker commands that use this function must be run with the '-d' switch.
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"io/ioutil"
7 7
 	"log"
8 8
 	"net/http"
9
+	"os"
9 10
 	"os/exec"
10 11
 	"strings"
11 12
 	"time"
... ...
@@ -147,6 +148,16 @@ var (
147 147
 		},
148 148
 		"Test requires native Golang compiler instead of GCCGO",
149 149
 	}
150
+	NotUserNamespace = testRequirement{
151
+		func() bool {
152
+			root := os.Getenv("DOCKER_REMAP_ROOT")
153
+			if root != "" {
154
+				return false
155
+			}
156
+			return true
157
+		},
158
+		"Test cannot be run when remapping root",
159
+	}
150 160
 )
151 161
 
152 162
 // testRequires checks if the environment satisfies the requirements
... ...
@@ -19,6 +19,7 @@ import (
19 19
 
20 20
 	"github.com/Sirupsen/logrus"
21 21
 	"github.com/docker/docker/pkg/fileutils"
22
+	"github.com/docker/docker/pkg/idtools"
22 23
 	"github.com/docker/docker/pkg/pools"
23 24
 	"github.com/docker/docker/pkg/promise"
24 25
 	"github.com/docker/docker/pkg/system"
... ...
@@ -41,6 +42,8 @@ type (
41 41
 		ExcludePatterns  []string
42 42
 		Compression      Compression
43 43
 		NoLchown         bool
44
+		UIDMaps          []idtools.IDMap
45
+		GIDMaps          []idtools.IDMap
44 46
 		ChownOpts        *TarChownOptions
45 47
 		IncludeSourceDir bool
46 48
 		// When unpacking, specifies whether overwriting a directory with a
... ...
@@ -52,9 +55,13 @@ type (
52 52
 	}
53 53
 
54 54
 	// Archiver allows the reuse of most utility functions of this package
55
-	// with a pluggable Untar function.
55
+	// with a pluggable Untar function. Also, to facilitate the passing of
56
+	// specific id mappings for untar, an archiver can be created with maps
57
+	// which will then be passed to Untar operations
56 58
 	Archiver struct {
57
-		Untar func(io.Reader, string, *TarOptions) error
59
+		Untar   func(io.Reader, string, *TarOptions) error
60
+		UIDMaps []idtools.IDMap
61
+		GIDMaps []idtools.IDMap
58 62
 	}
59 63
 
60 64
 	// breakoutError is used to differentiate errors related to breaking out
... ...
@@ -66,7 +73,7 @@ type (
66 66
 var (
67 67
 	// ErrNotImplemented is the error message of function not implemented.
68 68
 	ErrNotImplemented = errors.New("Function not implemented")
69
-	defaultArchiver   = &Archiver{Untar}
69
+	defaultArchiver   = &Archiver{Untar: Untar, UIDMaps: nil, GIDMaps: nil}
70 70
 )
71 71
 
72 72
 const (
... ...
@@ -194,6 +201,8 @@ type tarAppender struct {
194 194
 
195 195
 	// for hardlink mapping
196 196
 	SeenFiles map[uint64]string
197
+	UIDMaps   []idtools.IDMap
198
+	GIDMaps   []idtools.IDMap
197 199
 }
198 200
 
199 201
 // canonicalTarName provides a platform-independent and consistent posix-style
... ...
@@ -261,6 +270,25 @@ func (ta *tarAppender) addTarFile(path, name string) error {
261 261
 		hdr.Xattrs["security.capability"] = string(capability)
262 262
 	}
263 263
 
264
+	//handle re-mapping container ID mappings back to host ID mappings before
265
+	//writing tar headers/files
266
+	if ta.UIDMaps != nil || ta.GIDMaps != nil {
267
+		uid, gid, err := getFileUIDGID(fi.Sys())
268
+		if err != nil {
269
+			return err
270
+		}
271
+		xUID, err := idtools.ToContainer(uid, ta.UIDMaps)
272
+		if err != nil {
273
+			return err
274
+		}
275
+		xGID, err := idtools.ToContainer(gid, ta.GIDMaps)
276
+		if err != nil {
277
+			return err
278
+		}
279
+		hdr.Uid = xUID
280
+		hdr.Gid = xGID
281
+	}
282
+
264 283
 	if err := ta.TarWriter.WriteHeader(hdr); err != nil {
265 284
 		return err
266 285
 	}
... ...
@@ -427,6 +455,8 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
427 427
 			TarWriter: tar.NewWriter(compressWriter),
428 428
 			Buffer:    pools.BufioWriter32KPool.Get(nil),
429 429
 			SeenFiles: make(map[uint64]string),
430
+			UIDMaps:   options.UIDMaps,
431
+			GIDMaps:   options.GIDMaps,
430 432
 		}
431 433
 
432 434
 		defer func() {
... ...
@@ -554,6 +584,10 @@ func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) err
554 554
 	defer pools.BufioReader32KPool.Put(trBuf)
555 555
 
556 556
 	var dirs []*tar.Header
557
+	remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
558
+	if err != nil {
559
+		return err
560
+	}
557 561
 
558 562
 	// Iterate through the files in the archive.
559 563
 loop:
... ...
@@ -631,6 +665,28 @@ loop:
631 631
 		}
632 632
 		trBuf.Reset(tr)
633 633
 
634
+		// if the options contain a uid & gid maps, convert header uid/gid
635
+		// entries using the maps such that lchown sets the proper mapped
636
+		// uid/gid after writing the file. We only perform this mapping if
637
+		// the file isn't already owned by the remapped root UID or GID, as
638
+		// that specific uid/gid has no mapping from container -> host, and
639
+		// those files already have the proper ownership for inside the
640
+		// container.
641
+		if hdr.Uid != remappedRootUID {
642
+			xUID, err := idtools.ToHost(hdr.Uid, options.UIDMaps)
643
+			if err != nil {
644
+				return err
645
+			}
646
+			hdr.Uid = xUID
647
+		}
648
+		if hdr.Gid != remappedRootGID {
649
+			xGID, err := idtools.ToHost(hdr.Gid, options.GIDMaps)
650
+			if err != nil {
651
+				return err
652
+			}
653
+			hdr.Gid = xGID
654
+		}
655
+
634 656
 		if err := createTarFile(path, dest, hdr, trBuf, !options.NoLchown, options.ChownOpts); err != nil {
635 657
 			return err
636 658
 		}
... ...
@@ -703,7 +759,15 @@ func (archiver *Archiver) TarUntar(src, dst string) error {
703 703
 		return err
704 704
 	}
705 705
 	defer archive.Close()
706
-	return archiver.Untar(archive, dst, nil)
706
+
707
+	var options *TarOptions
708
+	if archiver.UIDMaps != nil || archiver.GIDMaps != nil {
709
+		options = &TarOptions{
710
+			UIDMaps: archiver.UIDMaps,
711
+			GIDMaps: archiver.GIDMaps,
712
+		}
713
+	}
714
+	return archiver.Untar(archive, dst, options)
707 715
 }
708 716
 
709 717
 // TarUntar is a convenience function which calls Tar and Untar, with the output of one piped into the other.
... ...
@@ -719,7 +783,14 @@ func (archiver *Archiver) UntarPath(src, dst string) error {
719 719
 		return err
720 720
 	}
721 721
 	defer archive.Close()
722
-	if err := archiver.Untar(archive, dst, nil); err != nil {
722
+	var options *TarOptions
723
+	if archiver.UIDMaps != nil || archiver.GIDMaps != nil {
724
+		options = &TarOptions{
725
+			UIDMaps: archiver.UIDMaps,
726
+			GIDMaps: archiver.GIDMaps,
727
+		}
728
+	}
729
+	if err := archiver.Untar(archive, dst, options); err != nil {
723 730
 		return err
724 731
 	}
725 732
 	return nil
... ...
@@ -801,6 +872,28 @@ func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
801 801
 		hdr.Name = filepath.Base(dst)
802 802
 		hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode)))
803 803
 
804
+		remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(archiver.UIDMaps, archiver.GIDMaps)
805
+		if err != nil {
806
+			return err
807
+		}
808
+
809
+		// only perform mapping if the file being copied isn't already owned by the
810
+		// uid or gid of the remapped root in the container
811
+		if remappedRootUID != hdr.Uid {
812
+			xUID, err := idtools.ToHost(hdr.Uid, archiver.UIDMaps)
813
+			if err != nil {
814
+				return err
815
+			}
816
+			hdr.Uid = xUID
817
+		}
818
+		if remappedRootGID != hdr.Gid {
819
+			xGID, err := idtools.ToHost(hdr.Gid, archiver.GIDMaps)
820
+			if err != nil {
821
+				return err
822
+			}
823
+			hdr.Gid = xGID
824
+		}
825
+
804 826
 		tw := tar.NewWriter(w)
805 827
 		defer tw.Close()
806 828
 		if err := tw.WriteHeader(hdr); err != nil {
... ...
@@ -816,6 +909,7 @@ func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
816 816
 			err = er
817 817
 		}
818 818
 	}()
819
+
819 820
 	return archiver.Untar(r, filepath.Dir(dst), nil)
820 821
 }
821 822
 
... ...
@@ -61,6 +61,15 @@ func setHeaderForSpecialDevice(hdr *tar.Header, ta *tarAppender, name string, st
61 61
 	return
62 62
 }
63 63
 
64
+func getFileUIDGID(stat interface{}) (int, int, error) {
65
+	s, ok := stat.(*syscall.Stat_t)
66
+
67
+	if !ok {
68
+		return -1, -1, errors.New("cannot convert stat value to syscall.Stat_t")
69
+	}
70
+	return int(s.Uid), int(s.Gid), nil
71
+}
72
+
64 73
 func major(device uint64) uint64 {
65 74
 	return (device >> 8) & 0xfff
66 75
 }
... ...
@@ -63,3 +63,8 @@ func handleTarTypeBlockCharFifo(hdr *tar.Header, path string) error {
63 63
 func handleLChmod(hdr *tar.Header, path string, hdrInfo os.FileInfo) error {
64 64
 	return nil
65 65
 }
66
+
67
+func getFileUIDGID(stat interface{}) (int, int, error) {
68
+	// no notion of file ownership mapping yet on Windows
69
+	return 0, 0, nil
70
+}
... ...
@@ -14,6 +14,7 @@ import (
14 14
 	"time"
15 15
 
16 16
 	"github.com/Sirupsen/logrus"
17
+	"github.com/docker/docker/pkg/idtools"
17 18
 	"github.com/docker/docker/pkg/pools"
18 19
 	"github.com/docker/docker/pkg/system"
19 20
 )
... ...
@@ -341,13 +342,15 @@ func ChangesSize(newDir string, changes []Change) int64 {
341 341
 }
342 342
 
343 343
 // ExportChanges produces an Archive from the provided changes, relative to dir.
344
-func ExportChanges(dir string, changes []Change) (Archive, error) {
344
+func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (Archive, error) {
345 345
 	reader, writer := io.Pipe()
346 346
 	go func() {
347 347
 		ta := &tarAppender{
348 348
 			TarWriter: tar.NewWriter(writer),
349 349
 			Buffer:    pools.BufioWriter32KPool.Get(nil),
350 350
 			SeenFiles: make(map[uint64]string),
351
+			UIDMaps:   uidMaps,
352
+			GIDMaps:   gidMaps,
351 353
 		}
352 354
 		// this buffer is needed for the duration of this piped stream
353 355
 		defer pools.BufioWriter32KPool.Put(ta.Buffer)
... ...
@@ -61,7 +61,7 @@ func TestHardLinkOrder(t *testing.T) {
61 61
 	sort.Sort(changesByPath(changes))
62 62
 
63 63
 	// ExportChanges
64
-	ar, err := ExportChanges(dest, changes)
64
+	ar, err := ExportChanges(dest, changes, nil, nil)
65 65
 	if err != nil {
66 66
 		t.Fatal(err)
67 67
 	}
... ...
@@ -73,7 +73,7 @@ func TestHardLinkOrder(t *testing.T) {
73 73
 	// reverse sort
74 74
 	sort.Sort(sort.Reverse(changesByPath(changes)))
75 75
 	// ExportChanges
76
-	arRev, err := ExportChanges(dest, changes)
76
+	arRev, err := ExportChanges(dest, changes, nil, nil)
77 77
 	if err != nil {
78 78
 		t.Fatal(err)
79 79
 	}
... ...
@@ -410,7 +410,7 @@ func TestApplyLayer(t *testing.T) {
410 410
 		t.Fatal(err)
411 411
 	}
412 412
 
413
-	layer, err := ExportChanges(dst, changes)
413
+	layer, err := ExportChanges(dst, changes, nil, nil)
414 414
 	if err != nil {
415 415
 		t.Fatal(err)
416 416
 	}
... ...
@@ -11,6 +11,7 @@ import (
11 11
 	"strings"
12 12
 
13 13
 	"github.com/Sirupsen/logrus"
14
+	"github.com/docker/docker/pkg/idtools"
14 15
 	"github.com/docker/docker/pkg/pools"
15 16
 	"github.com/docker/docker/pkg/system"
16 17
 )
... ...
@@ -18,16 +19,23 @@ import (
18 18
 // UnpackLayer unpack `layer` to a `dest`. The stream `layer` can be
19 19
 // compressed or uncompressed.
20 20
 // Returns the size in bytes of the contents of the layer.
21
-func UnpackLayer(dest string, layer Reader) (size int64, err error) {
21
+func UnpackLayer(dest string, layer Reader, options *TarOptions) (size int64, err error) {
22 22
 	tr := tar.NewReader(layer)
23 23
 	trBuf := pools.BufioReader32KPool.Get(tr)
24 24
 	defer pools.BufioReader32KPool.Put(trBuf)
25 25
 
26 26
 	var dirs []*tar.Header
27
+	remappedRootUID, remappedRootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
28
+	if err != nil {
29
+		return 0, err
30
+	}
27 31
 
28 32
 	aufsTempdir := ""
29 33
 	aufsHardlinks := make(map[string]*tar.Header)
30 34
 
35
+	if options == nil {
36
+		options = &TarOptions{}
37
+	}
31 38
 	// Iterate through the files in the archive.
32 39
 	for {
33 40
 		hdr, err := tr.Next()
... ...
@@ -169,6 +177,27 @@ func UnpackLayer(dest string, layer Reader) (size int64, err error) {
169 169
 				srcData = tmpFile
170 170
 			}
171 171
 
172
+			// if the options contain a uid & gid maps, convert header uid/gid
173
+			// entries using the maps such that lchown sets the proper mapped
174
+			// uid/gid after writing the file. We only perform this mapping if
175
+			// the file isn't already owned by the remapped root UID or GID, as
176
+			// that specific uid/gid has no mapping from container -> host, and
177
+			// those files already have the proper ownership for inside the
178
+			// container.
179
+			if srcHdr.Uid != remappedRootUID {
180
+				xUID, err := idtools.ToHost(srcHdr.Uid, options.UIDMaps)
181
+				if err != nil {
182
+					return 0, err
183
+				}
184
+				srcHdr.Uid = xUID
185
+			}
186
+			if srcHdr.Gid != remappedRootGID {
187
+				xGID, err := idtools.ToHost(srcHdr.Gid, options.GIDMaps)
188
+				if err != nil {
189
+					return 0, err
190
+				}
191
+				srcHdr.Gid = xGID
192
+			}
172 193
 			if err := createTarFile(path, dest, srcHdr, srcData, true, nil); err != nil {
173 194
 				return 0, err
174 195
 			}
... ...
@@ -196,19 +225,19 @@ func UnpackLayer(dest string, layer Reader) (size int64, err error) {
196 196
 // compressed or uncompressed.
197 197
 // Returns the size in bytes of the contents of the layer.
198 198
 func ApplyLayer(dest string, layer Reader) (int64, error) {
199
-	return applyLayerHandler(dest, layer, true)
199
+	return applyLayerHandler(dest, layer, &TarOptions{}, true)
200 200
 }
201 201
 
202 202
 // ApplyUncompressedLayer parses a diff in the standard layer format from
203 203
 // `layer`, and applies it to the directory `dest`. The stream `layer`
204 204
 // can only be uncompressed.
205 205
 // Returns the size in bytes of the contents of the layer.
206
-func ApplyUncompressedLayer(dest string, layer Reader) (int64, error) {
207
-	return applyLayerHandler(dest, layer, false)
206
+func ApplyUncompressedLayer(dest string, layer Reader, options *TarOptions) (int64, error) {
207
+	return applyLayerHandler(dest, layer, options, false)
208 208
 }
209 209
 
210 210
 // do the bulk load of ApplyLayer, but allow for not calling DecompressStream
211
-func applyLayerHandler(dest string, layer Reader, decompress bool) (int64, error) {
211
+func applyLayerHandler(dest string, layer Reader, options *TarOptions, decompress bool) (int64, error) {
212 212
 	dest = filepath.Clean(dest)
213 213
 
214 214
 	// We need to be able to set any perms
... ...
@@ -224,5 +253,5 @@ func applyLayerHandler(dest string, layer Reader, decompress bool) (int64, error
224 224
 			return 0, err
225 225
 		}
226 226
 	}
227
-	return UnpackLayer(dest, layer)
227
+	return UnpackLayer(dest, layer, options)
228 228
 }
... ...
@@ -7,13 +7,13 @@ import "github.com/docker/docker/pkg/archive"
7 7
 // uncompressed.
8 8
 // Returns the size in bytes of the contents of the layer.
9 9
 func ApplyLayer(dest string, layer archive.Reader) (size int64, err error) {
10
-	return applyLayerHandler(dest, layer, true)
10
+	return applyLayerHandler(dest, layer, &archive.TarOptions{}, true)
11 11
 }
12 12
 
13 13
 // ApplyUncompressedLayer parses a diff in the standard layer format from
14 14
 // `layer`, and applies it to the directory `dest`. The stream `layer`
15 15
 // can only be uncompressed.
16 16
 // Returns the size in bytes of the contents of the layer.
17
-func ApplyUncompressedLayer(dest string, layer archive.Reader) (int64, error) {
18
-	return applyLayerHandler(dest, layer, false)
17
+func ApplyUncompressedLayer(dest string, layer archive.Reader, options *archive.TarOptions) (int64, error) {
18
+	return applyLayerHandler(dest, layer, options, false)
19 19
 }
... ...
@@ -27,8 +27,9 @@ type applyLayerResponse struct {
27 27
 func applyLayer() {
28 28
 
29 29
 	var (
30
-		tmpDir = ""
31
-		err    error
30
+		tmpDir  = ""
31
+		err     error
32
+		options *archive.TarOptions
32 33
 	)
33 34
 	runtime.LockOSThread()
34 35
 	flag.Parse()
... ...
@@ -44,12 +45,16 @@ func applyLayer() {
44 44
 		fatal(err)
45 45
 	}
46 46
 
47
+	if err := json.Unmarshal([]byte(os.Getenv("OPT")), &options); err != nil {
48
+		fatal(err)
49
+	}
50
+
47 51
 	if tmpDir, err = ioutil.TempDir("/", "temp-docker-extract"); err != nil {
48 52
 		fatal(err)
49 53
 	}
50 54
 
51 55
 	os.Setenv("TMPDIR", tmpDir)
52
-	size, err := archive.UnpackLayer("/", os.Stdin)
56
+	size, err := archive.UnpackLayer("/", os.Stdin, options)
53 57
 	os.RemoveAll(tmpDir)
54 58
 	if err != nil {
55 59
 		fatal(err)
... ...
@@ -68,7 +73,7 @@ func applyLayer() {
68 68
 // applyLayerHandler parses a diff in the standard layer format from `layer`, and
69 69
 // applies it to the directory `dest`. Returns the size in bytes of the
70 70
 // contents of the layer.
71
-func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size int64, err error) {
71
+func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
72 72
 	dest = filepath.Clean(dest)
73 73
 	if decompress {
74 74
 		decompressed, err := archive.DecompressStream(layer)
... ...
@@ -79,9 +84,21 @@ func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size
79 79
 
80 80
 		layer = decompressed
81 81
 	}
82
+	if options == nil {
83
+		options = &archive.TarOptions{}
84
+	}
85
+	if options.ExcludePatterns == nil {
86
+		options.ExcludePatterns = []string{}
87
+	}
88
+
89
+	data, err := json.Marshal(options)
90
+	if err != nil {
91
+		return 0, fmt.Errorf("ApplyLayer json encode: %v", err)
92
+	}
82 93
 
83 94
 	cmd := reexec.Command("docker-applyLayer", dest)
84 95
 	cmd.Stdin = layer
96
+	cmd.Env = append(cmd.Env, fmt.Sprintf("OPT=%s", data))
85 97
 
86 98
 	outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
87 99
 	cmd.Stdout, cmd.Stderr = outBuf, errBuf
... ...
@@ -13,7 +13,7 @@ import (
13 13
 // applyLayerHandler parses a diff in the standard layer format from `layer`, and
14 14
 // applies it to the directory `dest`. Returns the size in bytes of the
15 15
 // contents of the layer.
16
-func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size int64, err error) {
16
+func applyLayerHandler(dest string, layer archive.Reader, options *archive.TarOptions, decompress bool) (size int64, err error) {
17 17
 	dest = filepath.Clean(dest)
18 18
 
19 19
 	// Ensure it is a Windows-style volume path
... ...
@@ -34,7 +34,7 @@ func applyLayerHandler(dest string, layer archive.Reader, decompress bool) (size
34 34
 		return 0, fmt.Errorf("ApplyLayer failed to create temp-docker-extract under %s. %s", dest, err)
35 35
 	}
36 36
 
37
-	s, err := archive.UnpackLayer(dest, layer)
37
+	s, err := archive.UnpackLayer(dest, layer, nil)
38 38
 	os.RemoveAll(tmpDir)
39 39
 	if err != nil {
40 40
 		return 0, fmt.Errorf("ApplyLayer %s failed UnpackLayer to %s", err, dest)
41 41
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+package directory
1
+
2
+import (
3
+	"io/ioutil"
4
+	"os"
5
+	"path/filepath"
6
+)
7
+
8
+// MoveToSubdir moves all contents of a directory to a subdirectory underneath the original path
9
+func MoveToSubdir(oldpath, subdir string) error {
10
+
11
+	infos, err := ioutil.ReadDir(oldpath)
12
+	if err != nil {
13
+		return err
14
+	}
15
+	for _, info := range infos {
16
+		if info.Name() != subdir {
17
+			oldName := filepath.Join(oldpath, info.Name())
18
+			newName := filepath.Join(oldpath, subdir, info.Name())
19
+			if err := os.Rename(oldName, newName); err != nil {
20
+				return err
21
+			}
22
+		}
23
+	}
24
+	return nil
25
+}
... ...
@@ -3,6 +3,9 @@ package directory
3 3
 import (
4 4
 	"io/ioutil"
5 5
 	"os"
6
+	"path/filepath"
7
+	"reflect"
8
+	"sort"
6 9
 	"testing"
7 10
 )
8 11
 
... ...
@@ -135,3 +138,45 @@ func TestSizeFileAndNestedDirectoryNonempty(t *testing.T) {
135 135
 		t.Fatalf("directory with 6-byte file and nested directory with 6-byte file has size: %d", size)
136 136
 	}
137 137
 }
138
+
139
+// Test migration of directory to a subdir underneath itself
140
+func TestMoveToSubdir(t *testing.T) {
141
+	var outerDir, subDir string
142
+	var err error
143
+
144
+	if outerDir, err = ioutil.TempDir(os.TempDir(), "TestMoveToSubdir"); err != nil {
145
+		t.Fatalf("failed to create directory: %v", err)
146
+	}
147
+
148
+	if subDir, err = ioutil.TempDir(outerDir, "testSub"); err != nil {
149
+		t.Fatalf("failed to create subdirectory: %v", err)
150
+	}
151
+
152
+	// write 4 temp files in the outer dir to get moved
153
+	filesList := []string{"a", "b", "c", "d"}
154
+	for _, fName := range filesList {
155
+		if file, err := os.Create(filepath.Join(outerDir, fName)); err != nil {
156
+			t.Fatalf("couldn't create temp file %q: %v", fName, err)
157
+		} else {
158
+			file.WriteString(fName)
159
+			file.Close()
160
+		}
161
+	}
162
+
163
+	if err = MoveToSubdir(outerDir, filepath.Base(subDir)); err != nil {
164
+		t.Fatalf("Error during migration of content to subdirectory: %v", err)
165
+	}
166
+	// validate that the files were moved to the subdirectory
167
+	infos, err := ioutil.ReadDir(subDir)
168
+	if len(infos) != 4 {
169
+		t.Fatalf("Should be four files in the subdir after the migration: actual length: %d", len(infos))
170
+	}
171
+	var results []string
172
+	for _, info := range infos {
173
+		results = append(results, info.Name())
174
+	}
175
+	sort.Sort(sort.StringSlice(results))
176
+	if !reflect.DeepEqual(filesList, results) {
177
+		t.Fatalf("Results after migration do not equal list of files: expected: %v, got: %v", filesList, results)
178
+	}
179
+}
... ...
@@ -5,7 +5,6 @@ package directory
5 5
 import (
6 6
 	"os"
7 7
 	"path/filepath"
8
-	"strings"
9 8
 
10 9
 	"github.com/docker/docker/pkg/longpath"
11 10
 )
12 11
new file mode 100644
... ...
@@ -0,0 +1,207 @@
0
+package idtools
1
+
2
+import (
3
+	"bufio"
4
+	"fmt"
5
+	"os"
6
+	"sort"
7
+	"strconv"
8
+	"strings"
9
+
10
+	"github.com/docker/docker/pkg/system"
11
+)
12
+
13
+// IDMap contains a single entry for user namespace range remapping. An array
14
+// of IDMap entries represents the structure that will be provided to the Linux
15
+// kernel for creating a user namespace.
16
+type IDMap struct {
17
+	ContainerID int `json:"container_id"`
18
+	HostID      int `json:"host_id"`
19
+	Size        int `json:"size"`
20
+}
21
+
22
+type subIDRange struct {
23
+	Start  int
24
+	Length int
25
+}
26
+
27
+type ranges []subIDRange
28
+
29
+func (e ranges) Len() int           { return len(e) }
30
+func (e ranges) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
31
+func (e ranges) Less(i, j int) bool { return e[i].Start < e[j].Start }
32
+
33
+const (
34
+	subuidFileName string = "/etc/subuid"
35
+	subgidFileName string = "/etc/subgid"
36
+)
37
+
38
+// MkdirAllAs creates a directory (include any along the path) and then modifies
39
+// ownership to the requested uid/gid.  If the directory already exists, this
40
+// function will still change ownership to the requested uid/gid pair.
41
+func MkdirAllAs(path string, mode os.FileMode, ownerUID, ownerGID int) error {
42
+	return mkdirAs(path, mode, ownerUID, ownerGID, true)
43
+}
44
+
45
+// MkdirAs creates a directory and then modifies ownership to the requested uid/gid.
46
+// If the directory already exists, this function still changes ownership
47
+func MkdirAs(path string, mode os.FileMode, ownerUID, ownerGID int) error {
48
+	return mkdirAs(path, mode, ownerUID, ownerGID, false)
49
+}
50
+
51
+func mkdirAs(path string, mode os.FileMode, ownerUID, ownerGID int, mkAll bool) error {
52
+	if mkAll {
53
+		if err := system.MkdirAll(path, mode); err != nil && !os.IsExist(err) {
54
+			return err
55
+		}
56
+	} else {
57
+		if err := os.Mkdir(path, mode); err != nil && !os.IsExist(err) {
58
+			return err
59
+		}
60
+	}
61
+	// even if it existed, we will chown to change ownership as requested
62
+	if err := os.Chown(path, ownerUID, ownerGID); err != nil {
63
+		return err
64
+	}
65
+	return nil
66
+}
67
+
68
+// GetRootUIDGID retrieves the remapped root uid/gid pair from the set of maps.
69
+// If the maps are empty, then the root uid/gid will default to "real" 0/0
70
+func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
71
+	var uid, gid int
72
+
73
+	if uidMap != nil {
74
+		xUID, err := ToHost(0, uidMap)
75
+		if err != nil {
76
+			return -1, -1, err
77
+		}
78
+		uid = xUID
79
+	}
80
+	if gidMap != nil {
81
+		xGID, err := ToHost(0, gidMap)
82
+		if err != nil {
83
+			return -1, -1, err
84
+		}
85
+		gid = xGID
86
+	}
87
+	return uid, gid, nil
88
+}
89
+
90
+// ToContainer takes an id mapping, and uses it to translate a
91
+// host ID to the remapped ID. If no map is provided, then the translation
92
+// assumes a 1-to-1 mapping and returns the passed in id
93
+func ToContainer(hostID int, idMap []IDMap) (int, error) {
94
+	if idMap == nil {
95
+		return hostID, nil
96
+	}
97
+	for _, m := range idMap {
98
+		if (hostID >= m.HostID) && (hostID <= (m.HostID + m.Size - 1)) {
99
+			contID := m.ContainerID + (hostID - m.HostID)
100
+			return contID, nil
101
+		}
102
+	}
103
+	return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID)
104
+}
105
+
106
+// ToHost takes an id mapping and a remapped ID, and translates the
107
+// ID to the mapped host ID. If no map is provided, then the translation
108
+// assumes a 1-to-1 mapping and returns the passed in id #
109
+func ToHost(contID int, idMap []IDMap) (int, error) {
110
+	if idMap == nil {
111
+		return contID, nil
112
+	}
113
+	for _, m := range idMap {
114
+		if (contID >= m.ContainerID) && (contID <= (m.ContainerID + m.Size - 1)) {
115
+			hostID := m.HostID + (contID - m.ContainerID)
116
+			return hostID, nil
117
+		}
118
+	}
119
+	return -1, fmt.Errorf("Container ID %d cannot be mapped to a host ID", contID)
120
+}
121
+
122
+// CreateIDMappings takes a requested user and group name and
123
+// using the data from /etc/sub{uid,gid} ranges, creates the
124
+// proper uid and gid remapping ranges for that user/group pair
125
+func CreateIDMappings(username, groupname string) ([]IDMap, []IDMap, error) {
126
+	subuidRanges, err := parseSubuid(username)
127
+	if err != nil {
128
+		return nil, nil, err
129
+	}
130
+	subgidRanges, err := parseSubgid(groupname)
131
+	if err != nil {
132
+		return nil, nil, err
133
+	}
134
+	if len(subuidRanges) == 0 {
135
+		return nil, nil, fmt.Errorf("No subuid ranges found for user %q", username)
136
+	}
137
+	if len(subgidRanges) == 0 {
138
+		return nil, nil, fmt.Errorf("No subgid ranges found for group %q", groupname)
139
+	}
140
+
141
+	return createIDMap(subuidRanges), createIDMap(subgidRanges), nil
142
+}
143
+
144
+func createIDMap(subidRanges ranges) []IDMap {
145
+	idMap := []IDMap{}
146
+
147
+	// sort the ranges by lowest ID first
148
+	sort.Sort(subidRanges)
149
+	containerID := 0
150
+	for _, idrange := range subidRanges {
151
+		idMap = append(idMap, IDMap{
152
+			ContainerID: containerID,
153
+			HostID:      idrange.Start,
154
+			Size:        idrange.Length,
155
+		})
156
+		containerID = containerID + idrange.Length
157
+	}
158
+	return idMap
159
+}
160
+
161
+func parseSubuid(username string) (ranges, error) {
162
+	return parseSubidFile(subuidFileName, username)
163
+}
164
+
165
+func parseSubgid(username string) (ranges, error) {
166
+	return parseSubidFile(subgidFileName, username)
167
+}
168
+
169
+func parseSubidFile(path, username string) (ranges, error) {
170
+	var rangeList ranges
171
+
172
+	subidFile, err := os.Open(path)
173
+	if err != nil {
174
+		return rangeList, err
175
+	}
176
+	defer subidFile.Close()
177
+
178
+	s := bufio.NewScanner(subidFile)
179
+	for s.Scan() {
180
+		if err := s.Err(); err != nil {
181
+			return rangeList, err
182
+		}
183
+
184
+		text := strings.TrimSpace(s.Text())
185
+		if text == "" {
186
+			continue
187
+		}
188
+		parts := strings.Split(text, ":")
189
+		if len(parts) != 3 {
190
+			return rangeList, fmt.Errorf("Cannot parse subuid/gid information: Format not correct for %s file", path)
191
+		}
192
+		if parts[0] == username {
193
+			// return the first entry for a user; ignores potential for multiple ranges per user
194
+			startid, err := strconv.Atoi(parts[1])
195
+			if err != nil {
196
+				return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
197
+			}
198
+			length, err := strconv.Atoi(parts[2])
199
+			if err != nil {
200
+				return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
201
+			}
202
+			rangeList = append(rangeList, subIDRange{startid, length})
203
+		}
204
+	}
205
+	return rangeList, nil
206
+}
0 207
new file mode 100644
... ...
@@ -0,0 +1,155 @@
0
+package idtools
1
+
2
+import (
3
+	"fmt"
4
+	"os/exec"
5
+	"path/filepath"
6
+	"strings"
7
+	"syscall"
8
+)
9
+
10
+// add a user and/or group to Linux /etc/passwd, /etc/group using standard
11
+// Linux distribution commands:
12
+// adduser --uid <id> --shell /bin/login --no-create-home --disabled-login --ingroup <groupname> <username>
13
+// useradd -M -u <id> -s /bin/nologin -N -g <groupname> <username>
14
+// addgroup --gid <id> <groupname>
15
+// groupadd -g <id> <groupname>
16
+
17
+const baseUID int = 10000
18
+const baseGID int = 10000
19
+const idMAX int = 65534
20
+
21
+var (
22
+	userCommand  string
23
+	groupCommand string
24
+
25
+	cmdTemplates = map[string]string{
26
+		"adduser":  "--uid %d --shell /bin/false --no-create-home --disabled-login --ingroup %s %s",
27
+		"useradd":  "-M -u %d -s /bin/false -N -g %s %s",
28
+		"addgroup": "--gid %d %s",
29
+		"groupadd": "-g %d %s",
30
+	}
31
+)
32
+
33
+func init() {
34
+	// set up which commands are used for adding users/groups dependent on distro
35
+	if _, err := resolveBinary("adduser"); err == nil {
36
+		userCommand = "adduser"
37
+	} else if _, err := resolveBinary("useradd"); err == nil {
38
+		userCommand = "useradd"
39
+	}
40
+	if _, err := resolveBinary("addgroup"); err == nil {
41
+		groupCommand = "addgroup"
42
+	} else if _, err := resolveBinary("groupadd"); err == nil {
43
+		groupCommand = "groupadd"
44
+	}
45
+}
46
+
47
+func resolveBinary(binname string) (string, error) {
48
+	binaryPath, err := exec.LookPath(binname)
49
+	if err != nil {
50
+		return "", err
51
+	}
52
+	resolvedPath, err := filepath.EvalSymlinks(binaryPath)
53
+	if err != nil {
54
+		return "", err
55
+	}
56
+	//only return no error if the final resolved binary basename
57
+	//matches what was searched for
58
+	if filepath.Base(resolvedPath) == binname {
59
+		return resolvedPath, nil
60
+	}
61
+	return "", fmt.Errorf("Binary %q does not resolve to a binary of that name in $PATH (%q)", binname, resolvedPath)
62
+}
63
+
64
+// AddNamespaceRangesUser takes a name and finds an unused uid, gid pair
65
+// and calls the appropriate helper function to add the group and then
66
+// the user to the group in /etc/group and /etc/passwd respectively.
67
+// This new user's /etc/sub{uid,gid} ranges will be used for user namespace
68
+// mapping ranges in containers.
69
+func AddNamespaceRangesUser(name string) (int, int, error) {
70
+	// Find unused uid, gid pair
71
+	uid, err := findUnusedUID(baseUID)
72
+	if err != nil {
73
+		return -1, -1, fmt.Errorf("Unable to find unused UID: %v", err)
74
+	}
75
+	gid, err := findUnusedGID(baseGID)
76
+	if err != nil {
77
+		return -1, -1, fmt.Errorf("Unable to find unused GID: %v", err)
78
+	}
79
+
80
+	// First add the group that we will use
81
+	if err := addGroup(name, gid); err != nil {
82
+		return -1, -1, fmt.Errorf("Error adding group %q: %v", name, err)
83
+	}
84
+	// Add the user as a member of the group
85
+	if err := addUser(name, uid, name); err != nil {
86
+		return -1, -1, fmt.Errorf("Error adding user %q: %v", name, err)
87
+	}
88
+	return uid, gid, nil
89
+}
90
+
91
+func addUser(userName string, uid int, groupName string) error {
92
+
93
+	if userCommand == "" {
94
+		return fmt.Errorf("Cannot add user; no useradd/adduser binary found")
95
+	}
96
+	args := fmt.Sprintf(cmdTemplates[userCommand], uid, groupName, userName)
97
+	return execAddCmd(userCommand, args)
98
+}
99
+
100
+func addGroup(groupName string, gid int) error {
101
+
102
+	if groupCommand == "" {
103
+		return fmt.Errorf("Cannot add group; no groupadd/addgroup binary found")
104
+	}
105
+	args := fmt.Sprintf(cmdTemplates[groupCommand], gid, groupName)
106
+	// only error out if the error isn't that the group already exists
107
+	// if the group exists then our needs are already met
108
+	if err := execAddCmd(groupCommand, args); err != nil && !strings.Contains(err.Error(), "already exists") {
109
+		return err
110
+	}
111
+	return nil
112
+}
113
+
114
+func execAddCmd(cmd, args string) error {
115
+	execCmd := exec.Command(cmd, strings.Split(args, " ")...)
116
+	out, err := execCmd.CombinedOutput()
117
+	if err != nil {
118
+		return fmt.Errorf("Failed to add user/group with error: %v; output: %q", err, string(out))
119
+	}
120
+	return nil
121
+}
122
+
123
+func findUnusedUID(startUID int) (int, error) {
124
+	return findUnused("passwd", startUID)
125
+}
126
+
127
+func findUnusedGID(startGID int) (int, error) {
128
+	return findUnused("group", startGID)
129
+}
130
+
131
+func findUnused(file string, id int) (int, error) {
132
+	for {
133
+		cmdStr := fmt.Sprintf("cat /etc/%s | cut -d: -f3 | grep '^%d$'", file, id)
134
+		cmd := exec.Command("sh", "-c", cmdStr)
135
+		if err := cmd.Run(); err != nil {
136
+			// if a non-zero return code occurs, then we know the ID was not found
137
+			// and is usable
138
+			if exiterr, ok := err.(*exec.ExitError); ok {
139
+				// The program has exited with an exit code != 0
140
+				if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
141
+					if status.ExitStatus() == 1 {
142
+						//no match, we can use this ID
143
+						return id, nil
144
+					}
145
+				}
146
+			}
147
+			return -1, fmt.Errorf("Error looking in /etc/%s for unused ID: %v", file, err)
148
+		}
149
+		id++
150
+		if id > idMAX {
151
+			return -1, fmt.Errorf("Maximum id in %q reached with finding unused numeric ID", file)
152
+		}
153
+	}
154
+}
0 155
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+// +build !linux
1
+
2
+package idtools
3
+
4
+import "fmt"
5
+
6
+// AddNamespaceRangesUser takes a name and finds an unused uid, gid pair
7
+// and calls the appropriate helper function to add the group and then
8
+// the user to the group in /etc/group and /etc/passwd respectively.
9
+func AddNamespaceRangesUser(name string) (int, int, error) {
10
+	return -1, -1, fmt.Errorf("No support for adding users or groups on this OS")
11
+}
... ...
@@ -68,7 +68,11 @@ func (c *Client) Call(serviceMethod string, args interface{}, ret interface{}) e
68 68
 		return err
69 69
 	}
70 70
 	defer body.Close()
71
-	return json.NewDecoder(body).Decode(&ret)
71
+	if err := json.NewDecoder(body).Decode(&ret); err != nil {
72
+		logrus.Errorf("%s: error reading plugin resp: %v", serviceMethod, err)
73
+		return err
74
+	}
75
+	return nil
72 76
 }
73 77
 
74 78
 // Stream calls the specified method with the specified arguments for the plugin and returns the response body
... ...
@@ -86,7 +90,11 @@ func (c *Client) SendFile(serviceMethod string, data io.Reader, ret interface{})
86 86
 	if err != nil {
87 87
 		return err
88 88
 	}
89
-	return json.NewDecoder(body).Decode(&ret)
89
+	if err := json.NewDecoder(body).Decode(&ret); err != nil {
90
+		logrus.Errorf("%s: error reading plugin resp: %v", serviceMethod, err)
91
+		return err
92
+	}
93
+	return nil
90 94
 }
91 95
 
92 96
 func (c *Client) callWithRetry(serviceMethod string, data io.Reader, retry bool) (io.ReadCloser, error) {
... ...
@@ -11,6 +11,7 @@ import (
11 11
 	"path/filepath"
12 12
 	"sync"
13 13
 
14
+	"github.com/docker/docker/pkg/idtools"
14 15
 	"github.com/docker/docker/volume"
15 16
 )
16 17
 
... ...
@@ -28,10 +29,10 @@ var ErrNotFound = errors.New("volume not found")
28 28
 // New instantiates a new Root instance with the provided scope. Scope
29 29
 // is the base path that the Root instance uses to store its
30 30
 // volumes. The base path is created here if it does not exist.
31
-func New(scope string) (*Root, error) {
31
+func New(scope string, rootUID, rootGID int) (*Root, error) {
32 32
 	rootDirectory := filepath.Join(scope, volumesPathName)
33 33
 
34
-	if err := os.MkdirAll(rootDirectory, 0700); err != nil {
34
+	if err := idtools.MkdirAllAs(rootDirectory, 0700, rootUID, rootGID); err != nil {
35 35
 		return nil, err
36 36
 	}
37 37
 
... ...
@@ -39,6 +40,8 @@ func New(scope string) (*Root, error) {
39 39
 		scope:   scope,
40 40
 		path:    rootDirectory,
41 41
 		volumes: make(map[string]*localVolume),
42
+		rootUID: rootUID,
43
+		rootGID: rootGID,
42 44
 	}
43 45
 
44 46
 	dirs, err := ioutil.ReadDir(rootDirectory)
... ...
@@ -66,6 +69,8 @@ type Root struct {
66 66
 	scope   string
67 67
 	path    string
68 68
 	volumes map[string]*localVolume
69
+	rootUID int
70
+	rootGID int
69 71
 }
70 72
 
71 73
 // List lists all the volumes
... ...
@@ -100,7 +105,7 @@ func (r *Root) Create(name string, _ map[string]string) (volume.Volume, error) {
100 100
 	}
101 101
 
102 102
 	path := r.DataPath(name)
103
-	if err := os.MkdirAll(path, 0755); err != nil {
103
+	if err := idtools.MkdirAllAs(path, 0755, r.rootUID, r.rootGID); err != nil {
104 104
 		if os.IsExist(err) {
105 105
 			return nil, fmt.Errorf("volume already exists under %s", filepath.Dir(path))
106 106
 		}
... ...
@@ -13,7 +13,7 @@ func TestRemove(t *testing.T) {
13 13
 	}
14 14
 	defer os.RemoveAll(rootDir)
15 15
 
16
-	r, err := New(rootDir)
16
+	r, err := New(rootDir, 0, 0)
17 17
 	if err != nil {
18 18
 		t.Fatal(err)
19 19
 	}
... ...
@@ -55,7 +55,7 @@ func TestInitializeWithVolumes(t *testing.T) {
55 55
 	}
56 56
 	defer os.RemoveAll(rootDir)
57 57
 
58
-	r, err := New(rootDir)
58
+	r, err := New(rootDir, 0, 0)
59 59
 	if err != nil {
60 60
 		t.Fatal(err)
61 61
 	}
... ...
@@ -65,7 +65,7 @@ func TestInitializeWithVolumes(t *testing.T) {
65 65
 		t.Fatal(err)
66 66
 	}
67 67
 
68
-	r, err = New(rootDir)
68
+	r, err = New(rootDir, 0, 0)
69 69
 	if err != nil {
70 70
 		t.Fatal(err)
71 71
 	}