Browse code

Run privileged containers when userns are specified

Following #19995 and #17409 this PR enables skipping userns re-mapping
when creating a container (or when executing a command). Thus, enabling
privileged containers running side by side with userns remapped
containers.

The feature is enabled by specifying ```--userns:host```, which will not
remapped the user if userns are applied. If this flag is not specified,
the existing behavior (which blocks specific privileged operation)
remains.

Signed-off-by: Liron Levin <liron@twistlock.com>

Liron Levin authored on 2016/02/08 23:23:24
Showing 12 changed files
... ...
@@ -218,11 +218,14 @@ func (daemon *Daemon) populateCommand(c *container.Container, env []string) erro
218 218
 	processConfig.Env = env
219 219
 
220 220
 	remappedRoot := &execdriver.User{}
221
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
222
-	if rootUID != 0 {
223
-		remappedRoot.UID = rootUID
224
-		remappedRoot.GID = rootGID
221
+	if c.HostConfig.UsernsMode.IsPrivate() {
222
+		rootUID, rootGID := daemon.GetRemappedUIDGID()
223
+		if rootUID != 0 {
224
+			remappedRoot.UID = rootUID
225
+			remappedRoot.GID = rootGID
226
+		}
225 227
 	}
228
+
226 229
 	uidMap, gidMap := daemon.GetUIDGIDMaps()
227 230
 
228 231
 	if !daemon.seccompEnabled {
... ...
@@ -429,7 +429,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
429 429
 		logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
430 430
 	}
431 431
 	// check for various conflicting options with user namespaces
432
-	if daemon.configStore.RemappedRoot != "" {
432
+	if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
433 433
 		if hostConfig.Privileged {
434 434
 			return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
435 435
 		}
... ...
@@ -125,6 +125,7 @@ This section lists each version from latest to oldest.  Each listing includes a
125 125
 * `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported.
126 126
 * `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported.
127 127
 * `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported.
128
+* `POST /containers/create` now allows you to override usernamespaces remapping and use privileged options for the container.
128 129
 * `POST /auth` now returns an `IdentityToken` when supported by a registry.
129 130
 
130 131
 ### v1.22 API changes
... ...
@@ -431,6 +431,8 @@ Json Parameters:
431 431
             The default is not to restart. (optional)
432 432
             An ever increasing delay (double the previous delay, starting at 100mS)
433 433
             is added before each restart to prevent flooding the server.
434
+    -   **UsernsMode**  - Sets the usernamespace mode for the container when usernamespace remapping option is enabled.
435
+           supported values are: `host`.
434 436
     -   **NetworkMode** - Sets the networking mode for the container. Supported
435 437
           standard values are: `bridge`, `host`, `none`, and `container:<name|id>`. Any other value is taken
436 438
           as a custom network's name to which this container should connect to.
... ...
@@ -83,6 +83,9 @@ Creates a new container.
83 83
       --shm-size=[]                 Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`.  Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
84 84
       -t, --tty                     Allocate a pseudo-TTY
85 85
       -u, --user=""                 Username or UID
86
+      --userns=""                   Container user namespace
87
+                                    'host': Use the Docker host user namespace
88
+                                    '': Use the Docker daemon user namespace specified by `--userns-remap` option.
86 89
       --ulimit=[]                   Ulimit options
87 90
       --uts=""                      UTS namespace to use
88 91
       -v, --volume=[host-src:]container-dest[:<options>]
... ...
@@ -750,6 +750,16 @@ following algorithm to create the mapping ranges:
750 750
 2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user.
751 751
 3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`.
752 752
 
753
+### Disable user namespace for a container
754
+
755
+If you enable user namespaces on the daemon, all containers are started
756
+with user namespaces enabled. In some situations you might want to disable
757
+this feature for a container, for example, to start a privileged container (see
758
+[user namespace known restrictions](#user-namespace-known-restrictions)).
759
+To enable those advanced features for a specific container use `--userns=host`
760
+in the `run/exec/create` command.
761
+This option will completely disable user namespace mapping for the container's user.
762
+
753 763
 ### User namespace known restrictions
754 764
 
755 765
 The following standard Docker features are currently incompatible when
... ...
@@ -85,6 +85,9 @@ parent = "smn_cli"
85 85
       --stop-signal="SIGTERM"       Signal to stop a container
86 86
       -t, --tty                     Allocate a pseudo-TTY
87 87
       -u, --user=""                 Username or UID (format: <name|uid>[:<group|gid>])
88
+      --userns=""                   Container user namespace
89
+                                    'host': Use the Docker host user namespace
90
+                                    '': Use the Docker daemon user namespace specified by `--userns-remap` option.
88 91
       --ulimit=[]                   Ulimit options
89 92
       --uts=""                      UTS namespace to use
90 93
       -v, --volume=[host-src:]container-dest[:<options>]
... ...
@@ -37,11 +37,13 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
37 37
 	gid, err := strconv.Atoi(uidgid[1])
38 38
 	c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid"))
39 39
 
40
-	//writeable by the remapped root UID/GID pair
40
+	// writable by the remapped root UID/GID pair
41 41
 	c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil)
42 42
 
43 43
 	out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
44 44
 	c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
45
+	user := s.findUser(c, "userns")
46
+	c.Assert(uidgid[0], checker.Equals, user)
45 47
 
46 48
 	pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
47 49
 	c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid))
... ...
@@ -62,4 +64,23 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
62 62
 	c.Assert(err, checker.IsNil)
63 63
 	c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
64 64
 	c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
65
+
66
+	// use host usernamespace
67
+	out, err = s.d.Cmd("run", "-d", "--name", "userns_skip", "--userns", "host", "busybox", "sh", "-c", "touch /goofy/testfile; top")
68
+	c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
69
+	user = s.findUser(c, "userns_skip")
70
+	// userns are skipped, user is root
71
+	c.Assert(user, checker.Equals, "root")
72
+}
73
+
74
+// findUser finds the uid or name of the user of the first process that runs in a container
75
+func (s *DockerDaemonSuite) findUser(c *check.C, container string) string {
76
+	out, err := s.d.Cmd("top", container)
77
+	c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
78
+	rows := strings.Split(out, "\n")
79
+	if len(rows) < 2 {
80
+		// No process rows founds
81
+		c.FailNow()
82
+	}
83
+	return strings.Fields(rows[1])[0]
65 84
 }
... ...
@@ -58,6 +58,7 @@ docker-create - Create a new container
58 58
 [**-P**|**--publish-all**]
59 59
 [**-p**|**--publish**[=*[]*]]
60 60
 [**--pid**[=*[]*]]
61
+[**--userns**[=*[]*]]
61 62
 [**--pids-limit**[=*PIDS_LIMIT*]]
62 63
 [**--privileged**]
63 64
 [**--read-only**]
... ...
@@ -291,6 +292,10 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
291 291
      **host**: use the host's PID namespace inside the container.
292 292
      Note: the host mode gives the container full access to local PID and is therefore considered insecure.
293 293
 
294
+**--userns**=""
295
+   Set the usernamespace mode for the container when `userns-remap` option is enabled.
296
+     **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
297
+
294 298
 **--pids-limit**=""
295 299
    Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
296 300
 
... ...
@@ -60,6 +60,7 @@ docker-run - Run a command in a new container
60 60
 [**-P**|**--publish-all**]
61 61
 [**-p**|**--publish**[=*[]*]]
62 62
 [**--pid**[=*[]*]]
63
+[**--userns**[=*[]*]]
63 64
 [**--pids-limit**[=*PIDS_LIMIT*]]
64 65
 [**--privileged**]
65 66
 [**--read-only**]
... ...
@@ -421,6 +422,10 @@ Use `docker port` to see the actual mapping: `docker port CONTAINER $CONTAINERPO
421 421
      **host**: use the host's PID namespace inside the container.
422 422
      Note: the host mode gives the container full access to local PID and is therefore considered insecure.
423 423
 
424
+**--userns**=""
425
+   Set the usernamespace mode for the container when `userns-remap` option is enabled.
426
+     **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`).
427
+
424 428
 **--pids-limit**=""
425 429
    Tune the container's pids limit. Set `-1` to have unlimited pids for the container.
426 430
 
... ...
@@ -121,6 +121,27 @@ func TestUTSModeTest(t *testing.T) {
121 121
 	}
122 122
 }
123 123
 
124
+func TestUsernsModeTest(t *testing.T) {
125
+	usrensMode := map[container.UsernsMode][]bool{
126
+		// private, host, valid
127
+		"":                {true, false, true},
128
+		"something:weird": {true, false, false},
129
+		"host":            {false, true, true},
130
+		"host:name":       {true, false, true},
131
+	}
132
+	for usernsMode, state := range usrensMode {
133
+		if usernsMode.IsPrivate() != state[0] {
134
+			t.Fatalf("UsernsMode.IsPrivate for %v should have been %v but was %v", usernsMode, state[0], usernsMode.IsPrivate())
135
+		}
136
+		if usernsMode.IsHost() != state[1] {
137
+			t.Fatalf("UsernsMode.IsHost for %v should have been %v but was %v", usernsMode, state[1], usernsMode.IsHost())
138
+		}
139
+		if usernsMode.Valid() != state[2] {
140
+			t.Fatalf("UsernsMode.Valid for %v should have been %v but was %v", usernsMode, state[2], usernsMode.Valid())
141
+		}
142
+	}
143
+}
144
+
124 145
 func TestPidModeTest(t *testing.T) {
125 146
 	pidModes := map[container.PidMode][]bool{
126 147
 		// private, host, valid
... ...
@@ -59,6 +59,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
59 59
 		flPrivileged        = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container")
60 60
 		flPidMode           = cmd.String([]string{"-pid"}, "", "PID namespace to use")
61 61
 		flUTSMode           = cmd.String([]string{"-uts"}, "", "UTS namespace to use")
62
+		flUsernsMode        = cmd.String([]string{"-userns"}, "", "User namespace to use")
62 63
 		flPublishAll        = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports")
63 64
 		flStdin             = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
64 65
 		flTty               = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
... ...
@@ -316,6 +317,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
316 316
 		return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode")
317 317
 	}
318 318
 
319
+	usernsMode := container.UsernsMode(*flUsernsMode)
320
+	if !usernsMode.Valid() {
321
+		return nil, nil, nil, cmd, fmt.Errorf("--userns: invalid USER mode")
322
+	}
323
+
319 324
 	restartPolicy, err := ParseRestartPolicy(*flRestartPolicy)
320 325
 	if err != nil {
321 326
 		return nil, nil, nil, cmd, err
... ...
@@ -404,6 +410,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
404 404
 		IpcMode:        ipcMode,
405 405
 		PidMode:        pidMode,
406 406
 		UTSMode:        utsMode,
407
+		UsernsMode:     usernsMode,
407 408
 		CapAdd:         strslice.StrSlice(flCapAdd.GetAll()),
408 409
 		CapDrop:        strslice.StrSlice(flCapDrop.GetAll()),
409 410
 		GroupAdd:       flGroupAdd.GetAll(),