Following #19995 and #17409 this PR enables skipping userns re-mapping
when creating a container (or when executing a command). Thus, enabling
privileged containers running side by side with userns remapped
containers.
The feature is enabled by specifying ```--userns:host```, which will not
remapped the user if userns are applied. If this flag is not specified,
the existing behavior (which blocks specific privileged operation)
remains.
Signed-off-by: Liron Levin <liron@twistlock.com>
| ... | ... |
@@ -218,11 +218,14 @@ func (daemon *Daemon) populateCommand(c *container.Container, env []string) erro |
| 218 | 218 |
processConfig.Env = env |
| 219 | 219 |
|
| 220 | 220 |
remappedRoot := &execdriver.User{}
|
| 221 |
- rootUID, rootGID := daemon.GetRemappedUIDGID() |
|
| 222 |
- if rootUID != 0 {
|
|
| 223 |
- remappedRoot.UID = rootUID |
|
| 224 |
- remappedRoot.GID = rootGID |
|
| 221 |
+ if c.HostConfig.UsernsMode.IsPrivate() {
|
|
| 222 |
+ rootUID, rootGID := daemon.GetRemappedUIDGID() |
|
| 223 |
+ if rootUID != 0 {
|
|
| 224 |
+ remappedRoot.UID = rootUID |
|
| 225 |
+ remappedRoot.GID = rootGID |
|
| 226 |
+ } |
|
| 225 | 227 |
} |
| 228 |
+ |
|
| 226 | 229 |
uidMap, gidMap := daemon.GetUIDGIDMaps() |
| 227 | 230 |
|
| 228 | 231 |
if !daemon.seccompEnabled {
|
| ... | ... |
@@ -429,7 +429,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. |
| 429 | 429 |
logrus.Warnf("IPv4 forwarding is disabled. Networking will not work")
|
| 430 | 430 |
} |
| 431 | 431 |
// check for various conflicting options with user namespaces |
| 432 |
- if daemon.configStore.RemappedRoot != "" {
|
|
| 432 |
+ if daemon.configStore.RemappedRoot != "" && hostConfig.UsernsMode.IsPrivate() {
|
|
| 433 | 433 |
if hostConfig.Privileged {
|
| 434 | 434 |
return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces")
|
| 435 | 435 |
} |
| ... | ... |
@@ -125,6 +125,7 @@ This section lists each version from latest to oldest. Each listing includes a |
| 125 | 125 |
* `GET /info` now returns `KernelMemory` field, showing if "kernel memory limit" is supported. |
| 126 | 126 |
* `POST /containers/create` now takes `PidsLimit` field, if the kernel is >= 4.3 and the pids cgroup is supported. |
| 127 | 127 |
* `GET /containers/(id or name)/stats` now returns `pids_stats`, if the kernel is >= 4.3 and the pids cgroup is supported. |
| 128 |
+* `POST /containers/create` now allows you to override usernamespaces remapping and use privileged options for the container. |
|
| 128 | 129 |
* `POST /auth` now returns an `IdentityToken` when supported by a registry. |
| 129 | 130 |
|
| 130 | 131 |
### v1.22 API changes |
| ... | ... |
@@ -431,6 +431,8 @@ Json Parameters: |
| 431 | 431 |
The default is not to restart. (optional) |
| 432 | 432 |
An ever increasing delay (double the previous delay, starting at 100mS) |
| 433 | 433 |
is added before each restart to prevent flooding the server. |
| 434 |
+ - **UsernsMode** - Sets the usernamespace mode for the container when usernamespace remapping option is enabled. |
|
| 435 |
+ supported values are: `host`. |
|
| 434 | 436 |
- **NetworkMode** - Sets the networking mode for the container. Supported |
| 435 | 437 |
standard values are: `bridge`, `host`, `none`, and `container:<name|id>`. Any other value is taken |
| 436 | 438 |
as a custom network's name to which this container should connect to. |
| ... | ... |
@@ -83,6 +83,9 @@ Creates a new container. |
| 83 | 83 |
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`. |
| 84 | 84 |
-t, --tty Allocate a pseudo-TTY |
| 85 | 85 |
-u, --user="" Username or UID |
| 86 |
+ --userns="" Container user namespace |
|
| 87 |
+ 'host': Use the Docker host user namespace |
|
| 88 |
+ '': Use the Docker daemon user namespace specified by `--userns-remap` option. |
|
| 86 | 89 |
--ulimit=[] Ulimit options |
| 87 | 90 |
--uts="" UTS namespace to use |
| 88 | 91 |
-v, --volume=[host-src:]container-dest[:<options>] |
| ... | ... |
@@ -750,6 +750,16 @@ following algorithm to create the mapping ranges: |
| 750 | 750 |
2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user. |
| 751 | 751 |
3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`. |
| 752 | 752 |
|
| 753 |
+### Disable user namespace for a container |
|
| 754 |
+ |
|
| 755 |
+If you enable user namespaces on the daemon, all containers are started |
|
| 756 |
+with user namespaces enabled. In some situations you might want to disable |
|
| 757 |
+this feature for a container, for example, to start a privileged container (see |
|
| 758 |
+[user namespace known restrictions](#user-namespace-known-restrictions)). |
|
| 759 |
+To enable those advanced features for a specific container use `--userns=host` |
|
| 760 |
+in the `run/exec/create` command. |
|
| 761 |
+This option will completely disable user namespace mapping for the container's user. |
|
| 762 |
+ |
|
| 753 | 763 |
### User namespace known restrictions |
| 754 | 764 |
|
| 755 | 765 |
The following standard Docker features are currently incompatible when |
| ... | ... |
@@ -85,6 +85,9 @@ parent = "smn_cli" |
| 85 | 85 |
--stop-signal="SIGTERM" Signal to stop a container |
| 86 | 86 |
-t, --tty Allocate a pseudo-TTY |
| 87 | 87 |
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>]) |
| 88 |
+ --userns="" Container user namespace |
|
| 89 |
+ 'host': Use the Docker host user namespace |
|
| 90 |
+ '': Use the Docker daemon user namespace specified by `--userns-remap` option. |
|
| 88 | 91 |
--ulimit=[] Ulimit options |
| 89 | 92 |
--uts="" UTS namespace to use |
| 90 | 93 |
-v, --volume=[host-src:]container-dest[:<options>] |
| ... | ... |
@@ -37,11 +37,13 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
|
| 37 | 37 |
gid, err := strconv.Atoi(uidgid[1]) |
| 38 | 38 |
c.Assert(err, checker.IsNil, check.Commentf("Can't parse gid"))
|
| 39 | 39 |
|
| 40 |
- //writeable by the remapped root UID/GID pair |
|
| 40 |
+ // writable by the remapped root UID/GID pair |
|
| 41 | 41 |
c.Assert(os.Chown(tmpDir, uid, gid), checker.IsNil) |
| 42 | 42 |
|
| 43 | 43 |
out, err := s.d.Cmd("run", "-d", "--name", "userns", "-v", tmpDir+":/goofy", "busybox", "sh", "-c", "touch /goofy/testfile; top")
|
| 44 | 44 |
c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
| 45 |
+ user := s.findUser(c, "userns") |
|
| 46 |
+ c.Assert(uidgid[0], checker.Equals, user) |
|
| 45 | 47 |
|
| 46 | 48 |
pid, err := s.d.Cmd("inspect", "--format='{{.State.Pid}}'", "userns")
|
| 47 | 49 |
c.Assert(err, checker.IsNil, check.Commentf("Could not inspect running container: out: %q", pid))
|
| ... | ... |
@@ -62,4 +64,23 @@ func (s *DockerDaemonSuite) TestDaemonUserNamespaceRootSetting(c *check.C) {
|
| 62 | 62 |
c.Assert(err, checker.IsNil) |
| 63 | 63 |
c.Assert(stat.UID(), checker.Equals, uint32(uid), check.Commentf("Touched file not owned by remapped root UID"))
|
| 64 | 64 |
c.Assert(stat.GID(), checker.Equals, uint32(gid), check.Commentf("Touched file not owned by remapped root GID"))
|
| 65 |
+ |
|
| 66 |
+ // use host usernamespace |
|
| 67 |
+ out, err = s.d.Cmd("run", "-d", "--name", "userns_skip", "--userns", "host", "busybox", "sh", "-c", "touch /goofy/testfile; top")
|
|
| 68 |
+ c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
|
| 69 |
+ user = s.findUser(c, "userns_skip") |
|
| 70 |
+ // userns are skipped, user is root |
|
| 71 |
+ c.Assert(user, checker.Equals, "root") |
|
| 72 |
+} |
|
| 73 |
+ |
|
| 74 |
+// findUser finds the uid or name of the user of the first process that runs in a container |
|
| 75 |
+func (s *DockerDaemonSuite) findUser(c *check.C, container string) string {
|
|
| 76 |
+ out, err := s.d.Cmd("top", container)
|
|
| 77 |
+ c.Assert(err, checker.IsNil, check.Commentf("Output: %s", out))
|
|
| 78 |
+ rows := strings.Split(out, "\n") |
|
| 79 |
+ if len(rows) < 2 {
|
|
| 80 |
+ // No process rows founds |
|
| 81 |
+ c.FailNow() |
|
| 82 |
+ } |
|
| 83 |
+ return strings.Fields(rows[1])[0] |
|
| 65 | 84 |
} |
| ... | ... |
@@ -58,6 +58,7 @@ docker-create - Create a new container |
| 58 | 58 |
[**-P**|**--publish-all**] |
| 59 | 59 |
[**-p**|**--publish**[=*[]*]] |
| 60 | 60 |
[**--pid**[=*[]*]] |
| 61 |
+[**--userns**[=*[]*]] |
|
| 61 | 62 |
[**--pids-limit**[=*PIDS_LIMIT*]] |
| 62 | 63 |
[**--privileged**] |
| 63 | 64 |
[**--read-only**] |
| ... | ... |
@@ -291,6 +292,10 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap. |
| 291 | 291 |
**host**: use the host's PID namespace inside the container. |
| 292 | 292 |
Note: the host mode gives the container full access to local PID and is therefore considered insecure. |
| 293 | 293 |
|
| 294 |
+**--userns**="" |
|
| 295 |
+ Set the usernamespace mode for the container when `userns-remap` option is enabled. |
|
| 296 |
+ **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`). |
|
| 297 |
+ |
|
| 294 | 298 |
**--pids-limit**="" |
| 295 | 299 |
Tune the container's pids limit. Set `-1` to have unlimited pids for the container. |
| 296 | 300 |
|
| ... | ... |
@@ -60,6 +60,7 @@ docker-run - Run a command in a new container |
| 60 | 60 |
[**-P**|**--publish-all**] |
| 61 | 61 |
[**-p**|**--publish**[=*[]*]] |
| 62 | 62 |
[**--pid**[=*[]*]] |
| 63 |
+[**--userns**[=*[]*]] |
|
| 63 | 64 |
[**--pids-limit**[=*PIDS_LIMIT*]] |
| 64 | 65 |
[**--privileged**] |
| 65 | 66 |
[**--read-only**] |
| ... | ... |
@@ -421,6 +422,10 @@ Use `docker port` to see the actual mapping: `docker port CONTAINER $CONTAINERPO |
| 421 | 421 |
**host**: use the host's PID namespace inside the container. |
| 422 | 422 |
Note: the host mode gives the container full access to local PID and is therefore considered insecure. |
| 423 | 423 |
|
| 424 |
+**--userns**="" |
|
| 425 |
+ Set the usernamespace mode for the container when `userns-remap` option is enabled. |
|
| 426 |
+ **host**: use the host usernamespace and enable all privileged options (e.g., `pid=host` or `--privileged`). |
|
| 427 |
+ |
|
| 424 | 428 |
**--pids-limit**="" |
| 425 | 429 |
Tune the container's pids limit. Set `-1` to have unlimited pids for the container. |
| 426 | 430 |
|
| ... | ... |
@@ -121,6 +121,27 @@ func TestUTSModeTest(t *testing.T) {
|
| 121 | 121 |
} |
| 122 | 122 |
} |
| 123 | 123 |
|
| 124 |
+func TestUsernsModeTest(t *testing.T) {
|
|
| 125 |
+ usrensMode := map[container.UsernsMode][]bool{
|
|
| 126 |
+ // private, host, valid |
|
| 127 |
+ "": {true, false, true},
|
|
| 128 |
+ "something:weird": {true, false, false},
|
|
| 129 |
+ "host": {false, true, true},
|
|
| 130 |
+ "host:name": {true, false, true},
|
|
| 131 |
+ } |
|
| 132 |
+ for usernsMode, state := range usrensMode {
|
|
| 133 |
+ if usernsMode.IsPrivate() != state[0] {
|
|
| 134 |
+ t.Fatalf("UsernsMode.IsPrivate for %v should have been %v but was %v", usernsMode, state[0], usernsMode.IsPrivate())
|
|
| 135 |
+ } |
|
| 136 |
+ if usernsMode.IsHost() != state[1] {
|
|
| 137 |
+ t.Fatalf("UsernsMode.IsHost for %v should have been %v but was %v", usernsMode, state[1], usernsMode.IsHost())
|
|
| 138 |
+ } |
|
| 139 |
+ if usernsMode.Valid() != state[2] {
|
|
| 140 |
+ t.Fatalf("UsernsMode.Valid for %v should have been %v but was %v", usernsMode, state[2], usernsMode.Valid())
|
|
| 141 |
+ } |
|
| 142 |
+ } |
|
| 143 |
+} |
|
| 144 |
+ |
|
| 124 | 145 |
func TestPidModeTest(t *testing.T) {
|
| 125 | 146 |
pidModes := map[container.PidMode][]bool{
|
| 126 | 147 |
// private, host, valid |
| ... | ... |
@@ -59,6 +59,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
| 59 | 59 |
flPrivileged = cmd.Bool([]string{"-privileged"}, false, "Give extended privileges to this container")
|
| 60 | 60 |
flPidMode = cmd.String([]string{"-pid"}, "", "PID namespace to use")
|
| 61 | 61 |
flUTSMode = cmd.String([]string{"-uts"}, "", "UTS namespace to use")
|
| 62 |
+ flUsernsMode = cmd.String([]string{"-userns"}, "", "User namespace to use")
|
|
| 62 | 63 |
flPublishAll = cmd.Bool([]string{"P", "-publish-all"}, false, "Publish all exposed ports to random ports")
|
| 63 | 64 |
flStdin = cmd.Bool([]string{"i", "-interactive"}, false, "Keep STDIN open even if not attached")
|
| 64 | 65 |
flTty = cmd.Bool([]string{"t", "-tty"}, false, "Allocate a pseudo-TTY")
|
| ... | ... |
@@ -316,6 +317,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
| 316 | 316 |
return nil, nil, nil, cmd, fmt.Errorf("--uts: invalid UTS mode")
|
| 317 | 317 |
} |
| 318 | 318 |
|
| 319 |
+ usernsMode := container.UsernsMode(*flUsernsMode) |
|
| 320 |
+ if !usernsMode.Valid() {
|
|
| 321 |
+ return nil, nil, nil, cmd, fmt.Errorf("--userns: invalid USER mode")
|
|
| 322 |
+ } |
|
| 323 |
+ |
|
| 319 | 324 |
restartPolicy, err := ParseRestartPolicy(*flRestartPolicy) |
| 320 | 325 |
if err != nil {
|
| 321 | 326 |
return nil, nil, nil, cmd, err |
| ... | ... |
@@ -404,6 +410,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
| 404 | 404 |
IpcMode: ipcMode, |
| 405 | 405 |
PidMode: pidMode, |
| 406 | 406 |
UTSMode: utsMode, |
| 407 |
+ UsernsMode: usernsMode, |
|
| 407 | 408 |
CapAdd: strslice.StrSlice(flCapAdd.GetAll()), |
| 408 | 409 |
CapDrop: strslice.StrSlice(flCapDrop.GetAll()), |
| 409 | 410 |
GroupAdd: flGroupAdd.GetAll(), |