Browse code

Fix capability check to allow privileged CLONE_NEWUSER from nested user namespaces

The existing patch which disallows unprivileged CLONE_NEWUSER applies
the check for CAP_SYS_ADMIN capability on the 'init_user_ns'
namespace, which is not entirely correct. Consider the following sequence:

1. A process with root privileges calls
clone(child_fn, ..., CLONE_NEWUSER, ...) to create a new user namespace.

2. child_fn, now running in the newly created user namespace enjoys the
full set of capabilities in the new user namespace, but has lost
its capabilities in the old user namespace (init_user_ns in this
case).

3. child_fn now calls
clone(child_fn2, ..., CLONE_NEWUSER, ...) to create a new (nested)
user namespace.

Step 3 should have succeeded because child_fn has full privileges
(including CAP_SYS_ADMIN) in its user namespace, but this step fails
on our kernel, because the CAP_SYS_ADMIN capability is checked against
init_user_ns, as opposed to child_fn's user namespace.

So fix this by checking for CAP_SYS_ADMIN using ns_capable() on the
current task's user namespace.

This also helps the userns07 testcase from LTP
(testcases/kernel/containers/userns/userns07.c) to pass when running
with root privileges.

Change-Id: I9c179d089309e6e1a18bb3cfb34cb66c872f5bfd
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/3947
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Anish Swaminathan <anishs@vmware.com>

Srivatsa S. Bhat authored on 2017/10/03 10:36:59
Showing 4 changed files
... ...
@@ -12,10 +12,15 @@ issues are found, we have a fail-safe.
12 12
 
13 13
 Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
14 14
 [bwh: Remove unneeded binary sysctl bits]
15
+[Srivatsa: Fix capability checks when running nested user namespaces by
16
+using ns_capable() on the current task's user namespace.]
17
+Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
18
+
19
+diff --git a/kernel/fork.c b/kernel/fork.c
20
+index 9321b1a..34d50a6 100644
15 21
 --- a/kernel/fork.c
16 22
 +++ b/kernel/fork.c
17
-@@ -87,6 +87,11 @@
23
+@@ -88,6 +88,11 @@
18 24
  
19 25
  #define CREATE_TRACE_POINTS
20 26
  #include <trace/events/task.h>
... ...
@@ -27,33 +32,35 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
27 27
  
28 28
  /*
29 29
   * Minimum number of threads to boot the kernel
30
-@@ -1252,6 +1257,10 @@ static struct task_struct *copy_process(
30
+@@ -1476,6 +1481,10 @@ static __latent_entropy struct task_struct *copy_process(
31 31
  	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
32 32
  		return ERR_PTR(-EINVAL);
33 33
  
34 34
 +	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
35
-+		if (!capable(CAP_SYS_ADMIN))
35
++		if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
36 36
 +			return ERR_PTR(-EPERM);
37 37
 +
38 38
  	/*
39 39
  	 * Thread groups must share signals as well, and detached threads
40 40
  	 * can only be started up within the thread group.
41
-@@ -1944,6 +1953,12 @@ SYSCALL_DEFINE1(unshare, unsigned long,
41
+@@ -2216,6 +2225,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
42 42
  	if (unshare_flags & CLONE_NEWNS)
43 43
  		unshare_flags |= CLONE_FS;
44 44
  
45 45
 +	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
46 46
 +		err = -EPERM;
47
-+		if (!capable(CAP_SYS_ADMIN))
47
++		if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
48 48
 +			goto bad_unshare_out;
49 49
 +	}
50 50
 +
51 51
  	err = check_unshare_flags(unshare_flags);
52 52
  	if (err)
53 53
  		goto bad_unshare_out;
54
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
55
+index 8dc9e80..0d91b8e 100644
54 56
 --- a/kernel/sysctl.c
55 57
 +++ b/kernel/sysctl.c
56
-@@ -102,6 +102,9 @@ extern int core_uses_pid;
58
+@@ -104,6 +104,9 @@ extern int core_uses_pid;
57 59
  extern char core_pattern[];
58 60
  extern unsigned int core_pipe_limit;
59 61
  #endif
... ...
@@ -63,11 +70,10 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
63 63
  extern int pid_max;
64 64
  extern int pid_max_min, pid_max_max;
65 65
  extern int percpu_pagelist_fraction;
66
-@@ -489,6 +492,15 @@ static struct ctl_table kern_table[] = {
67
- 		.mode		= 0644,
66
+@@ -504,6 +507,15 @@ static struct ctl_table kern_table[] = {
68 67
  		.proc_handler	= proc_dointvec,
69 68
  	},
70
-+#endif
69
+ #endif
71 70
 +#ifdef CONFIG_USER_NS
72 71
 +	{
73 72
 +		.procname	= "unprivileged_userns_clone",
... ...
@@ -76,9 +82,12 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
76 76
 +		.mode		= 0644,
77 77
 +		.proc_handler	= proc_dointvec,
78 78
 +	},
79
- #endif
79
++#endif
80 80
  #ifdef CONFIG_PROC_SYSCTL
81 81
  	{
82
+ 		.procname	= "tainted",
83
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
84
+index 86b7854..4e2e171 100644
82 85
 --- a/kernel/user_namespace.c
83 86
 +++ b/kernel/user_namespace.c
84 87
 @@ -23,6 +23,9 @@
... ...
@@ -2,7 +2,7 @@
2 2
 Summary:        Kernel
3 3
 Name:           linux-esx
4 4
 Version:        4.9.52
5
-Release:        2%{?dist}
5
+Release:        3%{?dist}
6 6
 License:        GPLv2
7 7
 URL:            http://www.kernel.org/
8 8
 Group:          System Environment/Kernel
... ...
@@ -193,6 +193,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg
193 193
 /usr/src/linux-headers-%{uname_r}
194 194
 
195 195
 %changelog
196
+*   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3
197
+-   Allow privileged CLONE_NEWUSER from nested user namespaces.
196 198
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2
197 199
 -   Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak)
198 200
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1
... ...
@@ -2,7 +2,7 @@
2 2
 Summary:        Kernel
3 3
 Name:           linux-secure
4 4
 Version:        4.9.52
5
-Release:        2%{?dist}
5
+Release:        3%{?dist}
6 6
 License:        GPLv2
7 7
 URL:            http://www.kernel.org/
8 8
 Group:          System Environment/Kernel
... ...
@@ -261,6 +261,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg
261 261
 /usr/src/linux-headers-%{uname_r}
262 262
 
263 263
 %changelog
264
+*   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3
265
+-   Allow privileged CLONE_NEWUSER from nested user namespaces.
264 266
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2
265 267
 -   Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak)
266 268
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1
... ...
@@ -2,7 +2,7 @@
2 2
 Summary:        Kernel
3 3
 Name:           linux
4 4
 Version:        4.9.52
5
-Release:        2%{?dist}
5
+Release:        3%{?dist}
6 6
 License:    	GPLv2
7 7
 URL:        	http://www.kernel.org/
8 8
 Group:        	System Environment/Kernel
... ...
@@ -301,6 +301,8 @@ ln -sf %{name}-%{uname_r}.cfg /boot/photon.cfg
301 301
 /usr/share/doc/*
302 302
 
303 303
 %changelog
304
+*   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3
305
+-   Allow privileged CLONE_NEWUSER from nested user namespaces.
304 306
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2
305 307
 -   Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak)
306 308
 *   Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1