The existing patch which disallows unprivileged CLONE_NEWUSER applies
the check for CAP_SYS_ADMIN capability on the 'init_user_ns'
namespace, which is not entirely correct. Consider the following sequence:
1. A process with root privileges calls
clone(child_fn, ..., CLONE_NEWUSER, ...) to create a new user namespace.
2. child_fn, now running in the newly created user namespace enjoys the
full set of capabilities in the new user namespace, but has lost
its capabilities in the old user namespace (init_user_ns in this
case).
3. child_fn now calls
clone(child_fn2, ..., CLONE_NEWUSER, ...) to create a new (nested)
user namespace.
Step 3 should have succeeded because child_fn has full privileges
(including CAP_SYS_ADMIN) in its user namespace, but this step fails
on our kernel, because the CAP_SYS_ADMIN capability is checked against
init_user_ns, as opposed to child_fn's user namespace.
So fix this by checking for CAP_SYS_ADMIN using ns_capable() on the
current task's user namespace.
This also helps the userns07 testcase from LTP
(testcases/kernel/containers/userns/userns07.c) to pass when running
with root privileges.
Change-Id: I9c179d089309e6e1a18bb3cfb34cb66c872f5bfd
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/3947
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Anish Swaminathan <anishs@vmware.com>
... | ... |
@@ -12,10 +12,15 @@ issues are found, we have a fail-safe. |
12 | 12 |
|
13 | 13 |
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
14 | 14 |
[bwh: Remove unneeded binary sysctl bits] |
15 |
+[Srivatsa: Fix capability checks when running nested user namespaces by |
|
16 |
+using ns_capable() on the current task's user namespace.] |
|
17 |
+Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu> |
|
18 |
+ |
|
19 |
+diff --git a/kernel/fork.c b/kernel/fork.c |
|
20 |
+index 9321b1a..34d50a6 100644 |
|
15 | 21 |
--- a/kernel/fork.c |
16 | 22 |
+++ b/kernel/fork.c |
17 |
-@@ -87,6 +87,11 @@ |
|
23 |
+@@ -88,6 +88,11 @@ |
|
18 | 24 |
|
19 | 25 |
#define CREATE_TRACE_POINTS |
20 | 26 |
#include <trace/events/task.h> |
... | ... |
@@ -27,33 +32,35 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
27 | 27 |
|
28 | 28 |
/* |
29 | 29 |
* Minimum number of threads to boot the kernel |
30 |
-@@ -1252,6 +1257,10 @@ static struct task_struct *copy_process( |
|
30 |
+@@ -1476,6 +1481,10 @@ static __latent_entropy struct task_struct *copy_process( |
|
31 | 31 |
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) |
32 | 32 |
return ERR_PTR(-EINVAL); |
33 | 33 |
|
34 | 34 |
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) |
35 |
-+ if (!capable(CAP_SYS_ADMIN)) |
|
35 |
++ if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
|
36 | 36 |
+ return ERR_PTR(-EPERM); |
37 | 37 |
+ |
38 | 38 |
/* |
39 | 39 |
* Thread groups must share signals as well, and detached threads |
40 | 40 |
* can only be started up within the thread group. |
41 |
-@@ -1944,6 +1953,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, |
|
41 |
+@@ -2216,6 +2225,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
|
42 | 42 |
if (unshare_flags & CLONE_NEWNS) |
43 | 43 |
unshare_flags |= CLONE_FS; |
44 | 44 |
|
45 | 45 |
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { |
46 | 46 |
+ err = -EPERM; |
47 |
-+ if (!capable(CAP_SYS_ADMIN)) |
|
47 |
++ if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
|
48 | 48 |
+ goto bad_unshare_out; |
49 | 49 |
+ } |
50 | 50 |
+ |
51 | 51 |
err = check_unshare_flags(unshare_flags); |
52 | 52 |
if (err) |
53 | 53 |
goto bad_unshare_out; |
54 |
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c |
|
55 |
+index 8dc9e80..0d91b8e 100644 |
|
54 | 56 |
--- a/kernel/sysctl.c |
55 | 57 |
+++ b/kernel/sysctl.c |
56 |
-@@ -102,6 +102,9 @@ extern int core_uses_pid; |
|
58 |
+@@ -104,6 +104,9 @@ extern int core_uses_pid; |
|
57 | 59 |
extern char core_pattern[]; |
58 | 60 |
extern unsigned int core_pipe_limit; |
59 | 61 |
#endif |
... | ... |
@@ -63,11 +70,10 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
63 | 63 |
extern int pid_max; |
64 | 64 |
extern int pid_max_min, pid_max_max; |
65 | 65 |
extern int percpu_pagelist_fraction; |
66 |
-@@ -489,6 +492,15 @@ static struct ctl_table kern_table[] = { |
|
67 |
- .mode = 0644, |
|
66 |
+@@ -504,6 +507,15 @@ static struct ctl_table kern_table[] = { |
|
68 | 67 |
.proc_handler = proc_dointvec, |
69 | 68 |
}, |
70 |
-+#endif |
|
69 |
+ #endif |
|
71 | 70 |
+#ifdef CONFIG_USER_NS |
72 | 71 |
+ { |
73 | 72 |
+ .procname = "unprivileged_userns_clone", |
... | ... |
@@ -76,9 +82,12 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
76 | 76 |
+ .mode = 0644, |
77 | 77 |
+ .proc_handler = proc_dointvec, |
78 | 78 |
+ }, |
79 |
- #endif |
|
79 |
++#endif |
|
80 | 80 |
#ifdef CONFIG_PROC_SYSCTL |
81 | 81 |
{ |
82 |
+ .procname = "tainted", |
|
83 |
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c |
|
84 |
+index 86b7854..4e2e171 100644 |
|
82 | 85 |
--- a/kernel/user_namespace.c |
83 | 86 |
+++ b/kernel/user_namespace.c |
84 | 87 |
@@ -23,6 +23,9 @@ |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
Summary: Kernel |
3 | 3 |
Name: linux-esx |
4 | 4 |
Version: 4.9.52 |
5 |
-Release: 2%{?dist} |
|
5 |
+Release: 3%{?dist} |
|
6 | 6 |
License: GPLv2 |
7 | 7 |
URL: http://www.kernel.org/ |
8 | 8 |
Group: System Environment/Kernel |
... | ... |
@@ -193,6 +193,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg |
193 | 193 |
/usr/src/linux-headers-%{uname_r} |
194 | 194 |
|
195 | 195 |
%changelog |
196 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
197 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
196 | 198 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
197 | 199 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
198 | 200 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
Summary: Kernel |
3 | 3 |
Name: linux-secure |
4 | 4 |
Version: 4.9.52 |
5 |
-Release: 2%{?dist} |
|
5 |
+Release: 3%{?dist} |
|
6 | 6 |
License: GPLv2 |
7 | 7 |
URL: http://www.kernel.org/ |
8 | 8 |
Group: System Environment/Kernel |
... | ... |
@@ -261,6 +261,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg |
261 | 261 |
/usr/src/linux-headers-%{uname_r} |
262 | 262 |
|
263 | 263 |
%changelog |
264 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
265 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
264 | 266 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
265 | 267 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
266 | 268 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
Summary: Kernel |
3 | 3 |
Name: linux |
4 | 4 |
Version: 4.9.52 |
5 |
-Release: 2%{?dist} |
|
5 |
+Release: 3%{?dist} |
|
6 | 6 |
License: GPLv2 |
7 | 7 |
URL: http://www.kernel.org/ |
8 | 8 |
Group: System Environment/Kernel |
... | ... |
@@ -301,6 +301,8 @@ ln -sf %{name}-%{uname_r}.cfg /boot/photon.cfg |
301 | 301 |
/usr/share/doc/* |
302 | 302 |
|
303 | 303 |
%changelog |
304 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
305 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
304 | 306 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
305 | 307 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
306 | 308 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |