The existing patch which disallows unprivileged CLONE_NEWUSER applies
the check for CAP_SYS_ADMIN capability on the 'init_user_ns'
namespace, which is not entirely correct. Consider the following sequence:
1. A process with root privileges calls
clone(child_fn, ..., CLONE_NEWUSER, ...) to create a new user namespace.
2. child_fn, now running in the newly created user namespace enjoys the
full set of capabilities in the new user namespace, but has lost
its capabilities in the old user namespace (init_user_ns in this
case).
3. child_fn now calls
clone(child_fn2, ..., CLONE_NEWUSER, ...) to create a new (nested)
user namespace.
Step 3 should have succeeded because child_fn has full privileges
(including CAP_SYS_ADMIN) in its user namespace, but this step fails
on our kernel, because the CAP_SYS_ADMIN capability is checked against
init_user_ns, as opposed to child_fn's user namespace.
So fix this by checking for CAP_SYS_ADMIN using ns_capable() on the
current task's user namespace.
This also helps the userns07 testcase from LTP
(testcases/kernel/containers/userns/userns07.c) to pass when running
with root privileges.
Change-Id: I9c179d089309e6e1a18bb3cfb34cb66c872f5bfd
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/3947
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Anish Swaminathan <anishs@vmware.com>
| ... | ... |
@@ -12,10 +12,15 @@ issues are found, we have a fail-safe. |
| 12 | 12 |
|
| 13 | 13 |
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
| 14 | 14 |
[bwh: Remove unneeded binary sysctl bits] |
| 15 |
+[Srivatsa: Fix capability checks when running nested user namespaces by |
|
| 16 |
+using ns_capable() on the current task's user namespace.] |
|
| 17 |
+Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu> |
|
| 18 |
+ |
|
| 19 |
+diff --git a/kernel/fork.c b/kernel/fork.c |
|
| 20 |
+index 9321b1a..34d50a6 100644 |
|
| 15 | 21 |
--- a/kernel/fork.c |
| 16 | 22 |
+++ b/kernel/fork.c |
| 17 |
-@@ -87,6 +87,11 @@ |
|
| 23 |
+@@ -88,6 +88,11 @@ |
|
| 18 | 24 |
|
| 19 | 25 |
#define CREATE_TRACE_POINTS |
| 20 | 26 |
#include <trace/events/task.h> |
| ... | ... |
@@ -27,33 +32,35 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
| 27 | 27 |
|
| 28 | 28 |
/* |
| 29 | 29 |
* Minimum number of threads to boot the kernel |
| 30 |
-@@ -1252,6 +1257,10 @@ static struct task_struct *copy_process( |
|
| 30 |
+@@ -1476,6 +1481,10 @@ static __latent_entropy struct task_struct *copy_process( |
|
| 31 | 31 |
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) |
| 32 | 32 |
return ERR_PTR(-EINVAL); |
| 33 | 33 |
|
| 34 | 34 |
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) |
| 35 |
-+ if (!capable(CAP_SYS_ADMIN)) |
|
| 35 |
++ if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
|
| 36 | 36 |
+ return ERR_PTR(-EPERM); |
| 37 | 37 |
+ |
| 38 | 38 |
/* |
| 39 | 39 |
* Thread groups must share signals as well, and detached threads |
| 40 | 40 |
* can only be started up within the thread group. |
| 41 |
-@@ -1944,6 +1953,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, |
|
| 41 |
+@@ -2216,6 +2225,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
|
| 42 | 42 |
if (unshare_flags & CLONE_NEWNS) |
| 43 | 43 |
unshare_flags |= CLONE_FS; |
| 44 | 44 |
|
| 45 | 45 |
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
| 46 | 46 |
+ err = -EPERM; |
| 47 |
-+ if (!capable(CAP_SYS_ADMIN)) |
|
| 47 |
++ if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
|
| 48 | 48 |
+ goto bad_unshare_out; |
| 49 | 49 |
+ } |
| 50 | 50 |
+ |
| 51 | 51 |
err = check_unshare_flags(unshare_flags); |
| 52 | 52 |
if (err) |
| 53 | 53 |
goto bad_unshare_out; |
| 54 |
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c |
|
| 55 |
+index 8dc9e80..0d91b8e 100644 |
|
| 54 | 56 |
--- a/kernel/sysctl.c |
| 55 | 57 |
+++ b/kernel/sysctl.c |
| 56 |
-@@ -102,6 +102,9 @@ extern int core_uses_pid; |
|
| 58 |
+@@ -104,6 +104,9 @@ extern int core_uses_pid; |
|
| 57 | 59 |
extern char core_pattern[]; |
| 58 | 60 |
extern unsigned int core_pipe_limit; |
| 59 | 61 |
#endif |
| ... | ... |
@@ -63,11 +70,10 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
| 63 | 63 |
extern int pid_max; |
| 64 | 64 |
extern int pid_max_min, pid_max_max; |
| 65 | 65 |
extern int percpu_pagelist_fraction; |
| 66 |
-@@ -489,6 +492,15 @@ static struct ctl_table kern_table[] = {
|
|
| 67 |
- .mode = 0644, |
|
| 66 |
+@@ -504,6 +507,15 @@ static struct ctl_table kern_table[] = {
|
|
| 68 | 67 |
.proc_handler = proc_dointvec, |
| 69 | 68 |
}, |
| 70 |
-+#endif |
|
| 69 |
+ #endif |
|
| 71 | 70 |
+#ifdef CONFIG_USER_NS |
| 72 | 71 |
+ {
|
| 73 | 72 |
+ .procname = "unprivileged_userns_clone", |
| ... | ... |
@@ -76,9 +82,12 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> |
| 76 | 76 |
+ .mode = 0644, |
| 77 | 77 |
+ .proc_handler = proc_dointvec, |
| 78 | 78 |
+ }, |
| 79 |
- #endif |
|
| 79 |
++#endif |
|
| 80 | 80 |
#ifdef CONFIG_PROC_SYSCTL |
| 81 | 81 |
{
|
| 82 |
+ .procname = "tainted", |
|
| 83 |
+diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c |
|
| 84 |
+index 86b7854..4e2e171 100644 |
|
| 82 | 85 |
--- a/kernel/user_namespace.c |
| 83 | 86 |
+++ b/kernel/user_namespace.c |
| 84 | 87 |
@@ -23,6 +23,9 @@ |
| ... | ... |
@@ -2,7 +2,7 @@ |
| 2 | 2 |
Summary: Kernel |
| 3 | 3 |
Name: linux-esx |
| 4 | 4 |
Version: 4.9.52 |
| 5 |
-Release: 2%{?dist}
|
|
| 5 |
+Release: 3%{?dist}
|
|
| 6 | 6 |
License: GPLv2 |
| 7 | 7 |
URL: http://www.kernel.org/ |
| 8 | 8 |
Group: System Environment/Kernel |
| ... | ... |
@@ -193,6 +193,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg
|
| 193 | 193 |
/usr/src/linux-headers-%{uname_r}
|
| 194 | 194 |
|
| 195 | 195 |
%changelog |
| 196 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
| 197 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
| 196 | 198 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
| 197 | 199 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
| 198 | 200 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |
| ... | ... |
@@ -2,7 +2,7 @@ |
| 2 | 2 |
Summary: Kernel |
| 3 | 3 |
Name: linux-secure |
| 4 | 4 |
Version: 4.9.52 |
| 5 |
-Release: 2%{?dist}
|
|
| 5 |
+Release: 3%{?dist}
|
|
| 6 | 6 |
License: GPLv2 |
| 7 | 7 |
URL: http://www.kernel.org/ |
| 8 | 8 |
Group: System Environment/Kernel |
| ... | ... |
@@ -261,6 +261,8 @@ ln -sf linux-%{uname_r}.cfg /boot/photon.cfg
|
| 261 | 261 |
/usr/src/linux-headers-%{uname_r}
|
| 262 | 262 |
|
| 263 | 263 |
%changelog |
| 264 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
| 265 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
| 264 | 266 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
| 265 | 267 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
| 266 | 268 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |
| ... | ... |
@@ -2,7 +2,7 @@ |
| 2 | 2 |
Summary: Kernel |
| 3 | 3 |
Name: linux |
| 4 | 4 |
Version: 4.9.52 |
| 5 |
-Release: 2%{?dist}
|
|
| 5 |
+Release: 3%{?dist}
|
|
| 6 | 6 |
License: GPLv2 |
| 7 | 7 |
URL: http://www.kernel.org/ |
| 8 | 8 |
Group: System Environment/Kernel |
| ... | ... |
@@ -301,6 +301,8 @@ ln -sf %{name}-%{uname_r}.cfg /boot/photon.cfg
|
| 301 | 301 |
/usr/share/doc/* |
| 302 | 302 |
|
| 303 | 303 |
%changelog |
| 304 |
+* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-3 |
|
| 305 |
+- Allow privileged CLONE_NEWUSER from nested user namespaces. |
|
| 304 | 306 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-2 |
| 305 | 307 |
- Fix CVE-2017-11472 (ACPICA: Namespace: fix operand cache leak) |
| 306 | 308 |
* Mon Oct 02 2017 Srivatsa S. Bhat <srivatsa@csail.mit.edu> 4.9.52-1 |