Fixing user namespaces (again) with a vendor update from runc
(specifically, the remount() only if special flags change)
Other changes are very minimal.
Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)
| ... | ... |
@@ -45,7 +45,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce |
| 45 | 45 |
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 |
| 46 | 46 |
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c |
| 47 | 47 |
|
| 48 |
-clone git github.com/opencontainers/runc aac9179bbadbf958054ce97ab368ac178140e5da # libcontainer |
|
| 48 |
+clone git github.com/opencontainers/runc fba07bce72e72ce5b2dd618e4f67dd86ccb49c82 # libcontainer |
|
| 49 | 49 |
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) |
| 50 | 50 |
clone git github.com/coreos/go-systemd v3 |
| 51 | 51 |
clone git github.com/godbus/dbus v2 |
| ... | ... |
@@ -5,57 +5,35 @@ package libcontainer |
| 5 | 5 |
import ( |
| 6 | 6 |
"fmt" |
| 7 | 7 |
"os" |
| 8 |
+ "strings" |
|
| 8 | 9 |
|
| 9 | 10 |
"github.com/syndtr/gocapability/capability" |
| 10 | 11 |
) |
| 11 | 12 |
|
| 12 | 13 |
const allCapabilityTypes = capability.CAPS | capability.BOUNDS |
| 13 | 14 |
|
| 14 |
-var capabilityList = map[string]capability.Cap{
|
|
| 15 |
- "CAP_SETPCAP": capability.CAP_SETPCAP, |
|
| 16 |
- "CAP_SYS_MODULE": capability.CAP_SYS_MODULE, |
|
| 17 |
- "CAP_SYS_RAWIO": capability.CAP_SYS_RAWIO, |
|
| 18 |
- "CAP_SYS_PACCT": capability.CAP_SYS_PACCT, |
|
| 19 |
- "CAP_SYS_ADMIN": capability.CAP_SYS_ADMIN, |
|
| 20 |
- "CAP_SYS_NICE": capability.CAP_SYS_NICE, |
|
| 21 |
- "CAP_SYS_RESOURCE": capability.CAP_SYS_RESOURCE, |
|
| 22 |
- "CAP_SYS_TIME": capability.CAP_SYS_TIME, |
|
| 23 |
- "CAP_SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, |
|
| 24 |
- "CAP_MKNOD": capability.CAP_MKNOD, |
|
| 25 |
- "CAP_AUDIT_WRITE": capability.CAP_AUDIT_WRITE, |
|
| 26 |
- "CAP_AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, |
|
| 27 |
- "CAP_MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, |
|
| 28 |
- "CAP_MAC_ADMIN": capability.CAP_MAC_ADMIN, |
|
| 29 |
- "CAP_NET_ADMIN": capability.CAP_NET_ADMIN, |
|
| 30 |
- "CAP_SYSLOG": capability.CAP_SYSLOG, |
|
| 31 |
- "CAP_CHOWN": capability.CAP_CHOWN, |
|
| 32 |
- "CAP_NET_RAW": capability.CAP_NET_RAW, |
|
| 33 |
- "CAP_DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, |
|
| 34 |
- "CAP_FOWNER": capability.CAP_FOWNER, |
|
| 35 |
- "CAP_DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, |
|
| 36 |
- "CAP_FSETID": capability.CAP_FSETID, |
|
| 37 |
- "CAP_KILL": capability.CAP_KILL, |
|
| 38 |
- "CAP_SETGID": capability.CAP_SETGID, |
|
| 39 |
- "CAP_SETUID": capability.CAP_SETUID, |
|
| 40 |
- "CAP_LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, |
|
| 41 |
- "CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, |
|
| 42 |
- "CAP_NET_BROADCAST": capability.CAP_NET_BROADCAST, |
|
| 43 |
- "CAP_IPC_LOCK": capability.CAP_IPC_LOCK, |
|
| 44 |
- "CAP_IPC_OWNER": capability.CAP_IPC_OWNER, |
|
| 45 |
- "CAP_SYS_CHROOT": capability.CAP_SYS_CHROOT, |
|
| 46 |
- "CAP_SYS_PTRACE": capability.CAP_SYS_PTRACE, |
|
| 47 |
- "CAP_SYS_BOOT": capability.CAP_SYS_BOOT, |
|
| 48 |
- "CAP_LEASE": capability.CAP_LEASE, |
|
| 49 |
- "CAP_SETFCAP": capability.CAP_SETFCAP, |
|
| 50 |
- "CAP_WAKE_ALARM": capability.CAP_WAKE_ALARM, |
|
| 51 |
- "CAP_BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, |
|
| 52 |
- "CAP_AUDIT_READ": capability.CAP_AUDIT_READ, |
|
| 15 |
+var capabilityMap map[string]capability.Cap |
|
| 16 |
+ |
|
| 17 |
+func init() {
|
|
| 18 |
+ capabilityMap = make(map[string]capability.Cap) |
|
| 19 |
+ last := capability.CAP_LAST_CAP |
|
| 20 |
+ // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap |
|
| 21 |
+ if last == capability.Cap(63) {
|
|
| 22 |
+ last = capability.CAP_BLOCK_SUSPEND |
|
| 23 |
+ } |
|
| 24 |
+ for _, cap := range capability.List() {
|
|
| 25 |
+ if cap > last {
|
|
| 26 |
+ continue |
|
| 27 |
+ } |
|
| 28 |
+ capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
|
| 29 |
+ capabilityMap[capKey] = cap |
|
| 30 |
+ } |
|
| 53 | 31 |
} |
| 54 | 32 |
|
| 55 | 33 |
func newCapWhitelist(caps []string) (*whitelist, error) {
|
| 56 | 34 |
l := []capability.Cap{}
|
| 57 | 35 |
for _, c := range caps {
|
| 58 |
- v, ok := capabilityList[c] |
|
| 36 |
+ v, ok := capabilityMap[c] |
|
| 59 | 37 |
if !ok {
|
| 60 | 38 |
return nil, fmt.Errorf("unknown capability %q", c)
|
| 61 | 39 |
} |
| ... | ... |
@@ -58,7 +58,7 @@ func (p *setnsProcess) signal(sig os.Signal) error {
|
| 58 | 58 |
if !ok {
|
| 59 | 59 |
return errors.New("os: unsupported signal type")
|
| 60 | 60 |
} |
| 61 |
- return syscall.Kill(p.cmd.Process.Pid, s) |
|
| 61 |
+ return syscall.Kill(p.pid(), s) |
|
| 62 | 62 |
} |
| 63 | 63 |
|
| 64 | 64 |
func (p *setnsProcess) start() (err error) {
|
| ... | ... |
@@ -67,7 +67,7 @@ func (p *setnsProcess) start() (err error) {
|
| 67 | 67 |
return newSystemError(err) |
| 68 | 68 |
} |
| 69 | 69 |
if len(p.cgroupPaths) > 0 {
|
| 70 |
- if err := cgroups.EnterPid(p.cgroupPaths, p.cmd.Process.Pid); err != nil {
|
|
| 70 |
+ if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
|
| 71 | 71 |
return newSystemError(err) |
| 72 | 72 |
} |
| 73 | 73 |
} |
| ... | ... |
@@ -290,7 +290,7 @@ func (p *initProcess) signal(sig os.Signal) error {
|
| 290 | 290 |
if !ok {
|
| 291 | 291 |
return errors.New("os: unsupported signal type")
|
| 292 | 292 |
} |
| 293 |
- return syscall.Kill(p.cmd.Process.Pid, s) |
|
| 293 |
+ return syscall.Kill(p.pid(), s) |
|
| 294 | 294 |
} |
| 295 | 295 |
|
| 296 | 296 |
func (p *initProcess) setExternalDescriptors(newFds []string) {
|
| ... | ... |
@@ -106,13 +106,17 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 106 | 106 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 107 | 107 |
return err |
| 108 | 108 |
} |
| 109 |
- return mountPropagate(m, rootfs, mountLabel) |
|
| 109 |
+ // Selinux kernels do not support labeling of /proc or /sys |
|
| 110 |
+ return mountPropagate(m, rootfs, "") |
|
| 110 | 111 |
case "mqueue": |
| 111 | 112 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 112 | 113 |
return err |
| 113 | 114 |
} |
| 114 | 115 |
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
| 115 |
- return err |
|
| 116 |
+ // older kernels do not support labeling of /dev/mqueue |
|
| 117 |
+ if err := mountPropagate(m, rootfs, ""); err != nil {
|
|
| 118 |
+ return err |
|
| 119 |
+ } |
|
| 116 | 120 |
} |
| 117 | 121 |
return label.SetFileLabel(dest, mountLabel) |
| 118 | 122 |
case "tmpfs": |
| ... | ... |
@@ -167,9 +171,14 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 167 | 167 |
return err |
| 168 | 168 |
} |
| 169 | 169 |
// bind mount won't change mount options, we need remount to make mount options effective. |
| 170 |
- if err := remount(m, rootfs); err != nil {
|
|
| 171 |
- return err |
|
| 170 |
+ // first check that we have non-default options required before attempting a remount |
|
| 171 |
+ if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 {
|
|
| 172 |
+ // only remount if unique mount options are set |
|
| 173 |
+ if err := remount(m, rootfs); err != nil {
|
|
| 174 |
+ return err |
|
| 175 |
+ } |
|
| 172 | 176 |
} |
| 177 |
+ |
|
| 173 | 178 |
if m.Relabel != "" {
|
| 174 | 179 |
if err := label.Validate(m.Relabel); err != nil {
|
| 175 | 180 |
return err |