Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
| ... | ... |
@@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce |
| 42 | 42 |
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 |
| 43 | 43 |
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c |
| 44 | 44 |
|
| 45 |
-clone git github.com/opencontainers/runc 08b5415ffa3769ff7c1d2f673f61382d69aabb7d # libcontainer |
|
| 45 |
+clone git github.com/opencontainers/runc aac9179bbadbf958054ce97ab368ac178140e5da # libcontainer |
|
| 46 | 46 |
# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) |
| 47 | 47 |
clone git github.com/coreos/go-systemd v3 |
| 48 | 48 |
clone git github.com/godbus/dbus v2 |
| ... | ... |
@@ -20,8 +20,12 @@ type IDMap struct {
|
| 20 | 20 |
} |
| 21 | 21 |
|
| 22 | 22 |
// Seccomp represents syscall restrictions |
| 23 |
+// By default, only the native architecture of the kernel is allowed to be used |
|
| 24 |
+// for syscalls. Additional architectures can be added by specifying them in |
|
| 25 |
+// Architectures. |
|
| 23 | 26 |
type Seccomp struct {
|
| 24 | 27 |
DefaultAction Action `json:"default_action"` |
| 28 |
+ Architectures []string `json:"architectures"` |
|
| 25 | 29 |
Syscalls []*Syscall `json:"syscalls"` |
| 26 | 30 |
} |
| 27 | 31 |
|
| ... | ... |
@@ -169,6 +173,9 @@ type Config struct {
|
| 169 | 169 |
// Hooks are a collection of actions to perform at various container lifecycle events. |
| 170 | 170 |
// Hooks are not able to be marshaled to json but they are also not needed to. |
| 171 | 171 |
Hooks *Hooks `json:"-"` |
| 172 |
+ |
|
| 173 |
+ // Version is the version of opencontainer specification that is supported. |
|
| 174 |
+ Version string `json:"version"` |
|
| 172 | 175 |
} |
| 173 | 176 |
|
| 174 | 177 |
type Hooks struct {
|
| ... | ... |
@@ -182,9 +189,10 @@ type Hooks struct {
|
| 182 | 182 |
|
| 183 | 183 |
// HookState is the payload provided to a hook on execution. |
| 184 | 184 |
type HookState struct {
|
| 185 |
- ID string `json:"id"` |
|
| 186 |
- Pid int `json:"pid"` |
|
| 187 |
- Root string `json:"root"` |
|
| 185 |
+ Version string `json:"version"` |
|
| 186 |
+ ID string `json:"id"` |
|
| 187 |
+ Pid int `json:"pid"` |
|
| 188 |
+ Root string `json:"root"` |
|
| 188 | 189 |
} |
| 189 | 190 |
|
| 190 | 191 |
type Hook interface {
|
| ... | ... |
@@ -1,13 +1,5 @@ |
| 1 | 1 |
package configs |
| 2 | 2 |
|
| 3 |
-import ( |
|
| 4 |
- "path/filepath" |
|
| 5 |
- "strings" |
|
| 6 |
- "syscall" |
|
| 7 |
- |
|
| 8 |
- "github.com/opencontainers/runc/libcontainer/label" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 | 3 |
type Mount struct {
|
| 12 | 4 |
// Source path for the mount. |
| 13 | 5 |
Source string `json:"source"` |
| ... | ... |
@@ -36,40 +28,3 @@ type Mount struct {
|
| 36 | 36 |
// Optional Command to be run after Source is mounted. |
| 37 | 37 |
PostmountCmds []Command `json:"postmount_cmds"` |
| 38 | 38 |
} |
| 39 |
- |
|
| 40 |
-func (m *Mount) Remount(rootfs string) error {
|
|
| 41 |
- var ( |
|
| 42 |
- dest = m.Destination |
|
| 43 |
- ) |
|
| 44 |
- if !strings.HasPrefix(dest, rootfs) {
|
|
| 45 |
- dest = filepath.Join(rootfs, dest) |
|
| 46 |
- } |
|
| 47 |
- |
|
| 48 |
- if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
|
|
| 49 |
- return err |
|
| 50 |
- } |
|
| 51 |
- return nil |
|
| 52 |
-} |
|
| 53 |
- |
|
| 54 |
-// Do the mount operation followed by additional mounts required to take care |
|
| 55 |
-// of propagation flags. |
|
| 56 |
-func (m *Mount) MountPropagate(rootfs string, mountLabel string) error {
|
|
| 57 |
- var ( |
|
| 58 |
- dest = m.Destination |
|
| 59 |
- data = label.FormatMountLabel(m.Data, mountLabel) |
|
| 60 |
- ) |
|
| 61 |
- if !strings.HasPrefix(dest, rootfs) {
|
|
| 62 |
- dest = filepath.Join(rootfs, dest) |
|
| 63 |
- } |
|
| 64 |
- |
|
| 65 |
- if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
|
|
| 66 |
- return err |
|
| 67 |
- } |
|
| 68 |
- |
|
| 69 |
- for _, pflag := range m.PropagationFlags {
|
|
| 70 |
- if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
|
| 71 |
- return err |
|
| 72 |
- } |
|
| 73 |
- } |
|
| 74 |
- return nil |
|
| 75 |
-} |
| ... | ... |
@@ -75,7 +75,7 @@ func (c *linuxConsole) Close() error {
|
| 75 | 75 |
|
| 76 | 76 |
// mount initializes the console inside the rootfs mounting with the specified mount label |
| 77 | 77 |
// and applying the correct ownership of the console. |
| 78 |
-func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
|
|
| 78 |
+func (c *linuxConsole) mount(rootfs, mountLabel string) error {
|
|
| 79 | 79 |
oldMask := syscall.Umask(0000) |
| 80 | 80 |
defer syscall.Umask(oldMask) |
| 81 | 81 |
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
|
| ... | ... |
@@ -250,8 +250,9 @@ func (c *linuxContainer) Destroy() error {
|
| 250 | 250 |
c.initProcess = nil |
| 251 | 251 |
if c.config.Hooks != nil {
|
| 252 | 252 |
s := configs.HookState{
|
| 253 |
- ID: c.id, |
|
| 254 |
- Root: c.config.Rootfs, |
|
| 253 |
+ Version: c.config.Version, |
|
| 254 |
+ ID: c.id, |
|
| 255 |
+ Root: c.config.Rootfs, |
|
| 255 | 256 |
} |
| 256 | 257 |
for _, hook := range c.config.Hooks.Poststop {
|
| 257 | 258 |
if err := hook.Run(s); err != nil {
|
| ... | ... |
@@ -187,16 +187,10 @@ func setupUser(config *initConfig) error {
|
| 187 | 187 |
return err |
| 188 | 188 |
} |
| 189 | 189 |
} |
| 190 |
- // change the permissions on the STDIO of the current process so that when the user |
|
| 191 |
- // is changed for the container, it's STDIO of the process matches the user. |
|
| 192 |
- for _, fd := range []uintptr{
|
|
| 193 |
- os.Stdin.Fd(), |
|
| 194 |
- os.Stderr.Fd(), |
|
| 195 |
- os.Stdout.Fd(), |
|
| 196 |
- } {
|
|
| 197 |
- if err := syscall.Fchown(int(fd), execUser.Uid, execUser.Gid); err != nil {
|
|
| 198 |
- return err |
|
| 199 |
- } |
|
| 190 |
+ // before we change to the container's user make sure that the processes STDIO |
|
| 191 |
+ // is correctly owned by the user that we are switching to. |
|
| 192 |
+ if err := fixStdioPermissions(execUser); err != nil {
|
|
| 193 |
+ return err |
|
| 200 | 194 |
} |
| 201 | 195 |
suppGroups := append(execUser.Sgids, addGroups...) |
| 202 | 196 |
if err := syscall.Setgroups(suppGroups); err != nil {
|
| ... | ... |
@@ -218,6 +212,34 @@ func setupUser(config *initConfig) error {
|
| 218 | 218 |
return nil |
| 219 | 219 |
} |
| 220 | 220 |
|
| 221 |
+// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. |
|
| 222 |
+// The ownership needs to match because it is created outside of the container and needs to be |
|
| 223 |
+// localized. |
|
| 224 |
+func fixStdioPermissions(u *user.ExecUser) error {
|
|
| 225 |
+ var null syscall.Stat_t |
|
| 226 |
+ if err := syscall.Stat("/dev/null", &null); err != nil {
|
|
| 227 |
+ return err |
|
| 228 |
+ } |
|
| 229 |
+ for _, fd := range []uintptr{
|
|
| 230 |
+ os.Stdin.Fd(), |
|
| 231 |
+ os.Stderr.Fd(), |
|
| 232 |
+ os.Stdout.Fd(), |
|
| 233 |
+ } {
|
|
| 234 |
+ var s syscall.Stat_t |
|
| 235 |
+ if err := syscall.Fstat(int(fd), &s); err != nil {
|
|
| 236 |
+ return err |
|
| 237 |
+ } |
|
| 238 |
+ // skip chown of /dev/null if it was used as one of the STDIO fds. |
|
| 239 |
+ if s.Rdev == null.Rdev {
|
|
| 240 |
+ continue |
|
| 241 |
+ } |
|
| 242 |
+ if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
|
|
| 243 |
+ return err |
|
| 244 |
+ } |
|
| 245 |
+ } |
|
| 246 |
+ return nil |
|
| 247 |
+} |
|
| 248 |
+ |
|
| 221 | 249 |
// setupNetwork sets up and initializes any network interface inside the container. |
| 222 | 250 |
func setupNetwork(config *initConfig) error {
|
| 223 | 251 |
for _, config := range config.Networks {
|
| ... | ... |
@@ -65,11 +65,11 @@ static int clone_parent(jmp_buf * env) |
| 65 | 65 |
|
| 66 | 66 |
void nsexec() |
| 67 | 67 |
{
|
| 68 |
- char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
|
|
| 68 |
+ char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
|
|
| 69 | 69 |
const int num = sizeof(namespaces) / sizeof(char *); |
| 70 | 70 |
jmp_buf env; |
| 71 | 71 |
char buf[PATH_MAX], *val; |
| 72 |
- int i, tfd, child, len, pipenum, consolefd = -1; |
|
| 72 |
+ int i, tfd, self_tfd, child, len, pipenum, consolefd = -1; |
|
| 73 | 73 |
pid_t pid; |
| 74 | 74 |
char *console; |
| 75 | 75 |
|
| ... | ... |
@@ -114,17 +114,30 @@ void nsexec() |
| 114 | 114 |
exit(1); |
| 115 | 115 |
} |
| 116 | 116 |
|
| 117 |
+ self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
|
|
| 118 |
+ if (self_tfd == -1) {
|
|
| 119 |
+ pr_perror("Failed to open /proc/self/ns");
|
|
| 120 |
+ exit(1); |
|
| 121 |
+ } |
|
| 122 |
+ |
|
| 117 | 123 |
for (i = 0; i < num; i++) {
|
| 118 | 124 |
struct stat st; |
| 125 |
+ struct stat self_st; |
|
| 119 | 126 |
int fd; |
| 120 | 127 |
|
| 121 | 128 |
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */ |
| 122 |
- if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) {
|
|
| 129 |
+ if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
|
|
| 123 | 130 |
// Ignore nonexistent namespaces. |
| 124 | 131 |
if (errno == ENOENT) |
| 125 | 132 |
continue; |
| 126 | 133 |
} |
| 127 | 134 |
|
| 135 |
+ /* Skip namespaces we're already part of */ |
|
| 136 |
+ if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && |
|
| 137 |
+ st.st_ino == self_st.st_ino) {
|
|
| 138 |
+ continue; |
|
| 139 |
+ } |
|
| 140 |
+ |
|
| 128 | 141 |
fd = openat(tfd, namespaces[i], O_RDONLY); |
| 129 | 142 |
if (fd == -1) {
|
| 130 | 143 |
pr_perror("Failed to open ns file %s for ns %s", buf,
|
| ... | ... |
@@ -139,6 +152,9 @@ void nsexec() |
| 139 | 139 |
close(fd); |
| 140 | 140 |
} |
| 141 | 141 |
|
| 142 |
+ close(self_tfd); |
|
| 143 |
+ close(tfd); |
|
| 144 |
+ |
|
| 142 | 145 |
if (setjmp(env) == 1) {
|
| 143 | 146 |
// Child |
| 144 | 147 |
|
| ... | ... |
@@ -203,9 +203,10 @@ func (p *initProcess) start() (err error) {
|
| 203 | 203 |
}() |
| 204 | 204 |
if p.config.Config.Hooks != nil {
|
| 205 | 205 |
s := configs.HookState{
|
| 206 |
- ID: p.container.id, |
|
| 207 |
- Pid: p.pid(), |
|
| 208 |
- Root: p.config.Config.Rootfs, |
|
| 206 |
+ Version: p.container.config.Version, |
|
| 207 |
+ ID: p.container.id, |
|
| 208 |
+ Pid: p.pid(), |
|
| 209 |
+ Root: p.config.Config.Rootfs, |
|
| 209 | 210 |
} |
| 210 | 211 |
for _, hook := range p.config.Config.Hooks.Prestart {
|
| 211 | 212 |
if err := hook.Run(s); err != nil {
|
| ... | ... |
@@ -68,7 +68,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
|
| 68 | 68 |
return newSystemError(err) |
| 69 | 69 |
} |
| 70 | 70 |
if !setupDev {
|
| 71 |
- if err := reOpenDevNull(config.Rootfs); err != nil {
|
|
| 71 |
+ if err := reOpenDevNull(); err != nil {
|
|
| 72 | 72 |
return newSystemError(err) |
| 73 | 73 |
} |
| 74 | 74 |
} |
| ... | ... |
@@ -106,12 +106,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 106 | 106 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 107 | 107 |
return err |
| 108 | 108 |
} |
| 109 |
- return m.MountPropagate(rootfs, mountLabel) |
|
| 109 |
+ return mountPropagate(m, rootfs, mountLabel) |
|
| 110 | 110 |
case "mqueue": |
| 111 | 111 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 112 | 112 |
return err |
| 113 | 113 |
} |
| 114 |
- if err := m.MountPropagate(rootfs, mountLabel); err != nil {
|
|
| 114 |
+ if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
| 115 | 115 |
return err |
| 116 | 116 |
} |
| 117 | 117 |
return label.SetFileLabel(dest, mountLabel) |
| ... | ... |
@@ -122,7 +122,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 122 | 122 |
return err |
| 123 | 123 |
} |
| 124 | 124 |
} |
| 125 |
- if err := m.MountPropagate(rootfs, mountLabel); err != nil {
|
|
| 125 |
+ if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
| 126 | 126 |
return err |
| 127 | 127 |
} |
| 128 | 128 |
if stat != nil {
|
| ... | ... |
@@ -135,12 +135,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 135 | 135 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 136 | 136 |
return err |
| 137 | 137 |
} |
| 138 |
- return m.MountPropagate(rootfs, mountLabel) |
|
| 138 |
+ return mountPropagate(m, rootfs, mountLabel) |
|
| 139 | 139 |
case "securityfs": |
| 140 | 140 |
if err := os.MkdirAll(dest, 0755); err != nil {
|
| 141 | 141 |
return err |
| 142 | 142 |
} |
| 143 |
- return m.MountPropagate(rootfs, mountLabel) |
|
| 143 |
+ return mountPropagate(m, rootfs, mountLabel) |
|
| 144 | 144 |
case "bind": |
| 145 | 145 |
stat, err := os.Stat(m.Source) |
| 146 | 146 |
if err != nil {
|
| ... | ... |
@@ -158,14 +158,16 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 158 | 158 |
if err := checkMountDestination(rootfs, dest); err != nil {
|
| 159 | 159 |
return err |
| 160 | 160 |
} |
| 161 |
+ // update the mount with the correct dest after symlinks are resolved. |
|
| 162 |
+ m.Destination = dest |
|
| 161 | 163 |
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
| 162 | 164 |
return err |
| 163 | 165 |
} |
| 164 |
- if err := m.MountPropagate(rootfs, mountLabel); err != nil {
|
|
| 166 |
+ if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
| 165 | 167 |
return err |
| 166 | 168 |
} |
| 167 | 169 |
// bind mount won't change mount options, we need remount to make mount options effective. |
| 168 |
- if err := m.Remount(rootfs); err != nil {
|
|
| 170 |
+ if err := remount(m, rootfs); err != nil {
|
|
| 169 | 171 |
return err |
| 170 | 172 |
} |
| 171 | 173 |
if m.Relabel != "" {
|
| ... | ... |
@@ -234,7 +236,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
| 234 | 234 |
Destination: m.Destination, |
| 235 | 235 |
Flags: defaultMountFlags | syscall.MS_RDONLY, |
| 236 | 236 |
} |
| 237 |
- if err := mcgrouproot.Remount(rootfs); err != nil {
|
|
| 237 |
+ if err := remount(mcgrouproot, rootfs); err != nil {
|
|
| 238 | 238 |
return err |
| 239 | 239 |
} |
| 240 | 240 |
} |
| ... | ... |
@@ -328,7 +330,7 @@ func setupDevSymlinks(rootfs string) error {
|
| 328 | 328 |
// this method will make them point to `/dev/null` in this container's rootfs. This |
| 329 | 329 |
// needs to be called after we chroot/pivot into the container's rootfs so that any |
| 330 | 330 |
// symlinks are resolved locally. |
| 331 |
-func reOpenDevNull(rootfs string) error {
|
|
| 331 |
+func reOpenDevNull() error {
|
|
| 332 | 332 |
var stat, devNullStat syscall.Stat_t |
| 333 | 333 |
file, err := os.Open("/dev/null")
|
| 334 | 334 |
if err != nil {
|
| ... | ... |
@@ -433,7 +435,7 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
|
| 433 | 433 |
return fmt.Errorf("symlink dev ptmx %s", err)
|
| 434 | 434 |
} |
| 435 | 435 |
if console != nil {
|
| 436 |
- return console.mount(config.Rootfs, config.MountLabel, 0, 0) |
|
| 436 |
+ return console.mount(config.Rootfs, config.MountLabel) |
|
| 437 | 437 |
} |
| 438 | 438 |
return nil |
| 439 | 439 |
} |
| ... | ... |
@@ -532,3 +534,40 @@ func writeSystemProperty(key, value string) error {
|
| 532 | 532 |
keyPath := strings.Replace(key, ".", "/", -1) |
| 533 | 533 |
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
|
| 534 | 534 |
} |
| 535 |
+ |
|
| 536 |
+func remount(m *configs.Mount, rootfs string) error {
|
|
| 537 |
+ var ( |
|
| 538 |
+ dest = m.Destination |
|
| 539 |
+ ) |
|
| 540 |
+ if !strings.HasPrefix(dest, rootfs) {
|
|
| 541 |
+ dest = filepath.Join(rootfs, dest) |
|
| 542 |
+ } |
|
| 543 |
+ |
|
| 544 |
+ if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
|
|
| 545 |
+ return err |
|
| 546 |
+ } |
|
| 547 |
+ return nil |
|
| 548 |
+} |
|
| 549 |
+ |
|
| 550 |
+// Do the mount operation followed by additional mounts required to take care |
|
| 551 |
+// of propagation flags. |
|
| 552 |
+func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
|
| 553 |
+ var ( |
|
| 554 |
+ dest = m.Destination |
|
| 555 |
+ data = label.FormatMountLabel(m.Data, mountLabel) |
|
| 556 |
+ ) |
|
| 557 |
+ if !strings.HasPrefix(dest, rootfs) {
|
|
| 558 |
+ dest = filepath.Join(rootfs, dest) |
|
| 559 |
+ } |
|
| 560 |
+ |
|
| 561 |
+ if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
|
|
| 562 |
+ return err |
|
| 563 |
+ } |
|
| 564 |
+ |
|
| 565 |
+ for _, pflag := range m.PropagationFlags {
|
|
| 566 |
+ if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
|
| 567 |
+ return err |
|
| 568 |
+ } |
|
| 569 |
+ } |
|
| 570 |
+ return nil |
|
| 571 |
+} |
| ... | ... |
@@ -37,6 +37,18 @@ func InitSeccomp(config *configs.Seccomp) error {
|
| 37 | 37 |
return fmt.Errorf("error creating filter: %s", err)
|
| 38 | 38 |
} |
| 39 | 39 |
|
| 40 |
+ // Add extra architectures |
|
| 41 |
+ for _, arch := range config.Architectures {
|
|
| 42 |
+ scmpArch, err := libseccomp.GetArchFromString(arch) |
|
| 43 |
+ if err != nil {
|
|
| 44 |
+ return err |
|
| 45 |
+ } |
|
| 46 |
+ |
|
| 47 |
+ if err := filter.AddArch(scmpArch); err != nil {
|
|
| 48 |
+ return err |
|
| 49 |
+ } |
|
| 50 |
+ } |
|
| 51 |
+ |
|
| 40 | 52 |
// Unset no new privs bit |
| 41 | 53 |
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
| 42 | 54 |
return fmt.Errorf("error setting no new privileges: %s", err)
|