Browse code

Vendor Microsoft/opengcs@a1096715

Signed-off-by: John Howard <jhoward@microsoft.com>

John Howard authored on 2019/03/05 05:44:25
Showing 4 changed files
... ...
@@ -6,7 +6,7 @@ github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
6 6
 github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
7 7
 github.com/golang/gddo 9b12a26f3fbd7397dee4e20939ddca719d840d2a
8 8
 github.com/gorilla/mux v1.7.0
9
-github.com/Microsoft/opengcs v0.3.9
9
+github.com/Microsoft/opengcs a10967154e143a36014584a6f664344e3bb0aa64
10 10
 github.com/kr/pty 5cf931ef8f
11 11
 github.com/mattn/go-shellwords v1.0.3
12 12
 github.com/sirupsen/logrus v1.0.6
... ...
@@ -18,24 +18,6 @@ import (
18 18
 type Mode uint
19 19
 
20 20
 const (
21
-	// Constants for the actual mode after validation
22
-
23
-	// ModeActualError means an error has occurred during validation
24
-	ModeActualError = iota
25
-	// ModeActualVhdx means that we are going to use VHDX boot after validation
26
-	ModeActualVhdx
27
-	// ModeActualKernelInitrd means that we are going to use kernel+initrd for boot after validation
28
-	ModeActualKernelInitrd
29
-
30
-	// Constants for the requested mode
31
-
32
-	// ModeRequestAuto means auto-select the boot mode for a utility VM
33
-	ModeRequestAuto = iota // VHDX will be priority over kernel+initrd
34
-	// ModeRequestVhdx means request VHDX boot if possible
35
-	ModeRequestVhdx
36
-	// ModeRequestKernelInitrd means request Kernel+initrd boot if possible
37
-	ModeRequestKernelInitrd
38
-
39 21
 	// defaultUvmTimeoutSeconds is the default time to wait for utility VM operations
40 22
 	defaultUvmTimeoutSeconds = 5 * 60
41 23
 
... ...
@@ -54,8 +36,6 @@ const (
54 54
 type Config struct {
55 55
 	Options                                        // Configuration options
56 56
 	Name               string                      // Name of the utility VM
57
-	RequestedMode      Mode                        // What mode is preferred when validating
58
-	ActualMode         Mode                        // What mode was obtained during validation
59 57
 	UvmTimeoutSeconds  int                         // How long to wait for the utility VM to respond in seconds
60 58
 	Uvm                hcsshim.Container           // The actual container
61 59
 	MappedVirtualDisks []hcsshim.MappedVirtualDisk // Data-disks to be attached
... ...
@@ -66,9 +46,8 @@ type Options struct {
66 66
 	KirdPath       string // Path to where kernel/initrd are found (defaults to %PROGRAMFILES%\Linux Containers)
67 67
 	KernelFile     string // Kernel for Utility VM (embedded in a UEFI bootloader) - does NOT include full path, just filename
68 68
 	InitrdFile     string // Initrd image for Utility VM - does NOT include full path, just filename
69
-	Vhdx           string // VHD for booting the utility VM - is a full path
70 69
 	TimeoutSeconds int    // Requested time for the utility VM to respond in seconds (may be over-ridden by environment)
71
-	BootParameters string // Additional boot parameters for initrd booting (not VHDx)
70
+	BootParameters string // Additional boot parameters for initrd booting
72 71
 }
73 72
 
74 73
 // ParseOptions parses a set of K-V pairs into options used by opengcs. Note
... ...
@@ -86,8 +65,6 @@ func ParseOptions(options []string) (Options, error) {
86 86
 				rOpts.KernelFile = opt[1]
87 87
 			case "lcow.initrd":
88 88
 				rOpts.InitrdFile = opt[1]
89
-			case "lcow.vhdx":
90
-				rOpts.Vhdx = opt[1]
91 89
 			case "lcow.bootparameters":
92 90
 				rOpts.BootParameters = opt[1]
93 91
 			case "lcow.timeout":
... ...
@@ -106,9 +83,6 @@ func ParseOptions(options []string) (Options, error) {
106 106
 	if rOpts.KirdPath == "" {
107 107
 		rOpts.KirdPath = filepath.Join(os.Getenv("ProgramFiles"), "Linux Containers")
108 108
 	}
109
-	if rOpts.Vhdx == "" {
110
-		rOpts.Vhdx = filepath.Join(rOpts.KirdPath, `uvm.vhdx`)
111
-	}
112 109
 	if rOpts.KernelFile == "" {
113 110
 		rOpts.KernelFile = `kernel`
114 111
 	}
... ...
@@ -157,47 +131,11 @@ func (config *Config) GenerateDefault(options []string) error {
157 157
 	// Last priority is the default timeout
158 158
 	config.UvmTimeoutSeconds = defaultUvmTimeoutSeconds
159 159
 
160
-	// Set the default requested mode
161
-	config.RequestedMode = ModeRequestAuto
162
-
163 160
 	return nil
164 161
 }
165 162
 
166 163
 // Validate validates a Config structure for starting a utility VM.
167 164
 func (config *Config) Validate() error {
168
-	config.ActualMode = ModeActualError
169
-
170
-	if config.RequestedMode == ModeRequestVhdx && config.Vhdx == "" {
171
-		return fmt.Errorf("VHDx mode must supply a VHDx")
172
-	}
173
-	if config.RequestedMode == ModeRequestKernelInitrd && (config.KernelFile == "" || config.InitrdFile == "") {
174
-		return fmt.Errorf("kernel+initrd mode must supply both kernel and initrd")
175
-	}
176
-
177
-	// Validate that if VHDX requested or auto, it exists.
178
-	if config.RequestedMode == ModeRequestAuto || config.RequestedMode == ModeRequestVhdx {
179
-		if _, err := os.Stat(config.Vhdx); os.IsNotExist(err) {
180
-			if config.RequestedMode == ModeRequestVhdx {
181
-				return fmt.Errorf("VHDx '%s' not found", config.Vhdx)
182
-			}
183
-		} else {
184
-			config.ActualMode = ModeActualVhdx
185
-
186
-			// Can't specify boot parameters with VHDx
187
-			if config.BootParameters != "" {
188
-				return fmt.Errorf("Boot parameters cannot be specified in VHDx mode")
189
-			}
190
-			return nil
191
-		}
192
-	}
193
-
194
-	// So must be kernel+initrd, or auto where we fallback as the VHDX doesn't exist
195
-	if config.InitrdFile == "" || config.KernelFile == "" {
196
-		if config.RequestedMode == ModeRequestKernelInitrd {
197
-			return fmt.Errorf("initrd and kernel options must be supplied")
198
-		}
199
-		return fmt.Errorf("opengcs: configuration is invalid")
200
-	}
201 165
 
202 166
 	if _, err := os.Stat(filepath.Join(config.KirdPath, config.KernelFile)); os.IsNotExist(err) {
203 167
 		return fmt.Errorf("kernel '%s' not found", filepath.Join(config.KirdPath, config.KernelFile))
... ...
@@ -206,8 +144,6 @@ func (config *Config) Validate() error {
206 206
 		return fmt.Errorf("initrd '%s' not found", filepath.Join(config.KirdPath, config.InitrdFile))
207 207
 	}
208 208
 
209
-	config.ActualMode = ModeActualKernelInitrd
210
-
211 209
 	// Ensure all the MappedVirtualDisks exist on the host
212 210
 	for _, mvd := range config.MappedVirtualDisks {
213 211
 		if _, err := os.Stat(mvd.HostPath); err != nil {
... ...
@@ -236,21 +172,12 @@ func (config *Config) StartUtilityVM() error {
236 236
 		ContainerType:               "linux",
237 237
 		TerminateOnLastHandleClosed: true,
238 238
 		MappedVirtualDisks:          config.MappedVirtualDisks,
239
-	}
240
-
241
-	if config.ActualMode == ModeActualVhdx {
242
-		configuration.HvRuntime = &hcsshim.HvRuntime{
243
-			ImagePath:          config.Vhdx,
244
-			BootSource:         "Vhd",
245
-			WritableBootSource: false,
246
-		}
247
-	} else {
248
-		configuration.HvRuntime = &hcsshim.HvRuntime{
239
+		HvRuntime: &hcsshim.HvRuntime{
249 240
 			ImagePath:           config.KirdPath,
250 241
 			LinuxInitrdFile:     config.InitrdFile,
251 242
 			LinuxKernelFile:     config.KernelFile,
252 243
 			LinuxBootParameters: config.BootParameters,
253
-		}
244
+		},
254 245
 	}
255 246
 
256 247
 	configurationS, _ := json.Marshal(configuration)
257 248
deleted file mode 100644
... ...
@@ -1,46 +0,0 @@
1
-// +build windows
2
-
3
-package client
4
-
5
-import (
6
-	"fmt"
7
-	"io"
8
-
9
-	"github.com/sirupsen/logrus"
10
-)
11
-
12
-// TarToVhd streams a tarstream contained in an io.Reader to a fixed vhd file
13
-func (config *Config) TarToVhd(targetVHDFile string, reader io.Reader) (int64, error) {
14
-	logrus.Debugf("opengcs: TarToVhd: %s", targetVHDFile)
15
-
16
-	if config.Uvm == nil {
17
-		return 0, fmt.Errorf("cannot Tar2Vhd as no utility VM is in configuration")
18
-	}
19
-
20
-	defer config.DebugGCS()
21
-
22
-	process, err := config.createUtilsProcess("tar2vhd")
23
-	if err != nil {
24
-		return 0, fmt.Errorf("failed to start tar2vhd for %s: %s", targetVHDFile, err)
25
-	}
26
-	defer process.Process.Close()
27
-
28
-	// Send the tarstream into the `tar2vhd`s stdin
29
-	if _, err = copyWithTimeout(process.Stdin, reader, 0, config.UvmTimeoutSeconds, fmt.Sprintf("stdin of tar2vhd for generating %s", targetVHDFile)); err != nil {
30
-		return 0, fmt.Errorf("failed sending to tar2vhd for %s: %s", targetVHDFile, err)
31
-	}
32
-
33
-	// Don't need stdin now we've sent everything. This signals GCS that we are finished sending data.
34
-	if err := process.Process.CloseStdin(); err != nil {
35
-		return 0, fmt.Errorf("failed closing stdin handle for %s: %s", targetVHDFile, err)
36
-	}
37
-
38
-	// Write stdout contents of `tar2vhd` to the VHD file
39
-	payloadSize, err := writeFileFromReader(targetVHDFile, process.Stdout, config.UvmTimeoutSeconds, fmt.Sprintf("stdout of tar2vhd to %s", targetVHDFile))
40
-	if err != nil {
41
-		return 0, fmt.Errorf("failed to write %s during tar2vhd: %s", targetVHDFile, err)
42
-	}
43
-
44
-	logrus.Debugf("opengcs: TarToVhd: %s created, %d bytes", targetVHDFile, payloadSize)
45
-	return payloadSize, err
46
-}
47 1
new file mode 100644
... ...
@@ -0,0 +1,333 @@
0
+#define _GNU_SOURCE
1
+#include <errno.h>
2
+#include <fcntl.h>
3
+#include <getopt.h>
4
+#include <net/if.h>
5
+#include <netinet/ip.h>
6
+#include <signal.h>
7
+#include <stdio.h>
8
+#include <stdlib.h>
9
+#include <string.h>
10
+#include <sys/mount.h>
11
+#include <sys/socket.h>
12
+#include <sys/stat.h>
13
+#include <sys/sysmacros.h>
14
+#include <sys/types.h>
15
+#include <sys/wait.h>
16
+#include <unistd.h>
17
+
18
+#define DEFAULT_PATH_ENV "PATH=/sbin:/usr/sbin:/bin:/usr/bin"
19
+
20
+const char *const default_envp[] = {
21
+    DEFAULT_PATH_ENV,
22
+    NULL,
23
+};
24
+
25
+// When nothing is passed, default to the LCOWv1 behavior.
26
+const char *const default_argv[] = { "/bin/gcs", "-loglevel", "debug", "-logfile=/tmp/gcs.log" };
27
+const char *const default_shell = "/bin/sh";
28
+
29
+struct Mount {
30
+    const char *source, *target, *type;
31
+    unsigned long flags;
32
+    const void *data;
33
+};
34
+
35
+struct Mkdir {
36
+    const char *path;
37
+    mode_t mode;
38
+};
39
+
40
+struct Mknod {
41
+    const char *path;
42
+    mode_t mode;
43
+    int major, minor;
44
+};
45
+
46
+struct Symlink {
47
+    const char *linkpath, *target;
48
+};
49
+
50
+enum OpType {
51
+    OpMount,
52
+    OpMkdir,
53
+    OpMknod,
54
+    OpSymlink,
55
+};
56
+
57
+struct InitOp {
58
+    enum OpType op;
59
+    union {
60
+        struct Mount mount;
61
+        struct Mkdir mkdir;
62
+        struct Mknod mknod;
63
+        struct Symlink symlink;
64
+    };
65
+};
66
+
67
+const struct InitOp ops[] = {
68
+    // mount /proc (which should already exist)
69
+    { OpMount, .mount = { "proc", "/proc", "proc", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
70
+
71
+    // add symlinks in /dev (which is already mounted)
72
+    { OpSymlink, .symlink = { "/dev/fd", "/proc/self/fd" } },
73
+    { OpSymlink, .symlink = { "/dev/stdin", "/proc/self/fd/0" } },
74
+    { OpSymlink, .symlink = { "/dev/stdout", "/proc/self/fd/1" } },
75
+    { OpSymlink, .symlink = { "/dev/stderr", "/proc/self/fd/2" } },
76
+
77
+    // mount tmpfs on /run and /tmp (which should already exist)
78
+    { OpMount, .mount = { "tmpfs", "/run", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC, "mode=0755" } },
79
+    { OpMount, .mount = { "tmpfs", "/tmp", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
80
+
81
+    // mount shm and devpts
82
+    { OpMkdir, .mkdir = { "/dev/shm", 0755 } },
83
+    { OpMount, .mount = { "shm", "/dev/shm", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
84
+    { OpMkdir, .mkdir = { "/dev/pts", 0755 } },
85
+    { OpMount, .mount = { "devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC } },
86
+
87
+    // mount /sys (which should already exist)
88
+    { OpMount, .mount = { "sysfs", "/sys", "sysfs", MS_NODEV | MS_NOSUID | MS_NOEXEC } },
89
+    { OpMount, .mount = { "cgroup_root", "/sys/fs/cgroup", "tmpfs", MS_NODEV | MS_NOSUID | MS_NOEXEC, "mode=0755" } },
90
+};
91
+
92
+void warn(const char *msg) {
93
+    int error = errno;
94
+    perror(msg);
95
+    errno = error;
96
+}
97
+
98
+void warn2(const char *msg1, const char *msg2) {
99
+    int error = errno;
100
+    fputs(msg1, stderr);
101
+    fputs(": ", stderr);
102
+    errno = error;
103
+    warn(msg2);
104
+}
105
+
106
+_Noreturn void dien() {
107
+    exit(errno);
108
+}
109
+
110
+_Noreturn void die(const char *msg) {
111
+    warn(msg);
112
+    dien();
113
+}
114
+
115
+_Noreturn void die2(const char *msg1, const char *msg2) {
116
+    warn2(msg1, msg2);
117
+    dien();
118
+}
119
+
120
+void init_dev() {
121
+    if (mount("dev", "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL) < 0) {
122
+        warn2("mount", "/dev");
123
+        // /dev will be already mounted if devtmpfs.mount = 1 on the kernel
124
+        // command line or CONFIG_DEVTMPFS_MOUNT is set. Do not consider this
125
+        // an error.
126
+        if (errno != EBUSY) {
127
+            dien();
128
+        }
129
+    }
130
+}
131
+
132
+void init_fs(const struct InitOp *ops, size_t count) {
133
+    for (size_t i = 0; i < count; i++) {
134
+        switch (ops[i].op) {
135
+        case OpMount: {
136
+            const struct Mount *m = &ops[i].mount;
137
+            if (mount(m->source, m->target, m->type, m->flags, m->data) < 0) {
138
+                die2("mount", m->target);
139
+            }
140
+            break;
141
+        }
142
+        case OpMkdir: {
143
+            const struct Mkdir *m = &ops[i].mkdir;
144
+            if (mkdir(m->path, m->mode) < 0) {
145
+                warn2("mkdir", m->path);
146
+                if (errno != EEXIST) {
147
+                    dien();
148
+                }
149
+            }
150
+            break;
151
+        }
152
+        case OpMknod: {
153
+            const struct Mknod *n = &ops[i].mknod;
154
+            if (mknod(n->path, n->mode, makedev(n->major, n->minor)) < 0) {
155
+                warn2("mknod", n->path);
156
+                if (errno != EEXIST) {
157
+                    dien();
158
+                }
159
+            }
160
+            break;
161
+        }
162
+        case OpSymlink: {
163
+            const struct Symlink *sl = &ops[i].symlink;
164
+            if (symlink(sl->target, sl->linkpath) < 0) {
165
+                warn2("symlink", sl->linkpath);
166
+                if (errno != EEXIST) {
167
+                    dien();
168
+                }
169
+            }
170
+            break;
171
+        }
172
+        }
173
+    }
174
+}
175
+
176
+void init_cgroups() {
177
+    const char *fpath = "/proc/cgroups";
178
+    FILE *f = fopen(fpath, "r");
179
+    if (f == NULL) {
180
+        die2("fopen", fpath);
181
+    }
182
+    // Skip the first line.
183
+    for (;;) {
184
+        char c = fgetc(f);
185
+        if (c == EOF || c == '\n') {
186
+            break;
187
+        }
188
+    }
189
+    for (;;) {
190
+        static const char base_path[] = "/sys/fs/cgroup/";
191
+        char path[sizeof(base_path) - 1 + 64];
192
+        char* name = path + sizeof(base_path) - 1;
193
+        int hier, groups, enabled;
194
+        int r = fscanf(f, "%64s %d %d %d\n", name, &hier, &groups, &enabled);
195
+        if (r == EOF) {
196
+            break;
197
+        }
198
+        if (r != 4) {
199
+            errno = errno ? : EINVAL;
200
+            die2("fscanf", fpath);
201
+        }
202
+        if (enabled) {
203
+            memcpy(path, base_path, sizeof(base_path) - 1);
204
+            if (mkdir(path, 0755) < 0) {
205
+                die2("mkdir", path);
206
+            }
207
+            if (mount(name, path, "cgroup", MS_NODEV | MS_NOSUID | MS_NOEXEC, name) < 0) {
208
+                die2("mount", path);
209
+            }
210
+        }
211
+    }
212
+    fclose(f);
213
+}
214
+
215
+void init_network(const char *iface, int domain) {
216
+    int s = socket(domain, SOCK_DGRAM, IPPROTO_IP);
217
+    if (s < 0) {
218
+        if (errno == EAFNOSUPPORT) {
219
+            return;
220
+        }
221
+        die("socket");
222
+    }
223
+
224
+    struct ifreq request = {0};
225
+    strncpy(request.ifr_name, iface, sizeof(request.ifr_name));
226
+    if (ioctl(s, SIOCGIFFLAGS, &request) < 0) {
227
+        die2("ioctl(SIOCGIFFLAGS)", iface);
228
+    }
229
+
230
+    request.ifr_flags |= IFF_UP | IFF_RUNNING;
231
+    if (ioctl(s, SIOCSIFFLAGS, &request) < 0) {
232
+        die2("ioctl(SIOCSIFFLAGS)", iface);
233
+    }
234
+
235
+    close(s);
236
+}
237
+
238
+pid_t launch(int argc, char **argv) {
239
+    int pid = fork();
240
+    if (pid != 0) {
241
+        if (pid < 0) {
242
+            die("fork");
243
+        }
244
+
245
+        return pid;
246
+    }
247
+
248
+    // Unblock signals before execing.
249
+    sigset_t set;
250
+    sigfillset(&set);
251
+    sigprocmask(SIG_UNBLOCK, &set, 0);
252
+
253
+    // Create a session and process group.
254
+    setsid();
255
+    setpgid(0, 0);
256
+
257
+    // Terminate the arguments and exec.
258
+    char **argvn = alloca(sizeof(argv[0]) * (argc + 1));
259
+    memcpy(argvn, argv, sizeof(argv[0]) * argc);
260
+    argvn[argc] = NULL;
261
+    if (putenv(DEFAULT_PATH_ENV)) { // Specify the PATH used for execvpe
262
+        die("putenv");
263
+    }
264
+    execvpe(argvn[0], argvn, (char**)default_envp);
265
+    die2("execvpe", argvn[0]);
266
+}
267
+
268
+int reap_until(pid_t until_pid) {
269
+    for (;;) {
270
+        int status;
271
+        pid_t pid = wait(&status);
272
+        if (pid < 0) {
273
+            die("wait");
274
+        }
275
+
276
+        if (pid == until_pid) {
277
+            // The initial child process died. Pass through the exit status.
278
+            if (WIFEXITED(status)) {
279
+                if (WEXITSTATUS(status) != 0) {
280
+                    fputs("child exited with error\n", stderr);
281
+                }
282
+                return WEXITSTATUS(status);
283
+            }
284
+            fputs("child exited by signal\n", stderr);
285
+            return 128 + WTERMSIG(status);
286
+        }
287
+    }
288
+}
289
+
290
+int main(int argc, char **argv) {
291
+    char *debug_shell = NULL;
292
+    if (argc <= 1) {
293
+        argv = (char **)default_argv;
294
+        argc = sizeof(default_argv) / sizeof(default_argv[0]);
295
+        optind = 0;
296
+        debug_shell = (char*)default_shell;
297
+    } else {
298
+        for (int opt; (opt = getopt(argc, argv, "+d:")) >= 0; ) {
299
+            switch (opt) {
300
+            case 'd':
301
+                debug_shell = optarg;
302
+                break;
303
+
304
+            default:
305
+                exit(1);
306
+            }
307
+        }
308
+    }
309
+
310
+    char **child_argv = argv + optind;
311
+    int child_argc = argc - optind;
312
+
313
+    // Block all signals in init. SIGCHLD will still cause wait() to return.
314
+    sigset_t set;
315
+    sigfillset(&set);
316
+    sigprocmask(SIG_BLOCK, &set, 0);
317
+
318
+    init_dev();
319
+    init_fs(ops, sizeof(ops) / sizeof(ops[0]));
320
+    init_cgroups();
321
+    init_network("lo", AF_INET);
322
+    init_network("lo", AF_INET6);
323
+
324
+    pid_t pid = launch(child_argc, child_argv);
325
+    if (debug_shell != NULL) {
326
+        // The debug shell takes over as the primary child.
327
+        pid = launch(1, &debug_shell);
328
+    }
329
+
330
+    // Reap until the initial child process dies.
331
+    return reap_until(pid);
332
+}