Browse code

Bump Microsoft/hcsshim to v0.7.3

Signed-off-by: John Howard <jhoward@microsoft.com>

John Howard authored on 2018/09/11 02:16:19
Showing 8 changed files
... ...
@@ -1,6 +1,6 @@
1 1
 # the following lines are in sorted order, FYI
2 2
 github.com/Azure/go-ansiterm d6e3b3328b783f23731bc4d058875b0371ff8109
3
-github.com/Microsoft/hcsshim 44c060121b68e8bdc40b411beba551f3b4ee9e55
3
+github.com/Microsoft/hcsshim v0.7.3
4 4
 github.com/Microsoft/go-winio v0.4.11
5 5
 github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
6 6
 github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
... ...
@@ -2,10 +2,16 @@ package guid
2 2
 
3 3
 import (
4 4
 	"crypto/rand"
5
+	"encoding/json"
5 6
 	"fmt"
6 7
 	"io"
8
+	"strconv"
9
+	"strings"
7 10
 )
8 11
 
12
+var _ = (json.Marshaler)(&GUID{})
13
+var _ = (json.Unmarshaler)(&GUID{})
14
+
9 15
 type GUID [16]byte
10 16
 
11 17
 func New() GUID {
... ...
@@ -20,3 +26,44 @@ func New() GUID {
20 20
 func (g GUID) String() string {
21 21
 	return fmt.Sprintf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x-%02x", g[3], g[2], g[1], g[0], g[5], g[4], g[7], g[6], g[8:10], g[10:])
22 22
 }
23
+
24
+func FromString(s string) GUID {
25
+	if len(s) != 36 {
26
+		panic(fmt.Sprintf("invalid GUID length: %d", len(s)))
27
+	}
28
+	if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' {
29
+		panic("invalid GUID format")
30
+	}
31
+	indexOrder := [16]int{
32
+		0, 2, 4, 6,
33
+		9, 11,
34
+		14, 16,
35
+		19, 21,
36
+		24, 26, 28, 30, 32, 34,
37
+	}
38
+	byteOrder := [16]int{
39
+		3, 2, 1, 0,
40
+		5, 4,
41
+		7, 6,
42
+		8, 9,
43
+		10, 11, 12, 13, 14, 15,
44
+	}
45
+	var g GUID
46
+	for i, x := range indexOrder {
47
+		b, err := strconv.ParseInt(s[x:x+2], 16, 16)
48
+		if err != nil {
49
+			panic(err)
50
+		}
51
+		g[byteOrder[i]] = byte(b)
52
+	}
53
+	return g
54
+}
55
+
56
+func (g GUID) MarshalJSON() ([]byte, error) {
57
+	return json.Marshal(g.String())
58
+}
59
+
60
+func (g *GUID) UnmarshalJSON(data []byte) error {
61
+	*g = FromString(strings.Trim(string(data), "\""))
62
+	return nil
63
+}
... ...
@@ -2,6 +2,7 @@ package hcs
2 2
 
3 3
 import (
4 4
 	"encoding/json"
5
+	"fmt"
5 6
 	"io"
6 7
 	"sync"
7 8
 	"syscall"
... ...
@@ -83,7 +84,10 @@ func (process *Process) Kill() error {
83 83
 	}
84 84
 
85 85
 	var resultp *uint16
86
+	completed := false
87
+	go syscallWatcher(fmt.Sprintf("TerminateProcess %s: %d", process.SystemID(), process.Pid()), &completed)
86 88
 	err := hcsTerminateProcess(process.handle, &resultp)
89
+	completed = true
87 90
 	events := processHcsResult(resultp)
88 91
 	if err != nil {
89 92
 		return makeProcessError(process, operation, err, events)
... ...
@@ -177,7 +181,10 @@ func (process *Process) Properties() (*ProcessStatus, error) {
177 177
 		resultp     *uint16
178 178
 		propertiesp *uint16
179 179
 	)
180
+	completed := false
181
+	go syscallWatcher(fmt.Sprintf("GetProcessProperties %s: %d", process.SystemID(), process.Pid()), &completed)
180 182
 	err := hcsGetProcessProperties(process.handle, &propertiesp, &resultp)
183
+	completed = true
181 184
 	events := processHcsResult(resultp)
182 185
 	if err != nil {
183 186
 		return nil, makeProcessError(process, operation, err, events)
... ...
@@ -2,6 +2,7 @@ package hcs
2 2
 
3 3
 import (
4 4
 	"encoding/json"
5
+	"fmt"
5 6
 	"os"
6 7
 	"strconv"
7 8
 	"sync"
... ...
@@ -63,7 +64,10 @@ func CreateComputeSystem(id string, hcsDocumentInterface interface{}) (*System,
63 63
 		resultp  *uint16
64 64
 		identity syscall.Handle
65 65
 	)
66
+	completed := false
67
+	go syscallWatcher(fmt.Sprintf("CreateCompleteSystem %s: %s", id, hcsDocument), &completed)
66 68
 	createError := hcsCreateComputeSystem(id, hcsDocument, identity, &computeSystem.handle, &resultp)
69
+	completed = true
67 70
 
68 71
 	if createError == nil || IsPending(createError) {
69 72
 		if err := computeSystem.registerCallback(); err != nil {
... ...
@@ -74,7 +78,7 @@ func CreateComputeSystem(id string, hcsDocumentInterface interface{}) (*System,
74 74
 		}
75 75
 	}
76 76
 
77
-	events, err := processAsyncHcsResult(createError, resultp, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.Duration)
77
+	events, err := processAsyncHcsResult(createError, resultp, computeSystem.callbackNumber, hcsNotificationSystemCreateCompleted, &timeout.SystemCreate)
78 78
 	if err != nil {
79 79
 		if err == ErrTimeout {
80 80
 			// Terminate the compute system if it still exists. We're okay to
... ...
@@ -135,7 +139,10 @@ func GetComputeSystems(q schema1.ComputeSystemQuery) ([]schema1.ContainerPropert
135 135
 		resultp         *uint16
136 136
 		computeSystemsp *uint16
137 137
 	)
138
+	completed := false
139
+	go syscallWatcher(fmt.Sprintf("GetComputeSystems %s:", query), &completed)
138 140
 	err = hcsEnumerateComputeSystems(query, &computeSystemsp, &resultp)
141
+	completed = true
139 142
 	events := processHcsResult(resultp)
140 143
 	if err != nil {
141 144
 		return nil, &HcsError{Op: operation, Err: err, Events: events}
... ...
@@ -192,8 +199,11 @@ func (computeSystem *System) Start() error {
192 192
 	}
193 193
 
194 194
 	var resultp *uint16
195
+	completed := false
196
+	go syscallWatcher(fmt.Sprintf("StartComputeSystem %s:", computeSystem.ID()), &completed)
195 197
 	err := hcsStartComputeSystem(computeSystem.handle, "", &resultp)
196
-	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.Duration)
198
+	completed = true
199
+	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemStartCompleted, &timeout.SystemStart)
197 200
 	if err != nil {
198 201
 		return makeSystemError(computeSystem, "Start", "", err, events)
199 202
 	}
... ...
@@ -219,7 +229,10 @@ func (computeSystem *System) Shutdown() error {
219 219
 	}
220 220
 
221 221
 	var resultp *uint16
222
+	completed := false
223
+	go syscallWatcher(fmt.Sprintf("ShutdownComputeSystem %s:", computeSystem.ID()), &completed)
222 224
 	err := hcsShutdownComputeSystem(computeSystem.handle, "", &resultp)
225
+	completed = true
223 226
 	events := processHcsResult(resultp)
224 227
 	if err != nil {
225 228
 		return makeSystemError(computeSystem, "Shutdown", "", err, events)
... ...
@@ -242,7 +255,10 @@ func (computeSystem *System) Terminate() error {
242 242
 	}
243 243
 
244 244
 	var resultp *uint16
245
+	completed := false
246
+	go syscallWatcher(fmt.Sprintf("TerminateComputeSystem %s:", computeSystem.ID()), &completed)
245 247
 	err := hcsTerminateComputeSystem(computeSystem.handle, "", &resultp)
248
+	completed = true
246 249
 	events := processHcsResult(resultp)
247 250
 	if err != nil {
248 251
 		return makeSystemError(computeSystem, "Terminate", "", err, events)
... ...
@@ -291,7 +307,10 @@ func (computeSystem *System) Properties(types ...schema1.PropertyType) (*schema1
291 291
 	}
292 292
 
293 293
 	var resultp, propertiesp *uint16
294
+	completed := false
295
+	go syscallWatcher(fmt.Sprintf("GetComputeSystemProperties %s:", computeSystem.ID()), &completed)
294 296
 	err = hcsGetComputeSystemProperties(computeSystem.handle, string(queryj), &propertiesp, &resultp)
297
+	completed = true
295 298
 	events := processHcsResult(resultp)
296 299
 	if err != nil {
297 300
 		return nil, makeSystemError(computeSystem, "Properties", "", err, events)
... ...
@@ -320,8 +339,11 @@ func (computeSystem *System) Pause() error {
320 320
 	}
321 321
 
322 322
 	var resultp *uint16
323
+	completed := false
324
+	go syscallWatcher(fmt.Sprintf("PauseComputeSystem %s:", computeSystem.ID()), &completed)
323 325
 	err := hcsPauseComputeSystem(computeSystem.handle, "", &resultp)
324
-	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.Duration)
326
+	completed = true
327
+	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemPauseCompleted, &timeout.SystemPause)
325 328
 	if err != nil {
326 329
 		return makeSystemError(computeSystem, "Pause", "", err, events)
327 330
 	}
... ...
@@ -342,8 +364,11 @@ func (computeSystem *System) Resume() error {
342 342
 	}
343 343
 
344 344
 	var resultp *uint16
345
+	completed := false
346
+	go syscallWatcher(fmt.Sprintf("ResumeComputeSystem %s:", computeSystem.ID()), &completed)
345 347
 	err := hcsResumeComputeSystem(computeSystem.handle, "", &resultp)
346
-	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.Duration)
348
+	completed = true
349
+	events, err := processAsyncHcsResult(err, resultp, computeSystem.callbackNumber, hcsNotificationSystemResumeCompleted, &timeout.SystemResume)
347 350
 	if err != nil {
348 351
 		return makeSystemError(computeSystem, "Resume", "", err, events)
349 352
 	}
... ...
@@ -375,7 +400,10 @@ func (computeSystem *System) CreateProcess(c interface{}) (*Process, error) {
375 375
 	configuration := string(configurationb)
376 376
 	logrus.Debugf(title+" config=%s", configuration)
377 377
 
378
+	completed := false
379
+	go syscallWatcher(fmt.Sprintf("CreateProcess %s: %s", computeSystem.ID(), configuration), &completed)
378 380
 	err = hcsCreateProcess(computeSystem.handle, configuration, &processInfo, &processHandle, &resultp)
381
+	completed = true
379 382
 	events := processHcsResult(resultp)
380 383
 	if err != nil {
381 384
 		return nil, makeSystemError(computeSystem, "CreateProcess", configuration, err, events)
... ...
@@ -415,7 +443,10 @@ func (computeSystem *System) OpenProcess(pid int) (*Process, error) {
415 415
 		return nil, makeSystemError(computeSystem, "OpenProcess", "", ErrAlreadyClosed, nil)
416 416
 	}
417 417
 
418
+	completed := false
419
+	go syscallWatcher(fmt.Sprintf("OpenProcess %s: %d", computeSystem.ID(), pid), &completed)
418 420
 	err := hcsOpenProcess(computeSystem.handle, uint32(pid), &processHandle, &resultp)
421
+	completed = true
419 422
 	events := processHcsResult(resultp)
420 423
 	if err != nil {
421 424
 		return nil, makeSystemError(computeSystem, "OpenProcess", "", err, events)
... ...
@@ -451,7 +482,11 @@ func (computeSystem *System) Close() error {
451 451
 		return makeSystemError(computeSystem, "Close", "", err, nil)
452 452
 	}
453 453
 
454
-	if err := hcsCloseComputeSystem(computeSystem.handle); err != nil {
454
+	completed := false
455
+	go syscallWatcher(fmt.Sprintf("CloseComputeSystem %s:", computeSystem.ID()), &completed)
456
+	err := hcsCloseComputeSystem(computeSystem.handle)
457
+	completed = true
458
+	if err != nil {
455 459
 		return makeSystemError(computeSystem, "Close", "", err, nil)
456 460
 	}
457 461
 
... ...
@@ -537,7 +572,10 @@ func (computeSystem *System) Modify(config interface{}) error {
537 537
 	logrus.Debugf(title + " " + requestString)
538 538
 
539 539
 	var resultp *uint16
540
+	completed := false
541
+	go syscallWatcher(fmt.Sprintf("ModifyComputeSystem %s: %s", computeSystem.ID(), requestString), &completed)
540 542
 	err = hcsModifyComputeSystem(computeSystem.handle, requestString, &resultp)
543
+	completed = true
541 544
 	events := processHcsResult(resultp)
542 545
 	if err != nil {
543 546
 		return makeSystemError(computeSystem, "Modify", requestString, err, events)
544 547
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+package hcs
1
+
2
+import (
3
+	"time"
4
+
5
+	"github.com/Microsoft/hcsshim/internal/timeout"
6
+	"github.com/sirupsen/logrus"
7
+)
8
+
9
+// syscallWatcher is used as a very simple goroutine around calls into
10
+// the platform. In some cases, we have seen HCS APIs not returning due to
11
+// various bugs, and the goroutine making the syscall ends up not returning,
12
+// prior to its async callback. By spinning up a syscallWatcher, it allows
13
+// us to at least log a warning if a syscall doesn't complete in a reasonable
14
+// amount of time.
15
+//
16
+// Usage is:
17
+//
18
+// completed := false
19
+// go syscallWatcher("some description", &completed)
20
+// <syscall>
21
+// completed = true
22
+//
23
+func syscallWatcher(description string, syscallCompleted *bool) {
24
+	time.Sleep(timeout.SyscallWatcher)
25
+	if *syscallCompleted {
26
+		return
27
+	}
28
+	logrus.Warnf("%s: Did not complete within %s. This may indicate a platform issue. If it appears to be making no forward progress, obtain the stacks and see is there is a syscall stuck in the platform API for a significant length of time.", description, timeout.SyscallWatcher)
29
+}
... ...
@@ -1,4 +1,4 @@
1
-// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT
1
+// Code generated by 'go generate'; DO NOT EDIT.
2 2
 
3 3
 package interop
4 4
 
... ...
@@ -1,4 +1,4 @@
1
-// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT
1
+// Code generated by 'go generate'; DO NOT EDIT.
2 2
 
3 3
 package safefile
4 4
 
... ...
@@ -6,21 +6,65 @@ import (
6 6
 	"time"
7 7
 )
8 8
 
9
-// Duration is the default time to wait for various operations.
10
-// - Waiting for async notifications from HCS
11
-// - Waiting for processes to launch through
12
-// - Waiting to copy data to/from a launched processes stdio pipes.
13
-//
14
-// This can be overridden through environment variable `HCS_TIMEOUT_SECONDS`
9
+var (
10
+	// defaultTimeout is the timeout for most operations that is not overridden.
11
+	defaultTimeout = 4 * time.Minute
15 12
 
16
-var Duration = 4 * time.Minute
13
+	// defaultTimeoutTestdRetry is the retry loop timeout for testd to respond
14
+	// for a disk to come online in LCOW.
15
+	defaultTimeoutTestdRetry = 5 * time.Second
16
+)
17
+
18
+// External variables for HCSShim consumers to use.
19
+var (
20
+	// SystemCreate is the timeout for creating a compute system
21
+	SystemCreate time.Duration = defaultTimeout
22
+
23
+	// SystemStart is the timeout for starting a compute system
24
+	SystemStart time.Duration = defaultTimeout
25
+
26
+	// SystemPause is the timeout for pausing a compute system
27
+	SystemPause time.Duration = defaultTimeout
28
+
29
+	// SystemResume is the timeout for resuming a compute system
30
+	SystemResume time.Duration = defaultTimeout
31
+
32
+	// SyscallWatcher is the timeout before warning of a potential stuck platform syscall.
33
+	SyscallWatcher time.Duration = defaultTimeout
34
+
35
+	// Tar2VHD is the timeout for the tar2vhd operation to complete
36
+	Tar2VHD time.Duration = defaultTimeout
37
+
38
+	// ExternalCommandToStart is the timeout for external commands to start
39
+	ExternalCommandToStart = defaultTimeout
40
+
41
+	// ExternalCommandToComplete is the timeout for external commands to complete.
42
+	// Generally this means copying data from their stdio pipes.
43
+	ExternalCommandToComplete = defaultTimeout
44
+
45
+	// TestDRetryLoop is the timeout for testd retry loop when onlining a SCSI disk in LCOW
46
+	TestDRetryLoop = defaultTimeoutTestdRetry
47
+)
17 48
 
18 49
 func init() {
19
-	envTimeout := os.Getenv("HCSSHIM_TIMEOUT_SECONDS")
50
+	SystemCreate = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMCREATE", SystemCreate)
51
+	SystemStart = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMSTART", SystemStart)
52
+	SystemPause = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMPAUSE", SystemPause)
53
+	SystemResume = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMRESUME", SystemResume)
54
+	SyscallWatcher = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSCALLWATCHER", SyscallWatcher)
55
+	Tar2VHD = durationFromEnvironment("HCSSHIM_TIMEOUT_TAR2VHD", Tar2VHD)
56
+	ExternalCommandToStart = durationFromEnvironment("HCSSHIM_TIMEOUT_EXTERNALCOMMANDSTART", ExternalCommandToStart)
57
+	ExternalCommandToComplete = durationFromEnvironment("HCSSHIM_TIMEOUT_EXTERNALCOMMANDCOMPLETE", ExternalCommandToComplete)
58
+	TestDRetryLoop = durationFromEnvironment("HCSSHIM_TIMEOUT_TESTDRETRYLOOP", TestDRetryLoop)
59
+}
60
+
61
+func durationFromEnvironment(env string, defaultValue time.Duration) time.Duration {
62
+	envTimeout := os.Getenv(env)
20 63
 	if len(envTimeout) > 0 {
21 64
 		e, err := strconv.Atoi(envTimeout)
22 65
 		if err == nil && e > 0 {
23
-			Duration = time.Second * time.Duration(e)
66
+			return time.Second * time.Duration(e)
24 67
 		}
25 68
 	}
69
+	return defaultValue
26 70
 }