Browse code

Fix race on force deleting container created by task

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>

Tonis Tiigi authored on 2016/06/15 03:11:43
Showing 14 changed files
... ...
@@ -539,7 +539,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
539 539
 // ShouldRestart decides whether the daemon should restart the container or not.
540 540
 // This is based on the container's restart policy.
541 541
 func (container *Container) ShouldRestart() bool {
542
-	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
542
+	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
543 543
 	return shouldRestart
544 544
 }
545 545
 
... ...
@@ -24,8 +24,8 @@ type State struct {
24 24
 	RemovalInProgress bool // Not need for this to be persistent on disk.
25 25
 	Dead              bool
26 26
 	Pid               int
27
-	ExitCode          int
28
-	Error             string // contains last known error when starting the container
27
+	exitCode          int
28
+	error             string // contains last known error when starting the container
29 29
 	StartedAt         time.Time
30 30
 	FinishedAt        time.Time
31 31
 	waitChan          chan struct{}
... ...
@@ -46,7 +46,7 @@ func (s *State) String() string {
46 46
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
47 47
 		}
48 48
 		if s.Restarting {
49
-			return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
49
+			return fmt.Sprintf("Restarting (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
50 50
 		}
51 51
 
52 52
 		if h := s.Health; h != nil {
... ...
@@ -71,7 +71,7 @@ func (s *State) String() string {
71 71
 		return ""
72 72
 	}
73 73
 
74
-	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
74
+	return fmt.Sprintf("Exited (%d) %s ago", s.exitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
75 75
 }
76 76
 
77 77
 // StateString returns a single string to describe state
... ...
@@ -129,7 +129,7 @@ func wait(waitChan <-chan struct{}, timeout time.Duration) error {
129 129
 func (s *State) WaitStop(timeout time.Duration) (int, error) {
130 130
 	s.Lock()
131 131
 	if !s.Running {
132
-		exitCode := s.ExitCode
132
+		exitCode := s.exitCode
133 133
 		s.Unlock()
134 134
 		return exitCode, nil
135 135
 	}
... ...
@@ -138,33 +138,38 @@ func (s *State) WaitStop(timeout time.Duration) (int, error) {
138 138
 	if err := wait(waitChan, timeout); err != nil {
139 139
 		return -1, err
140 140
 	}
141
-	return s.getExitCode(), nil
141
+	s.Lock()
142
+	defer s.Unlock()
143
+	return s.ExitCode(), nil
142 144
 }
143 145
 
144 146
 // WaitWithContext waits for the container to stop. Optional context can be
145 147
 // passed for canceling the request.
146
-func (s *State) WaitWithContext(ctx context.Context) <-chan int {
148
+func (s *State) WaitWithContext(ctx context.Context) error {
147 149
 	// todo(tonistiigi): make other wait functions use this
148
-	c := make(chan int)
149
-	go func() {
150
-		s.Lock()
151
-		if !s.Running {
152
-			exitCode := s.ExitCode
153
-			s.Unlock()
154
-			c <- exitCode
155
-			close(c)
156
-			return
150
+	s.Lock()
151
+	if !s.Running {
152
+		state := *s
153
+		defer s.Unlock()
154
+		if state.exitCode == 0 {
155
+			return nil
157 156
 		}
158
-		waitChan := s.waitChan
157
+		return &state
158
+	}
159
+	waitChan := s.waitChan
160
+	s.Unlock()
161
+	select {
162
+	case <-waitChan:
163
+		s.Lock()
164
+		state := *s
159 165
 		s.Unlock()
160
-		select {
161
-		case <-waitChan:
162
-			c <- s.getExitCode()
163
-		case <-ctx.Done():
166
+		if state.exitCode == 0 {
167
+			return nil
164 168
 		}
165
-		close(c)
166
-	}()
167
-	return c
169
+		return &state
170
+	case <-ctx.Done():
171
+		return ctx.Err()
172
+	}
168 173
 }
169 174
 
170 175
 // IsRunning returns whether the running flag is set. Used by Container to check whether a container is running.
... ...
@@ -183,20 +188,26 @@ func (s *State) GetPID() int {
183 183
 	return res
184 184
 }
185 185
 
186
-func (s *State) getExitCode() int {
187
-	s.Lock()
188
-	res := s.ExitCode
189
-	s.Unlock()
186
+// ExitCode returns current exitcode for the state. Take lock before if state
187
+// may be shared.
188
+func (s *State) ExitCode() int {
189
+	res := s.exitCode
190 190
 	return res
191 191
 }
192 192
 
193
+// SetExitCode set current exitcode for the state. Take lock before if state
194
+// may be shared.
195
+func (s *State) SetExitCode(ec int) {
196
+	s.exitCode = ec
197
+}
198
+
193 199
 // SetRunning sets the state of the container to "running".
194 200
 func (s *State) SetRunning(pid int, initial bool) {
195
-	s.Error = ""
201
+	s.error = ""
196 202
 	s.Running = true
197 203
 	s.Paused = false
198 204
 	s.Restarting = false
199
-	s.ExitCode = 0
205
+	s.exitCode = 0
200 206
 	s.Pid = pid
201 207
 	if initial {
202 208
 		s.StartedAt = time.Now().UTC()
... ...
@@ -248,7 +259,7 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
248 248
 // know the error that occurred when container transits to another state
249 249
 // when inspecting it
250 250
 func (s *State) SetError(err error) {
251
-	s.Error = err.Error()
251
+	s.error = err.Error()
252 252
 }
253 253
 
254 254
 // IsPaused returns whether the container is paused or not.
... ...
@@ -292,3 +303,8 @@ func (s *State) SetDead() {
292 292
 	s.Dead = true
293 293
 	s.Unlock()
294 294
 }
295
+
296
+// Error returns current error for the state.
297
+func (s *State) Error() string {
298
+	return s.error
299
+}
... ...
@@ -3,5 +3,5 @@ package container
3 3
 // setFromExitStatus is a platform specific helper function to set the state
4 4
 // based on the ExitStatus structure.
5 5
 func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
6
-	s.ExitCode = exitStatus.ExitCode
6
+	s.exitCode = exitStatus.ExitCode
7 7
 }
... ...
@@ -19,8 +19,8 @@ func TestStateRunStop(t *testing.T) {
19 19
 		if s.Pid != i+100 {
20 20
 			t.Fatalf("Pid %v, expected %v", s.Pid, i+100)
21 21
 		}
22
-		if s.ExitCode != 0 {
23
-			t.Fatalf("ExitCode %v, expected 0", s.ExitCode)
22
+		if s.ExitCode() != 0 {
23
+			t.Fatalf("ExitCode %v, expected 0", s.ExitCode())
24 24
 		}
25 25
 
26 26
 		stopped := make(chan struct{})
... ...
@@ -34,8 +34,8 @@ func TestStateRunStop(t *testing.T) {
34 34
 		if s.IsRunning() {
35 35
 			t.Fatal("State is running")
36 36
 		}
37
-		if s.ExitCode != i {
38
-			t.Fatalf("ExitCode %v, expected %v", s.ExitCode, i)
37
+		if s.ExitCode() != i {
38
+			t.Fatalf("ExitCode %v, expected %v", s.ExitCode(), i)
39 39
 		}
40 40
 		if s.Pid != 0 {
41 41
 			t.Fatalf("Pid %v, expected 0", s.Pid)
... ...
@@ -5,6 +5,6 @@ package container
5 5
 // setFromExitStatus is a platform specific helper function to set the state
6 6
 // based on the ExitStatus structure.
7 7
 func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
8
-	s.ExitCode = exitStatus.ExitCode
8
+	s.exitCode = exitStatus.ExitCode
9 9
 	s.OOMKilled = exitStatus.OOMKilled
10 10
 }
... ...
@@ -3,5 +3,5 @@ package container
3 3
 // setFromExitStatus is a platform specific helper function to set the state
4 4
 // based on the ExitStatus structure.
5 5
 func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
6
-	s.ExitCode = exitStatus.ExitCode
6
+	s.exitCode = exitStatus.ExitCode
7 7
 }
... ...
@@ -24,7 +24,7 @@ type Backend interface {
24 24
 	ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
25 25
 	UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error
26 26
 	ContainerInspectCurrent(name string, size bool) (*types.ContainerJSON, error)
27
-	ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error)
27
+	ContainerWaitWithContext(ctx context.Context, name string) error
28 28
 	ContainerRm(name string, config *types.ContainerRmConfig) error
29 29
 	ContainerKill(name string, sig uint64) error
30 30
 	SystemInfo() (*types.Info, error)
... ...
@@ -160,7 +160,7 @@ func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, er
160 160
 //
161 161
 // A chan struct{} is returned that will be closed if the event procressing
162 162
 // fails and needs to be restarted.
163
-func (c *containerAdapter) wait(ctx context.Context) (<-chan int, error) {
163
+func (c *containerAdapter) wait(ctx context.Context) error {
164 164
 	return c.backend.ContainerWaitWithContext(ctx, c.container.name())
165 165
 }
166 166
 
... ...
@@ -1,7 +1,6 @@
1 1
 package container
2 2
 
3 3
 import (
4
-	"errors"
5 4
 	"fmt"
6 5
 	"strings"
7 6
 
... ...
@@ -151,33 +150,20 @@ func (r *controller) Wait(pctx context.Context) error {
151 151
 	ctx, cancel := context.WithCancel(pctx)
152 152
 	defer cancel()
153 153
 
154
-	c, err := r.adapter.wait(ctx)
154
+	err := r.adapter.wait(ctx)
155 155
 	if err != nil {
156 156
 		return err
157 157
 	}
158
-
159
-	<-c
160 158
 	if ctx.Err() != nil {
161 159
 		return ctx.Err()
162 160
 	}
163
-	ctnr, err := r.adapter.inspect(ctx)
164 161
 	if err != nil {
165
-		// TODO(stevvooe): Need to handle missing container here. It is likely
166
-		// that a Wait call with a not found error should result in no waiting
167
-		// and no error at all.
168
-		return err
169
-	}
170
-
171
-	if ctnr.State.ExitCode != 0 {
172
-		var cause error
173
-		if ctnr.State.Error != "" {
174
-			cause = errors.New(ctnr.State.Error)
162
+		ee := &exitError{}
163
+		if err.Error() != "" {
164
+			ee.cause = err
175 165
 		}
176
-		cstatus, _ := parseContainerStatus(ctnr)
177
-		return &exitError{
178
-			code:            ctnr.State.ExitCode,
179
-			cause:           cause,
180
-			containerStatus: cstatus,
166
+		if ec, ok := err.(exec.ExitCoder); ok {
167
+			ee.code = ec.ExitCode()
181 168
 		}
182 169
 	}
183 170
 	return nil
... ...
@@ -283,9 +269,8 @@ func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error
283 283
 }
284 284
 
285 285
 type exitError struct {
286
-	code            int
287
-	cause           error
288
-	containerStatus *api.ContainerStatus
286
+	code  int
287
+	cause error
289 288
 }
290 289
 
291 290
 func (e *exitError) Error() string {
... ...
@@ -297,7 +282,7 @@ func (e *exitError) Error() string {
297 297
 }
298 298
 
299 299
 func (e *exitError) ExitCode() int {
300
-	return int(e.containerStatus.ExitCode)
300
+	return int(e.code)
301 301
 }
302 302
 
303 303
 func (e *exitError) Cause() error {
... ...
@@ -127,8 +127,8 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
127 127
 		OOMKilled:  container.State.OOMKilled,
128 128
 		Dead:       container.State.Dead,
129 129
 		Pid:        container.State.Pid,
130
-		ExitCode:   container.State.ExitCode,
131
-		Error:      container.State.Error,
130
+		ExitCode:   container.State.ExitCode(),
131
+		Error:      container.State.Error(),
132 132
 		StartedAt:  container.State.StartedAt.Format(time.RFC3339Nano),
133 133
 		FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano),
134 134
 		Health:     containerHealth,
... ...
@@ -337,7 +337,7 @@ func includeContainerInList(container *container.Container, ctx *listContext) it
337 337
 	if len(ctx.exitAllowed) > 0 {
338 338
 		shouldSkip := true
339 339
 		for _, code := range ctx.exitAllowed {
340
-			if code == container.ExitCode && !container.Running && !container.StartedAt.IsZero() {
340
+			if code == container.ExitCode() && !container.Running && !container.StartedAt.IsZero() {
341 341
 				shouldSkip = false
342 342
 				break
343 343
 			}
... ...
@@ -29,7 +29,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon
29 29
 		// Create a new servicing container, which will start, complete the update, and merge back the
30 30
 		// results if it succeeded, all as part of the below function call.
31 31
 		if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil {
32
-			container.ExitCode = -1
32
+			container.SetExitCode(-1)
33 33
 			return fmt.Errorf("Post-run update servicing failed: %s", err)
34 34
 		}
35 35
 	}
... ...
@@ -107,8 +107,8 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
107 107
 		if err != nil {
108 108
 			container.SetError(err)
109 109
 			// if no one else has set it, make sure we don't leave it at zero
110
-			if container.ExitCode == 0 {
111
-				container.ExitCode = 128
110
+			if container.ExitCode() == 0 {
111
+				container.SetExitCode(128)
112 112
 			}
113 113
 			container.ToDisk()
114 114
 			daemon.Cleanup(container)
... ...
@@ -151,11 +151,11 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
151 151
 			(strings.Contains(errDesc, "executable file not found") ||
152 152
 				strings.Contains(errDesc, "no such file or directory") ||
153 153
 				strings.Contains(errDesc, "system cannot find the file specified")) {
154
-			container.ExitCode = 127
154
+			container.SetExitCode(127)
155 155
 		}
156 156
 		// set to 126 for container cmd can't be invoked errors
157 157
 		if strings.Contains(errDesc, syscall.EACCES.Error()) {
158
-			container.ExitCode = 126
158
+			container.SetExitCode(126)
159 159
 		}
160 160
 
161 161
 		container.Reset(false)
... ...
@@ -22,11 +22,11 @@ func (daemon *Daemon) ContainerWait(name string, timeout time.Duration) (int, er
22 22
 
23 23
 // ContainerWaitWithContext returns a channel where exit code is sent
24 24
 // when container stops. Channel can be cancelled with a context.
25
-func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) (<-chan int, error) {
25
+func (daemon *Daemon) ContainerWaitWithContext(ctx context.Context, name string) error {
26 26
 	container, err := daemon.GetContainer(name)
27 27
 	if err != nil {
28
-		return nil, err
28
+		return err
29 29
 	}
30 30
 
31
-	return container.WaitWithContext(ctx), nil
31
+	return container.WaitWithContext(ctx)
32 32
 }