Browse code

container: Fixed a bunch of race conditions by getting rid of lxc-wait.

Andrea Luzzardi authored on 2013/01/26 08:36:47
Showing 1 changed files
... ...
@@ -121,18 +121,6 @@ func (container *Container) Start() error {
121 121
 	container.State.setRunning(container.cmd.Process.Pid)
122 122
 	container.save()
123 123
 	go container.monitor()
124
-	if err := exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "RUNNING|STOPPED").Run(); err != nil {
125
-		// lxc-wait might return an error if by the time we call it,
126
-		// the container we just started is already STOPPED.
127
-		// This is a rare race condition that happens for short living programs.
128
-		//
129
-		// A workaround is to discard lxc-wait errors if the container is not
130
-		// running anymore.
131
-		if !container.State.Running {
132
-			return nil
133
-		}
134
-		return errors.New("Container failed to start")
135
-	}
136 124
 	return nil
137 125
 }
138 126
 
... ...
@@ -188,20 +176,11 @@ func (container *Container) monitor() {
188 188
 }
189 189
 
190 190
 func (container *Container) kill() error {
191
-	// This will cause the main container process to receive a SIGKILL
192
-	if err := exec.Command("/usr/bin/lxc-stop", "-n", container.Id).Run(); err != nil {
193
-		log.Printf("Failed to lxc-stop %v", container.Id)
191
+	if err := container.cmd.Process.Kill(); err != nil {
194 192
 		return err
195 193
 	}
196
-
197 194
 	// Wait for the container to be actually stopped
198 195
 	container.Wait()
199
-
200
-	// Make sure the underlying LXC thinks it's stopped too
201
-	// LXC Issue: lxc-wait MIGHT say that the container doesn't exist
202
-	// That's probably because it was destroyed and it cannot find it anymore
203
-	// We are going to ignore lxc-wait's error
204
-	exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "STOPPED").Run()
205 196
 	return nil
206 197
 }
207 198
 
... ...
@@ -218,18 +197,20 @@ func (container *Container) Stop() error {
218 218
 	}
219 219
 
220 220
 	// 1. Send a SIGTERM
221
-	if err := exec.Command("/usr/bin/lxc-kill", "-n", container.Id, "15").Run(); err != nil {
222
-		return err
221
+	if output, err := exec.Command("/usr/bin/lxc-kill", "-n", container.Id, "15").CombinedOutput(); err != nil {
222
+		log.Printf(string(output))
223
+		log.Printf("Failed to send SIGTERM to the process, force killing")
224
+		if err := container.Kill(); err != nil {
225
+			return err
226
+		}
223 227
 	}
224 228
 
225 229
 	// 2. Wait for the process to exit on its own
226 230
 	if err := container.WaitTimeout(10 * time.Second); err != nil {
227
-		log.Printf("Container %v failed to exit within 10 seconds of SIGTERM", container.Id)
228
-	}
229
-
230
-	// 3. Force kill
231
-	if err := container.kill(); err != nil {
232
-		return err
231
+		log.Printf("Container %v failed to exit within 10 seconds of SIGTERM - using the force", container.Id)
232
+		if err := container.Kill(); err != nil {
233
+			return err
234
+		}
233 235
 	}
234 236
 	return nil
235 237
 }
... ...
@@ -245,6 +226,7 @@ func (container *Container) Restart() error {
245 245
 }
246 246
 
247 247
 func (container *Container) Wait() {
248
+
248 249
 	for container.State.Running {
249 250
 		container.State.wait()
250 251
 	}