Browse code

Fixed lxc-wait race condition. Added unit test to try running multiple containers in parallel.

Andrea Luzzardi authored on 2013/01/23 10:30:09
Showing 2 changed files
... ...
@@ -121,20 +121,19 @@ func (container *Container) Start() error {
121 121
 	container.State.setRunning(container.cmd.Process.Pid)
122 122
 	container.save()
123 123
 	go container.monitor()
124
-
125
-	// Wait until we are out of the STARTING state before returning
126
-	//
127
-	// Even though lxc-wait blocks until the container reaches a given state,
128
-	// sometimes it returns an error code, which is why we have to retry.
129
-	//
130
-	// This is a rare race condition that happens for short lived programs
131
-	for retries := 0; retries < 3; retries++ {
132
-		err := exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "RUNNING|STOPPED").Run()
133
-		if err == nil {
124
+	if err := exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "RUNNING|STOPPED").Run(); err != nil {
125
+		// lxc-wait might return an error if by the time we call it,
126
+		// the container we just started is already STOPPED.
127
+		// This is a rare race condition that happens for short living programs.
128
+		//
129
+		// A workaround is to discard lxc-wait errors if the container is not
130
+		// running anymore.
131
+		if !container.State.Running {
134 132
 			return nil
135 133
 		}
134
+		return errors.New("Container failed to start")
136 135
 	}
137
-	return errors.New("Container failed to start")
136
+	return nil
138 137
 }
139 138
 
140 139
 func (container *Container) Run() error {
... ...
@@ -174,6 +173,7 @@ func (container *Container) StderrPipe() (io.ReadCloser, error) {
174 174
 func (container *Container) monitor() {
175 175
 	// Wait for the program to exit
176 176
 	container.cmd.Wait()
177
+	exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
177 178
 
178 179
 	// Cleanup
179 180
 	container.stdout.Close()
... ...
@@ -183,7 +183,6 @@ func (container *Container) monitor() {
183 183
 	}
184 184
 
185 185
 	// Report status back
186
-	exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
187 186
 	container.State.setStopped(exitCode)
188 187
 	container.save()
189 188
 }
... ...
@@ -191,13 +190,18 @@ func (container *Container) monitor() {
191 191
 func (container *Container) kill() error {
192 192
 	// This will cause the main container process to receive a SIGKILL
193 193
 	if err := exec.Command("/usr/bin/lxc-stop", "-n", container.Id).Run(); err != nil {
194
+		log.Printf("Failed to lxc-stop %v", container.Id)
194 195
 		return err
195 196
 	}
196 197
 
197 198
 	// Wait for the container to be actually stopped
198
-	if err := exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "STOPPED").Run(); err != nil {
199
-		return err
200
-	}
199
+	container.Wait()
200
+
201
+	// Make sure the underlying LXC thinks it's stopped too
202
+	// LXC Issue: lxc-wait MIGHT say that the container doesn't exist
203
+	// That's probably because it was destroyed and it cannot find it anymore
204
+	// We are going to ignore lxc-wait's error
205
+	exec.Command("/usr/bin/lxc-wait", "-n", container.Id, "-s", "STOPPED").Run()
201 206
 	return nil
202 207
 }
203 208
 
... ...
@@ -184,3 +184,59 @@ func TestExitCode(t *testing.T) {
184 184
 		t.Errorf("Unexpected exit code %v", falseContainer.State.ExitCode)
185 185
 	}
186 186
 }
187
+
188
+func TestMultipleContainers(t *testing.T) {
189
+	docker, err := newTestDocker()
190
+	if err != nil {
191
+		t.Fatal(err)
192
+	}
193
+
194
+	container1, err := docker.Create(
195
+		"container1",
196
+		"cat",
197
+		[]string{"/dev/zero"},
198
+		[]string{"/var/lib/docker/images/ubuntu"},
199
+		&Config{},
200
+	)
201
+	if err != nil {
202
+		t.Fatal(err)
203
+	}
204
+	defer docker.Destroy(container1)
205
+
206
+	container2, err := docker.Create(
207
+		"container2",
208
+		"cat",
209
+		[]string{"/dev/zero"},
210
+		[]string{"/var/lib/docker/images/ubuntu"},
211
+		&Config{},
212
+	)
213
+	if err != nil {
214
+		t.Fatal(err)
215
+	}
216
+	defer docker.Destroy(container2)
217
+
218
+	// Start both containers
219
+	if err := container1.Start(); err != nil {
220
+		t.Fatal(err)
221
+	}
222
+	if err := container2.Start(); err != nil {
223
+		t.Fatal(err)
224
+	}
225
+
226
+	// If we are here, both containers should be running
227
+	if !container1.State.Running {
228
+		t.Fatal("Container not running")
229
+	}
230
+	if !container2.State.Running {
231
+		t.Fatal("Container not running")
232
+	}
233
+
234
+	// Kill them
235
+	if err := container1.Kill(); err != nil {
236
+		t.Fatal(err)
237
+	}
238
+
239
+	if err := container2.Kill(); err != nil {
240
+		t.Fatal(err)
241
+	}
242
+}