Browse code

Merge pull request #9774 from pwaller/cancellation

Add basic build cancellation

Jessie Frazelle authored on 2015/03/23 11:16:23
Showing 11 changed files
... ...
@@ -1087,6 +1087,20 @@ func postBuild(eng *engine.Engine, version version.Version, w http.ResponseWrite
1087 1087
 	job.Setenv("cpusetcpus", r.FormValue("cpusetcpus"))
1088 1088
 	job.Setenv("cpushares", r.FormValue("cpushares"))
1089 1089
 
1090
+	// Job cancellation. Note: not all job types support this.
1091
+	if closeNotifier, ok := w.(http.CloseNotifier); ok {
1092
+		finished := make(chan struct{})
1093
+		defer close(finished)
1094
+		go func() {
1095
+			select {
1096
+			case <-finished:
1097
+			case <-closeNotifier.CloseNotify():
1098
+				log.Infof("Client disconnected, cancelling job: %v", job)
1099
+				job.Cancel()
1100
+			}
1101
+		}()
1102
+	}
1103
+
1090 1104
 	if err := job.Run(); err != nil {
1091 1105
 		if !job.Stdout.Used() {
1092 1106
 			return err
... ...
@@ -131,6 +131,8 @@ type Builder struct {
131 131
 	cpuShares  int64
132 132
 	memory     int64
133 133
 	memorySwap int64
134
+
135
+	cancelled <-chan struct{} // When closed, job was cancelled.
134 136
 }
135 137
 
136 138
 // Run the builder with the context. This is the lynchpin of this package. This
... ...
@@ -166,6 +168,14 @@ func (b *Builder) Run(context io.Reader) (string, error) {
166 166
 	b.TmpContainers = map[string]struct{}{}
167 167
 
168 168
 	for i, n := range b.dockerfile.Children {
169
+		select {
170
+		case <-b.cancelled:
171
+			log.Debug("Builder: build cancelled!")
172
+			fmt.Fprintf(b.OutStream, "Build cancelled")
173
+			return "", fmt.Errorf("Build cancelled")
174
+		default:
175
+			// Not cancelled yet, keep going...
176
+		}
169 177
 		if err := b.dispatch(i, n); err != nil {
170 178
 			if b.ForceRemove {
171 179
 				b.clearTmp()
... ...
@@ -581,6 +581,17 @@ func (b *Builder) run(c *daemon.Container) error {
581 581
 		return err
582 582
 	}
583 583
 
584
+	finished := make(chan struct{})
585
+	defer close(finished)
586
+	go func() {
587
+		select {
588
+		case <-b.cancelled:
589
+			log.Debugln("Build cancelled, killing container:", c.ID)
590
+			c.Kill()
591
+		case <-finished:
592
+		}
593
+	}()
594
+
584 595
 	if b.Verbose {
585 596
 		// Block on reading output from container, stop on err or chan closed
586 597
 		if err := <-errCh; err != nil {
... ...
@@ -153,6 +153,7 @@ func (b *BuilderJob) CmdBuild(job *engine.Job) engine.Status {
153 153
 		cpuSetCpus:      cpuSetCpus,
154 154
 		memory:          memory,
155 155
 		memorySwap:      memorySwap,
156
+		cancelled:       job.WaitCancelled(),
156 157
 	}
157 158
 
158 159
 	id, err := builder.Run(context)
... ...
@@ -76,6 +76,11 @@ Builds can now set resource constraints for all containers created for the build
76 76
 (`CgroupParent`) can be passed in the host config to setup container cgroups under a specific cgroup.
77 77
 
78 78
 
79
+`POST /build`
80
+
81
+**New!**
82
+Closing the HTTP request will now cause the build to be canceled.
83
+
79 84
 ## v1.17
80 85
 
81 86
 ### Full Documentation
... ...
@@ -1144,6 +1144,9 @@ The archive may include any number of other files,
1144 1144
 which will be accessible in the build context (See the [*ADD build
1145 1145
 command*](/reference/builder/#dockerbuilder)).
1146 1146
 
1147
+The build will also be canceled if the client drops the connection by quitting
1148
+or being killed.
1149
+
1147 1150
 Query Parameters:
1148 1151
 
1149 1152
 -   **dockerfile** - path within the build context to the Dockerfile. This is 
... ...
@@ -599,6 +599,12 @@ in cases where the same set of files are used for multiple builds. The path
599 599
 must be to a file within the build context. If a relative path is specified
600 600
 then it must to be relative to the current directory.
601 601
 
602
+If the Docker client loses connection to the daemon, the build is canceled.
603
+This happens if you interrupt the Docker client with `ctrl-c` or if the Docker
604
+client is killed for any reason.
605
+
606
+> **Note:** Currently only the "run" phase of the build can be canceled until
607
+> pull cancelation is implemented).
602 608
 
603 609
 See also:
604 610
 
... ...
@@ -124,6 +124,8 @@ func (eng *Engine) Job(name string, args ...string) *Job {
124 124
 		Stderr:  NewOutput(),
125 125
 		env:     &Env{},
126 126
 		closeIO: true,
127
+
128
+		cancelled: make(chan struct{}),
127 129
 	}
128 130
 	if eng.Logging {
129 131
 		job.Stderr.Add(ioutils.NopWriteCloser(eng.Stderr))
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"fmt"
6 6
 	"io"
7 7
 	"strings"
8
+	"sync"
8 9
 	"time"
9 10
 
10 11
 	log "github.com/Sirupsen/logrus"
... ...
@@ -34,6 +35,12 @@ type Job struct {
34 34
 	status  Status
35 35
 	end     time.Time
36 36
 	closeIO bool
37
+
38
+	// When closed, the job has been cancelled.
39
+	// Note: not all jobs implement cancellation.
40
+	// See Job.Cancel() and Job.WaitCancelled()
41
+	cancelled  chan struct{}
42
+	cancelOnce sync.Once
37 43
 }
38 44
 
39 45
 type Status int
... ...
@@ -248,3 +255,15 @@ func (job *Job) StatusCode() int {
248 248
 func (job *Job) SetCloseIO(val bool) {
249 249
 	job.closeIO = val
250 250
 }
251
+
252
+// When called, causes the Job.WaitCancelled channel to unblock.
253
+func (job *Job) Cancel() {
254
+	job.cancelOnce.Do(func() {
255
+		close(job.cancelled)
256
+	})
257
+}
258
+
259
+// Returns a channel which is closed ("never blocks") when the job is cancelled.
260
+func (job *Job) WaitCancelled() <-chan struct{} {
261
+	return job.cancelled
262
+}
... ...
@@ -2,6 +2,7 @@ package main
2 2
 
3 3
 import (
4 4
 	"archive/tar"
5
+	"bufio"
5 6
 	"bytes"
6 7
 	"encoding/json"
7 8
 	"fmt"
... ...
@@ -14,6 +15,7 @@ import (
14 14
 	"runtime"
15 15
 	"strconv"
16 16
 	"strings"
17
+	"sync"
17 18
 	"testing"
18 19
 	"text/template"
19 20
 	"time"
... ...
@@ -1924,6 +1926,132 @@ func TestBuildForceRm(t *testing.T) {
1924 1924
 	logDone("build - ensure --force-rm doesn't leave containers behind")
1925 1925
 }
1926 1926
 
1927
+// Test that an infinite sleep during a build is killed if the client disconnects.
1928
+// This test is fairly hairy because there are lots of ways to race.
1929
+// Strategy:
1930
+// * Monitor the output of docker events starting from before
1931
+// * Run a 1-year-long sleep from a docker build.
1932
+// * When docker events sees container start, close the "docker build" command
1933
+// * Wait for docker events to emit a dying event.
1934
+func TestBuildCancelationKillsSleep(t *testing.T) {
1935
+	// TODO(jfrazelle): Make this work on Windows.
1936
+	testRequires(t, SameHostDaemon)
1937
+
1938
+	name := "testbuildcancelation"
1939
+	defer deleteImages(name)
1940
+
1941
+	// (Note: one year, will never finish)
1942
+	ctx, err := fakeContext("FROM busybox\nRUN sleep 31536000", nil)
1943
+	if err != nil {
1944
+		t.Fatal(err)
1945
+	}
1946
+	defer ctx.Close()
1947
+
1948
+	var wg sync.WaitGroup
1949
+	defer wg.Wait()
1950
+
1951
+	finish := make(chan struct{})
1952
+	defer close(finish)
1953
+
1954
+	eventStart := make(chan struct{})
1955
+	eventDie := make(chan struct{})
1956
+
1957
+	// Start one second ago, to avoid rounding problems
1958
+	startEpoch := time.Now().Add(-1 * time.Second)
1959
+
1960
+	// Goroutine responsible for watching start/die events from `docker events`
1961
+	wg.Add(1)
1962
+	go func() {
1963
+		defer wg.Done()
1964
+
1965
+		// Watch for events since epoch.
1966
+		eventsCmd := exec.Command(dockerBinary, "events",
1967
+			"-since", fmt.Sprint(startEpoch.Unix()))
1968
+		stdout, err := eventsCmd.StdoutPipe()
1969
+		err = eventsCmd.Start()
1970
+		if err != nil {
1971
+			t.Fatalf("failed to start 'docker events': %s", err)
1972
+		}
1973
+
1974
+		go func() {
1975
+			<-finish
1976
+			eventsCmd.Process.Kill()
1977
+		}()
1978
+
1979
+		var started, died bool
1980
+		matchStart := regexp.MustCompile(" \\(from busybox\\:latest\\) start$")
1981
+		matchDie := regexp.MustCompile(" \\(from busybox\\:latest\\) die$")
1982
+
1983
+		//
1984
+		// Read lines of `docker events` looking for container start and stop.
1985
+		//
1986
+		scanner := bufio.NewScanner(stdout)
1987
+		for scanner.Scan() {
1988
+			if ok := matchStart.MatchString(scanner.Text()); ok {
1989
+				if started {
1990
+					t.Fatal("assertion fail: more than one container started")
1991
+				}
1992
+				close(eventStart)
1993
+				started = true
1994
+			}
1995
+			if ok := matchDie.MatchString(scanner.Text()); ok {
1996
+				if died {
1997
+					t.Fatal("assertion fail: more than one container died")
1998
+				}
1999
+				close(eventDie)
2000
+				died = true
2001
+			}
2002
+		}
2003
+
2004
+		err = eventsCmd.Wait()
2005
+		if err != nil && !IsKilled(err) {
2006
+			t.Fatalf("docker events had bad exit status: %s", err)
2007
+		}
2008
+	}()
2009
+
2010
+	buildCmd := exec.Command(dockerBinary, "build", "-t", name, ".")
2011
+	buildCmd.Dir = ctx.Dir
2012
+	buildCmd.Stdout = os.Stdout
2013
+
2014
+	err = buildCmd.Start()
2015
+	if err != nil {
2016
+		t.Fatalf("failed to run build: %s", err)
2017
+	}
2018
+
2019
+	select {
2020
+	case <-time.After(30 * time.Second):
2021
+		t.Fatal("failed to observe build container start in timely fashion")
2022
+	case <-eventStart:
2023
+		// Proceeds from here when we see the container fly past in the
2024
+		// output of "docker events".
2025
+		// Now we know the container is running.
2026
+	}
2027
+
2028
+	// Send a kill to the `docker build` command.
2029
+	// Causes the underlying build to be cancelled due to socket close.
2030
+	err = buildCmd.Process.Kill()
2031
+	if err != nil {
2032
+		t.Fatalf("error killing build command: %s", err)
2033
+	}
2034
+
2035
+	// Get the exit status of `docker build`, check it exited because killed.
2036
+	err = buildCmd.Wait()
2037
+	if err != nil && !IsKilled(err) {
2038
+		t.Fatalf("wait failed during build run: %T %s", err, err)
2039
+	}
2040
+
2041
+	select {
2042
+	case <-time.After(30 * time.Second):
2043
+		// If we don't get here in a timely fashion, it wasn't killed.
2044
+		t.Fatal("container cancel did not succeed")
2045
+	case <-eventDie:
2046
+		// We saw the container shut down in the `docker events` stream,
2047
+		// as expected.
2048
+	}
2049
+
2050
+	logDone("build - ensure canceled job finishes immediately")
2051
+}
2052
+
1927 2053
 func TestBuildRm(t *testing.T) {
1928 2054
 	name := "testbuildrm"
1929 2055
 	defer deleteImages(name)
... ...
@@ -42,6 +42,18 @@ func processExitCode(err error) (exitCode int) {
42 42
 	return
43 43
 }
44 44
 
45
+func IsKilled(err error) bool {
46
+	if exitErr, ok := err.(*exec.ExitError); ok {
47
+		sys := exitErr.ProcessState.Sys()
48
+		status, ok := sys.(syscall.WaitStatus)
49
+		if !ok {
50
+			return false
51
+		}
52
+		return status.Signaled() && status.Signal() == os.Kill
53
+	}
54
+	return false
55
+}
56
+
45 57
 func runCommandWithOutput(cmd *exec.Cmd) (output string, exitCode int, err error) {
46 58
 	exitCode = 0
47 59
 	out, err := cmd.CombinedOutput()