Browse code

Decouple daemon and container to stop and kill containers.

Signed-off-by: David Calavera <david.calavera@gmail.com>

David Calavera authored on 2015/11/03 08:25:26
Showing 12 changed files
... ...
@@ -128,6 +128,8 @@ type Docker interface {
128 128
 	// Release releases a list of images that were retained for the time of a build.
129 129
 	// TODO: remove
130 130
 	Release(sessionID string, activeImages []string)
131
+	// Kill stops the container execution abruptly.
132
+	Kill(c *daemon.Container) error
131 133
 }
132 134
 
133 135
 // ImageCache abstracts an image cache store.
... ...
@@ -559,7 +559,7 @@ func (b *Builder) run(c *daemon.Container) error {
559 559
 		select {
560 560
 		case <-b.cancelled:
561 561
 			logrus.Debugln("Build cancelled, killing and removing container:", c.ID)
562
-			c.Kill()
562
+			b.docker.Kill(c)
563 563
 			b.removeContainer(c.ID)
564 564
 		case <-finished:
565 565
 		}
... ...
@@ -337,51 +337,10 @@ func (container *Container) cleanup() {
337 337
 	}
338 338
 }
339 339
 
340
-// killSig sends the container the given signal. This wrapper for the
341
-// host specific kill command prepares the container before attempting
342
-// to send the signal. An error is returned if the container is paused
343
-// or not running, or if there is a problem returned from the
344
-// underlying kill command.
345
-func (container *Container) killSig(sig int) error {
346
-	logrus.Debugf("Sending %d to %s", sig, container.ID)
347
-	container.Lock()
348
-	defer container.Unlock()
349
-
350
-	// We could unpause the container for them rather than returning this error
351
-	if container.Paused {
352
-		return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
353
-	}
354
-
355
-	if !container.Running {
356
-		return derr.ErrorCodeNotRunning.WithArgs(container.ID)
357
-	}
358
-
359
-	// signal to the monitor that it should not restart the container
360
-	// after we send the kill signal
340
+// ExitOnNext signals to the monitor that it should not restart the container
341
+// after we send the kill signal.
342
+func (container *Container) ExitOnNext() {
361 343
 	container.monitor.ExitOnNext()
362
-
363
-	// if the container is currently restarting we do not need to send the signal
364
-	// to the process.  Telling the monitor that it should exit on it's next event
365
-	// loop is enough
366
-	if container.Restarting {
367
-		return nil
368
-	}
369
-
370
-	if err := container.daemon.kill(container, sig); err != nil {
371
-		return err
372
-	}
373
-	container.logEvent("kill")
374
-	return nil
375
-}
376
-
377
-// Wrapper aroung killSig() suppressing "no such process" error.
378
-func (container *Container) killPossiblyDeadProcess(sig int) error {
379
-	err := container.killSig(sig)
380
-	if err == syscall.ESRCH {
381
-		logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.getPID(), sig)
382
-		return nil
383
-	}
384
-	return err
385 344
 }
386 345
 
387 346
 func (container *Container) pause() error {
... ...
@@ -428,98 +387,6 @@ func (container *Container) unpause() error {
428 428
 	return nil
429 429
 }
430 430
 
431
-// Kill forcefully terminates a container.
432
-func (container *Container) Kill() error {
433
-	if !container.IsRunning() {
434
-		return derr.ErrorCodeNotRunning.WithArgs(container.ID)
435
-	}
436
-
437
-	// 1. Send SIGKILL
438
-	if err := container.killPossiblyDeadProcess(int(syscall.SIGKILL)); err != nil {
439
-		// While normally we might "return err" here we're not going to
440
-		// because if we can't stop the container by this point then
441
-		// its probably because its already stopped. Meaning, between
442
-		// the time of the IsRunning() call above and now it stopped.
443
-		// Also, since the err return will be exec driver specific we can't
444
-		// look for any particular (common) error that would indicate
445
-		// that the process is already dead vs something else going wrong.
446
-		// So, instead we'll give it up to 2 more seconds to complete and if
447
-		// by that time the container is still running, then the error
448
-		// we got is probably valid and so we return it to the caller.
449
-
450
-		if container.IsRunning() {
451
-			container.WaitStop(2 * time.Second)
452
-			if container.IsRunning() {
453
-				return err
454
-			}
455
-		}
456
-	}
457
-
458
-	// 2. Wait for the process to die, in last resort, try to kill the process directly
459
-	if err := killProcessDirectly(container); err != nil {
460
-		return err
461
-	}
462
-
463
-	container.WaitStop(-1 * time.Second)
464
-	return nil
465
-}
466
-
467
-// Stop halts a container by sending a stop signal, waiting for the given
468
-// duration in seconds, and then calling SIGKILL and waiting for the
469
-// process to exit. If a negative duration is given, Stop will wait
470
-// for the initial signal forever. If the container is not running Stop returns
471
-// immediately.
472
-func (container *Container) Stop(seconds int) error {
473
-	if !container.IsRunning() {
474
-		return nil
475
-	}
476
-
477
-	// 1. Send a SIGTERM
478
-	if err := container.killPossiblyDeadProcess(container.stopSignal()); err != nil {
479
-		logrus.Infof("Failed to send SIGTERM to the process, force killing")
480
-		if err := container.killPossiblyDeadProcess(9); err != nil {
481
-			return err
482
-		}
483
-	}
484
-
485
-	// 2. Wait for the process to exit on its own
486
-	if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
487
-		logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
488
-		// 3. If it doesn't, then send SIGKILL
489
-		if err := container.Kill(); err != nil {
490
-			container.WaitStop(-1 * time.Second)
491
-			logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
492
-		}
493
-	}
494
-
495
-	container.logEvent("stop")
496
-	return nil
497
-}
498
-
499
-// Restart attempts to gracefully stop and then start the
500
-// container. When stopping, wait for the given duration in seconds to
501
-// gracefully stop, before forcefully terminating the container. If
502
-// given a negative duration, wait forever for a graceful stop.
503
-func (container *Container) Restart(seconds int) error {
504
-	// Avoid unnecessarily unmounting and then directly mounting
505
-	// the container when the container stops and then starts
506
-	// again
507
-	if err := container.Mount(); err == nil {
508
-		defer container.Unmount()
509
-	}
510
-
511
-	if err := container.Stop(seconds); err != nil {
512
-		return err
513
-	}
514
-
515
-	if err := container.Start(); err != nil {
516
-		return err
517
-	}
518
-
519
-	container.logEvent("restart")
520
-	return nil
521
-}
522
-
523 431
 // Resize changes the TTY of the process running inside the container
524 432
 // to the given height and width. The container must be running.
525 433
 func (container *Container) Resize(h, w int) error {
... ...
@@ -64,7 +64,7 @@ type Container struct {
64 64
 func killProcessDirectly(container *Container) error {
65 65
 	if _, err := container.WaitStop(10 * time.Second); err != nil {
66 66
 		// Ensure that we don't kill ourselves
67
-		if pid := container.getPID(); pid != 0 {
67
+		if pid := container.GetPID(); pid != 0 {
68 68
 			logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID))
69 69
 			if err := syscall.Kill(pid, 9); err != nil {
70 70
 				if err != syscall.ESRCH {
... ...
@@ -838,7 +838,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
838 838
 	return d, nil
839 839
 }
840 840
 
841
-func stopContainer(c *Container) error {
841
+func (daemon *Daemon) shutdownContainer(c *Container) error {
842 842
 	// TODO(windows): Handle docker restart with paused containers
843 843
 	if c.isPaused() {
844 844
 		// To terminate a process in freezer cgroup, we should send
... ...
@@ -869,7 +869,7 @@ func stopContainer(c *Container) error {
869 869
 		}
870 870
 	}
871 871
 	// If container failed to exit in 10 seconds of SIGTERM, then using the force
872
-	if err := c.Stop(10); err != nil {
872
+	if err := daemon.containerStop(c, 10); err != nil {
873 873
 		return fmt.Errorf("Stop container %s with error: %v", c.ID, err)
874 874
 	}
875 875
 
... ...
@@ -891,7 +891,7 @@ func (daemon *Daemon) Shutdown() error {
891 891
 			group.Add(1)
892 892
 			go func(c *Container) {
893 893
 				defer group.Done()
894
-				if err := stopContainer(c); err != nil {
894
+				if err := daemon.shutdownContainer(c); err != nil {
895 895
 					logrus.Errorf("Stop container error: %v", err)
896 896
 					return
897 897
 				}
... ...
@@ -205,6 +205,11 @@ func (d Docker) GetCachedImage(imgID string, cfg *runconfig.Config) (string, err
205 205
 	return cache.ID, nil
206 206
 }
207 207
 
208
+// Kill stops the container execution abruptly.
209
+func (d Docker) Kill(container *daemon.Container) error {
210
+	return d.Daemon.Kill(container)
211
+}
212
+
208 213
 // Following is specific to builder contexts
209 214
 
210 215
 // DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used
... ...
@@ -71,7 +71,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
71 71
 		if !forceRemove {
72 72
 			return derr.ErrorCodeRmRunning
73 73
 		}
74
-		if err := container.Kill(); err != nil {
74
+		if err := daemon.Kill(container); err != nil {
75 75
 			return derr.ErrorCodeRmFailed.WithArgs(err)
76 76
 		}
77 77
 	}
... ...
@@ -90,7 +90,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) {
90 90
 	// if stats are currently getting collected.
91 91
 	daemon.statsCollector.stopCollection(container)
92 92
 
93
-	if err = container.Stop(3); err != nil {
93
+	if err = daemon.containerStop(container, 3); err != nil {
94 94
 		return err
95 95
 	}
96 96
 
97 97
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+package daemon
1
+
2
+// logContainerEvent generates an event related to a container.
3
+func (daemon *Daemon) logContainerEvent(container *Container, action string) {
4
+	daemon.EventsService.Log(
5
+		action,
6
+		container.ID,
7
+		container.Config.Image,
8
+	)
9
+}
... ...
@@ -4,7 +4,10 @@ import (
4 4
 	"fmt"
5 5
 	"runtime"
6 6
 	"syscall"
7
+	"time"
7 8
 
9
+	"github.com/Sirupsen/logrus"
10
+	derr "github.com/docker/docker/errors"
8 11
 	"github.com/docker/docker/pkg/signal"
9 12
 )
10 13
 
... ...
@@ -24,14 +27,96 @@ func (daemon *Daemon) ContainerKill(name string, sig uint64) error {
24 24
 
25 25
 	// If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait())
26 26
 	if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL {
27
-		if err := container.Kill(); err != nil {
27
+		if err := daemon.Kill(container); err != nil {
28 28
 			return err
29 29
 		}
30 30
 	} else {
31 31
 		// Otherwise, just send the requested signal
32
-		if err := container.killSig(int(sig)); err != nil {
32
+		if err := daemon.killWithSignal(container, int(sig)); err != nil {
33 33
 			return err
34 34
 		}
35 35
 	}
36 36
 	return nil
37 37
 }
38
+
39
+// killWithSignal sends the container the given signal. This wrapper for the
40
+// host specific kill command prepares the container before attempting
41
+// to send the signal. An error is returned if the container is paused
42
+// or not running, or if there is a problem returned from the
43
+// underlying kill command.
44
+func (daemon *Daemon) killWithSignal(container *Container, sig int) error {
45
+	logrus.Debugf("Sending %d to %s", sig, container.ID)
46
+	container.Lock()
47
+	defer container.Unlock()
48
+
49
+	// We could unpause the container for them rather than returning this error
50
+	if container.Paused {
51
+		return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID)
52
+	}
53
+
54
+	if !container.Running {
55
+		return derr.ErrorCodeNotRunning.WithArgs(container.ID)
56
+	}
57
+
58
+	container.ExitOnNext()
59
+
60
+	// if the container is currently restarting we do not need to send the signal
61
+	// to the process.  Telling the monitor that it should exit on it's next event
62
+	// loop is enough
63
+	if container.Restarting {
64
+		return nil
65
+	}
66
+
67
+	if err := daemon.kill(container, sig); err != nil {
68
+		return err
69
+	}
70
+
71
+	daemon.logContainerEvent(container, "kill")
72
+	return nil
73
+}
74
+
75
+// Kill forcefully terminates a container.
76
+func (daemon *Daemon) Kill(container *Container) error {
77
+	if !container.IsRunning() {
78
+		return derr.ErrorCodeNotRunning.WithArgs(container.ID)
79
+	}
80
+
81
+	// 1. Send SIGKILL
82
+	if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
83
+		// While normally we might "return err" here we're not going to
84
+		// because if we can't stop the container by this point then
85
+		// its probably because its already stopped. Meaning, between
86
+		// the time of the IsRunning() call above and now it stopped.
87
+		// Also, since the err return will be exec driver specific we can't
88
+		// look for any particular (common) error that would indicate
89
+		// that the process is already dead vs something else going wrong.
90
+		// So, instead we'll give it up to 2 more seconds to complete and if
91
+		// by that time the container is still running, then the error
92
+		// we got is probably valid and so we return it to the caller.
93
+
94
+		if container.IsRunning() {
95
+			container.WaitStop(2 * time.Second)
96
+			if container.IsRunning() {
97
+				return err
98
+			}
99
+		}
100
+	}
101
+
102
+	// 2. Wait for the process to die, in last resort, try to kill the process directly
103
+	if err := killProcessDirectly(container); err != nil {
104
+		return err
105
+	}
106
+
107
+	container.WaitStop(-1 * time.Second)
108
+	return nil
109
+}
110
+
111
+// killPossibleDeadProcess is a wrapper aroung killSig() suppressing "no such process" error.
112
+func (daemon *Daemon) killPossiblyDeadProcess(container *Container, sig int) error {
113
+	err := daemon.killWithSignal(container, sig)
114
+	if err == syscall.ESRCH {
115
+		logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.GetPID(), sig)
116
+		return nil
117
+	}
118
+	return err
119
+}
... ...
@@ -15,8 +15,32 @@ func (daemon *Daemon) ContainerRestart(name string, seconds int) error {
15 15
 	if err != nil {
16 16
 		return err
17 17
 	}
18
-	if err := container.Restart(seconds); err != nil {
18
+	if err := daemon.containerRestart(container, seconds); err != nil {
19 19
 		return derr.ErrorCodeCantRestart.WithArgs(name, err)
20 20
 	}
21 21
 	return nil
22 22
 }
23
+
24
+// containerRestart attempts to gracefully stop and then start the
25
+// container. When stopping, wait for the given duration in seconds to
26
+// gracefully stop, before forcefully terminating the container. If
27
+// given a negative duration, wait forever for a graceful stop.
28
+func (daemon *Daemon) containerRestart(container *Container, seconds int) error {
29
+	// Avoid unnecessarily unmounting and then directly mounting
30
+	// the container when the container stops and then starts
31
+	// again
32
+	if err := container.Mount(); err == nil {
33
+		defer container.Unmount()
34
+	}
35
+
36
+	if err := daemon.containerStop(container, seconds); err != nil {
37
+		return err
38
+	}
39
+
40
+	if err := container.Start(); err != nil {
41
+		return err
42
+	}
43
+
44
+	daemon.logContainerEvent(container, "restart")
45
+	return nil
46
+}
... ...
@@ -134,7 +134,7 @@ func (s *State) waitRunning(timeout time.Duration) (int, error) {
134 134
 	if err := wait(waitChan, timeout); err != nil {
135 135
 		return -1, err
136 136
 	}
137
-	return s.getPID(), nil
137
+	return s.GetPID(), nil
138 138
 }
139 139
 
140 140
 // WaitStop waits until state is stopped. If state already stopped it returns
... ...
@@ -164,7 +164,7 @@ func (s *State) IsRunning() bool {
164 164
 }
165 165
 
166 166
 // GetPID holds the process id of a container.
167
-func (s *State) getPID() int {
167
+func (s *State) GetPID() int {
168 168
 	s.Lock()
169 169
 	res := s.Pid
170 170
 	s.Unlock()
... ...
@@ -1,6 +1,9 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"time"
5
+
6
+	"github.com/Sirupsen/logrus"
4 7
 	derr "github.com/docker/docker/errors"
5 8
 )
6 9
 
... ...
@@ -18,8 +21,40 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error {
18 18
 	if !container.IsRunning() {
19 19
 		return derr.ErrorCodeStopped
20 20
 	}
21
-	if err := container.Stop(seconds); err != nil {
21
+	if err := daemon.containerStop(container, seconds); err != nil {
22 22
 		return derr.ErrorCodeCantStop.WithArgs(name, err)
23 23
 	}
24 24
 	return nil
25 25
 }
26
+
27
+// containerStop halts a container by sending a stop signal, waiting for the given
28
+// duration in seconds, and then calling SIGKILL and waiting for the
29
+// process to exit. If a negative duration is given, Stop will wait
30
+// for the initial signal forever. If the container is not running Stop returns
31
+// immediately.
32
+func (daemon *Daemon) containerStop(container *Container, seconds int) error {
33
+	if !container.IsRunning() {
34
+		return nil
35
+	}
36
+
37
+	// 1. Send a SIGTERM
38
+	if err := daemon.killPossiblyDeadProcess(container, container.stopSignal()); err != nil {
39
+		logrus.Infof("Failed to send SIGTERM to the process, force killing")
40
+		if err := daemon.killPossiblyDeadProcess(container, 9); err != nil {
41
+			return err
42
+		}
43
+	}
44
+
45
+	// 2. Wait for the process to exit on its own
46
+	if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil {
47
+		logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds)
48
+		// 3. If it doesn't, then send SIGKILL
49
+		if err := daemon.Kill(container); err != nil {
50
+			container.WaitStop(-1 * time.Second)
51
+			logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
52
+		}
53
+	}
54
+
55
+	daemon.logContainerEvent(container, "stop")
56
+	return nil
57
+}