Browse code

Restart containers based on restart policy

Signed-off-by: Michael Crosby <michael@docker.com>

Michael Crosby authored on 2014/08/05 07:53:10
Showing 3 changed files
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"io"
8 8
 	"io/ioutil"
9 9
 	"os"
10
+	"os/exec"
10 11
 	"path"
11 12
 	"path/filepath"
12 13
 	"strings"
... ...
@@ -75,6 +76,7 @@ type Container struct {
75 75
 
76 76
 	daemon                   *Daemon
77 77
 	MountLabel, ProcessLabel string
78
+	RestartCount             int
78 79
 
79 80
 	Volumes map[string]string
80 81
 	// Store rw/ro in a separate structure to preserve reverse-compatibility on-disk.
... ...
@@ -82,7 +84,8 @@ type Container struct {
82 82
 	VolumesRW  map[string]bool
83 83
 	hostConfig *runconfig.HostConfig
84 84
 
85
-	activeLinks map[string]*links.Link
85
+	activeLinks   map[string]*links.Link
86
+	requestedStop bool
86 87
 }
87 88
 
88 89
 func (container *Container) FromDisk() error {
... ...
@@ -277,6 +280,7 @@ func (container *Container) Start() (err error) {
277 277
 	if container.State.IsRunning() {
278 278
 		return nil
279 279
 	}
280
+
280 281
 	// if we encounter and error during start we need to ensure that any other
281 282
 	// setup has been cleaned up properly
282 283
 	defer func() {
... ...
@@ -312,9 +316,6 @@ func (container *Container) Start() (err error) {
312 312
 	if err := setupMountsForContainer(container); err != nil {
313 313
 		return err
314 314
 	}
315
-	if err := container.startLoggingToDisk(); err != nil {
316
-		return err
317
-	}
318 315
 
319 316
 	return container.waitForStart()
320 317
 }
... ...
@@ -497,35 +498,105 @@ func (container *Container) releaseNetwork() {
497 497
 
498 498
 func (container *Container) monitor(callback execdriver.StartCallback) error {
499 499
 	var (
500
-		err      error
501
-		exitCode int
500
+		err       error
501
+		exitCode  int
502
+		failCount int
502 503
 	)
503 504
 
504
-	pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
505
-	exitCode, err = container.daemon.Run(container, pipes, callback)
506
-	if err != nil {
507
-		log.Errorf("Error running container: %s", err)
508
-	}
509
-	container.State.SetStopped(exitCode)
505
+	// reset the restart count
506
+	container.RestartCount = -1
507
+	container.requestedStop = false
510 508
 
511
-	// Cleanup
512
-	container.cleanup()
509
+	for {
510
+		container.RestartCount++
513 511
 
514
-	// Re-create a brand new stdin pipe once the container exited
515
-	if container.Config.OpenStdin {
516
-		container.stdin, container.stdinPipe = io.Pipe()
517
-	}
518
-	container.LogEvent("die")
519
-	// If the engine is shutting down, don't save the container state as stopped.
520
-	// This will cause it to be restarted when the engine is restarted.
521
-	if container.daemon != nil && container.daemon.eng != nil && !container.daemon.eng.IsShutdown() {
522
-		if err := container.toDisk(); err != nil {
523
-			log.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
512
+		pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
513
+		if err := container.startLoggingToDisk(); err != nil {
514
+			return err
515
+		}
516
+
517
+		if exitCode, err = container.daemon.Run(container, pipes, callback); err != nil {
518
+			failCount++
519
+
520
+			if failCount == 100 {
521
+				return err
522
+			}
523
+
524
+			utils.Errorf("Error running container: %s", err)
525
+		}
526
+
527
+		// We still wait to set the state as stopped and ensure that the locks were released
528
+		container.State.SetStopped(exitCode)
529
+
530
+		if container.Config.OpenStdin {
531
+			if err := container.stdin.Close(); err != nil {
532
+				utils.Errorf("%s: Error close stdin: %s", container.ID, err)
533
+			}
534
+		}
535
+
536
+		if err := container.stdout.Clean(); err != nil {
537
+			utils.Errorf("%s: Error close stdout: %s", container.ID, err)
538
+		}
539
+
540
+		if err := container.stderr.Clean(); err != nil {
541
+			utils.Errorf("%s: Error close stderr: %s", container.ID, err)
542
+		}
543
+
544
+		if container.command != nil && container.command.Terminal != nil {
545
+			if err := container.command.Terminal.Close(); err != nil {
546
+				utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
547
+			}
548
+		}
549
+
550
+		// Re-create a brand new stdin pipe once the container exited
551
+		if container.Config.OpenStdin {
552
+			container.stdin, container.stdinPipe = io.Pipe()
553
+		}
554
+
555
+		if container.daemon != nil && container.daemon.srv != nil {
556
+			container.daemon.srv.LogEvent("die", container.ID, container.daemon.repositories.ImageName(container.Image))
557
+		}
558
+
559
+		policy := container.hostConfig.RestartPolicy
560
+
561
+		if (policy == "always" || (policy == "on-failure" && exitCode != 0)) && !container.requestedStop {
562
+			container.command.Cmd = copyCmd(&container.command.Cmd)
563
+			time.Sleep(1 * time.Second)
564
+		} else {
565
+			// do not restart the container, let it die
566
+			// Cleanup networking and mounts
567
+			container.cleanup()
568
+
569
+			if container.daemon != nil && container.daemon.srv != nil && container.daemon.srv.IsRunning() {
570
+				// FIXME: here is race condition between two RUN instructions in Dockerfile
571
+				// because they share same runconfig and change image. Must be fixed
572
+				// in builder/builder.go
573
+				if err := container.toDisk(); err != nil {
574
+					utils.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
575
+				}
576
+			}
577
+
578
+			return err
524 579
 		}
525 580
 	}
526
-	return err
527 581
 }
528 582
 
583
+func copyCmd(c *exec.Cmd) exec.Cmd {
584
+	return exec.Cmd{
585
+		Stdin:       c.Stdin,
586
+		Stdout:      c.Stdout,
587
+		Stderr:      c.Stderr,
588
+		Path:        c.Path,
589
+		Env:         c.Env,
590
+		ExtraFiles:  c.ExtraFiles,
591
+		Args:        c.Args,
592
+		Dir:         c.Dir,
593
+		SysProcAttr: c.SysProcAttr,
594
+	}
595
+}
596
+
597
+// cleanup releases any network resources allocated to the container along with any rules
598
+// around how containers are linked together.  It also unmounts the container's root filesystem.
529 599
 func (container *Container) cleanup() {
530 600
 	container.releaseNetwork()
531 601
 
... ...
@@ -535,22 +606,6 @@ func (container *Container) cleanup() {
535 535
 			link.Disable()
536 536
 		}
537 537
 	}
538
-	if container.Config.OpenStdin {
539
-		if err := container.stdin.Close(); err != nil {
540
-			log.Errorf("%s: Error close stdin: %s", container.ID, err)
541
-		}
542
-	}
543
-	if err := container.stdout.Clean(); err != nil {
544
-		log.Errorf("%s: Error close stdout: %s", container.ID, err)
545
-	}
546
-	if err := container.stderr.Clean(); err != nil {
547
-		log.Errorf("%s: Error close stderr: %s", container.ID, err)
548
-	}
549
-	if container.command != nil && container.command.Terminal != nil {
550
-		if err := container.command.Terminal.Close(); err != nil {
551
-			log.Errorf("%s: Error closing terminal: %s", container.ID, err)
552
-		}
553
-	}
554 538
 
555 539
 	if err := container.Unmount(); err != nil {
556 540
 		log.Errorf("%v: Failed to umount filesystem: %v", container.ID, err)
... ...
@@ -570,6 +625,8 @@ func (container *Container) KillSig(sig int) error {
570 570
 	if !container.State.IsRunning() {
571 571
 		return nil
572 572
 	}
573
+	container.requestedStop = true
574
+
573 575
 	return container.daemon.Kill(container, sig)
574 576
 }
575 577
 
... ...
@@ -1122,6 +1179,7 @@ func (container *Container) waitForStart() error {
1122 1122
 				c.Close()
1123 1123
 			}
1124 1124
 		}
1125
+
1125 1126
 		container.State.SetRunning(command.Pid())
1126 1127
 		if err := container.toDisk(); err != nil {
1127 1128
 			log.Debugf("%s", err)
... ...
@@ -40,6 +40,7 @@ type HostConfig struct {
40 40
 	NetworkMode     NetworkMode
41 41
 	CapAdd          []string
42 42
 	CapDrop         []string
43
+	RestartPolicy   string
43 44
 }
44 45
 
45 46
 func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
... ...
@@ -48,6 +49,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
48 48
 		Privileged:      job.GetenvBool("Privileged"),
49 49
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
50 50
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
51
+		RestartPolicy:   job.Getenv("RestartPolicy"),
51 52
 	}
52 53
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)
53 54
 	job.GetenvJson("PortBindings", &hostConfig.PortBindings)
... ...
@@ -71,6 +71,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
71 71
 		flCpuShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
72 72
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
73 73
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
74
+		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy when the dies")
74 75
 		// For documentation purpose
75 76
 		_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
76 77
 		_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
... ...
@@ -271,6 +272,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
271 271
 		Devices:         deviceMappings,
272 272
 		CapAdd:          flCapAdd.GetAll(),
273 273
 		CapDrop:         flCapDrop.GetAll(),
274
+		RestartPolicy:   *flRestartPolicy,
274 275
 	}
275 276
 
276 277
 	if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {