Browse code

Add --live-restore flag

This flags enables full support of daemonless containers in docker. It
ensures that docker does not stop containers on shutdown or restore and
properly reconnects to the container when restarted.

This is not the default because of backwards compat but should be the
desired outcome for people running containers in prod.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>

Michael Crosby authored on 2016/06/03 03:10:55
Showing 13 changed files
... ...
@@ -71,6 +71,9 @@ func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
71 71
 		args := []string{"--systemd-cgroup=true"}
72 72
 		opts = append(opts, libcontainerd.WithRuntimeArgs(args))
73 73
 	}
74
+	if cli.Config.LiveRestore {
75
+		opts = append(opts, libcontainerd.WithLiveRestore(true))
76
+	}
74 77
 	return opts
75 78
 }
76 79
 
... ...
@@ -90,6 +90,7 @@ type CommonConfig struct {
90 90
 	TrustKeyPath         string              `json:"-"`
91 91
 	CorsHeaders          string              `json:"api-cors-header,omitempty"`
92 92
 	EnableCors           bool                `json:"api-enable-cors,omitempty"`
93
+	LiveRestore          bool                `json:"live-restore,omitempty"`
93 94
 
94 95
 	// ClusterStore is the storage backend used for the cluster information. It is used by both
95 96
 	// multihost networking (to store networks and endpoints information) and by the node discovery
... ...
@@ -82,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
82 82
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
83 83
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
84 84
 	cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerd socket"))
85
+	cmd.BoolVar(&config.LiveRestore, []string{"-live-restore"}, false, usageFn("Enable live restore of docker when containers are still running"))
85 86
 
86 87
 	config.attachExperimentalFlags(cmd, usageFn)
87 88
 }
... ...
@@ -92,6 +92,7 @@ type Daemon struct {
92 92
 	nameIndex                 *registrar.Registrar
93 93
 	linkIndex                 *linkIndex
94 94
 	containerd                libcontainerd.Client
95
+	containerdRemote          libcontainerd.Remote
95 96
 	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
96 97
 }
97 98
 
... ...
@@ -542,6 +543,7 @@ func NewDaemon(config *Config, registryService registry.Service, containerdRemot
542 542
 
543 543
 	d.nameIndex = registrar.NewRegistrar()
544 544
 	d.linkIndex = newLinkIndex()
545
+	d.containerdRemote = containerdRemote
545 546
 
546 547
 	go d.execCommandGC()
547 548
 
... ...
@@ -599,6 +601,11 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
599 599
 // Shutdown stops the daemon.
600 600
 func (daemon *Daemon) Shutdown() error {
601 601
 	daemon.shutdown = true
602
+	// Keep mounts and networking running on daemon shutdown if
603
+	// we are to keep containers running and restore them.
604
+	if daemon.configStore.LiveRestore {
605
+		return nil
606
+	}
602 607
 	if daemon.containers != nil {
603 608
 		logrus.Debug("starting clean shutdown of all containers...")
604 609
 		daemon.containers.ApplyAll(func(c *container.Container) {
... ...
@@ -782,6 +789,7 @@ func (daemon *Daemon) initDiscovery(config *Config) error {
782 782
 // - Daemon max concurrent downloads
783 783
 // - Daemon max concurrent uploads
784 784
 // - Cluster discovery (reconfigure and restart).
785
+// - Daemon live restore
785 786
 func (daemon *Daemon) Reload(config *Config) error {
786 787
 	daemon.configStore.reloadLock.Lock()
787 788
 	defer daemon.configStore.reloadLock.Unlock()
... ...
@@ -796,6 +804,13 @@ func (daemon *Daemon) Reload(config *Config) error {
796 796
 	if config.IsValueSet("debug") {
797 797
 		daemon.configStore.Debug = config.Debug
798 798
 	}
799
+	if config.IsValueSet("live-restore") {
800
+		daemon.configStore.LiveRestore = config.LiveRestore
801
+		if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(config.LiveRestore)); err != nil {
802
+			return err
803
+		}
804
+
805
+	}
799 806
 
800 807
 	// If no value is set for max-concurrent-downloads we assume it is the default value
801 808
 	// We always "reset" as the cost is lightweight and easy to maintain.
... ...
@@ -278,3 +278,16 @@ be viewed using `journalctl -u docker`
278 278
     May 06 00:22:06 localhost.localdomain docker[2495]: time="2015-05-06T00:22:06Z" level="info" msg="-job acceptconnections() = OK (0)"
279 279
 
280 280
 _Note: Using and configuring journal is an advanced topic and is beyond the scope of this article._
281
+
282
+
283
+### Daemonless Containers
284
+
285
+Starting with Docker 1.12 containers can run without Docker or containerd running.  This allows the 
286
+Docker daemon to exit, be upgraded, or recover from a crash without affecting running containers 
287
+on the system.  To enable this functionality you need to add the `--live-restore` flag when
288
+launching `dockerd`.  This will ensure that Docker does not kill containers on graceful shutdown or
289
+on restart leaving the containers running.
290
+
291
+While the Docker daemon is down logging will still be captured, however, it will be capped at the kernel's pipe buffer size before the buffer fills up, blocking the process.
292
+Docker will need to be restarted to flush these buffers.
293
+You can modify the kernel's buffer size by changing `/proc/sys/fs/pipe-max-size`.
... ...
@@ -63,7 +63,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
63 63
 // them now, should remove the mounts.
64 64
 func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
65 65
 	testRequires(c, DaemonIsLinux)
66
-	c.Assert(s.d.StartWithBusybox(), check.IsNil)
66
+	c.Assert(s.d.StartWithBusybox("--live-restore"), check.IsNil)
67 67
 
68 68
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
69 69
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
... ...
@@ -78,7 +78,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
78 78
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
79 79
 
80 80
 	// restart daemon.
81
-	if err := s.d.Restart(); err != nil {
81
+	if err := s.d.Restart("--live-restore"); err != nil {
82 82
 		c.Fatal(err)
83 83
 	}
84 84
 
... ...
@@ -103,7 +103,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
103 103
 
104 104
 // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
105 105
 func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
106
-	if err := s.d.StartWithBusybox(); err != nil {
106
+	if err := s.d.StartWithBusybox("--live-restore"); err != nil {
107 107
 		t.Fatal(err)
108 108
 	}
109 109
 
... ...
@@ -130,7 +130,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check
130 130
 	time.Sleep(3 * time.Second)
131 131
 
132 132
 	// restart the daemon
133
-	if err := s.d.Start(); err != nil {
133
+	if err := s.d.Start("--live-restore"); err != nil {
134 134
 		t.Fatal(err)
135 135
 	}
136 136
 
... ...
@@ -148,7 +148,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check
148 148
 func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
149 149
 	// TODO(mlaventure): Not sure what would the exit code be on windows
150 150
 	testRequires(t, DaemonIsLinux)
151
-	if err := s.d.StartWithBusybox(); err != nil {
151
+	if err := s.d.StartWithBusybox("--live-restore"); err != nil {
152 152
 		t.Fatal(err)
153 153
 	}
154 154
 
... ...
@@ -180,7 +180,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che
180 180
 	time.Sleep(3 * time.Second)
181 181
 
182 182
 	// restart the daemon
183
-	if err := s.d.Start(); err != nil {
183
+	if err := s.d.Start("--live-restore"); err != nil {
184 184
 		t.Fatal(err)
185 185
 	}
186 186
 
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"strings"
9 9
 	"sync"
10 10
 	"syscall"
11
+	"time"
11 12
 
12 13
 	"github.com/Sirupsen/logrus"
13 14
 	containerd "github.com/docker/containerd/api/grpc/types"
... ...
@@ -24,6 +25,7 @@ type client struct {
24 24
 	remote        *remote
25 25
 	q             queue
26 26
 	exitNotifiers map[string]*exitNotifier
27
+	liveRestore   bool
27 28
 }
28 29
 
29 30
 func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error {
... ...
@@ -445,13 +447,48 @@ func (clnt *client) restore(cont *containerd.Container, options ...CreateOption)
445 445
 }
446 446
 
447 447
 func (clnt *client) Restore(containerID string, options ...CreateOption) error {
448
+	if clnt.liveRestore {
449
+		cont, err := clnt.getContainerdContainer(containerID)
450
+		if err == nil && cont.Status != "stopped" {
451
+			if err := clnt.restore(cont, options...); err != nil {
452
+				logrus.Errorf("error restoring %s: %v", containerID, err)
453
+			}
454
+			return nil
455
+		}
456
+		return clnt.setExited(containerID)
457
+	}
458
+
448 459
 	cont, err := clnt.getContainerdContainer(containerID)
449 460
 	if err == nil && cont.Status != "stopped" {
450
-		if err := clnt.restore(cont, options...); err != nil {
451
-			logrus.Errorf("error restoring %s: %v", containerID, err)
461
+		w := clnt.getOrCreateExitNotifier(containerID)
462
+		clnt.lock(cont.Id)
463
+		container := clnt.newContainer(cont.BundlePath)
464
+		container.systemPid = systemPid(cont)
465
+		clnt.appendContainer(container)
466
+		clnt.unlock(cont.Id)
467
+
468
+		container.discardFifos()
469
+
470
+		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
471
+			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
472
+		}
473
+		select {
474
+		case <-time.After(10 * time.Second):
475
+			if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
476
+				logrus.Errorf("error sending sigkill to %v: %v", containerID, err)
477
+			}
478
+			select {
479
+			case <-time.After(2 * time.Second):
480
+			case <-w.wait():
481
+				return nil
482
+			}
483
+		case <-w.wait():
484
+			return nil
452 485
 		}
453
-		return nil
454 486
 	}
487
+
488
+	clnt.deleteContainer(containerID)
489
+
455 490
 	return clnt.setExited(containerID)
456 491
 }
457 492
 
... ...
@@ -2,6 +2,7 @@ package libcontainerd
2 2
 
3 3
 import (
4 4
 	"encoding/json"
5
+	"io"
5 6
 	"io/ioutil"
6 7
 	"os"
7 8
 	"path/filepath"
... ...
@@ -194,3 +195,18 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
194 194
 	}
195 195
 	return nil
196 196
 }
197
+
198
+// discardFifos attempts to fully read the container fifos to unblock processes
199
+// that may be blocked on the writer side.
200
+func (ctr *container) discardFifos() {
201
+	for _, i := range []int{syscall.Stdout, syscall.Stderr} {
202
+		f := ctr.fifo(i)
203
+		c := make(chan struct{})
204
+		go func() {
205
+			close(c) // this channel is used to not close the writer too early, before readonly open has been called.
206
+			io.Copy(ioutil.Discard, openReaderFromFifo(f))
207
+		}()
208
+		<-c
209
+		closeReaderFifo(f) // avoid blocking permanently on open if there is no writer side
210
+	}
211
+}
... ...
@@ -9,6 +9,8 @@ type Remote interface {
9 9
 	// Cleanup stops containerd if it was started by libcontainerd.
10 10
 	// Note this is not used on Windows as there is no remote containerd.
11 11
 	Cleanup()
12
+	// UpdateOptions allows various remote options to be updated at runtime.
13
+	UpdateOptions(...RemoteOption) error
12 14
 }
13 15
 
14 16
 // RemoteOption allows to configure parameters of remotes.
... ...
@@ -52,6 +52,7 @@ type remote struct {
52 52
 	pastEvents    map[string]*containerd.Event
53 53
 	runtimeArgs   []string
54 54
 	daemonWaitCh  chan struct{}
55
+	liveRestore   bool
55 56
 }
56 57
 
57 58
 // New creates a fresh instance of libcontainerd remote.
... ...
@@ -111,6 +112,15 @@ func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
111 111
 	return r, nil
112 112
 }
113 113
 
114
+func (r *remote) UpdateOptions(options ...RemoteOption) error {
115
+	for _, option := range options {
116
+		if err := option.Apply(r); err != nil {
117
+			return err
118
+		}
119
+	}
120
+	return nil
121
+}
122
+
114 123
 func (r *remote) handleConnectionChange() {
115 124
 	var transientFailureCount = 0
116 125
 	state := grpc.Idle
... ...
@@ -184,6 +194,7 @@ func (r *remote) Client(b Backend) (Client, error) {
184 184
 		},
185 185
 		remote:        r,
186 186
 		exitNotifiers: make(map[string]*exitNotifier),
187
+		liveRestore:   r.liveRestore,
187 188
 	}
188 189
 
189 190
 	r.Lock()
... ...
@@ -460,3 +471,21 @@ func (d debugLog) Apply(r Remote) error {
460 460
 	}
461 461
 	return fmt.Errorf("WithDebugLog option not supported for this remote")
462 462
 }
463
+
464
+// WithLiveRestore defines if containers are stopped on shutdown or restored.
465
+func WithLiveRestore(v bool) RemoteOption {
466
+	return liveRestore(v)
467
+}
468
+
469
+type liveRestore bool
470
+
471
+func (l liveRestore) Apply(r Remote) error {
472
+	if remote, ok := r.(*remote); ok {
473
+		remote.liveRestore = bool(l)
474
+		for _, c := range remote.clients {
475
+			c.liveRestore = bool(l)
476
+		}
477
+		return nil
478
+	}
479
+	return fmt.Errorf("WithLiveRestore option not supported for this remote")
480
+}
... ...
@@ -19,7 +19,16 @@ func (r *remote) Client(b Backend) (Client, error) {
19 19
 func (r *remote) Cleanup() {
20 20
 }
21 21
 
22
+func (r *remote) UpdateOptions(opts ...RemoteOption) error {
23
+	return nil
24
+}
25
+
22 26
 // New creates a fresh instance of libcontainerd remote.
23 27
 func New(_ string, _ ...RemoteOption) (Remote, error) {
24 28
 	return &remote{}, nil
25 29
 }
30
+
31
+// WithLiveRestore is a noop on solaris.
32
+func WithLiveRestore(v bool) RemoteOption {
33
+	return nil
34
+}
... ...
@@ -20,8 +20,17 @@ func (r *remote) Client(b Backend) (Client, error) {
20 20
 func (r *remote) Cleanup() {
21 21
 }
22 22
 
23
+func (r *remote) UpdateOptions(opts ...RemoteOption) error {
24
+	return nil
25
+}
26
+
23 27
 // New creates a fresh instance of libcontainerd remote. On Windows,
24 28
 // this is not used as there is no remote containerd process.
25 29
 func New(_ string, _ ...RemoteOption) (Remote, error) {
26 30
 	return &remote{}, nil
27 31
 }
32
+
33
+// WithLiveRestore is a noop on windows.
34
+func WithLiveRestore(v bool) RemoteOption {
35
+	return nil
36
+}
... ...
@@ -42,6 +42,7 @@ dockerd - Enable daemon mode
42 42
 [**--isolation**[=*default*]]
43 43
 [**-l**|**--log-level**[=*info*]]
44 44
 [**--label**[=*[]*]]
45
+[**--live-restore**[=*false*]]
45 46
 [**--log-driver**[=*json-file*]]
46 47
 [**--log-opt**[=*map[]*]]
47 48
 [**--mtu**[=*0*]]
... ...
@@ -195,6 +196,9 @@ is `hyperv`. Linux only supports `default`.
195 195
 **--label**="[]"
196 196
   Set key=value labels to the daemon (displayed in `docker info`)
197 197
 
198
+**--live-restore**=*false*
199
+  Enable live restore of running containers when the daemon starts so that they are not restarted.
200
+
198 201
 **--log-driver**="*json-file*|*syslog*|*journald*|*gelf*|*fluentd*|*awslogs*|*splunk*|*etwlogs*|*gcplogs*|*none*"
199 202
   Default driver for container logs. Default is `json-file`.
200 203
   **Warning**: `docker logs` command works only for `json-file` logging driver.