Browse code

Remove restartmanager from libcontainerd

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>

Tonis Tiigi authored on 2016/10/06 05:29:56
Showing 13 changed files
... ...
@@ -292,9 +292,7 @@ func (container *Container) GetRootResourcePath(path string) (string, error) {
292 292
 // ExitOnNext signals to the monitor that it should not restart the container
293 293
 // after we send the kill signal.
294 294
 func (container *Container) ExitOnNext() {
295
-	if container.restartManager != nil {
296
-		container.restartManager.Cancel()
297
-	}
295
+	container.RestartManager().Cancel()
298 296
 }
299 297
 
300 298
 // HostConfigPath returns the path to the container's JSON hostconfig
... ...
@@ -545,7 +543,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
545 545
 // ShouldRestart decides whether the daemon should restart the container or not.
546 546
 // This is based on the container's restart policy.
547 547
 func (container *Container) ShouldRestart() bool {
548
-	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
548
+	shouldRestart, _, _ := container.RestartManager().ShouldRestart(uint32(container.ExitCode()), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
549 549
 	return shouldRestart
550 550
 }
551 551
 
... ...
@@ -941,7 +939,7 @@ func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPo
941 941
 		SetPolicy(containertypes.RestartPolicy)
942 942
 	}
943 943
 
944
-	if rm, ok := container.RestartManager(false).(policySetter); ok {
944
+	if rm, ok := container.RestartManager().(policySetter); ok {
945 945
 		rm.SetPolicy(restartPolicy)
946 946
 	}
947 947
 }
... ...
@@ -956,18 +954,24 @@ func (container *Container) FullHostname() string {
956 956
 }
957 957
 
958 958
 // RestartManager returns the current restartmanager instance connected to container.
959
-func (container *Container) RestartManager(reset bool) restartmanager.RestartManager {
960
-	if reset {
961
-		container.RestartCount = 0
962
-		container.restartManager = nil
963
-	}
959
+func (container *Container) RestartManager() restartmanager.RestartManager {
964 960
 	if container.restartManager == nil {
965 961
 		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount)
966 962
 	}
967
-
968 963
 	return container.restartManager
969 964
 }
970 965
 
966
+// ResetRestartManager initializes new restartmanager based on container config
967
+func (container *Container) ResetRestartManager(resetCount bool) {
968
+	if container.restartManager != nil {
969
+		container.restartManager.Cancel()
970
+	}
971
+	if resetCount {
972
+		container.RestartCount = 0
973
+	}
974
+	container.restartManager = nil
975
+}
976
+
971 977
 type attachContext struct {
972 978
 	ctx    context.Context
973 979
 	cancel context.CancelFunc
... ...
@@ -189,12 +189,13 @@ func (daemon *Daemon) restore() error {
189 189
 				logrus.Errorf("Failed to migrate old mounts to use new spec format")
190 190
 			}
191 191
 
192
-			rm := c.RestartManager(false)
193 192
 			if c.IsRunning() || c.IsPaused() {
194
-				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
193
+				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
194
+				if err := daemon.containerd.Restore(c.ID); err != nil {
195 195
 					logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err)
196 196
 					return
197 197
 				}
198
+				c.ResetRestartManager(false)
198 199
 				if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
199 200
 					options, err := daemon.buildSandboxOptions(c)
200 201
 					if err != nil {
... ...
@@ -300,7 +301,7 @@ func (daemon *Daemon) restore() error {
300 300
 
301 301
 			// Make sure networks are available before starting
302 302
 			daemon.waitForNetworks(c)
303
-			if err := daemon.containerStart(c, ""); err != nil {
303
+			if err := daemon.containerStart(c, "", true); err != nil {
304 304
 				logrus.Errorf("Failed to start container %s: %s", c.ID, err)
305 305
 			}
306 306
 			close(chNotify)
... ...
@@ -372,7 +373,7 @@ func (daemon *Daemon) RestartSwarmContainers() {
372 372
 				group.Add(1)
373 373
 				go func(c *container.Container) {
374 374
 					defer group.Done()
375
-					if err := daemon.containerStart(c, ""); err != nil {
375
+					if err := daemon.containerStart(c, "", true); err != nil {
376 376
 						logrus.Error(err)
377 377
 					}
378 378
 				}(c)
... ...
@@ -6,11 +6,13 @@ import (
6 6
 	"io"
7 7
 	"runtime"
8 8
 	"strconv"
9
+	"time"
9 10
 
10 11
 	"github.com/Sirupsen/logrus"
11 12
 	"github.com/docker/docker/api/types"
12 13
 	"github.com/docker/docker/daemon/exec"
13 14
 	"github.com/docker/docker/libcontainerd"
15
+	"github.com/docker/docker/restartmanager"
14 16
 	"github.com/docker/docker/runconfig"
15 17
 )
16 18
 
... ...
@@ -31,43 +33,57 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
31 31
 		daemon.LogContainerEvent(c, "oom")
32 32
 	case libcontainerd.StateExit:
33 33
 		// if container's AutoRemove flag is set, remove it after clean up
34
-		if c.HostConfig.AutoRemove {
35
-			defer func() {
34
+		autoRemove := func() {
35
+			if c.HostConfig.AutoRemove {
36 36
 				if err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
37 37
 					logrus.Errorf("can't remove container %s: %v", c.ID, err)
38 38
 				}
39
-			}()
39
+			}
40 40
 		}
41
+
41 42
 		c.Lock()
42
-		defer c.Unlock()
43 43
 		c.Wait()
44 44
 		c.Reset(false)
45
-		c.SetStopped(platformConstructExitStatus(e))
45
+
46
+		restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, false, time.Since(c.StartedAt))
47
+		if err == nil && restart {
48
+			c.RestartCount++
49
+			c.SetRestarting(platformConstructExitStatus(e))
50
+		} else {
51
+			c.SetStopped(platformConstructExitStatus(e))
52
+			defer autoRemove()
53
+		}
54
+
55
+		daemon.updateHealthMonitor(c)
46 56
 		attributes := map[string]string{
47 57
 			"exitCode": strconv.Itoa(int(e.ExitCode)),
48 58
 		}
49
-		daemon.updateHealthMonitor(c)
50 59
 		daemon.LogContainerEventWithAttributes(c, "die", attributes)
51 60
 		daemon.Cleanup(c)
52
-		// FIXME: here is race condition between two RUN instructions in Dockerfile
53
-		// because they share same runconfig and change image. Must be fixed
54
-		// in builder/builder.go
61
+
62
+		if err == nil && restart {
63
+			go func() {
64
+				err := <-wait
65
+				if err == nil {
66
+					if err = daemon.containerStart(c, "", false); err != nil {
67
+						logrus.Debugf("failed to restart contianer: %+v", err)
68
+					}
69
+				}
70
+				if err != nil {
71
+					c.SetStopped(platformConstructExitStatus(e))
72
+					defer autoRemove()
73
+					if err != restartmanager.ErrRestartCanceled {
74
+						logrus.Errorf("restartmanger wait error: %+v", err)
75
+					}
76
+				}
77
+			}()
78
+		}
79
+
80
+		defer c.Unlock()
55 81
 		if err := c.ToDisk(); err != nil {
56 82
 			return err
57 83
 		}
58 84
 		return daemon.postRunProcessing(c, e)
59
-	case libcontainerd.StateRestart:
60
-		c.Lock()
61
-		defer c.Unlock()
62
-		c.Reset(false)
63
-		c.RestartCount++
64
-		c.SetRestarting(platformConstructExitStatus(e))
65
-		attributes := map[string]string{
66
-			"exitCode": strconv.Itoa(int(e.ExitCode)),
67
-		}
68
-		daemon.LogContainerEventWithAttributes(c, "die", attributes)
69
-		daemon.updateHealthMonitor(c)
70
-		return c.ToDisk()
71 85
 	case libcontainerd.StateExitProcess:
72 86
 		c.Lock()
73 87
 		defer c.Unlock()
... ...
@@ -32,7 +32,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon
32 32
 		}
33 33
 
34 34
 		if copts != nil {
35
-			newOpts = append(newOpts, *copts...)
35
+			newOpts = append(newOpts, copts...)
36 36
 		}
37 37
 
38 38
 		// Create a new servicing container, which will start, complete the update, and merge back the
... ...
@@ -56,7 +56,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i
56 56
 		}
57 57
 	}
58 58
 
59
-	if err := daemon.containerStart(container, ""); err != nil {
59
+	if err := daemon.containerStart(container, "", true); err != nil {
60 60
 		return err
61 61
 	}
62 62
 
... ...
@@ -14,7 +14,6 @@ import (
14 14
 	"github.com/docker/docker/api/types"
15 15
 	containertypes "github.com/docker/docker/api/types/container"
16 16
 	"github.com/docker/docker/container"
17
-	"github.com/docker/docker/libcontainerd"
18 17
 	"github.com/docker/docker/runconfig"
19 18
 )
20 19
 
... ...
@@ -78,23 +77,23 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos
78 78
 		return err
79 79
 	}
80 80
 
81
-	return daemon.containerStart(container, checkpoint)
81
+	return daemon.containerStart(container, checkpoint, true)
82 82
 }
83 83
 
84 84
 // Start starts a container
85 85
 func (daemon *Daemon) Start(container *container.Container) error {
86
-	return daemon.containerStart(container, "")
86
+	return daemon.containerStart(container, "", true)
87 87
 }
88 88
 
89 89
 // containerStart prepares the container to run by setting up everything the
90 90
 // container needs, such as storage and networking, as well as links
91 91
 // between containers. The container is left waiting for a signal to
92 92
 // begin running.
93
-func (daemon *Daemon) containerStart(container *container.Container, checkpoint string) (err error) {
93
+func (daemon *Daemon) containerStart(container *container.Container, checkpoint string, resetRestartManager bool) (err error) {
94 94
 	container.Lock()
95 95
 	defer container.Unlock()
96 96
 
97
-	if container.Running {
97
+	if resetRestartManager && container.Running { // skip this check if already in restarting step and resetRestartManager==false
98 98
 		return nil
99 99
 	}
100 100
 
... ...
@@ -141,13 +140,13 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
141 141
 		return err
142 142
 	}
143 143
 
144
-	createOptions := []libcontainerd.CreateOption{libcontainerd.WithRestartManager(container.RestartManager(true))}
145
-	copts, err := daemon.getLibcontainerdCreateOptions(container)
144
+	createOptions, err := daemon.getLibcontainerdCreateOptions(container)
146 145
 	if err != nil {
147 146
 		return err
148 147
 	}
149
-	if copts != nil {
150
-		createOptions = append(createOptions, *copts...)
148
+
149
+	if resetRestartManager {
150
+		container.ResetRestartManager(true)
151 151
 	}
152 152
 
153 153
 	if err := daemon.containerd.Create(container.ID, checkpoint, container.CheckpointDir(), *spec, createOptions...); err != nil {
... ...
@@ -7,7 +7,7 @@ import (
7 7
 	"github.com/docker/docker/libcontainerd"
8 8
 )
9 9
 
10
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (*[]libcontainerd.CreateOption, error) {
10
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
11 11
 	createOptions := []libcontainerd.CreateOption{}
12 12
 
13 13
 	// Ensure a runtime has been assigned to this container
... ...
@@ -25,5 +25,5 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
25 25
 	}
26 26
 	createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args))
27 27
 
28
-	return &createOptions, nil
28
+	return createOptions, nil
29 29
 }
... ...
@@ -17,7 +17,7 @@ const (
17 17
 	credentialSpecFileLocation     = "CredentialSpecs"
18 18
 )
19 19
 
20
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (*[]libcontainerd.CreateOption, error) {
20
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
21 21
 	createOptions := []libcontainerd.CreateOption{}
22 22
 
23 23
 	// Are we going to run as a Hyper-V container?
... ...
@@ -139,7 +139,7 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
139 139
 		createOptions = append(createOptions, &libcontainerd.NetworkEndpointsOption{Endpoints: epList, AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery})
140 140
 	}
141 141
 
142
-	return &createOptions, nil
142
+	return createOptions, nil
143 143
 }
144 144
 
145 145
 // getCredentialSpec is a helper function to get the value of a credential spec supplied
... ...
@@ -138,13 +138,8 @@ func (clnt *client) Create(containerID string, checkpoint string, checkpointDir
138 138
 	clnt.lock(containerID)
139 139
 	defer clnt.unlock(containerID)
140 140
 
141
-	if ctr, err := clnt.getContainer(containerID); err == nil {
142
-		if ctr.restarting {
143
-			ctr.restartManager.Cancel()
144
-			ctr.clean()
145
-		} else {
146
-			return fmt.Errorf("Container %s is already active", containerID)
147
-		}
141
+	if _, err := clnt.getContainer(containerID); err == nil {
142
+		return fmt.Errorf("Container %s is already active", containerID)
148 143
 	}
149 144
 
150 145
 	uid, gid, err := getRootIDs(specs.Spec(spec))
... ...
@@ -1,12 +1,5 @@
1 1
 package libcontainerd
2 2
 
3
-import (
4
-	"fmt"
5
-	"time"
6
-
7
-	"github.com/docker/docker/restartmanager"
8
-)
9
-
10 3
 const (
11 4
 	// InitFriendlyName is the name given in the lookup map of processes
12 5
 	// for the first process started in a container.
... ...
@@ -16,25 +9,5 @@ const (
16 16
 
17 17
 type containerCommon struct {
18 18
 	process
19
-	restartManager restartmanager.RestartManager
20
-	restarting     bool
21
-	processes      map[string]*process
22
-	startedAt      time.Time
23
-}
24
-
25
-// WithRestartManager sets the restartmanager to be used with the container.
26
-func WithRestartManager(rm restartmanager.RestartManager) CreateOption {
27
-	return restartManager{rm}
28
-}
29
-
30
-type restartManager struct {
31
-	rm restartmanager.RestartManager
32
-}
33
-
34
-func (rm restartManager) Apply(p interface{}) error {
35
-	if pr, ok := p.(*container); ok {
36
-		pr.restartManager = rm.rm
37
-		return nil
38
-	}
39
-	return fmt.Errorf("WithRestartManager option not supported for this client")
19
+	processes map[string]*process
40 20
 }
... ...
@@ -7,12 +7,10 @@ import (
7 7
 	"os"
8 8
 	"path/filepath"
9 9
 	"syscall"
10
-	"time"
11 10
 
12 11
 	"github.com/Sirupsen/logrus"
13 12
 	containerd "github.com/docker/containerd/api/grpc/types"
14 13
 	"github.com/docker/docker/pkg/ioutils"
15
-	"github.com/docker/docker/restartmanager"
16 14
 	"github.com/opencontainers/runtime-spec/specs-go"
17 15
 	"golang.org/x/net/context"
18 16
 )
... ...
@@ -137,7 +135,6 @@ func (ctr *container) start(checkpoint string, checkpointDir string) error {
137 137
 		ctr.closeFifos(iopipe)
138 138
 		return err
139 139
 	}
140
-	ctr.startedAt = time.Now()
141 140
 	ctr.systemPid = systemPid(resp.Container)
142 141
 	close(createChan)
143 142
 
... ...
@@ -164,7 +161,6 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
164 164
 	defer ctr.client.unlock(ctr.containerID)
165 165
 	switch e.Type {
166 166
 	case StateExit, StatePause, StateResume, StateOOM:
167
-		var waitRestart chan error
168 167
 		st := StateInfo{
169 168
 			CommonStateInfo: CommonStateInfo{
170 169
 				State:    e.Type,
... ...
@@ -179,20 +175,8 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
179 179
 			st.ProcessID = e.Pid
180 180
 			st.State = StateExitProcess
181 181
 		}
182
-		if st.State == StateExit && ctr.restartManager != nil {
183
-			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false, time.Since(ctr.startedAt))
184
-			if err != nil {
185
-				logrus.Warnf("libcontainerd: container %s %v", ctr.containerID, err)
186
-			} else if restart {
187
-				st.State = StateRestart
188
-				ctr.restarting = true
189
-				ctr.client.deleteContainer(e.Id)
190
-				waitRestart = wait
191
-			}
192
-		}
193 182
 
194 183
 		// Remove process from list if we have exited
195
-		// We need to do so here in case the Message Handler decides to restart it.
196 184
 		switch st.State {
197 185
 		case StateExit:
198 186
 			ctr.clean()
... ...
@@ -204,32 +188,6 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
204 204
 			if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
205 205
 				logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
206 206
 			}
207
-			if st.State == StateRestart {
208
-				go func() {
209
-					err := <-waitRestart
210
-					ctr.client.lock(ctr.containerID)
211
-					defer ctr.client.unlock(ctr.containerID)
212
-					ctr.restarting = false
213
-					if err == nil {
214
-						if err = ctr.start("", ""); err != nil {
215
-							logrus.Errorf("libcontainerd: error restarting %v", err)
216
-						}
217
-					}
218
-					if err != nil {
219
-						st.State = StateExit
220
-						ctr.clean()
221
-						ctr.client.q.append(e.Id, func() {
222
-							if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
223
-								logrus.Errorf("libcontainerd: %v", err)
224
-							}
225
-						})
226
-						if err != restartmanager.ErrRestartCanceled {
227
-							logrus.Errorf("libcontainerd: %v", err)
228
-						}
229
-					}
230
-				}()
231
-			}
232
-
233 207
 			if e.Type == StatePause || e.Type == StateResume {
234 208
 				ctr.pauseMonitor.handle(e.Type)
235 209
 			}
... ...
@@ -91,7 +91,6 @@ func (ctr *container) start() error {
91 91
 		}
92 92
 		return err
93 93
 	}
94
-	ctr.startedAt = time.Now()
95 94
 
96 95
 	pid := newProcess.Pid()
97 96
 
... ...
@@ -194,7 +193,6 @@ func (ctr *container) waitProcessExitCode(process *process) int {
194 194
 // equivalent to (in the linux containerd world) where events come in for
195 195
 // state change notifications from containerd.
196 196
 func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
197
-	var waitRestart chan error
198 197
 	logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
199 198
 
200 199
 	exitCode := ctr.waitProcessExitCode(process)
... ...
@@ -234,20 +232,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
234 234
 			logrus.Error(err)
235 235
 		}
236 236
 
237
-		if !ctr.manualStopRequested && ctr.restartManager != nil {
238
-			restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false, time.Since(ctr.startedAt))
239
-			if err != nil {
240
-				logrus.Error(err)
241
-			} else if restart {
242
-				si.State = StateRestart
243
-				ctr.restarting = true
244
-				ctr.client.deleteContainer(ctr.containerID)
245
-				waitRestart = wait
246
-			}
247
-		}
248
-
249 237
 		// Remove process from list if we have exited
250
-		// We need to do so here in case the Message Handler decides to restart it.
251 238
 		if si.State == StateExit {
252 239
 			ctr.client.deleteContainer(ctr.containerID)
253 240
 		}
... ...
@@ -268,24 +253,6 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
268 268
 
269 269
 	logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
270 270
 
271
-	if si.State == StateRestart {
272
-		go func() {
273
-			err := <-waitRestart
274
-			ctr.restarting = false
275
-			if err == nil {
276
-				if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil {
277
-					logrus.Errorf("libcontainerd: error restarting %v", err)
278
-				}
279
-			}
280
-			if err != nil {
281
-				si.State = StateExit
282
-				if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
283
-					logrus.Error(err)
284
-				}
285
-			}
286
-		}()
287
-	}
288
-
289 271
 	return nil
290 272
 }
291 273
 
... ...
@@ -13,7 +13,6 @@ const (
13 13
 	StatePause        = "pause"
14 14
 	StateResume       = "resume"
15 15
 	StateExit         = "exit"
16
-	StateRestart      = "restart"
17 16
 	StateRestore      = "restore"
18 17
 	StateStartProcess = "start-process"
19 18
 	StateExitProcess  = "exit-process"