Break big lock into some tiny locks for containerStart
Sebastiaan van Stijn authored on 2016/01/09 08:35:26... | ... |
@@ -80,6 +80,7 @@ type containerMonitor struct { |
80 | 80 |
// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy |
81 | 81 |
// and starts the container's process. |
82 | 82 |
func (container *Container) StartMonitor(s supervisor, policy container.RestartPolicy) error { |
83 |
+ container.Lock() |
|
83 | 84 |
container.monitor = &containerMonitor{ |
84 | 85 |
supervisor: s, |
85 | 86 |
container: container, |
... | ... |
@@ -88,6 +89,7 @@ func (container *Container) StartMonitor(s supervisor, policy container.RestartP |
88 | 88 |
stopChan: make(chan struct{}), |
89 | 89 |
startSignal: make(chan struct{}), |
90 | 90 |
} |
91 |
+ container.Unlock() |
|
91 | 92 |
|
92 | 93 |
return container.monitor.wait() |
93 | 94 |
} |
... | ... |
@@ -157,6 +159,8 @@ func (m *containerMonitor) start() error { |
157 | 157 |
} |
158 | 158 |
m.Close() |
159 | 159 |
}() |
160 |
+ |
|
161 |
+ m.container.Lock() |
|
160 | 162 |
// reset stopped flag |
161 | 163 |
if m.container.HasBeenManuallyStopped { |
162 | 164 |
m.container.HasBeenManuallyStopped = false |
... | ... |
@@ -171,16 +175,20 @@ func (m *containerMonitor) start() error { |
171 | 171 |
if err := m.supervisor.StartLogging(m.container); err != nil { |
172 | 172 |
m.resetContainer(false) |
173 | 173 |
|
174 |
+ m.container.Unlock() |
|
174 | 175 |
return err |
175 | 176 |
} |
176 | 177 |
|
177 | 178 |
pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin) |
179 |
+ m.container.Unlock() |
|
178 | 180 |
|
179 | 181 |
m.logEvent("start") |
180 | 182 |
|
181 | 183 |
m.lastStartTime = time.Now() |
182 | 184 |
|
185 |
+ // don't lock Run because m.callback has own lock |
|
183 | 186 |
if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil { |
187 |
+ m.container.Lock() |
|
184 | 188 |
// if we receive an internal error from the initial start of a container then lets |
185 | 189 |
// return it instead of entering the restart loop |
186 | 190 |
// set to 127 for container cmd not found/does not exist) |
... | ... |
@@ -190,6 +198,7 @@ func (m *containerMonitor) start() error { |
190 | 190 |
if m.container.RestartCount == 0 { |
191 | 191 |
m.container.ExitCode = 127 |
192 | 192 |
m.resetContainer(false) |
193 |
+ m.container.Unlock() |
|
193 | 194 |
return derr.ErrorCodeCmdNotFound |
194 | 195 |
} |
195 | 196 |
} |
... | ... |
@@ -198,6 +207,7 @@ func (m *containerMonitor) start() error { |
198 | 198 |
if m.container.RestartCount == 0 { |
199 | 199 |
m.container.ExitCode = 126 |
200 | 200 |
m.resetContainer(false) |
201 |
+ m.container.Unlock() |
|
201 | 202 |
return derr.ErrorCodeCmdCouldNotBeInvoked |
202 | 203 |
} |
203 | 204 |
} |
... | ... |
@@ -206,11 +216,13 @@ func (m *containerMonitor) start() error { |
206 | 206 |
m.container.ExitCode = -1 |
207 | 207 |
m.resetContainer(false) |
208 | 208 |
|
209 |
+ m.container.Unlock() |
|
209 | 210 |
return derr.ErrorCodeCantStart.WithArgs(m.container.ID, utils.GetErrorMessage(err)) |
210 | 211 |
} |
211 | 212 |
|
213 |
+ m.container.Unlock() |
|
212 | 214 |
logrus.Errorf("Error running container: %s", err) |
213 |
- } |
|
215 |
+ } // end if |
|
214 | 216 |
|
215 | 217 |
// here container.Lock is already lost |
216 | 218 |
afterRun = true |
... | ... |
@@ -231,13 +243,14 @@ func (m *containerMonitor) start() error { |
231 | 231 |
if m.shouldStop { |
232 | 232 |
return err |
233 | 233 |
} |
234 |
+ m.container.Lock() |
|
234 | 235 |
continue |
235 | 236 |
} |
236 | 237 |
|
237 | 238 |
m.logEvent("die") |
238 | 239 |
m.resetContainer(true) |
239 | 240 |
return err |
240 |
- } |
|
241 |
+ } // end for |
|
241 | 242 |
} |
242 | 243 |
|
243 | 244 |
// resetMonitor resets the stateful fields on the containerMonitor based on the |
... | ... |
@@ -319,7 +332,7 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid |
319 | 319 |
} |
320 | 320 |
} |
321 | 321 |
|
322 |
- m.container.SetRunning(pid) |
|
322 |
+ m.container.SetRunningLocking(pid) |
|
323 | 323 |
|
324 | 324 |
// signal that the process has started |
325 | 325 |
// close channel only if not closed |
... | ... |
@@ -179,6 +179,13 @@ func (s *State) getExitCode() int { |
179 | 179 |
return res |
180 | 180 |
} |
181 | 181 |
|
182 |
+// SetRunningLocking locks container and sets it to "running" |
|
183 |
+func (s *State) SetRunningLocking(pid int) { |
|
184 |
+ s.Lock() |
|
185 |
+ s.SetRunning(pid) |
|
186 |
+ s.Unlock() |
|
187 |
+} |
|
188 |
+ |
|
182 | 189 |
// SetRunning sets the state of the container to "running". |
183 | 190 |
func (s *State) SetRunning(pid int) { |
184 | 191 |
s.Error = "" |
... | ... |
@@ -192,7 +199,7 @@ func (s *State) SetRunning(pid int) { |
192 | 192 |
s.waitChan = make(chan struct{}) |
193 | 193 |
} |
194 | 194 |
|
195 |
-// SetStoppedLocking locks the container state is sets it to "stopped". |
|
195 |
+// SetStoppedLocking locks the container state and sets it to "stopped". |
|
196 | 196 |
func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) { |
197 | 197 |
s.Lock() |
198 | 198 |
s.SetStopped(exitStatus) |
... | ... |
@@ -132,9 +132,15 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) |
132 | 132 |
mounts = append(mounts, container.TmpfsMounts()...) |
133 | 133 |
|
134 | 134 |
container.Command.Mounts = mounts |
135 |
+ container.Unlock() |
|
136 |
+ |
|
137 |
+ // don't lock waitForStart because it has potential risk of blocking |
|
138 |
+ // which will lead to dead lock, forever. |
|
135 | 139 |
if err := daemon.waitForStart(container); err != nil { |
140 |
+ container.Lock() |
|
136 | 141 |
return err |
137 | 142 |
} |
143 |
+ container.Lock() |
|
138 | 144 |
container.HasBeenStartedBefore = true |
139 | 145 |
return nil |
140 | 146 |
} |