Don't involve code waiting for blocking channel in locked critical
section because it has potential risk of hanging forever.
Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
| ... | ... |
@@ -80,6 +80,7 @@ type containerMonitor struct {
|
| 80 | 80 |
// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy |
| 81 | 81 |
// and starts the container's process. |
| 82 | 82 |
func (container *Container) StartMonitor(s supervisor, policy container.RestartPolicy) error {
|
| 83 |
+ container.Lock() |
|
| 83 | 84 |
container.monitor = &containerMonitor{
|
| 84 | 85 |
supervisor: s, |
| 85 | 86 |
container: container, |
| ... | ... |
@@ -88,6 +89,7 @@ func (container *Container) StartMonitor(s supervisor, policy container.RestartP |
| 88 | 88 |
stopChan: make(chan struct{}),
|
| 89 | 89 |
startSignal: make(chan struct{}),
|
| 90 | 90 |
} |
| 91 |
+ container.Unlock() |
|
| 91 | 92 |
|
| 92 | 93 |
return container.monitor.wait() |
| 93 | 94 |
} |
| ... | ... |
@@ -157,6 +159,8 @@ func (m *containerMonitor) start() error {
|
| 157 | 157 |
} |
| 158 | 158 |
m.Close() |
| 159 | 159 |
}() |
| 160 |
+ |
|
| 161 |
+ m.container.Lock() |
|
| 160 | 162 |
// reset stopped flag |
| 161 | 163 |
if m.container.HasBeenManuallyStopped {
|
| 162 | 164 |
m.container.HasBeenManuallyStopped = false |
| ... | ... |
@@ -171,16 +175,20 @@ func (m *containerMonitor) start() error {
|
| 171 | 171 |
if err := m.supervisor.StartLogging(m.container); err != nil {
|
| 172 | 172 |
m.resetContainer(false) |
| 173 | 173 |
|
| 174 |
+ m.container.Unlock() |
|
| 174 | 175 |
return err |
| 175 | 176 |
} |
| 176 | 177 |
|
| 177 | 178 |
pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin) |
| 179 |
+ m.container.Unlock() |
|
| 178 | 180 |
|
| 179 | 181 |
m.logEvent("start")
|
| 180 | 182 |
|
| 181 | 183 |
m.lastStartTime = time.Now() |
| 182 | 184 |
|
| 185 |
+ // don't lock Run because m.callback has own lock |
|
| 183 | 186 |
if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil {
|
| 187 |
+ m.container.Lock() |
|
| 184 | 188 |
// if we receive an internal error from the initial start of a container then lets |
| 185 | 189 |
// return it instead of entering the restart loop |
| 186 | 190 |
// set to 127 for container cmd not found/does not exist) |
| ... | ... |
@@ -190,6 +198,7 @@ func (m *containerMonitor) start() error {
|
| 190 | 190 |
if m.container.RestartCount == 0 {
|
| 191 | 191 |
m.container.ExitCode = 127 |
| 192 | 192 |
m.resetContainer(false) |
| 193 |
+ m.container.Unlock() |
|
| 193 | 194 |
return derr.ErrorCodeCmdNotFound |
| 194 | 195 |
} |
| 195 | 196 |
} |
| ... | ... |
@@ -198,6 +207,7 @@ func (m *containerMonitor) start() error {
|
| 198 | 198 |
if m.container.RestartCount == 0 {
|
| 199 | 199 |
m.container.ExitCode = 126 |
| 200 | 200 |
m.resetContainer(false) |
| 201 |
+ m.container.Unlock() |
|
| 201 | 202 |
return derr.ErrorCodeCmdCouldNotBeInvoked |
| 202 | 203 |
} |
| 203 | 204 |
} |
| ... | ... |
@@ -206,11 +216,13 @@ func (m *containerMonitor) start() error {
|
| 206 | 206 |
m.container.ExitCode = -1 |
| 207 | 207 |
m.resetContainer(false) |
| 208 | 208 |
|
| 209 |
+ m.container.Unlock() |
|
| 209 | 210 |
return derr.ErrorCodeCantStart.WithArgs(m.container.ID, utils.GetErrorMessage(err)) |
| 210 | 211 |
} |
| 211 | 212 |
|
| 213 |
+ m.container.Unlock() |
|
| 212 | 214 |
logrus.Errorf("Error running container: %s", err)
|
| 213 |
- } |
|
| 215 |
+ } // end if |
|
| 214 | 216 |
|
| 215 | 217 |
// here container.Lock is already lost |
| 216 | 218 |
afterRun = true |
| ... | ... |
@@ -231,13 +243,14 @@ func (m *containerMonitor) start() error {
|
| 231 | 231 |
if m.shouldStop {
|
| 232 | 232 |
return err |
| 233 | 233 |
} |
| 234 |
+ m.container.Lock() |
|
| 234 | 235 |
continue |
| 235 | 236 |
} |
| 236 | 237 |
|
| 237 | 238 |
m.logEvent("die")
|
| 238 | 239 |
m.resetContainer(true) |
| 239 | 240 |
return err |
| 240 |
- } |
|
| 241 |
+ } // end for |
|
| 241 | 242 |
} |
| 242 | 243 |
|
| 243 | 244 |
// resetMonitor resets the stateful fields on the containerMonitor based on the |
| ... | ... |
@@ -319,7 +332,7 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid |
| 319 | 319 |
} |
| 320 | 320 |
} |
| 321 | 321 |
|
| 322 |
- m.container.SetRunning(pid) |
|
| 322 |
+ m.container.SetRunningLocking(pid) |
|
| 323 | 323 |
|
| 324 | 324 |
// signal that the process has started |
| 325 | 325 |
// close channel only if not closed |
| ... | ... |
@@ -179,6 +179,13 @@ func (s *State) getExitCode() int {
|
| 179 | 179 |
return res |
| 180 | 180 |
} |
| 181 | 181 |
|
| 182 |
+// SetRunningLocking locks container and sets it to "running" |
|
| 183 |
+func (s *State) SetRunningLocking(pid int) {
|
|
| 184 |
+ s.Lock() |
|
| 185 |
+ s.SetRunning(pid) |
|
| 186 |
+ s.Unlock() |
|
| 187 |
+} |
|
| 188 |
+ |
|
| 182 | 189 |
// SetRunning sets the state of the container to "running". |
| 183 | 190 |
func (s *State) SetRunning(pid int) {
|
| 184 | 191 |
s.Error = "" |
| ... | ... |
@@ -192,7 +199,7 @@ func (s *State) SetRunning(pid int) {
|
| 192 | 192 |
s.waitChan = make(chan struct{})
|
| 193 | 193 |
} |
| 194 | 194 |
|
| 195 |
-// SetStoppedLocking locks the container state is sets it to "stopped". |
|
| 195 |
+// SetStoppedLocking locks the container state and sets it to "stopped". |
|
| 196 | 196 |
func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) {
|
| 197 | 197 |
s.Lock() |
| 198 | 198 |
s.SetStopped(exitStatus) |
| ... | ... |
@@ -129,9 +129,15 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) |
| 129 | 129 |
mounts = append(mounts, container.TmpfsMounts()...) |
| 130 | 130 |
|
| 131 | 131 |
container.Command.Mounts = mounts |
| 132 |
+ container.Unlock() |
|
| 133 |
+ |
|
| 134 |
+ // don't lock waitForStart because it has potential risk of blocking |
|
| 135 |
+ // which will lead to dead lock, forever. |
|
| 132 | 136 |
if err := daemon.waitForStart(container); err != nil {
|
| 137 |
+ container.Lock() |
|
| 133 | 138 |
return err |
| 134 | 139 |
} |
| 140 |
+ container.Lock() |
|
| 135 | 141 |
container.HasBeenStartedBefore = true |
| 136 | 142 |
return nil |
| 137 | 143 |
} |