Fixes #27779
Currently `followLogs` can get into a deadlock if we receive an inotify
IN_MODIFY event while we are trying to close the `fileWatcher`. This is
due to the fact that closing the `fileWatcher` happens in the same block
as consumes events from the `fileWatcher`. We are trying to run
`fileWatcher.Close`, which is waiting for an IN_IGNORE event to come in
over inotify to confirm the watch was been removed. But, because an
IN_MODIFY event has appeared after `Close` was entered but before the
IN_IGNORE, the broadcast never comes. The IN_MODIFY cannot be consumed
as the events channel is unbuffered and the only `select` that reads
from it is busy waiting for the IN_IGNORE event.
In order to try and fix this race condition I've moved the removal of
the `fileWatcher` out to a separate go block that waits for a signal to
close, removes the watcher and then signals to the previous selects on
the close signal.
This has introduced a `fileWatcher.Remove` in the final case, but if we
try and remove a watcher that does not exist it will just return an
error saying so. We are not doing any checking on the return of `Remove`
so this shouldn't cause any side-effects.
Signed-off-by: Tom Booth <tombooth@gmail.com>
... | ... |
@@ -9,6 +9,7 @@ import ( |
9 | 9 |
"os" |
10 | 10 |
"time" |
11 | 11 |
|
12 |
+ "golang.org/x/net/context" |
|
12 | 13 |
"gopkg.in/fsnotify.v1" |
13 | 14 |
|
14 | 15 |
"github.com/Sirupsen/logrus" |
... | ... |
@@ -172,9 +173,22 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
172 | 172 |
} |
173 | 173 |
defer func() { |
174 | 174 |
f.Close() |
175 |
+ fileWatcher.Remove(name) |
|
175 | 176 |
fileWatcher.Close() |
176 | 177 |
}() |
177 | 178 |
|
179 |
+ ctx, cancel := context.WithCancel(context.Background()) |
|
180 |
+ defer cancel() |
|
181 |
+ go func() { |
|
182 |
+ select { |
|
183 |
+ case <-logWatcher.WatchClose(): |
|
184 |
+ fileWatcher.Remove(name) |
|
185 |
+ cancel() |
|
186 |
+ case <-ctx.Done(): |
|
187 |
+ return |
|
188 |
+ } |
|
189 |
+ }() |
|
190 |
+ |
|
178 | 191 |
var retries int |
179 | 192 |
handleRotate := func() error { |
180 | 193 |
f.Close() |
... | ... |
@@ -209,8 +223,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
209 | 209 |
case fsnotify.Rename, fsnotify.Remove: |
210 | 210 |
select { |
211 | 211 |
case <-notifyRotate: |
212 |
- case <-logWatcher.WatchClose(): |
|
213 |
- fileWatcher.Remove(name) |
|
212 |
+ case <-ctx.Done(): |
|
214 | 213 |
return errDone |
215 | 214 |
} |
216 | 215 |
if err := handleRotate(); err != nil { |
... | ... |
@@ -232,8 +245,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
232 | 232 |
return errRetry |
233 | 233 |
} |
234 | 234 |
return err |
235 |
- case <-logWatcher.WatchClose(): |
|
236 |
- fileWatcher.Remove(name) |
|
235 |
+ case <-ctx.Done(): |
|
237 | 236 |
return errDone |
238 | 237 |
} |
239 | 238 |
} |
... | ... |
@@ -290,7 +302,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
290 | 290 |
} |
291 | 291 |
select { |
292 | 292 |
case logWatcher.Msg <- msg: |
293 |
- case <-logWatcher.WatchClose(): |
|
293 |
+ case <-ctx.Done(): |
|
294 | 294 |
logWatcher.Msg <- msg |
295 | 295 |
for { |
296 | 296 |
msg, err := decodeLogLine(dec, l) |