Fixes #27779
Currently `followLogs` can get into a deadlock if we receive an inotify
IN_MODIFY event while we are trying to close the `fileWatcher`. This is
due to the fact that closing the `fileWatcher` happens in the same block
as consumes events from the `fileWatcher`. We are trying to run
`fileWatcher.Close`, which is waiting for an IN_IGNORE event to come in
over inotify to confirm the watch was been removed. But, because an
IN_MODIFY event has appeared after `Close` was entered but before the
IN_IGNORE, the broadcast never comes. The IN_MODIFY cannot be consumed
as the events channel is unbuffered and the only `select` that reads
from it is busy waiting for the IN_IGNORE event.
In order to try and fix this race condition I've moved the removal of
the `fileWatcher` out to a separate go block that waits for a signal to
close, removes the watcher and then signals to the previous selects on
the close signal.
This has introduced a `fileWatcher.Remove` in the final case, but if we
try and remove a watcher that does not exist it will just return an
error saying so. We are not doing any checking on the return of `Remove`
so this shouldn't cause any side-effects.
Signed-off-by: Tom Booth <tombooth@gmail.com>
| ... | ... |
@@ -9,6 +9,7 @@ import ( |
| 9 | 9 |
"os" |
| 10 | 10 |
"time" |
| 11 | 11 |
|
| 12 |
+ "golang.org/x/net/context" |
|
| 12 | 13 |
"gopkg.in/fsnotify.v1" |
| 13 | 14 |
|
| 14 | 15 |
"github.com/Sirupsen/logrus" |
| ... | ... |
@@ -172,9 +173,22 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
| 172 | 172 |
} |
| 173 | 173 |
defer func() {
|
| 174 | 174 |
f.Close() |
| 175 |
+ fileWatcher.Remove(name) |
|
| 175 | 176 |
fileWatcher.Close() |
| 176 | 177 |
}() |
| 177 | 178 |
|
| 179 |
+ ctx, cancel := context.WithCancel(context.Background()) |
|
| 180 |
+ defer cancel() |
|
| 181 |
+ go func() {
|
|
| 182 |
+ select {
|
|
| 183 |
+ case <-logWatcher.WatchClose(): |
|
| 184 |
+ fileWatcher.Remove(name) |
|
| 185 |
+ cancel() |
|
| 186 |
+ case <-ctx.Done(): |
|
| 187 |
+ return |
|
| 188 |
+ } |
|
| 189 |
+ }() |
|
| 190 |
+ |
|
| 178 | 191 |
var retries int |
| 179 | 192 |
handleRotate := func() error {
|
| 180 | 193 |
f.Close() |
| ... | ... |
@@ -209,8 +223,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
| 209 | 209 |
case fsnotify.Rename, fsnotify.Remove: |
| 210 | 210 |
select {
|
| 211 | 211 |
case <-notifyRotate: |
| 212 |
- case <-logWatcher.WatchClose(): |
|
| 213 |
- fileWatcher.Remove(name) |
|
| 212 |
+ case <-ctx.Done(): |
|
| 214 | 213 |
return errDone |
| 215 | 214 |
} |
| 216 | 215 |
if err := handleRotate(); err != nil {
|
| ... | ... |
@@ -232,8 +245,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
| 232 | 232 |
return errRetry |
| 233 | 233 |
} |
| 234 | 234 |
return err |
| 235 |
- case <-logWatcher.WatchClose(): |
|
| 236 |
- fileWatcher.Remove(name) |
|
| 235 |
+ case <-ctx.Done(): |
|
| 237 | 236 |
return errDone |
| 238 | 237 |
} |
| 239 | 238 |
} |
| ... | ... |
@@ -290,7 +302,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int |
| 290 | 290 |
} |
| 291 | 291 |
select {
|
| 292 | 292 |
case logWatcher.Msg <- msg: |
| 293 |
- case <-logWatcher.WatchClose(): |
|
| 293 |
+ case <-ctx.Done(): |
|
| 294 | 294 |
logWatcher.Msg <- msg |
| 295 | 295 |
for {
|
| 296 | 296 |
msg, err := decodeLogLine(dec, l) |