Browse code

logfile: Check if log is closed on close error during rotate

This prevents getting into a situation where a container log cannot make
progress because we tried to rotate a file, got an error, and now the
file is closed. The next time we try to write a log entry it will try
and rotate again but error that the file is already closed.

I wonder if there is more we can do to beef up this rotation logic.
Found this issue while investigating missing logs with errors in the
docker daemon logs like:

```
Failed to log message for json-file: error closing file: close <file>:
file already closed
```

I'm not sure why the original rotation failed since the data was no
longer available.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2020/05/07 01:35:13
Showing 2 changed files
... ...
@@ -188,8 +188,11 @@ func (w *LogFile) checkCapacityAndRotate() error {
188 188
 		w.rotateMu.Lock()
189 189
 		fname := w.f.Name()
190 190
 		if err := w.f.Close(); err != nil {
191
-			w.rotateMu.Unlock()
192
-			return errors.Wrap(err, "error closing file")
191
+			// if there was an error during a prior rotate, the file could already be closed
192
+			if !errors.Is(err, os.ErrClosed) {
193
+				w.rotateMu.Unlock()
194
+				return errors.Wrap(err, "error closing file")
195
+			}
193 196
 		}
194 197
 		if err := rotate(fname, w.maxFiles, w.compress); err != nil {
195 198
 			w.rotateMu.Unlock()
... ...
@@ -11,6 +11,7 @@ import (
11 11
 	"time"
12 12
 
13 13
 	"github.com/docker/docker/daemon/logger"
14
+	"github.com/docker/docker/pkg/pubsub"
14 15
 	"github.com/docker/docker/pkg/tailfile"
15 16
 	"gotest.tools/v3/assert"
16 17
 )
... ...
@@ -235,3 +236,66 @@ func TestFollowLogsProducerGone(t *testing.T) {
235 235
 	default:
236 236
 	}
237 237
 }
238
+
239
+func TestCheckCapacityAndRotate(t *testing.T) {
240
+	dir, err := ioutil.TempDir("", t.Name())
241
+	assert.NilError(t, err)
242
+	defer os.RemoveAll(dir)
243
+
244
+	f, err := ioutil.TempFile(dir, "log")
245
+	assert.NilError(t, err)
246
+
247
+	l := &LogFile{
248
+		f:            f,
249
+		capacity:     5,
250
+		maxFiles:     3,
251
+		compress:     true,
252
+		notifyRotate: pubsub.NewPublisher(0, 1),
253
+		perms:        0600,
254
+		marshal: func(msg *logger.Message) ([]byte, error) {
255
+			return msg.Line, nil
256
+		},
257
+	}
258
+	defer l.Close()
259
+
260
+	assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
261
+
262
+	dStringer := dirStringer{dir}
263
+
264
+	_, err = os.Stat(f.Name() + ".1")
265
+	assert.Assert(t, os.IsNotExist(err), dStringer)
266
+
267
+	assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
268
+	_, err = os.Stat(f.Name() + ".1")
269
+	assert.NilError(t, err, dStringer)
270
+
271
+	assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
272
+	_, err = os.Stat(f.Name() + ".1")
273
+	assert.NilError(t, err, dStringer)
274
+	_, err = os.Stat(f.Name() + ".2.gz")
275
+	assert.NilError(t, err, dStringer)
276
+
277
+	// Now let's simulate a failed rotation where the file was able to be closed but something else happened elsewhere
278
+	// down the line.
279
+	// We want to make sure that we can recover in the case that `l.f` was closed while attempting a rotation.
280
+	l.f.Close()
281
+	assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
282
+}
283
+
284
+type dirStringer struct {
285
+	d string
286
+}
287
+
288
+func (d dirStringer) String() string {
289
+	ls, err := ioutil.ReadDir(d.d)
290
+	if err != nil {
291
+		return ""
292
+	}
293
+	var s strings.Builder
294
+	s.WriteString("\n")
295
+
296
+	for _, fi := range ls {
297
+		s.WriteString(fi.Name() + "\n")
298
+	}
299
+	return s.String()
300
+}