Browse code

Add func to get an io.Reader for tail operations

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2018/04/06 01:38:06
Showing 2 changed files
... ...
@@ -3,7 +3,9 @@
3 3
 package tailfile // import "github.com/docker/docker/pkg/tailfile"
4 4
 
5 5
 import (
6
+	"bufio"
6 7
 	"bytes"
8
+	"context"
7 9
 	"errors"
8 10
 	"io"
9 11
 	"os"
... ...
@@ -16,51 +18,205 @@ var eol = []byte("\n")
16 16
 // ErrNonPositiveLinesNumber is an error returned if the lines number was negative.
17 17
 var ErrNonPositiveLinesNumber = errors.New("The number of lines to extract from the file must be positive")
18 18
 
19
-//TailFile returns last n lines of reader f (could be a nil).
20
-func TailFile(f io.ReadSeeker, n int) ([][]byte, error) {
21
-	if n <= 0 {
22
-		return nil, ErrNonPositiveLinesNumber
19
+//TailFile returns last n lines of the passed in file.
20
+func TailFile(f *os.File, n int) ([][]byte, error) {
21
+	size, err := f.Seek(0, io.SeekEnd)
22
+	if err != nil {
23
+		return nil, err
23 24
 	}
24
-	size, err := f.Seek(0, os.SEEK_END)
25
+
26
+	rAt := io.NewSectionReader(f, 0, size)
27
+	r, nLines, err := NewTailReader(context.Background(), rAt, n)
25 28
 	if err != nil {
26 29
 		return nil, err
27 30
 	}
28
-	block := -1
29
-	var data []byte
30
-	var cnt int
31
+
32
+	buf := make([][]byte, 0, nLines)
33
+	scanner := bufio.NewScanner(r)
34
+
35
+	for scanner.Scan() {
36
+		buf = append(buf, scanner.Bytes())
37
+	}
38
+	return buf, nil
39
+}
40
+
41
+// SizeReaderAt is an interface used to get a ReaderAt as well as the size of the underlying reader.
42
+// Note that the size of the underlying reader should not change when using this interface.
43
+type SizeReaderAt interface {
44
+	io.ReaderAt
45
+	Size() int64
46
+}
47
+
48
+// NewTailReader scopes the passed in reader to just the last N lines passed in
49
+func NewTailReader(ctx context.Context, r SizeReaderAt, reqLines int) (io.Reader, int, error) {
50
+	return NewTailReaderWithDelimiter(ctx, r, reqLines, eol)
51
+}
52
+
53
+// NewTailReaderWithDelimiter scopes the passed in reader to just the last N lines passed in
54
+// In this case a "line" is defined by the passed in delimiter.
55
+//
56
+// Delimiter lengths should be generally small, no more than 12 bytes
57
+func NewTailReaderWithDelimiter(ctx context.Context, r SizeReaderAt, reqLines int, delimiter []byte) (io.Reader, int, error) {
58
+	if reqLines < 1 {
59
+		return nil, 0, ErrNonPositiveLinesNumber
60
+	}
61
+	if len(delimiter) == 0 {
62
+		return nil, 0, errors.New("must provide a delimiter")
63
+	}
64
+	var (
65
+		size      = r.Size()
66
+		tailStart int64
67
+		tailEnd   = size
68
+		found     int
69
+	)
70
+
71
+	if int64(len(delimiter)) >= size {
72
+		return bytes.NewReader(nil), 0, nil
73
+	}
74
+
75
+	scanner := newScanner(r, delimiter)
76
+	for scanner.Scan(ctx) {
77
+		if err := scanner.Err(); err != nil {
78
+			return nil, 0, scanner.Err()
79
+		}
80
+
81
+		found++
82
+		if found == 1 {
83
+			tailEnd = scanner.End()
84
+		}
85
+		if found == reqLines {
86
+			break
87
+		}
88
+	}
89
+
90
+	tailStart = scanner.Start(ctx)
91
+
92
+	if found == 0 {
93
+		return bytes.NewReader(nil), 0, nil
94
+	}
95
+
96
+	if found < reqLines && tailStart != 0 {
97
+		tailStart = 0
98
+	}
99
+	return io.NewSectionReader(r, tailStart, tailEnd-tailStart), found, nil
100
+}
101
+
102
+func newScanner(r SizeReaderAt, delim []byte) *scanner {
103
+	size := r.Size()
104
+	readSize := blockSize
105
+	if readSize > int(size) {
106
+		readSize = int(size)
107
+	}
108
+	// silly case...
109
+	if len(delim) >= readSize/2 {
110
+		readSize = len(delim)*2 + 2
111
+	}
112
+
113
+	return &scanner{
114
+		r:     r,
115
+		pos:   size,
116
+		buf:   make([]byte, readSize),
117
+		delim: delim,
118
+	}
119
+}
120
+
121
+type scanner struct {
122
+	r     SizeReaderAt
123
+	pos   int64
124
+	buf   []byte
125
+	delim []byte
126
+	err   error
127
+	idx   int
128
+	done  bool
129
+}
130
+
131
+func (s *scanner) Start(ctx context.Context) int64 {
132
+	if s.idx > 0 {
133
+		idx := bytes.LastIndex(s.buf[:s.idx], s.delim)
134
+		if idx >= 0 {
135
+			return s.pos + int64(idx) + int64(len(s.delim))
136
+		}
137
+	}
138
+
139
+	// slow path
140
+	buf := make([]byte, len(s.buf))
141
+	copy(buf, s.buf)
142
+
143
+	readAhead := &scanner{
144
+		r:     s.r,
145
+		pos:   s.pos,
146
+		delim: s.delim,
147
+		idx:   s.idx,
148
+		buf:   buf,
149
+	}
150
+
151
+	if !readAhead.Scan(ctx) {
152
+		return 0
153
+	}
154
+	return readAhead.End()
155
+}
156
+
157
+func (s *scanner) End() int64 {
158
+	return s.pos + int64(s.idx) + int64(len(s.delim))
159
+}
160
+
161
+func (s *scanner) Err() error {
162
+	return s.err
163
+}
164
+
165
+func (s *scanner) Scan(ctx context.Context) bool {
166
+	if s.err != nil {
167
+		return false
168
+	}
169
+
31 170
 	for {
32
-		var b []byte
33
-		step := int64(block * blockSize)
34
-		left := size + step // how many bytes to beginning
35
-		if left < 0 {
36
-			if _, err := f.Seek(0, os.SEEK_SET); err != nil {
37
-				return nil, err
38
-			}
39
-			b = make([]byte, blockSize+left)
40
-			if _, err := f.Read(b); err != nil {
41
-				return nil, err
171
+		select {
172
+		case <-ctx.Done():
173
+			s.err = ctx.Err()
174
+			return false
175
+		default:
176
+		}
177
+
178
+		idx := s.idx - len(s.delim)
179
+		if idx < 0 {
180
+			readSize := int(s.pos)
181
+			if readSize > len(s.buf) {
182
+				readSize = len(s.buf)
42 183
 			}
43
-			data = append(b, data...)
44
-			break
45
-		} else {
46
-			b = make([]byte, blockSize)
47
-			if _, err := f.Seek(left, os.SEEK_SET); err != nil {
48
-				return nil, err
184
+
185
+			if readSize < len(s.delim) {
186
+				return false
49 187
 			}
50
-			if _, err := f.Read(b); err != nil {
51
-				return nil, err
188
+
189
+			offset := s.pos - int64(readSize)
190
+			n, err := s.r.ReadAt(s.buf[:readSize], offset)
191
+			if err != nil && err != io.EOF {
192
+				s.err = err
193
+				return false
52 194
 			}
53
-			data = append(b, data...)
195
+
196
+			s.pos -= int64(n)
197
+			idx = n
54 198
 		}
55
-		cnt += bytes.Count(b, eol)
56
-		if cnt > n {
57
-			break
199
+
200
+		s.idx = bytes.LastIndex(s.buf[:idx], s.delim)
201
+		if s.idx >= 0 {
202
+			return true
58 203
 		}
59
-		block--
60
-	}
61
-	lines := bytes.Split(data, eol)
62
-	if n < len(lines) {
63
-		return lines[len(lines)-n-1 : len(lines)-1], nil
204
+
205
+		if len(s.delim) > 1 && s.pos > 0 {
206
+			// in this case, there may be a partial delimiter at the front of the buffer, so set the position forward
207
+			// up to the maximum size partial that could be there so it can be read again in the next iteration with any
208
+			// potential remainder.
209
+			// An example where delimiter is `####`:
210
+			// [##asdfqwerty]
211
+			//    ^
212
+			// This resets the position to where the arrow is pointing.
213
+			// It could actually check if a partial exists and at the front, but that is pretty similar to the indexing
214
+			// code above though a bit more complex since each byte has to be checked (`len(delimiter)-1`) factorial).
215
+			// It's much simpler and cleaner to just re-read `len(delimiter)-1` bytes again.
216
+			s.pos += int64(len(s.delim)) - 1
217
+		}
218
+
64 219
 	}
65
-	return lines[:len(lines)-1], nil
66 220
 }
... ...
@@ -1,9 +1,17 @@
1 1
 package tailfile // import "github.com/docker/docker/pkg/tailfile"
2 2
 
3 3
 import (
4
+	"bufio"
5
+	"bytes"
6
+	"context"
7
+	"fmt"
8
+	"io"
4 9
 	"io/ioutil"
5 10
 	"os"
11
+	"strings"
6 12
 	"testing"
13
+
14
+	"gotest.tools/assert"
7 15
 )
8 16
 
9 17
 func TestTailFile(t *testing.T) {
... ...
@@ -42,7 +50,7 @@ truncated line`)
42 42
 	if _, err := f.Write(testFile); err != nil {
43 43
 		t.Fatal(err)
44 44
 	}
45
-	if _, err := f.Seek(0, os.SEEK_SET); err != nil {
45
+	if _, err := f.Seek(0, io.SeekStart); err != nil {
46 46
 		t.Fatal(err)
47 47
 	}
48 48
 	expected := []string{"last fourth line", "last fifth line"}
... ...
@@ -50,10 +58,12 @@ truncated line`)
50 50
 	if err != nil {
51 51
 		t.Fatal(err)
52 52
 	}
53
+	if len(res) != len(expected) {
54
+		t.Fatalf("\nexpected:\n%s\n\nactual:\n%s", expected, res)
55
+	}
53 56
 	for i, l := range res {
54
-		t.Logf("%s", l)
55 57
 		if expected[i] != string(l) {
56
-			t.Fatalf("Expected line %s, got %s", expected[i], l)
58
+			t.Fatalf("Expected line %q, got %q", expected[i], l)
57 59
 		}
58 60
 	}
59 61
 }
... ...
@@ -71,7 +81,7 @@ truncated line`)
71 71
 	if _, err := f.Write(testFile); err != nil {
72 72
 		t.Fatal(err)
73 73
 	}
74
-	if _, err := f.Seek(0, os.SEEK_SET); err != nil {
74
+	if _, err := f.Seek(0, io.SeekStart); err != nil {
75 75
 		t.Fatal(err)
76 76
 	}
77 77
 	expected := []string{"first line", "second line"}
... ...
@@ -79,8 +89,10 @@ truncated line`)
79 79
 	if err != nil {
80 80
 		t.Fatal(err)
81 81
 	}
82
+	if len(expected) != len(res) {
83
+		t.Fatalf("\nexpected:\n%s\n\nactual:\n%s", expected, res)
84
+	}
82 85
 	for i, l := range res {
83
-		t.Logf("%s", l)
84 86
 		if expected[i] != string(l) {
85 87
 			t.Fatalf("Expected line %s, got %s", expected[i], l)
86 88
 		}
... ...
@@ -116,11 +128,11 @@ truncated line`)
116 116
 	if _, err := f.Write(testFile); err != nil {
117 117
 		t.Fatal(err)
118 118
 	}
119
-	if _, err := f.Seek(0, os.SEEK_SET); err != nil {
119
+	if _, err := f.Seek(0, io.SeekStart); err != nil {
120 120
 		t.Fatal(err)
121 121
 	}
122 122
 	if _, err := TailFile(f, -1); err != ErrNonPositiveLinesNumber {
123
-		t.Fatalf("Expected ErrNonPositiveLinesNumber, got %s", err)
123
+		t.Fatalf("Expected ErrNonPositiveLinesNumber, got %v", err)
124 124
 	}
125 125
 	if _, err := TailFile(f, 0); err != ErrNonPositiveLinesNumber {
126 126
 		t.Fatalf("Expected ErrNonPositiveLinesNumber, got %s", err)
... ...
@@ -146,3 +158,170 @@ func BenchmarkTail(b *testing.B) {
146 146
 		}
147 147
 	}
148 148
 }
149
+
150
+func TestNewTailReader(t *testing.T) {
151
+	t.Parallel()
152
+	ctx := context.Background()
153
+
154
+	for dName, delim := range map[string][]byte{
155
+		"no delimiter":          {},
156
+		"single byte delimiter": {'\n'},
157
+		"2 byte delimiter":      []byte(";\n"),
158
+		"4 byte delimiter":      []byte("####"),
159
+		"8 byte delimiter":      []byte("########"),
160
+		"12 byte delimiter":     []byte("############"),
161
+	} {
162
+		t.Run(dName, func(t *testing.T) {
163
+			delim := delim
164
+			t.Parallel()
165
+
166
+			s1 := "Hello world."
167
+			s2 := "Today is a fine day."
168
+			s3 := "So long, and thanks for all the fish!"
169
+			s4 := strings.Repeat("a", blockSize/2) // same as block size
170
+			s5 := strings.Repeat("a", blockSize)   // just to make sure
171
+			s6 := strings.Repeat("a", blockSize*2) // bigger than block size
172
+			s7 := strings.Repeat("a", blockSize-1) // single line same as block
173
+
174
+			s8 := `{"log":"Don't panic!\n","stream":"stdout","time":"2018-04-04T20:28:44.7207062Z"}`
175
+			jsonTest := make([]string, 0, 20)
176
+			for i := 0; i < 20; i++ {
177
+				jsonTest = append(jsonTest, s8)
178
+			}
179
+
180
+			for _, test := range []struct {
181
+				desc string
182
+				data []string
183
+			}{
184
+				{desc: "one small entry", data: []string{s1}},
185
+				{desc: "several small entries", data: []string{s1, s2, s3}},
186
+				{desc: "various sizes", data: []string{s1, s2, s3, s4, s5, s1, s2, s3, s7, s6}},
187
+				{desc: "multiple lines with one more than block", data: []string{s5, s5, s5, s5, s5}},
188
+				{desc: "multiple lines much bigger than block", data: []string{s6, s6, s6, s6, s6}},
189
+				{desc: "multiple lines same as block", data: []string{s4, s4, s4, s4, s4}},
190
+				{desc: "single line same as block", data: []string{s7}},
191
+				{desc: "single line half block", data: []string{s4}},
192
+				{desc: "single line twice block", data: []string{s6}},
193
+				{desc: "json encoded values", data: jsonTest},
194
+				{desc: "no lines", data: []string{}},
195
+				{desc: "same length as delimiter", data: []string{strings.Repeat("a", len(delim))}},
196
+			} {
197
+				t.Run(test.desc, func(t *testing.T) {
198
+					test := test
199
+					t.Parallel()
200
+
201
+					max := len(test.data)
202
+					if max > 10 {
203
+						max = 10
204
+					}
205
+
206
+					s := strings.Join(test.data, string(delim))
207
+					if len(test.data) > 0 {
208
+						s += string(delim)
209
+					}
210
+
211
+					for i := 1; i <= max; i++ {
212
+						t.Run(fmt.Sprintf("%d lines", i), func(t *testing.T) {
213
+							i := i
214
+							t.Parallel()
215
+
216
+							r := strings.NewReader(s)
217
+							tr, lines, err := NewTailReaderWithDelimiter(ctx, r, i, delim)
218
+							if len(delim) == 0 {
219
+								assert.Assert(t, err != nil)
220
+								assert.Assert(t, lines == 0)
221
+								return
222
+							}
223
+							assert.Assert(t, err)
224
+							assert.Check(t, lines == i, "%d -- %d", lines, i)
225
+
226
+							b, err := ioutil.ReadAll(tr)
227
+							assert.Assert(t, err)
228
+
229
+							expectLines := test.data[len(test.data)-i:]
230
+							assert.Check(t, len(expectLines) == i)
231
+							expect := strings.Join(expectLines, string(delim)) + string(delim)
232
+							assert.Check(t, string(b) == expect, "\n%v\n%v", b, []byte(expect))
233
+						})
234
+					}
235
+
236
+					t.Run("request more lines than available", func(t *testing.T) {
237
+						t.Parallel()
238
+
239
+						r := strings.NewReader(s)
240
+						tr, lines, err := NewTailReaderWithDelimiter(ctx, r, len(test.data)*2, delim)
241
+						if len(delim) == 0 {
242
+							assert.Assert(t, err != nil)
243
+							assert.Assert(t, lines == 0)
244
+							return
245
+						}
246
+						if len(test.data) == 0 {
247
+							assert.Assert(t, err == ErrNonPositiveLinesNumber, err)
248
+							return
249
+						}
250
+
251
+						assert.Assert(t, err)
252
+						assert.Check(t, lines == len(test.data), "%d -- %d", lines, len(test.data))
253
+						b, err := ioutil.ReadAll(tr)
254
+						assert.Assert(t, err)
255
+						assert.Check(t, bytes.Equal(b, []byte(s)), "\n%v\n%v", b, []byte(s))
256
+					})
257
+				})
258
+			}
259
+		})
260
+	}
261
+	t.Run("truncated last line", func(t *testing.T) {
262
+		t.Run("more than available", func(t *testing.T) {
263
+			tail, nLines, err := NewTailReader(ctx, strings.NewReader("a\nb\nextra"), 3)
264
+			assert.Assert(t, err)
265
+			assert.Check(t, nLines == 2, nLines)
266
+
267
+			rdr := bufio.NewReader(tail)
268
+			data, _, err := rdr.ReadLine()
269
+			assert.Assert(t, err)
270
+			assert.Check(t, string(data) == "a", string(data))
271
+
272
+			data, _, err = rdr.ReadLine()
273
+			assert.Assert(t, err)
274
+			assert.Check(t, string(data) == "b", string(data))
275
+
276
+			_, _, err = rdr.ReadLine()
277
+			assert.Assert(t, err == io.EOF, err)
278
+		})
279
+	})
280
+	t.Run("truncated last line", func(t *testing.T) {
281
+		t.Run("exact", func(t *testing.T) {
282
+			tail, nLines, err := NewTailReader(ctx, strings.NewReader("a\nb\nextra"), 2)
283
+			assert.Assert(t, err)
284
+			assert.Check(t, nLines == 2, nLines)
285
+
286
+			rdr := bufio.NewReader(tail)
287
+			data, _, err := rdr.ReadLine()
288
+			assert.Assert(t, err)
289
+			assert.Check(t, string(data) == "a", string(data))
290
+
291
+			data, _, err = rdr.ReadLine()
292
+			assert.Assert(t, err)
293
+			assert.Check(t, string(data) == "b", string(data))
294
+
295
+			_, _, err = rdr.ReadLine()
296
+			assert.Assert(t, err == io.EOF, err)
297
+		})
298
+	})
299
+
300
+	t.Run("truncated last line", func(t *testing.T) {
301
+		t.Run("one line", func(t *testing.T) {
302
+			tail, nLines, err := NewTailReader(ctx, strings.NewReader("a\nb\nextra"), 1)
303
+			assert.Assert(t, err)
304
+			assert.Check(t, nLines == 1, nLines)
305
+
306
+			rdr := bufio.NewReader(tail)
307
+			data, _, err := rdr.ReadLine()
308
+			assert.Assert(t, err)
309
+			assert.Check(t, string(data) == "b", string(data))
310
+
311
+			_, _, err = rdr.ReadLine()
312
+			assert.Assert(t, err == io.EOF, err)
313
+		})
314
+	})
315
+}