Browse code

Fix inefficient file paths filter

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>

Tonis Tiigi authored on 2017/03/09 07:23:25
Showing 4 changed files
... ...
@@ -2403,7 +2403,7 @@ func (s *DockerSuite) TestBuildDockerignoringBadExclusion(c *check.C) {
2403 2403
 		withFile(".dockerignore", "!\n"),
2404 2404
 	)).Assert(c, icmd.Expected{
2405 2405
 		ExitCode: 1,
2406
-		Err:      "Error checking context: 'Illegal exclusion pattern: !",
2406
+		Err:      "Error checking context: 'illegal exclusion pattern: \"!\"",
2407 2407
 	})
2408 2408
 }
2409 2409
 
... ...
@@ -553,8 +553,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
553 553
 	// on platforms other than Windows.
554 554
 	srcPath = fixVolumePathPrefix(srcPath)
555 555
 
556
-	patterns, patDirs, exceptions, err := fileutils.CleanPatterns(options.ExcludePatterns)
557
-
556
+	pm, err := fileutils.NewPatternMatcher(options.ExcludePatterns)
558 557
 	if err != nil {
559 558
 		return nil, err
560 559
 	}
... ...
@@ -651,7 +650,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
651 651
 				// is asking for that file no matter what - which is true
652 652
 				// for some files, like .dockerignore and Dockerfile (sometimes)
653 653
 				if include != relFilePath {
654
-					skip, err = fileutils.OptimizedMatches(relFilePath, patterns, patDirs)
654
+					skip, err = pm.Matches(relFilePath)
655 655
 					if err != nil {
656 656
 						logrus.Errorf("Error matching %s: %v", relFilePath, err)
657 657
 						return err
... ...
@@ -670,18 +669,17 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
670 670
 					}
671 671
 
672 672
 					// No exceptions (!...) in patterns so just skip dir
673
-					if !exceptions {
673
+					if !pm.Exclusions() {
674 674
 						return filepath.SkipDir
675 675
 					}
676 676
 
677 677
 					dirSlash := relFilePath + string(filepath.Separator)
678 678
 
679
-					for _, pat := range patterns {
680
-						if pat[0] != '!' {
679
+					for _, pat := range pm.Patterns() {
680
+						if !pat.Exclusion() {
681 681
 							continue
682 682
 						}
683
-						pat = pat[1:] + string(filepath.Separator)
684
-						if strings.HasPrefix(pat, dirSlash) {
683
+						if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
685 684
 							// found a match - so can't skip this dir
686 685
 							return nil
687 686
 						}
... ...
@@ -13,98 +13,74 @@ import (
13 13
 	"github.com/Sirupsen/logrus"
14 14
 )
15 15
 
16
-// exclusion returns true if the specified pattern is an exclusion
17
-func exclusion(pattern string) bool {
18
-	return pattern[0] == '!'
16
+// PatternMatcher allows checking paths agaist a list of patterns
17
+type PatternMatcher struct {
18
+	patterns   []*Pattern
19
+	exclusions bool
19 20
 }
20 21
 
21
-// empty returns true if the specified pattern is empty
22
-func empty(pattern string) bool {
23
-	return pattern == ""
24
-}
25
-
26
-// CleanPatterns takes a slice of patterns returns a new
27
-// slice of patterns cleaned with filepath.Clean, stripped
28
-// of any empty patterns and lets the caller know whether the
29
-// slice contains any exception patterns (prefixed with !).
30
-func CleanPatterns(patterns []string) ([]string, [][]string, bool, error) {
31
-	// Loop over exclusion patterns and:
32
-	// 1. Clean them up.
33
-	// 2. Indicate whether we are dealing with any exception rules.
34
-	// 3. Error if we see a single exclusion marker on its own (!).
35
-	cleanedPatterns := []string{}
36
-	patternDirs := [][]string{}
37
-	exceptions := false
38
-	for _, pattern := range patterns {
22
+// NewPatternMatcher creates a new matcher object for specific patterns that can
23
+// be used later to match against patterns against paths
24
+func NewPatternMatcher(patterns []string) (*PatternMatcher, error) {
25
+	pm := &PatternMatcher{
26
+		patterns: make([]*Pattern, 0, len(patterns)),
27
+	}
28
+	for _, p := range patterns {
39 29
 		// Eliminate leading and trailing whitespace.
40
-		pattern = strings.TrimSpace(pattern)
41
-		if empty(pattern) {
30
+		p = strings.TrimSpace(p)
31
+		if p == "" {
42 32
 			continue
43 33
 		}
44
-		if exclusion(pattern) {
45
-			if len(pattern) == 1 {
46
-				return nil, nil, false, errors.New("Illegal exclusion pattern: !")
34
+		p = filepath.Clean(p)
35
+		newp := &Pattern{}
36
+		if p[0] == '!' {
37
+			if len(p) == 1 {
38
+				return nil, errors.New("illegal exclusion pattern: \"!\"")
47 39
 			}
48
-			exceptions = true
40
+			newp.exclusion = true
41
+			p = p[1:]
42
+			pm.exclusions = true
49 43
 		}
50
-		pattern = filepath.Clean(pattern)
51
-		cleanedPatterns = append(cleanedPatterns, pattern)
52
-		if exclusion(pattern) {
53
-			pattern = pattern[1:]
44
+		// Do some syntax checking on the pattern.
45
+		// filepath's Match() has some really weird rules that are inconsistent
46
+		// so instead of trying to dup their logic, just call Match() for its
47
+		// error state and if there is an error in the pattern return it.
48
+		// If this becomes an issue we can remove this since its really only
49
+		// needed in the error (syntax) case - which isn't really critical.
50
+		if _, err := filepath.Match(p, "."); err != nil {
51
+			return nil, err
54 52
 		}
55
-		patternDirs = append(patternDirs, strings.Split(pattern, string(os.PathSeparator)))
56
-	}
57
-
58
-	return cleanedPatterns, patternDirs, exceptions, nil
59
-}
60
-
61
-// Matches returns true if file matches any of the patterns
62
-// and isn't excluded by any of the subsequent patterns.
63
-func Matches(file string, patterns []string) (bool, error) {
64
-	file = filepath.Clean(file)
65
-
66
-	if file == "." {
67
-		// Don't let them exclude everything, kind of silly.
68
-		return false, nil
53
+		newp.cleanedPattern = p
54
+		newp.dirs = strings.Split(p, string(os.PathSeparator))
55
+		pm.patterns = append(pm.patterns, newp)
69 56
 	}
70
-
71
-	patterns, patDirs, _, err := CleanPatterns(patterns)
72
-	if err != nil {
73
-		return false, err
74
-	}
75
-
76
-	return OptimizedMatches(file, patterns, patDirs)
57
+	return pm, nil
77 58
 }
78 59
 
79
-// OptimizedMatches is basically the same as fileutils.Matches() but optimized for archive.go.
80
-// It will assume that the inputs have been preprocessed and therefore the function
81
-// doesn't need to do as much error checking and clean-up. This was done to avoid
82
-// repeating these steps on each file being checked during the archive process.
83
-// The more generic fileutils.Matches() can't make these assumptions.
84
-func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool, error) {
60
+// Matches matches path against all the patterns. Matches is not safe to be
61
+// called concurrently
62
+func (pm *PatternMatcher) Matches(file string) (bool, error) {
85 63
 	matched := false
86 64
 	file = filepath.FromSlash(file)
87 65
 	parentPath := filepath.Dir(file)
88 66
 	parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
89 67
 
90
-	for i, pattern := range patterns {
68
+	for _, pattern := range pm.patterns {
91 69
 		negative := false
92 70
 
93
-		if exclusion(pattern) {
71
+		if pattern.exclusion {
94 72
 			negative = true
95
-			pattern = pattern[1:]
96 73
 		}
97 74
 
98
-		match, err := regexpMatch(pattern, file)
75
+		match, err := pattern.match(file)
99 76
 		if err != nil {
100
-			return false, fmt.Errorf("Error in pattern (%s): %s", pattern, err)
77
+			return false, err
101 78
 		}
102 79
 
103 80
 		if !match && parentPath != "." {
104 81
 			// Check to see if the pattern matches one of our parent dirs.
105
-			if len(patDirs[i]) <= len(parentPathDirs) {
106
-				match, _ = regexpMatch(strings.Join(patDirs[i], string(os.PathSeparator)),
107
-					strings.Join(parentPathDirs[:len(patDirs[i])], string(os.PathSeparator)))
82
+			if len(pattern.dirs) <= len(parentPathDirs) {
83
+				match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator)))
108 84
 			}
109 85
 		}
110 86
 
... ...
@@ -120,28 +96,49 @@ func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool,
120 120
 	return matched, nil
121 121
 }
122 122
 
123
-// regexpMatch tries to match the logic of filepath.Match but
124
-// does so using regexp logic. We do this so that we can expand the
125
-// wildcard set to include other things, like "**" to mean any number
126
-// of directories.  This means that we should be backwards compatible
127
-// with filepath.Match(). We'll end up supporting more stuff, due to
128
-// the fact that we're using regexp, but that's ok - it does no harm.
129
-//
130
-// As per the comment in golangs filepath.Match, on Windows, escaping
131
-// is disabled. Instead, '\\' is treated as path separator.
132
-func regexpMatch(pattern, path string) (bool, error) {
133
-	regStr := "^"
123
+// Exclusions returns true if any of the patterns define exclusions
124
+func (pm *PatternMatcher) Exclusions() bool {
125
+	return pm.exclusions
126
+}
134 127
 
135
-	// Do some syntax checking on the pattern.
136
-	// filepath's Match() has some really weird rules that are inconsistent
137
-	// so instead of trying to dup their logic, just call Match() for its
138
-	// error state and if there is an error in the pattern return it.
139
-	// If this becomes an issue we can remove this since its really only
140
-	// needed in the error (syntax) case - which isn't really critical.
141
-	if _, err := filepath.Match(pattern, path); err != nil {
142
-		return false, err
128
+// Patterns returns array of active patterns
129
+func (pm *PatternMatcher) Patterns() []*Pattern {
130
+	return pm.patterns
131
+}
132
+
133
+// Pattern defines a single regexp used used to filter file paths.
134
+type Pattern struct {
135
+	cleanedPattern string
136
+	dirs           []string
137
+	regexp         *regexp.Regexp
138
+	exclusion      bool
139
+}
140
+
141
+func (p *Pattern) String() string {
142
+	return p.cleanedPattern
143
+}
144
+
145
+// Exclusion returns true if this pattern defines exclusion
146
+func (p *Pattern) Exclusion() bool {
147
+	return p.exclusion
148
+}
149
+
150
+func (p *Pattern) match(path string) (bool, error) {
151
+
152
+	if p.regexp == nil {
153
+		if err := p.compile(); err != nil {
154
+			return false, filepath.ErrBadPattern
155
+		}
143 156
 	}
144 157
 
158
+	b := p.regexp.MatchString(path)
159
+
160
+	return b, nil
161
+}
162
+
163
+func (p *Pattern) compile() error {
164
+	regStr := "^"
165
+	pattern := p.cleanedPattern
145 166
 	// Go through the pattern and convert it to a regexp.
146 167
 	// We use a scanner so we can support utf-8 chars.
147 168
 	var scan scanner.Scanner
... ...
@@ -208,14 +205,30 @@ func regexpMatch(pattern, path string) (bool, error) {
208 208
 
209 209
 	regStr += "$"
210 210
 
211
-	res, err := regexp.MatchString(regStr, path)
211
+	re, err := regexp.Compile(regStr)
212
+	if err != nil {
213
+		return err
214
+	}
215
+
216
+	p.regexp = re
217
+	return nil
218
+}
212 219
 
213
-	// Map regexp's error to filepath's so no one knows we're not using filepath
220
+// Matches returns true if file matches any of the patterns
221
+// and isn't excluded by any of the subsequent patterns.
222
+func Matches(file string, patterns []string) (bool, error) {
223
+	pm, err := NewPatternMatcher(patterns)
214 224
 	if err != nil {
215
-		err = filepath.ErrBadPattern
225
+		return false, err
226
+	}
227
+	file = filepath.Clean(file)
228
+
229
+	if file == "." {
230
+		// Don't let them exclude everything, kind of silly.
231
+		return false, nil
216 232
 	}
217 233
 
218
-	return res, err
234
+	return pm.Matches(file)
219 235
 }
220 236
 
221 237
 // CopyFile copies from src to dst until either EOF is reached
... ...
@@ -277,14 +277,6 @@ func TestSingleExclamationError(t *testing.T) {
277 277
 	}
278 278
 }
279 279
 
280
-// A string preceded with a ! should return true from Exclusion.
281
-func TestExclusion(t *testing.T) {
282
-	exclusion := exclusion("!")
283
-	if !exclusion {
284
-		t.Errorf("failed to get true for a single !, got %v", exclusion)
285
-	}
286
-}
287
-
288 280
 // Matches with no patterns
289 281
 func TestMatchesWithNoPatterns(t *testing.T) {
290 282
 	matches, err := Matches("/any/path/there", []string{})
... ...
@@ -335,7 +327,7 @@ func TestMatches(t *testing.T) {
335 335
 		{"dir/**", "dir/dir2/file", true},
336 336
 		{"dir/**", "dir/dir2/file/", true},
337 337
 		{"**/dir2/*", "dir/dir2/file", true},
338
-		{"**/dir2/*", "dir/dir2/file/", false},
338
+		{"**/dir2/*", "dir/dir2/file/", true},
339 339
 		{"**/dir2/**", "dir/dir2/dir3/file", true},
340 340
 		{"**/dir2/**", "dir/dir2/dir3/file/", true},
341 341
 		{"**file", "file", true},
... ...
@@ -384,73 +376,82 @@ func TestMatches(t *testing.T) {
384 384
 	}
385 385
 
386 386
 	for _, test := range tests {
387
-		res, _ := regexpMatch(test.pattern, test.text)
387
+		pm, err := NewPatternMatcher([]string{test.pattern})
388
+		if err != nil {
389
+			t.Fatalf("invalid pattern %s", test.pattern)
390
+		}
391
+		res, _ := pm.Matches(test.text)
388 392
 		if res != test.pass {
389 393
 			t.Fatalf("Failed: %v - res:%v", test, res)
390 394
 		}
391 395
 	}
392 396
 }
393 397
 
394
-// An empty string should return true from Empty.
395
-func TestEmpty(t *testing.T) {
396
-	empty := empty("")
397
-	if !empty {
398
-		t.Errorf("failed to get true for an empty string, got %v", empty)
399
-	}
400
-}
401
-
402 398
 func TestCleanPatterns(t *testing.T) {
403
-	cleaned, _, _, _ := CleanPatterns([]string{"docs", "config"})
399
+	patterns := []string{"docs", "config"}
400
+	pm, err := NewPatternMatcher(patterns)
401
+	if err != nil {
402
+		t.Fatalf("invalid pattern %v", patterns)
403
+	}
404
+	cleaned := pm.Patterns()
404 405
 	if len(cleaned) != 2 {
405 406
 		t.Errorf("expected 2 element slice, got %v", len(cleaned))
406 407
 	}
407 408
 }
408 409
 
409 410
 func TestCleanPatternsStripEmptyPatterns(t *testing.T) {
410
-	cleaned, _, _, _ := CleanPatterns([]string{"docs", "config", ""})
411
+	patterns := []string{"docs", "config", ""}
412
+	pm, err := NewPatternMatcher(patterns)
413
+	if err != nil {
414
+		t.Fatalf("invalid pattern %v", patterns)
415
+	}
416
+	cleaned := pm.Patterns()
411 417
 	if len(cleaned) != 2 {
412 418
 		t.Errorf("expected 2 element slice, got %v", len(cleaned))
413 419
 	}
414 420
 }
415 421
 
416 422
 func TestCleanPatternsExceptionFlag(t *testing.T) {
417
-	_, _, exceptions, _ := CleanPatterns([]string{"docs", "!docs/README.md"})
418
-	if !exceptions {
419
-		t.Errorf("expected exceptions to be true, got %v", exceptions)
423
+	patterns := []string{"docs", "!docs/README.md"}
424
+	pm, err := NewPatternMatcher(patterns)
425
+	if err != nil {
426
+		t.Fatalf("invalid pattern %v", patterns)
427
+	}
428
+	if !pm.Exclusions() {
429
+		t.Errorf("expected exceptions to be true, got %v", pm.Exclusions())
420 430
 	}
421 431
 }
422 432
 
423 433
 func TestCleanPatternsLeadingSpaceTrimmed(t *testing.T) {
424
-	_, _, exceptions, _ := CleanPatterns([]string{"docs", "  !docs/README.md"})
425
-	if !exceptions {
426
-		t.Errorf("expected exceptions to be true, got %v", exceptions)
434
+	patterns := []string{"docs", "  !docs/README.md"}
435
+	pm, err := NewPatternMatcher(patterns)
436
+	if err != nil {
437
+		t.Fatalf("invalid pattern %v", patterns)
438
+	}
439
+	if !pm.Exclusions() {
440
+		t.Errorf("expected exceptions to be true, got %v", pm.Exclusions())
427 441
 	}
428 442
 }
429 443
 
430 444
 func TestCleanPatternsTrailingSpaceTrimmed(t *testing.T) {
431
-	_, _, exceptions, _ := CleanPatterns([]string{"docs", "!docs/README.md  "})
432
-	if !exceptions {
433
-		t.Errorf("expected exceptions to be true, got %v", exceptions)
445
+	patterns := []string{"docs", "!docs/README.md  "}
446
+	pm, err := NewPatternMatcher(patterns)
447
+	if err != nil {
448
+		t.Fatalf("invalid pattern %v", patterns)
449
+	}
450
+	if !pm.Exclusions() {
451
+		t.Errorf("expected exceptions to be true, got %v", pm.Exclusions())
434 452
 	}
435 453
 }
436 454
 
437 455
 func TestCleanPatternsErrorSingleException(t *testing.T) {
438
-	_, _, _, err := CleanPatterns([]string{"!"})
456
+	patterns := []string{"!"}
457
+	_, err := NewPatternMatcher(patterns)
439 458
 	if err == nil {
440 459
 		t.Errorf("expected error on single exclamation point, got %v", err)
441 460
 	}
442 461
 }
443 462
 
444
-func TestCleanPatternsFolderSplit(t *testing.T) {
445
-	_, dirs, _, _ := CleanPatterns([]string{"docs/config/CONFIG.md"})
446
-	if dirs[0][0] != "docs" {
447
-		t.Errorf("expected first element in dirs slice to be docs, got %v", dirs[0][1])
448
-	}
449
-	if dirs[0][1] != "config" {
450
-		t.Errorf("expected first element in dirs slice to be config, got %v", dirs[0][1])
451
-	}
452
-}
453
-
454 463
 func TestCreateIfNotExistsDir(t *testing.T) {
455 464
 	tempFolder, err := ioutil.TempDir("", "docker-fileutils-test")
456 465
 	if err != nil {
... ...
@@ -508,7 +509,7 @@ var matchTests = []matchTest{
508 508
 	{"*c", "abc", true, nil},
509 509
 	{"a*", "a", true, nil},
510 510
 	{"a*", "abc", true, nil},
511
-	{"a*", "ab/c", false, nil},
511
+	{"a*", "ab/c", true, nil},
512 512
 	{"a*/b", "abc/b", true, nil},
513 513
 	{"a*/b", "a/c/b", false, nil},
514 514
 	{"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil},
... ...
@@ -579,7 +580,7 @@ func TestMatch(t *testing.T) {
579 579
 			pattern = filepath.Clean(pattern)
580 580
 			s = filepath.Clean(s)
581 581
 		}
582
-		ok, err := regexpMatch(pattern, s)
582
+		ok, err := Matches(s, []string{pattern})
583 583
 		if ok != tt.match || err != tt.err {
584 584
 			t.Fatalf("Match(%#q, %#q) = %v, %q want %v, %q", pattern, s, ok, errp(err), tt.match, errp(tt.err))
585 585
 		}