Browse code

Merge pull request #22268 from Microsoft/jjh/continuationescape

Support platform semantic file paths through ESCAPE

Arnaud Porterie authored on 2016/05/27 02:00:56
Showing 13 changed files
... ...
@@ -94,12 +94,12 @@ func parseWords(rest string) []string {
94 94
 				blankOK = true
95 95
 				phase = inQuote
96 96
 			}
97
-			if ch == '\\' {
97
+			if ch == tokenEscape {
98 98
 				if pos+1 == len(rest) {
99
-					continue // just skip \ at end
99
+					continue // just skip an escape token at end of line
100 100
 				}
101
-				// If we're not quoted and we see a \, then always just
102
-				// add \ plus the char to the word, even if the char
101
+				// If we're not quoted and we see an escape token, then always just
102
+				// add the escape token plus the char to the word, even if the char
103 103
 				// is a quote.
104 104
 				word += string(ch)
105 105
 				pos++
... ...
@@ -112,11 +112,11 @@ func parseWords(rest string) []string {
112 112
 			if ch == quote {
113 113
 				phase = inWord
114 114
 			}
115
-			// \ is special except for ' quotes - can't escape anything for '
116
-			if ch == '\\' && quote != '\'' {
115
+			// The escape token is special except for ' quotes - can't escape anything for '
116
+			if ch == tokenEscape && quote != '\'' {
117 117
 				if pos+1 == len(rest) {
118 118
 					phase = inWord
119
-					continue // just skip \ at end
119
+					continue // just skip the escape token at end
120 120
 				}
121 121
 				pos++
122 122
 				nextCh := rune(rest[pos])
... ...
@@ -3,6 +3,7 @@ package parser
3 3
 
4 4
 import (
5 5
 	"bufio"
6
+	"fmt"
6 7
 	"io"
7 8
 	"regexp"
8 9
 	"strings"
... ...
@@ -37,10 +38,26 @@ type Node struct {
37 37
 var (
38 38
 	dispatch              map[string]func(string) (*Node, map[string]bool, error)
39 39
 	tokenWhitespace       = regexp.MustCompile(`[\t\v\f\r ]+`)
40
-	tokenLineContinuation = regexp.MustCompile(`\\[ \t]*$`)
40
+	tokenLineContinuation *regexp.Regexp
41
+	tokenEscape           rune
42
+	tokenEscapeCommand    = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
41 43
 	tokenComment          = regexp.MustCompile(`^#.*$`)
44
+	lookingForDirectives  bool
45
+	directiveEscapeSeen   bool
42 46
 )
43 47
 
48
+const defaultTokenEscape = "\\"
49
+
50
+// setTokenEscape sets the default token for escaping characters in a Dockerfile.
51
+func setTokenEscape(s string) error {
52
+	if s != "`" && s != "\\" {
53
+		return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
54
+	}
55
+	tokenEscape = rune(s[0])
56
+	tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`)
57
+	return nil
58
+}
59
+
44 60
 func init() {
45 61
 	// Dispatch Table. see line_parsers.go for the parse functions.
46 62
 	// The command is parsed and mapped to the line parser. The line parser
... ...
@@ -70,6 +87,29 @@ func init() {
70 70
 
71 71
 // ParseLine parse a line and return the remainder.
72 72
 func ParseLine(line string) (string, *Node, error) {
73
+
74
+	// Handle the parser directive '# escape=<char>. Parser directives must preceed
75
+	// any builder instruction or other comments, and cannot be repeated.
76
+	if lookingForDirectives {
77
+		tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
78
+		if len(tecMatch) > 0 {
79
+			if directiveEscapeSeen == true {
80
+				return "", nil, fmt.Errorf("only one escape parser directive can be used")
81
+			}
82
+			for i, n := range tokenEscapeCommand.SubexpNames() {
83
+				if n == "escapechar" {
84
+					if err := setTokenEscape(tecMatch[i]); err != nil {
85
+						return "", nil, err
86
+					}
87
+					directiveEscapeSeen = true
88
+					return "", nil, nil
89
+				}
90
+			}
91
+		}
92
+	}
93
+
94
+	lookingForDirectives = false
95
+
73 96
 	if line = stripComments(line); line == "" {
74 97
 		return "", nil, nil
75 98
 	}
... ...
@@ -103,6 +143,9 @@ func ParseLine(line string) (string, *Node, error) {
103 103
 // Parse is the main parse routine.
104 104
 // It handles an io.ReadWriteCloser and returns the root of the AST.
105 105
 func Parse(rwc io.Reader) (*Node, error) {
106
+	directiveEscapeSeen = false
107
+	lookingForDirectives = true
108
+	setTokenEscape(defaultTokenEscape) // Assume the default token for escape
106 109
 	currentLine := 0
107 110
 	root := &Node{}
108 111
 	root.StartLine = -1
... ...
@@ -131,22 +131,22 @@ func TestLineInformation(t *testing.T) {
131 131
 		t.Fatalf("Error parsing dockerfile %s: %v", testFileLineInfo, err)
132 132
 	}
133 133
 
134
-	if ast.StartLine != 4 || ast.EndLine != 30 {
135
-		fmt.Fprintf(os.Stderr, "Wrong root line information: expected(%d-%d), actual(%d-%d)\n", 4, 30, ast.StartLine, ast.EndLine)
134
+	if ast.StartLine != 5 || ast.EndLine != 31 {
135
+		fmt.Fprintf(os.Stderr, "Wrong root line information: expected(%d-%d), actual(%d-%d)\n", 5, 31, ast.StartLine, ast.EndLine)
136 136
 		t.Fatalf("Root line information doesn't match result.")
137 137
 	}
138 138
 	if len(ast.Children) != 3 {
139 139
 		fmt.Fprintf(os.Stderr, "Wrong number of child: expected(%d), actual(%d)\n", 3, len(ast.Children))
140
-		t.Fatalf("Root line information doesn't match result.")
140
+		t.Fatalf("Root line information doesn't match result for %s", testFileLineInfo)
141 141
 	}
142 142
 	expected := [][]int{
143
-		{4, 4},
144
-		{10, 11},
145
-		{16, 30},
143
+		{5, 5},
144
+		{11, 12},
145
+		{17, 31},
146 146
 	}
147 147
 	for i, child := range ast.Children {
148 148
 		if child.StartLine != expected[i][0] || child.EndLine != expected[i][1] {
149
-			fmt.Fprintf(os.Stderr, "Wrong line information for child %d: expected(%d-%d), actual(%d-%d)\n",
149
+			t.Logf("Wrong line information for child %d: expected(%d-%d), actual(%d-%d)\n",
150 150
 				i, expected[i][0], expected[i][1], child.StartLine, child.EndLine)
151 151
 			t.Fatalf("Root line information doesn't match result.")
152 152
 		}
... ...
@@ -1,3 +1,4 @@
1
+# ESCAPE=\
1 2
 
2 3
 
3 4
 
... ...
@@ -1,3 +1,4 @@
1
+#escape=\
1 2
 FROM brimstone/ubuntu:14.04
2 3
 
3 4
 MAINTAINER brimstone@the.narro.ws
4 5
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+# Comment here. Should not be looking for the following parser directive.
1
+# Hence the following line will be ignored, and the subsequent backslash
2
+# continuation will be the default.
3
+# escape = `
4
+
5
+FROM image
6
+MAINTAINER foo@bar.com
7
+ENV GOPATH \
8
+\go
0 9
\ No newline at end of file
1 10
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+(from "image")
1
+(maintainer "foo@bar.com")
2
+(env "GOPATH" "\\go")
0 3
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+# escape = ``
1
+# There is no white space line after the directives. This still succeeds, but goes
2
+# against best practices.
3
+FROM image
4
+MAINTAINER foo@bar.com
5
+ENV GOPATH `
6
+\go
0 7
\ No newline at end of file
1 8
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+(from "image")
1
+(maintainer "foo@bar.com")
2
+(env "GOPATH" "\\go")
0 3
new file mode 100644
... ...
@@ -0,0 +1,6 @@
0
+#escape = `
1
+
2
+FROM image
3
+MAINTAINER foo@bar.com
4
+ENV GOPATH `
5
+\go
0 6
\ No newline at end of file
1 7
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+(from "image")
1
+(maintainer "foo@bar.com")
2
+(env "GOPATH" "\\go")
... ...
@@ -106,27 +106,197 @@ repository to its registry*](../userguide/containers/dockerrepos.md#contributing
106 106
 
107 107
 Here is the format of the `Dockerfile`:
108 108
 
109
-    # Comment
110
-    INSTRUCTION arguments
109
+```Dockerfile
110
+# Comment
111
+INSTRUCTION arguments
112
+```
113
+
114
+The instruction is not case-sensitive. However, convention is for them to
115
+be UPPERCASE to distinguish them from arguments more easily.
116
+
117
+
118
+Docker runs instructions in a `Dockerfile` in order. **The first 
119
+instruction must be \`FROM\`** in order to specify the [*Base
120
+Image*](glossary.md#base-image) from which you are building. 
121
+
122
+Docker treats lines that *begin* with `#` as a comment, unless the line is 
123
+a valid [parser directive](builder.md#parser directives). A `#` marker anywhere
124
+else in a line is treated as an argument. This allows statements like:
125
+
126
+```Dockerfile
127
+# Comment
128
+RUN echo 'we are running some # of cool things'
129
+```
130
+
131
+Line continuation characters are not supported in comments.
132
+
133
+## Parser directives
134
+
135
+Parser directives are optional, and affect the way in which subsequent lines 
136
+in a `Dockerfile` are handled. Parser directives do not add layers to the build,
137
+and will not be shown as a build step. Parser directives are written as a
138
+special type of comment in the form `# directive=value`. A single directive
139
+may only be used once.
140
+
141
+Once a comment, empty line or builder instruction has been processed, Docker 
142
+no longer looks for parser directives. Instead it treats anything formatted
143
+as a parser directive as a comment and does not attempt to validate if it might
144
+be a parser directive. Therefore, all parser directives must be at the very
145
+top of a `Dockerfile`. 
146
+
147
+Parser directives are not case-sensitive. However, convention is for them to
148
+be lowercase. Convention is also to include a blank line following any 
149
+parser directives. Line continuation characters are not supported in parser
150
+directives.
151
+
152
+Due to these rules, the following examples are all invalid:
153
+
154
+Invalid due to line continuation:
155
+
156
+```Dockerfile
157
+# direc \
158
+tive=value
159
+```
160
+
161
+Invalid due to appearing twice:
162
+
163
+```Dockerfile
164
+# directive=value1
165
+# directive=value2
166
+
167
+FROM ImageName
168
+```
169
+    
170
+Treated as a comment due to appearing after a builder instruction:
111 171
 
112
-The instruction is not case-sensitive, however convention is for them to
113
-be UPPERCASE in order to distinguish them from arguments more easily.
172
+```Dockerfile
173
+FROM ImageName
174
+# directive=value
175
+```
114 176
 
115
-Docker runs the instructions in a `Dockerfile` in order. **The
116
-first instruction must be \`FROM\`** in order to specify the [*Base
117
-Image*](glossary.md#base-image) from which you are building.
177
+Treated as a comment due to appearing after a comment which is not a parser
178
+directive:
118 179
 
119
-Docker will treat lines that *begin* with `#` as a
120
-comment. A `#` marker anywhere else in the line will
121
-be treated as an argument. This allows statements like:
180
+```Dockerfile
181
+# About my dockerfile
182
+FROM ImageName
183
+# directive=value
184
+```
122 185
 
123
-    # Comment
124
-    RUN echo 'we are running some # of cool things'
186
+The unknown directive is treated as a comment due to not being recognized. In
187
+addition, the known directive is treated as a comment due to appearing after
188
+a comment which is not a parser directive.
189
+
190
+```Dockerfile
191
+# unknowndirective=value
192
+# knowndirective=value
193
+```    
194
+    
195
+Non line-breaking whitespace is permitted in a parser directive. Hence, the
196
+following lines are all treated identically: 
197
+
198
+```Dockerfile
199
+#directive=value
200
+# directive =value
201
+#	directive= value
202
+# directive = value
203
+#	  dIrEcTiVe=value
204
+```
125 205
 
126
-Here is the set of instructions you can use in a `Dockerfile` for building
127
-images.
206
+The following parser directive is supported:
207
+
208
+* `escape`
209
+
210
+## escape
211
+
212
+    # escape=\ (backslash)
213
+
214
+Or
215
+
216
+    # escape=` (backtick)
217
+
218
+The `escape` directive sets the character used to escape characters in a 
219
+`Dockerfile`. If not specified, the default escape character is `\`. 
220
+
221
+The escape character is used both to escape characters in a line, and to
222
+escape a newline. This allows a `Dockerfile` instruction to
223
+span multiple lines. Note that regardless of whether the `escape` parser
224
+directive is included in a `Dockerfile`, *escaping is not performed in 
225
+a `RUN` command, except at the end of a line.* 
226
+
227
+Setting the escape character to `` ` `` is especially useful on 
228
+`Windows`, where `\` is the directory path separator. `` ` `` is consistent 
229
+with [Windows PowerShell](https://technet.microsoft.com/en-us/library/hh847755.aspx).
230
+
231
+Consider the following example which would fail in a non-obvious way on 
232
+`Windows`. The second `\` at the end of the second line would be interpreted as an
233
+escape for the newline, instead of a target of the escape from the first `\`. 
234
+Similarly, the `\` at the end of the third line would, assuming it was actually
235
+handled as an instruction, cause it be treated as a line continuation. The result
236
+of this dockerfile is that second and third lines are considered a single
237
+instruction: 
238
+
239
+```Dockerfile
240
+FROM windowsservercore
241
+COPY testfile.txt c:\\
242
+RUN dir c:\
243
+```
128 244
 
129
-### Environment replacement
245
+Results in:
246
+
247
+    PS C:\John> docker build -t cmd .
248
+    Sending build context to Docker daemon 3.072 kB
249
+    Step 1 : FROM windowsservercore
250
+     ---> dbfee88ee9fd
251
+    Step 2 : COPY testfile.txt c:RUN dir c:
252
+    GetFileAttributesEx c:RUN: The system cannot find the file specified.
253
+    PS C:\John> 
254
+
255
+One solution to the above would be to use `/` as the target of both the `COPY`
256
+instruction, and `dir`. However, this syntax is, at best, confusing as it is not
257
+natural for paths on `Windows`, and at worst, error prone as not all commands on
258
+`Windows` support `/` as the path separator.
259
+
260
+By adding the `escape` parser directive, the following `Dockerfile` succeeds as 
261
+expected with the use of natural platform semantics for file paths on `Windows`:
262
+
263
+    # escape=`
264
+    
265
+    FROM windowsservercore
266
+    COPY testfile.txt c:\
267
+    RUN dir c:\
268
+
269
+Results in:
270
+
271
+    PS C:\John> docker build -t succeeds --no-cache=true .
272
+    Sending build context to Docker daemon 3.072 kB
273
+    Step 1 : FROM windowsservercore
274
+     ---> dbfee88ee9fd
275
+    Step 2 : COPY testfile.txt c:\
276
+     ---> 99ceb62e90df
277
+    Removing intermediate container 62afbe726221
278
+    Step 3 : RUN dir c:\
279
+     ---> Running in a5ff53ad6323
280
+     Volume in drive C has no label.
281
+     Volume Serial Number is 1440-27FA
282
+    
283
+     Directory of c:\
284
+    
285
+    03/25/2016  05:28 AM    <DIR>          inetpub
286
+    03/25/2016  04:22 AM    <DIR>          PerfLogs
287
+    04/22/2016  10:59 PM    <DIR>          Program Files
288
+    03/25/2016  04:22 AM    <DIR>          Program Files (x86)
289
+    04/18/2016  09:26 AM                 4 testfile.txt
290
+    04/22/2016  10:59 PM    <DIR>          Users
291
+    04/22/2016  10:59 PM    <DIR>          Windows
292
+                   1 File(s)              4 bytes
293
+                   6 Dir(s)  21,252,689,920 bytes free
294
+     ---> 2569aa19abef
295
+    Removing intermediate container a5ff53ad6323
296
+    Successfully built 2569aa19abef
297
+    PS C:\John>
298
+
299
+## Environment replacement
130 300
 
131 301
 Environment variables (declared with [the `ENV` statement](#env)) can also be
132 302
 used in certain instructions as variables to be interpreted by the
... ...
@@ -192,7 +362,7 @@ will result in `def` having a value of `hello`, not `bye`. However,
192 192
 `ghi` will have a value of `bye` because it is not part of the same command
193 193
 that set `abc` to `bye`.
194 194
 
195
-### .dockerignore file
195
+## .dockerignore file
196 196
 
197 197
 Before the docker CLI sends the context to the docker daemon, it looks
198 198
 for a file named `.dockerignore` in the root directory of the context.
... ...
@@ -3125,9 +3125,10 @@ func (s *DockerSuite) TestBuildAddToSymlinkDest(c *check.C) {
3125 3125
 }
3126 3126
 
3127 3127
 func (s *DockerSuite) TestBuildEscapeWhitespace(c *check.C) {
3128
-	name := "testbuildescaping"
3128
+	name := "testbuildescapewhitespace"
3129 3129
 
3130 3130
 	_, err := buildImage(name, `
3131
+  # ESCAPE=\
3131 3132
   FROM busybox
3132 3133
   MAINTAINER "Docker \
3133 3134
 IO <io@\