Signed-off-by: Daehyeok Mun <daehyeok@gmail.com>
| ... | ... |
@@ -9,13 +9,15 @@ package dockerfile |
| 9 | 9 |
import ( |
| 10 | 10 |
"fmt" |
| 11 | 11 |
"strings" |
| 12 |
+ "text/scanner" |
|
| 12 | 13 |
"unicode" |
| 13 | 14 |
) |
| 14 | 15 |
|
| 15 | 16 |
type shellWord struct {
|
| 16 |
- word string |
|
| 17 |
- envs []string |
|
| 18 |
- pos int |
|
| 17 |
+ word string |
|
| 18 |
+ scanner scanner.Scanner |
|
| 19 |
+ envs []string |
|
| 20 |
+ pos int |
|
| 19 | 21 |
} |
| 20 | 22 |
|
| 21 | 23 |
// ProcessWord will use the 'env' list of environment variables, |
| ... | ... |
@@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) {
|
| 26 | 26 |
envs: env, |
| 27 | 27 |
pos: 0, |
| 28 | 28 |
} |
| 29 |
+ sw.scanner.Init(strings.NewReader(word)) |
|
| 29 | 30 |
return sw.process() |
| 30 | 31 |
} |
| 31 | 32 |
|
| 32 | 33 |
func (sw *shellWord) process() (string, error) {
|
| 33 |
- return sw.processStopOn('\000')
|
|
| 34 |
+ return sw.processStopOn(scanner.EOF) |
|
| 34 | 35 |
} |
| 35 | 36 |
|
| 36 | 37 |
// Process the word, starting at 'pos', and stop when we get to the |
| ... | ... |
@@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
| 43 | 43 |
'$': sw.processDollar, |
| 44 | 44 |
} |
| 45 | 45 |
|
| 46 |
- for sw.pos < len(sw.word) {
|
|
| 47 |
- ch := sw.peek() |
|
| 48 |
- if stopChar != '\000' && ch == stopChar {
|
|
| 49 |
- sw.next() |
|
| 46 |
+ for sw.scanner.Peek() != scanner.EOF {
|
|
| 47 |
+ ch := sw.scanner.Peek() |
|
| 48 |
+ |
|
| 49 |
+ if stopChar != scanner.EOF && ch == stopChar {
|
|
| 50 |
+ sw.scanner.Next() |
|
| 50 | 51 |
break |
| 51 | 52 |
} |
| 52 | 53 |
if fn, ok := charFuncMapping[ch]; ok {
|
| ... | ... |
@@ -58,14 +62,19 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
| 58 | 58 |
result += tmp |
| 59 | 59 |
} else {
|
| 60 | 60 |
// Not special, just add it to the result |
| 61 |
- ch = sw.next() |
|
| 61 |
+ ch = sw.scanner.Next() |
|
| 62 |
+ |
|
| 62 | 63 |
if ch == '\\' {
|
| 63 | 64 |
// '\' escapes, except end of line |
| 64 |
- ch = sw.next() |
|
| 65 |
- if ch == '\000' {
|
|
| 66 |
- continue |
|
| 65 |
+ |
|
| 66 |
+ ch = sw.scanner.Next() |
|
| 67 |
+ |
|
| 68 |
+ if ch == scanner.EOF {
|
|
| 69 |
+ break |
|
| 67 | 70 |
} |
| 71 |
+ |
|
| 68 | 72 |
} |
| 73 |
+ |
|
| 69 | 74 |
result += string(ch) |
| 70 | 75 |
} |
| 71 | 76 |
} |
| ... | ... |
@@ -73,36 +82,21 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
| 73 | 73 |
return result, nil |
| 74 | 74 |
} |
| 75 | 75 |
|
| 76 |
-func (sw *shellWord) peek() rune {
|
|
| 77 |
- if sw.pos == len(sw.word) {
|
|
| 78 |
- return '\000' |
|
| 79 |
- } |
|
| 80 |
- return rune(sw.word[sw.pos]) |
|
| 81 |
-} |
|
| 82 |
- |
|
| 83 |
-func (sw *shellWord) next() rune {
|
|
| 84 |
- if sw.pos == len(sw.word) {
|
|
| 85 |
- return '\000' |
|
| 86 |
- } |
|
| 87 |
- ch := rune(sw.word[sw.pos]) |
|
| 88 |
- sw.pos++ |
|
| 89 |
- return ch |
|
| 90 |
-} |
|
| 91 |
- |
|
| 92 | 76 |
func (sw *shellWord) processSingleQuote() (string, error) {
|
| 93 | 77 |
// All chars between single quotes are taken as-is |
| 94 | 78 |
// Note, you can't escape ' |
| 95 | 79 |
var result string |
| 96 | 80 |
|
| 97 |
- sw.next() |
|
| 81 |
+ sw.scanner.Next() |
|
| 98 | 82 |
|
| 99 | 83 |
for {
|
| 100 |
- ch := sw.next() |
|
| 101 |
- if ch == '\000' || ch == '\'' {
|
|
| 84 |
+ ch := sw.scanner.Next() |
|
| 85 |
+ if ch == '\'' || ch == scanner.EOF {
|
|
| 102 | 86 |
break |
| 103 | 87 |
} |
| 104 | 88 |
result += string(ch) |
| 105 | 89 |
} |
| 90 |
+ |
|
| 106 | 91 |
return result, nil |
| 107 | 92 |
} |
| 108 | 93 |
|
| ... | ... |
@@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
| 111 | 111 |
// But you can escape " with a \ |
| 112 | 112 |
var result string |
| 113 | 113 |
|
| 114 |
- sw.next() |
|
| 114 |
+ sw.scanner.Next() |
|
| 115 | 115 |
|
| 116 |
- for sw.pos < len(sw.word) {
|
|
| 117 |
- ch := sw.peek() |
|
| 116 |
+ for sw.scanner.Peek() != scanner.EOF {
|
|
| 117 |
+ ch := sw.scanner.Peek() |
|
| 118 | 118 |
if ch == '"' {
|
| 119 |
- sw.next() |
|
| 119 |
+ sw.scanner.Next() |
|
| 120 | 120 |
break |
| 121 | 121 |
} |
| 122 | 122 |
if ch == '$' {
|
| ... | ... |
@@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
| 126 | 126 |
} |
| 127 | 127 |
result += tmp |
| 128 | 128 |
} else {
|
| 129 |
- ch = sw.next() |
|
| 129 |
+ ch = sw.scanner.Next() |
|
| 130 | 130 |
if ch == '\\' {
|
| 131 |
- chNext := sw.peek() |
|
| 131 |
+ chNext := sw.scanner.Peek() |
|
| 132 | 132 |
|
| 133 |
- if chNext == '\000' {
|
|
| 133 |
+ if chNext == scanner.EOF {
|
|
| 134 | 134 |
// Ignore \ at end of word |
| 135 | 135 |
continue |
| 136 | 136 |
} |
| 137 | 137 |
|
| 138 | 138 |
if chNext == '"' || chNext == '$' {
|
| 139 | 139 |
// \" and \$ can be escaped, all other \'s are left as-is |
| 140 |
- ch = sw.next() |
|
| 140 |
+ ch = sw.scanner.Next() |
|
| 141 | 141 |
} |
| 142 | 142 |
} |
| 143 | 143 |
result += string(ch) |
| ... | ... |
@@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
| 148 | 148 |
} |
| 149 | 149 |
|
| 150 | 150 |
func (sw *shellWord) processDollar() (string, error) {
|
| 151 |
- sw.next() |
|
| 152 |
- ch := sw.peek() |
|
| 151 |
+ sw.scanner.Next() |
|
| 152 |
+ ch := sw.scanner.Peek() |
|
| 153 | 153 |
if ch == '{' {
|
| 154 |
- sw.next() |
|
| 154 |
+ sw.scanner.Next() |
|
| 155 | 155 |
name := sw.processName() |
| 156 |
- ch = sw.peek() |
|
| 156 |
+ ch = sw.scanner.Peek() |
|
| 157 | 157 |
if ch == '}' {
|
| 158 | 158 |
// Normal ${xx} case
|
| 159 |
- sw.next() |
|
| 159 |
+ sw.scanner.Next() |
|
| 160 | 160 |
return sw.getEnv(name), nil |
| 161 | 161 |
} |
| 162 | 162 |
if ch == ':' {
|
| 163 | 163 |
// Special ${xx:...} format processing
|
| 164 | 164 |
// Yes it allows for recursive $'s in the ... spot |
| 165 | 165 |
|
| 166 |
- sw.next() // skip over : |
|
| 167 |
- modifier := sw.next() |
|
| 166 |
+ sw.scanner.Next() // skip over : |
|
| 167 |
+ modifier := sw.scanner.Next() |
|
| 168 | 168 |
|
| 169 | 169 |
word, err := sw.processStopOn('}')
|
| 170 | 170 |
if err != nil {
|
| ... | ... |
@@ -207,16 +201,16 @@ func (sw *shellWord) processName() string {
|
| 207 | 207 |
// If it starts with a numeric then just return $# |
| 208 | 208 |
var name string |
| 209 | 209 |
|
| 210 |
- for sw.pos < len(sw.word) {
|
|
| 211 |
- ch := sw.peek() |
|
| 210 |
+ for sw.scanner.Peek() != scanner.EOF {
|
|
| 211 |
+ ch := sw.scanner.Peek() |
|
| 212 | 212 |
if len(name) == 0 && unicode.IsDigit(ch) {
|
| 213 |
- ch = sw.next() |
|
| 213 |
+ ch = sw.scanner.Next() |
|
| 214 | 214 |
return string(ch) |
| 215 | 215 |
} |
| 216 | 216 |
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
|
| 217 | 217 |
break |
| 218 | 218 |
} |
| 219 |
- ch = sw.next() |
|
| 219 |
+ ch = sw.scanner.Next() |
|
| 220 | 220 |
name += string(ch) |
| 221 | 221 |
} |
| 222 | 222 |
|
| ... | ... |
@@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) {
|
| 15 | 15 |
defer file.Close() |
| 16 | 16 |
|
| 17 | 17 |
scanner := bufio.NewScanner(file) |
| 18 |
- envs := []string{"PWD=/home", "SHELL=bash"}
|
|
| 18 |
+ envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"}
|
|
| 19 | 19 |
for scanner.Scan() {
|
| 20 | 20 |
line := scanner.Text() |
| 21 | 21 |
|
| ... | ... |
@@ -56,3 +56,57 @@ he${PWD:=000}xx | error
|
| 56 | 56 |
he${PWD:+${PWD}:}xx | he/home:xx
|
| 57 | 57 |
he${XXX:-\$PWD:}xx | he$PWD:xx
|
| 58 | 58 |
he${XXX:-\${PWD}z}xx | he${PWDz}xx
|
| 59 |
+안녕하세요 | 안녕하세요 |
|
| 60 |
+안'녕'하세요 | 안녕하세요 |
|
| 61 |
+안'녕하세요 | 안녕하세요 |
|
| 62 |
+안녕\'하세요 | 안녕'하세요 |
|
| 63 |
+안\\'녕하세요 | 안\녕하세요 |
|
| 64 |
+안녕\t하세요 | 안녕t하세요 |
|
| 65 |
+"안녕\t하세요" | 안녕\t하세요 |
|
| 66 |
+'안녕\t하세요 | 안녕\t하세요 |
|
| 67 |
+안녕하세요\ | 안녕하세요 |
|
| 68 |
+안녕하세요\\ | 안녕하세요\ |
|
| 69 |
+"안녕하세요 | 안녕하세요 |
|
| 70 |
+"안녕하세요\" | 안녕하세요" |
|
| 71 |
+"안녕'하세요" | 안녕'하세요 |
|
| 72 |
+'안녕하세요 | 안녕하세요 |
|
| 73 |
+'안녕하세요\' | 안녕하세요\ |
|
| 74 |
+안녕$1x | 안녕x |
|
| 75 |
+안녕$.x | 안녕$.x |
|
| 76 |
+안녕$pwd. | 안녕. |
|
| 77 |
+안녕$PWD | 안녕/home |
|
| 78 |
+안녕\$PWD | 안녕$PWD |
|
| 79 |
+안녕\\$PWD | 안녕\/home |
|
| 80 |
+안녕\${} | 안녕${}
|
|
| 81 |
+안녕\${}xx | 안녕${}xx
|
|
| 82 |
+안녕${} | 안녕
|
|
| 83 |
+안녕${}xx | 안녕xx
|
|
| 84 |
+안녕${hi} | 안녕
|
|
| 85 |
+안녕${hi}xx | 안녕xx
|
|
| 86 |
+안녕${PWD} | 안녕/home
|
|
| 87 |
+안녕${.} | error
|
|
| 88 |
+안녕${XXX:-000}xx | 안녕000xx
|
|
| 89 |
+안녕${PWD:-000}xx | 안녕/homexx
|
|
| 90 |
+안녕${XXX:-$PWD}xx | 안녕/homexx
|
|
| 91 |
+안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx
|
|
| 92 |
+안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx
|
|
| 93 |
+안녕${XXX:YYY} | error
|
|
| 94 |
+안녕${XXX:+${PWD}}xx | 안녕xx
|
|
| 95 |
+안녕${PWD:+${XXX}}xx | 안녕xx
|
|
| 96 |
+안녕${PWD:+${SHELL}}xx | 안녕bashxx
|
|
| 97 |
+안녕${XXX:+000}xx | 안녕xx
|
|
| 98 |
+안녕${PWD:+000}xx | 안녕000xx
|
|
| 99 |
+'안녕${XX}' | 안녕${XX}
|
|
| 100 |
+"안녕${PWD}" | 안녕/home
|
|
| 101 |
+"안녕'$PWD'" | 안녕'/home' |
|
| 102 |
+'"안녕"' | "안녕" |
|
| 103 |
+안녕\$PWD | 안녕$PWD |
|
| 104 |
+"안녕\$PWD" | 안녕$PWD |
|
| 105 |
+'안녕\$PWD' | 안녕\$PWD |
|
| 106 |
+안녕${PWD | error
|
|
| 107 |
+안녕${PWD:=000}xx | error
|
|
| 108 |
+안녕${PWD:+${PWD}:}xx | 안녕/home:xx
|
|
| 109 |
+안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx
|
|
| 110 |
+안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx
|
|
| 111 |
+$KOREAN | 한국어 |
|
| 112 |
+안녕$KOREAN | 안녕한국어 |