Browse code

Support unicode characters in parseWords

Signed-off-by: Jonathan Stoppani <jonathan.stoppani@divio.com>

Jonathan Stoppani authored on 2016/06/08 20:55:26
Showing 2 changed files
... ...
@@ -12,6 +12,7 @@ import (
12 12
 	"fmt"
13 13
 	"strings"
14 14
 	"unicode"
15
+	"unicode/utf8"
15 16
 )
16 17
 
17 18
 var (
... ...
@@ -58,10 +59,11 @@ func parseWords(rest string) []string {
58 58
 	quote := '\000'
59 59
 	blankOK := false
60 60
 	var ch rune
61
+	var chWidth int
61 62
 
62
-	for pos := 0; pos <= len(rest); pos++ {
63
+	for pos := 0; pos <= len(rest); pos += chWidth {
63 64
 		if pos != len(rest) {
64
-			ch = rune(rest[pos])
65
+			ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
65 66
 		}
66 67
 
67 68
 		if phase == inSpaces { // Looking for start of word
... ...
@@ -95,15 +97,15 @@ func parseWords(rest string) []string {
95 95
 				phase = inQuote
96 96
 			}
97 97
 			if ch == tokenEscape {
98
-				if pos+1 == len(rest) {
98
+				if pos+chWidth == len(rest) {
99 99
 					continue // just skip an escape token at end of line
100 100
 				}
101 101
 				// If we're not quoted and we see an escape token, then always just
102 102
 				// add the escape token plus the char to the word, even if the char
103 103
 				// is a quote.
104 104
 				word += string(ch)
105
-				pos++
106
-				ch = rune(rest[pos])
105
+				pos += chWidth
106
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
107 107
 			}
108 108
 			word += string(ch)
109 109
 			continue
... ...
@@ -114,14 +116,13 @@ func parseWords(rest string) []string {
114 114
 			}
115 115
 			// The escape token is special except for ' quotes - can't escape anything for '
116 116
 			if ch == tokenEscape && quote != '\'' {
117
-				if pos+1 == len(rest) {
117
+				if pos+chWidth == len(rest) {
118 118
 					phase = inWord
119 119
 					continue // just skip the escape token at end
120 120
 				}
121
-				pos++
122
-				nextCh := rune(rest[pos])
121
+				pos += chWidth
123 122
 				word += string(ch)
124
-				ch = nextCh
123
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
125 124
 			}
126 125
 			word += string(ch)
127 126
 		}
... ...
@@ -93,6 +93,10 @@ func TestParseWords(t *testing.T) {
93 93
 			"expect": {"foo", "bar"},
94 94
 		},
95 95
 		{
96
+			"input":  {"foo\\ bar"},
97
+			"expect": {"foo\\ bar"},
98
+		},
99
+		{
96 100
 			"input":  {"foo=bar"},
97 101
 			"expect": {"foo=bar"},
98 102
 		},
... ...
@@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) {
104 104
 			"input":  {`foo bar "abc xyz"`},
105 105
 			"expect": {"foo", "bar", `"abc xyz"`},
106 106
 		},
107
+		{
108
+			"input":  {"àöû"},
109
+			"expect": {"àöû"},
110
+		},
111
+		{
112
+			"input":  {`föo bàr "âbc xÿz"`},
113
+			"expect": {"föo", "bàr", `"âbc xÿz"`},
114
+		},
107 115
 	}
108 116
 
109 117
 	for _, test := range tests {