Browse code

Return error if env file contains non-ascii or utf8 bytes (for windows)

This fix tries to address the issue raised in 26179 where an env file
with non-ascii or utf8 bytes will crash on windows platform.

The issue is two-fold:
- Windows will adds a BOM mark at the begining with Notepad as the editor
- Non-utf8 bytes can not be handled by env file parser.

This fix removes utf8 BOM marker if exists so that utf8 encoded env file
could be processed.
This fix also returns an error (instead of a runtime CreateProcess crash)
if env file contains non-utf8 bytes, thus giving users better experiences.

Additional test cases has been added in unit tests.

This fix fixes 26179.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

Yong Tang authored on 2016/09/04 07:06:42
Showing 5 changed files
... ...
@@ -2,9 +2,12 @@ package opts
2 2
 
3 3
 import (
4 4
 	"bufio"
5
+	"bytes"
5 6
 	"fmt"
6 7
 	"os"
7 8
 	"strings"
9
+	"unicode"
10
+	"unicode/utf8"
8 11
 )
9 12
 
10 13
 // ParseEnvFile reads a file with environment variables enumerated by lines
... ...
@@ -29,9 +32,20 @@ func ParseEnvFile(filename string) ([]string, error) {
29 29
 
30 30
 	lines := []string{}
31 31
 	scanner := bufio.NewScanner(fh)
32
+	currentLine := 0
33
+	utf8bom := []byte{0xEF, 0xBB, 0xBF}
32 34
 	for scanner.Scan() {
35
+		scannedBytes := scanner.Bytes()
36
+		if !utf8.Valid(scannedBytes) {
37
+			return []string{}, fmt.Errorf("env file %s contains invalid utf8 bytes at line %d: %v", filename, currentLine+1, scannedBytes)
38
+		}
39
+		// We trim UTF8 BOM
40
+		if currentLine == 0 {
41
+			scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
42
+		}
33 43
 		// trim the line from all leading whitespace first
34
-		line := strings.TrimLeft(scanner.Text(), whiteSpaces)
44
+		line := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
45
+		currentLine++
35 46
 		// line is not empty, and not starting with '#'
36 47
 		if len(line) > 0 && !strings.HasPrefix(line, "#") {
37 48
 			data := strings.SplitN(line, "=", 2)
38 49
new file mode 100755
39 50
Binary files /dev/null and b/runconfig/opts/fixtures/utf16.env differ
40 51
new file mode 100755
41 52
Binary files /dev/null and b/runconfig/opts/fixtures/utf16be.env differ
42 53
new file mode 100755
... ...
@@ -0,0 +1,3 @@
0
+FOO=BAR
1
+HELLO=您好
2
+BAR=FOO
0 3
\ No newline at end of file
... ...
@@ -666,6 +666,33 @@ func TestParseEnvfileVariables(t *testing.T) {
666 666
 	}
667 667
 }
668 668
 
669
+func TestParseEnvfileVariablesWithBOMUnicode(t *testing.T) {
670
+	// UTF8 with BOM
671
+	config, _, _, err := parseRun([]string{"--env-file=fixtures/utf8.env", "img", "cmd"})
672
+	if err != nil {
673
+		t.Fatal(err)
674
+	}
675
+	env := []string{"FOO=BAR", "HELLO=" + string([]byte{0xe6, 0x82, 0xa8, 0xe5, 0xa5, 0xbd}), "BAR=FOO"}
676
+	if len(config.Env) != len(env) {
677
+		t.Fatalf("Expected a config with %d env variables, got %v: %v", len(env), len(config.Env), config.Env)
678
+	}
679
+	for i, v := range env {
680
+		if config.Env[i] != v {
681
+			t.Fatalf("Expected a config with [%s], got %v", v, []byte(config.Env[i]))
682
+		}
683
+	}
684
+
685
+	// UTF16 with BOM
686
+	e := "contains invalid utf8 bytes at line"
687
+	if _, _, _, err := parseRun([]string{"--env-file=fixtures/utf16.env", "img", "cmd"}); err == nil || !strings.Contains(err.Error(), e) {
688
+		t.Fatalf("Expected an error with message '%s', got %v", e, err)
689
+	}
690
+	// UTF16BE with BOM
691
+	if _, _, _, err := parseRun([]string{"--env-file=fixtures/utf16be.env", "img", "cmd"}); err == nil || !strings.Contains(err.Error(), e) {
692
+		t.Fatalf("Expected an error with message '%s', got %v", e, err)
693
+	}
694
+}
695
+
669 696
 func TestParseLabelfileVariables(t *testing.T) {
670 697
 	e := "open nonexistent: no such file or directory"
671 698
 	if runtime.GOOS == "windows" {