Browse code

Fixed #922 - getenv return unicode in py3 and bytes in py2

This patch consistently convert getenv values to unicode when needed.

Florent Viard authored on 2017/10/05 00:06:35
Showing 5 changed files
... ...
@@ -31,6 +31,19 @@ except NameError:
31 31
     # In python 3, unicode -> str, and str -> bytes
32 32
     unicode = str
33 33
 
34
+def config_unicodise(string, encoding = "utf-8", errors = "replace"):
35
+    """
36
+    Convert 'string' to Unicode or raise an exception.
37
+    Config can't use toolbox from Utils that is itself using Config
38
+    """
39
+    if type(string) == unicode:
40
+        return string
41
+
42
+    try:
43
+        return unicode(string, encoding, errors)
44
+    except UnicodeDecodeError:
45
+        raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
46
+
34 47
 class Config(object):
35 48
     _instance = None
36 49
     _parsed_files = []
... ...
@@ -168,7 +181,6 @@ class Config(object):
168 168
             if access_key and secret_key:
169 169
                 self.access_key = access_key
170 170
                 self.secret_key = secret_key
171
-                
172 171
             if access_token:
173 172
                 self.access_token = access_token
174 173
                 # Do not refresh the IAM role when an access token is provided.
... ...
@@ -179,12 +191,13 @@ class Config(object):
179 179
                 env_secret_key = os.getenv('AWS_SECRET_KEY') or os.getenv('AWS_SECRET_ACCESS_KEY')
180 180
                 env_access_token = os.getenv('AWS_SESSION_TOKEN') or os.getenv('AWS_SECURITY_TOKEN')
181 181
                 if env_access_key:
182
-                    self.access_key = env_access_key
183
-                    self.secret_key = env_secret_key
182
+                    # py3 getenv returns unicode and py2 returns bytes.
183
+                    self.access_key = config_unicodise(env_access_key)
184
+                    self.secret_key = config_unicodise(env_secret_key)
184 185
                     if env_access_token:
185 186
                         # Do not refresh the IAM role when an access token is provided.
186 187
                         self._access_token_refresh = False
187
-                        self.access_token = env_access_token
188
+                        self.access_token = config_unicodise(env_access_token)
188 189
                 else:
189 190
                     self.role_config()
190 191
 
... ...
@@ -544,8 +544,6 @@ def calculateChecksum(buffer, mfile, offset, chunk_size, send_chunk):
544 544
         md5_hash.update(buffer)
545 545
 
546 546
     return md5_hash.hexdigest()
547
-
548
-
549 547
 __all__.append("calculateChecksum")
550 548
 
551 549
 
... ...
@@ -39,6 +39,33 @@ exclude_tests = []
39 39
 
40 40
 verbose = False
41 41
 
42
+encoding = locale.getpreferredencoding()
43
+if not encoding:
44
+    print("Guessing current system encoding failed. Consider setting $LANG variable.")
45
+    sys.exit(1)
46
+else:
47
+    print("System encoding: " + encoding)
48
+
49
+try:
50
+    unicode
51
+except NameError:
52
+    # python 3 support
53
+    # In python 3, unicode -> str, and str -> bytes
54
+    unicode = str
55
+
56
+def unicodise(string, encoding = "utf-8", errors = "replace"):
57
+    """
58
+    Convert 'string' to Unicode or raise an exception.
59
+    Config can't use toolbox from Utils that is itself using Config
60
+    """
61
+    if type(string) == unicode:
62
+        return string
63
+
64
+    try:
65
+        return unicode(string, encoding, errors)
66
+    except UnicodeDecodeError:
67
+        raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
68
+
42 69
 # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python/377028#377028
43 70
 def which(program):
44 71
     def is_exe(fpath):
... ...
@@ -64,9 +91,12 @@ else:
64 64
 
65 65
 config_file = None
66 66
 if os.getenv("HOME"):
67
-    config_file = os.path.join(os.getenv("HOME"), ".s3cfg")
67
+    config_file = os.path.join(unicodise(os.getenv("HOME"), encoding), ".s3cfg")
68 68
 elif os.name == "nt" and os.getenv("USERPROFILE"):
69
-    config_file = os.path.join(os.getenv("USERPROFILE").decode('mbcs'), os.getenv("APPDATA").decode('mbcs') or 'Application Data', "s3cmd.ini")
69
+    config_file = os.path.join(unicodise(os.getenv("USERPROFILE"), encoding),
70
+                               os.getenv("APPDATA") and unicodise(os.getenv("APPDATA"), encoding)
71
+                               or 'Application Data',
72
+                               "s3cmd.ini")
70 73
 
71 74
 
72 75
 ## Unpack testsuite/ directory
... ...
@@ -90,13 +120,6 @@ patterns = {}
90 90
 patterns['UTF-8'] = u"ŪņЇЌœđЗ/☺ unicode € rocks ™"
91 91
 patterns['GBK'] = u"12月31日/1-特色條目"
92 92
 
93
-encoding = locale.getpreferredencoding()
94
-if not encoding:
95
-    print("Guessing current system encoding failed. Consider setting $LANG variable.")
96
-    sys.exit(1)
97
-else:
98
-    print("System encoding: " + encoding)
99
-
100 93
 have_encoding = os.path.isdir('testsuite/encodings/' + encoding)
101 94
 if not have_encoding and os.path.isfile('testsuite/encodings/%s.tar.gz' % encoding):
102 95
     os.system("tar xvz -C testsuite/encodings -f testsuite/encodings/%s.tar.gz" % encoding)
... ...
@@ -39,6 +39,33 @@ exclude_tests = []
39 39
 
40 40
 verbose = False
41 41
 
42
+encoding = locale.getpreferredencoding()
43
+if not encoding:
44
+    print("Guessing current system encoding failed. Consider setting $LANG variable.")
45
+    sys.exit(1)
46
+else:
47
+    print("System encoding: " + encoding)
48
+
49
+try:
50
+    unicode
51
+except NameError:
52
+    # python 3 support
53
+    # In python 3, unicode -> str, and str -> bytes
54
+    unicode = str
55
+
56
+def unicodise(string, encoding = "utf-8", errors = "replace"):
57
+    """
58
+    Convert 'string' to Unicode or raise an exception.
59
+    Config can't use toolbox from Utils that is itself using Config
60
+    """
61
+    if type(string) == unicode:
62
+        return string
63
+
64
+    try:
65
+        return unicode(string, encoding, errors)
66
+    except UnicodeDecodeError:
67
+        raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
68
+
42 69
 # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python/377028#377028
43 70
 def which(program):
44 71
     def is_exe(fpath):
... ...
@@ -64,9 +91,12 @@ else:
64 64
 
65 65
 config_file = None
66 66
 if os.getenv("HOME"):
67
-    config_file = os.path.join(os.getenv("HOME"), ".s3cfg")
67
+    config_file = os.path.join(unicodise(os.getenv("HOME"), encoding), ".s3cfg")
68 68
 elif os.name == "nt" and os.getenv("USERPROFILE"):
69
-    config_file = os.path.join(os.getenv("USERPROFILE").decode('mbcs'), os.getenv("APPDATA").decode('mbcs') or 'Application Data', "s3cmd.ini")
69
+    config_file = os.path.join(unicodise(os.getenv("USERPROFILE"), encoding),
70
+                               os.getenv("APPDATA") and unicodise(os.getenv("APPDATA"), encoding)
71
+                               or 'Application Data',
72
+                               "s3cmd.ini")
70 73
 
71 74
 
72 75
 ## Unpack testsuite/ directory
... ...
@@ -90,13 +120,6 @@ patterns = {}
90 90
 patterns['UTF-8'] = u"ŪņЇЌœđЗ/☺ unicode € rocks ™"
91 91
 patterns['GBK'] = u"12月31日/1-特色條目"
92 92
 
93
-encoding = locale.getpreferredencoding()
94
-if not encoding:
95
-    print("Guessing current system encoding failed. Consider setting $LANG variable.")
96
-    sys.exit(1)
97
-else:
98
-    print("System encoding: " + encoding)
99
-
100 93
 have_encoding = os.path.isdir('testsuite/encodings/' + encoding)
101 94
 if not have_encoding and os.path.isfile('testsuite/encodings/%s.tar.gz' % encoding):
102 95
     os.system("tar xvz -C testsuite/encodings -f testsuite/encodings/%s.tar.gz" % encoding)
... ...
@@ -2270,7 +2270,9 @@ def run_configure(config_file, args):
2270 2270
         setattr(cfg, "gpg_command", find_executable("gpg"))
2271 2271
 
2272 2272
     if getattr(cfg, "proxy_host") == "" and os.getenv("http_proxy"):
2273
-        re_match=re.match("(http://)?([^:]+):(\d+)", os.getenv("http_proxy"))
2273
+        autodetected_encoding = locale.getpreferredencoding() or "UTF-8"
2274
+        re_match=re.match("(http://)?([^:]+):(\d+)",
2275
+                          unicodise_s(os.getenv("http_proxy"), autodetected_encoding))
2274 2276
         if re_match:
2275 2277
             setattr(cfg, "proxy_host", re_match.groups()[1])
2276 2278
             setattr(cfg, "proxy_port", re_match.groups()[2])
... ...
@@ -2605,17 +2607,20 @@ def main():
2605 2605
     optparser = OptionParser(option_class=OptionAll, formatter=MyHelpFormatter())
2606 2606
     #optparser.disable_interspersed_args()
2607 2607
 
2608
+    autodetected_encoding = locale.getpreferredencoding() or "UTF-8"
2609
+
2608 2610
     config_file = None
2609 2611
     if os.getenv("S3CMD_CONFIG"):
2610
-        config_file = os.getenv("S3CMD_CONFIG")
2612
+        config_file = unicodise_s(os.getenv("S3CMD_CONFIG"), autodetected_encoding)
2611 2613
     elif os.name == "nt" and os.getenv("USERPROFILE"):
2612
-        config_file = os.path.join(os.getenv("USERPROFILE").decode('mbcs'), os.getenv("APPDATA").decode('mbcs') or 'Application Data', "s3cmd.ini")
2614
+        config_file = os.path.join(unicodise_s(os.getenv("USERPROFILE"), autodetected_encoding),
2615
+                                   os.getenv("APPDATA") and unicodise_s(os.getenv("APPDATA"), autodetected_encoding)
2616
+                                   or 'Application Data',
2617
+                                   "s3cmd.ini")
2613 2618
     else:
2614 2619
         from os.path import expanduser
2615 2620
         config_file = os.path.join(expanduser("~"), ".s3cfg")
2616 2621
 
2617
-    autodetected_encoding = locale.getpreferredencoding() or "UTF-8"
2618
-
2619 2622
     optparser.set_defaults(config = config_file)
2620 2623
 
2621 2624
     optparser.add_option(      "--configure", dest="run_configure", action="store_true", help="Invoke interactive (re)configuration tool. Optionally use as '--configure s3://some-bucket' to test access to a specific bucket instead of attempting to list them all.")
... ...
@@ -3019,7 +3024,7 @@ def report_exception(e, msg=u''):
3019 3019
         except NameError:
3020 3020
             sys.stderr.write(u"S3cmd:   unknown version. Module import problem?\n")
3021 3021
         sys.stderr.write(u"python:   %s\n" % sys.version)
3022
-        sys.stderr.write(u"environment LANG=%s\n" % os.getenv("LANG"))
3022
+        sys.stderr.write(u"environment LANG=%s\n" % unicodise_s(os.getenv("LANG"), 'ascii'))
3023 3023
         sys.stderr.write(u"\n")
3024 3024
         if type(tb) == unicode:
3025 3025
             sys.stderr.write(tb)
... ...
@@ -3131,7 +3136,7 @@ The solutions to this are:
3131 3131
         sys.exit(EX_OSERR)
3132 3132
 
3133 3133
     except UnicodeEncodeError as e:
3134
-        lang = os.getenv("LANG")
3134
+        lang = unicodise_s(os.getenv("LANG"), 'ascii')
3135 3135
         msg = """
3136 3136
 You have encountered a UnicodeEncodeError.  Your environment
3137 3137
 variable LANG=%s may not specify a Unicode encoding (e.g. UTF-8).