Browse code

* s3cmd, S3/Utils.py, S3/Exceptions.py, S3/Progress.py, S3/Config.py, S3/S3.py: Added --encoding switch and Config.encoding variable. Don't assume utf-8 for filesystem and terminal output anymore.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@308 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2008/12/31 11:42:33
Showing 7 changed files
... ...
@@ -1,5 +1,9 @@
1 1
 2008-12-31  Michal Ludvig  <michal@logix.cz>
2 2
 
3
+	* s3cmd, S3/Utils.py, S3/Exceptions.py, S3/Progress.py,
4
+	  S3/Config.py, S3/S3.py: Added --encoding switch and 
5
+	  Config.encoding variable. Don't assume utf-8 for filesystem
6
+	  and terminal output anymore.
3 7
 	* s3cmd: Avoid ZeroDivisionError on fast links.
4 8
 	* s3cmd: Unicodised all info() output.
5 9
 
... ...
@@ -59,6 +59,7 @@ class Config(object):
59 59
 	exclude = []
60 60
 	# Dict mapping compiled REGEXPs back to their textual form
61 61
 	debug_exclude = {}
62
+	encoding = "utf-8"
62 63
 
63 64
 	## Creating a singleton
64 65
 	def __new__(self, configfile = None):
... ...
@@ -13,11 +13,10 @@ except ImportError:
13 13
 
14 14
 class S3Exception(Exception):
15 15
 	def __init__(self, message = ""):
16
-		self.message = message
16
+		self.message = unicodise(message)
17 17
 
18 18
 	def __str__(self):
19
-		## Is this legal?
20
-		return unicode(self)
19
+		return deunicodise(self.message)
21 20
 
22 21
 	def __unicode__(self):
23 22
 		return self.message
... ...
@@ -11,6 +11,7 @@ class Progress(object):
11 11
 	_stdout = sys.stdout
12 12
 
13 13
 	def __init__(self, labels, total_size):
14
+		self._stdout = sys.stdout
14 15
 		self.new_file(labels, total_size)
15 16
 	
16 17
 	def new_file(self, labels, total_size):
... ...
@@ -174,12 +174,12 @@ class S3(object):
174 174
 			raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
175 175
 
176 176
 		if not os.path.isfile(filename):
177
-			raise InvalidFileError("%s is not a regular file" % filename)
177
+			raise InvalidFileError(u"%s is not a regular file" % unicodise(filename))
178 178
 		try:
179 179
 			file = open(filename, "rb")
180 180
 			size = os.stat(filename)[ST_SIZE]
181 181
 		except IOError, e:
182
-			raise InvalidFileError("%s: %s" % (filename, e.strerror))
182
+			raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror))
183 183
 		headers = SortedDict()
184 184
 		if extra_headers:
185 185
 			headers.update(extra_headers)
... ...
@@ -194,7 +194,7 @@ class S3(object):
194 194
 		if self.config.acl_public:
195 195
 			headers["x-amz-acl"] = "public-read"
196 196
 		request = self.create_request("OBJECT_PUT", uri = uri, headers = headers)
197
-		labels = { 'source' : file.name, 'destination' : uri, 'extra' : extra_label }
197
+		labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label }
198 198
 		response = self.send_file(request, file, labels)
199 199
 		return response
200 200
 
... ...
@@ -13,6 +13,8 @@ import errno
13 13
 
14 14
 from logging import debug, info, warning, error
15 15
 
16
+import Config
17
+
16 18
 try:
17 19
 	import xml.etree.ElementTree as ET
18 20
 except ImportError:
... ...
@@ -181,21 +183,44 @@ def mkdir_with_parents(dir_name):
181 181
 			return False
182 182
 	return True
183 183
 
184
-def unicodise(string):
184
+def unicodise(string, encoding = None, errors = "replace"):
185 185
 	"""
186 186
 	Convert 'string' to Unicode or raise an exception.
187 187
 	"""
188
-	debug("Unicodising %r" % string)
188
+
189
+	if not encoding:
190
+		encoding = Config.Config().encoding
191
+
192
+	debug("Unicodising %r using %s" % (string, encoding))
189 193
 	if type(string) == unicode:
190 194
 		return string
191 195
 	try:
192
-		return string.decode("utf-8")
196
+		return string.decode(encoding, errors)
193 197
 	except UnicodeDecodeError:
194 198
 		raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
195 199
 
196
-def try_unicodise(string):
200
+def deunicodise(string, encoding = None, errors = "replace"):
201
+	"""
202
+	Convert unicode 'string' to <type str>, by default replacing
203
+	all invalid characters with '?' or raise an exception.
204
+	"""
205
+
206
+	if not encoding:
207
+		encoding = Config.Config().encoding
208
+
209
+	debug("DeUnicodising %r using %s" % (string, encoding))
210
+	if type(string) != unicode:
211
+		return str(string)
197 212
 	try:
198
-		return unicodise(string)
199
-	except UnicodeDecodeError:
200
-		return string
213
+		return string.encode(encoding, errors)
214
+	except UnicodeEncodeError:
215
+		raise UnicodeEncodeError("Conversion from unicode failed: %r" % string)
216
+
217
+def unicodise_safe(string, encoding = None):
218
+	"""
219
+	Convert 'string' to Unicode according to current encoding 
220
+	and replace all invalid characters with '?'
221
+	"""
222
+
223
+	return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?')
201 224
 
... ...
@@ -21,13 +21,8 @@ from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatt
21 21
 from logging import debug, info, warning, error
22 22
 from distutils.spawn import find_executable
23 23
 
24
-## Output UTF-8 in all cases
25
-encoding = locale.getpreferredencoding() or "utf-8"
26
-_stdout = codecs.getwriter(encoding)(sys.stdout, "replace")
27
-_stderr = codecs.getwriter(encoding)(sys.stderr, "replace")
28
-
29 24
 def output(message):
30
-	_stdout.write(message + "\n")
25
+	sys.stdout.write(message + "\n")
31 26
 
32 27
 def check_args_type(args, type, verbose_type):
33 28
 	for arg in args:
... ...
@@ -353,7 +348,7 @@ def cmd_object_get(args):
353 353
 
354 354
 		if destination == "-":
355 355
 			## stdout
356
-			dst_stream = sys.stdout
356
+			dst_stream = sys.__stdout__
357 357
 		else:
358 358
 			## File
359 359
 			try:
... ...
@@ -1121,7 +1116,10 @@ def main():
1121 1121
 	elif os.name == "nt" and os.getenv("USERPROFILE"):
1122 1122
 		config_file = os.path.join(os.getenv("USERPROFILE"), "Application Data", "s3cmd.ini")
1123 1123
 
1124
-	optparser.set_defaults(config=config_file)
1124
+	preferred_encoding = locale.getpreferredencoding() or "UTF-8"
1125
+
1126
+	optparser.set_defaults(encoding = preferred_encoding)
1127
+	optparser.set_defaults(config = config_file)
1125 1128
 	optparser.set_defaults(verbosity = default_verbosity)
1126 1129
 
1127 1130
 	optparser.add_option(      "--configure", dest="run_configure", action="store_true", help="Invoke interactive (re)configuration tool.")
... ...
@@ -1153,6 +1151,8 @@ def main():
1153 1153
 	optparser.add_option("-m", "--mime-type", dest="default_mime_type", type="mimetype", metavar="MIME/TYPE", help="Default MIME-type to be set for objects stored.")
1154 1154
 	optparser.add_option("-M", "--guess-mime-type", dest="guess_mime_type", action="store_true", help="Guess MIME-type of files by their extension. Falls back to default MIME-Type as specified by --mime-type option")
1155 1155
 
1156
+	optparser.add_option(      "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding)
1157
+
1156 1158
 	optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form.")
1157 1159
 
1158 1160
 	optparser.add_option(      "--progress", dest="progress_meter", action="store_true", help="Display progress meter (default on TTY).")
... ...
@@ -1175,7 +1175,7 @@ def main():
1175 1175
 	## debugging/verbose output for config file parser on request
1176 1176
 	logging.basicConfig(level=options.verbosity,
1177 1177
 	                    format='%(levelname)s: %(message)s',
1178
-	                    stream = _stderr)
1178
+	                    stream = sys.stderr)
1179 1179
 	
1180 1180
 	if options.show_version:
1181 1181
 		output(u"s3cmd version %s" % PkgInfo.version)
... ...
@@ -1207,9 +1207,6 @@ def main():
1207 1207
 	## Can be overriden by actual --(no-)progress parameter
1208 1208
 	cfg.update_option('progress_meter', sys.stdout.isatty())
1209 1209
 
1210
-	## We may need a way to display progress meter on STDERR or somewhere else
1211
-	Progress._stdout = _stdout
1212
-
1213 1210
 	## Unsupported features on Win32 platform
1214 1211
 	if os.name == "nt":
1215 1212
 		if cfg.preserve_attrs:
... ...
@@ -1229,6 +1226,10 @@ def main():
1229 1229
 			## Some Config() options are not settable from command line
1230 1230
 			pass
1231 1231
 
1232
+	## Set output and filesystem encoding for printing out filenames.
1233
+	sys.stdout = codecs.getwriter(cfg.encoding)(sys.stdout, "replace")
1234
+	sys.stderr = codecs.getwriter(cfg.encoding)(sys.stderr, "replace")
1235
+
1232 1236
 	## Process GLOB (shell wildcard style) excludes
1233 1237
 	if options.exclude is None:
1234 1238
 		options.exclude = []