Browse code

* S3/Utils.py: New function replace_nonprintables() * s3cmd: Filter local filenames through the above function to avoid problems with uploaded filenames containing invalid XML entities, eg  * S3/S3.py: Warn if a non-printables char is passed to urlencode_string() - they should have been replaced earlier in the processing. * run-tests.py, TODO, NEWS: Updated. * testsuite/crappy-file-name.tar.gz: Tarball with a crappy-named file. Untar for the testsuite.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@394 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2009/05/28 22:59:03
Showing 8 changed files
... ...
@@ -1,5 +1,18 @@
1 1
 2009-05-29  Michal Ludvig  <michal@logix.cz>
2 2
 
3
+	* S3/Utils.py: New function replace_nonprintables()
4
+	* s3cmd: Filter local filenames through the above function
5
+	  to avoid problems with uploaded filenames containing invalid 
6
+	  XML entities, eg &#08; 
7
+	* S3/S3.py: Warn if a non-printables char is passed to
8
+	  urlencode_string() - they should have been replaced earlier 
9
+	  in the processing.
10
+	* run-tests.py, TODO, NEWS: Updated.
11
+	* testsuite/crappy-file-name.tar.gz: Tarball with a crappy-named
12
+	  file. Untar for the testsuite.
13
+
14
+2009-05-29  Michal Ludvig  <michal@logix.cz>
15
+
3 16
 	* testsuite/blahBlah/*: Added files needed for run-tests.py
4 17
 
5 18
 2009-05-28  Michal Ludvig  <michal@logix.cz>
... ...
@@ -8,6 +8,8 @@ s3cmd 1.0.0
8 8
 * Added --recursive support for [cp] and [mv], including
9 9
   multiple-source arguments, --include/--exclude, --dry-run, etc.
10 10
 * Added --exclude/--include and --dry-run for [del], [setacl].
11
+* Neutralise characters that are invalid in XML to avoid ExpatErrors.
12
+  http://boodebr.org/main/python/all-about-python-and-unicode
11 13
 
12 14
 s3cmd 0.9.9   -   2009-02-17
13 15
 ===========
... ...
@@ -344,19 +344,19 @@ class S3(object):
344 344
 					# systems.
345 345
 					#           [hope that sounds reassuring ;-)]
346 346
 			o = ord(c)
347
-			if (o <= 32 or		# Space and below
347
+			if (o < 0x20 or o == 0x7f):
348
+				error(u"Non-printable character 0x%02x in: %s" % (o, string))
349
+				error(u"Please report it to s3tools-bugs@lists.sourceforge.net")
350
+				encoded += replace_nonprintables(c)
351
+			elif (o == 0x20 or	# Space and below
348 352
 			    o == 0x22 or	# "
349 353
 			    o == 0x23 or	# #
350
-			    o == 0x25 or	# %
354
+			    o == 0x25 or	# % (escape character)
351 355
 			    o == 0x26 or	# &
352 356
 			    o == 0x2B or	# + (or it would become <space>)
353 357
 			    o == 0x3C or	# <
354 358
 			    o == 0x3E or	# >
355 359
 			    o == 0x3F or	# ?
356
-			    o == 0x5B or	# [
357
-			    o == 0x5C or	# \
358
-			    o == 0x5D or	# ]
359
-			    o == 0x5E or	# ^
360 360
 			    o == 0x60 or	# `
361 361
 			    o >= 123):   	# { and above, including >= 128 for UTF-8
362 362
 				encoded += "%%%02X" % o
... ...
@@ -256,6 +256,29 @@ def unicodise_safe(string, encoding = None):
256 256
 
257 257
 	return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?')
258 258
 
259
+def replace_nonprintables(string):
260
+	"""
261
+	replace_nonprintables(string)
262
+
263
+	Replaces all non-printable characters 'ch' in 'string'
264
+	where ord(ch) <= 26 with ^@, ^A, ... ^Z
265
+	"""
266
+	new_string = ""
267
+	modified = 0
268
+	for c in string:
269
+		o = ord(c)
270
+		if (o <= 31):
271
+			new_string += "^" + chr(ord('@') + o)
272
+			modified += 1
273
+		elif (o == 127):
274
+			new_string += "^?"
275
+			modified += 1
276
+		else:
277
+			new_string += c
278
+	if modified:
279
+		warning("%d non-printable characters replaced in: %s" % (modified, new_string))
280
+	return new_string
281
+
259 282
 def sign_string(string_to_sign):
260 283
 	#debug("string_to_sign: %s" % string_to_sign)
261 284
 	signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip()
... ...
@@ -12,8 +12,6 @@ TODO list for s3cmd project
12 12
   - Option --mime-type should set mime type with 'cp' and 'mv'. 
13 13
     If possible --guess-mime-type should do as well.
14 14
   - Skip files that disapper during upload. Now it fails.
15
-  - Neutralise characters that are invalid in XML to avoid ExpatErrors.
16
-    http://boodebr.org/main/python/all-about-python-and-unicode
17 15
 
18 16
 - For 1.0.0
19 17
   - Add 'geturl' command, both Unicode and urlencoded output.
... ...
@@ -223,6 +223,8 @@ test_s3cmd("Buckets list", ["ls"],
223 223
 
224 224
 ## ====== Sync to S3
225 225
 test_s3cmd("Sync to S3", ['sync', 'testsuite/', 's3://s3cmd-autotest-1/xyz/', '--exclude', '.svn/*', '--exclude', '*.png', '--no-encrypt', '--exclude-from', 'testsuite/exclude.encodings' ],
226
+	must_find = [ "WARNING: 32 non-printable characters replaced in: crappy-file-name/non-printables ^A^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^^^_^? +-[\]^<>%%\"'#{}`&?.end",
227
+	              "stored as 's3://s3cmd-autotest-1/xyz/crappy-file-name/non-printables ^A^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^^^_^? +-[\\]^<>%%\"'#{}`&?.end'" ],
226 228
 	must_not_find_re = [ "\.svn/", "\.png$" ])
227 229
 
228 230
 if have_encoding:
... ...
@@ -638,6 +638,8 @@ def _get_filelist_local(local_uri):
638 638
 				## for now skip over
639 639
 				continue
640 640
 			relative_file = unicodise(os.path.join(rel_root, f))
641
+			if not cfg.verbatim:
642
+				relative_file = replace_nonprintables(relative_file)
641 643
 			if relative_file.startswith('./'):
642 644
 				relative_file = relative_file[2:]
643 645
 			sr = os.stat_result(os.lstat(full_name))
644 646
new file mode 100644
645 647
Binary files /dev/null and b/testsuite/crappy-file-name.tar.gz differ