git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@394 830e0280-6d2a-0410-9c65-932aecc39d9d
Michal Ludvig authored on 2009/05/28 22:59:03... | ... |
@@ -1,5 +1,18 @@ |
1 | 1 |
2009-05-29 Michal Ludvig <michal@logix.cz> |
2 | 2 |
|
3 |
+ * S3/Utils.py: New function replace_nonprintables() |
|
4 |
+ * s3cmd: Filter local filenames through the above function |
|
5 |
+ to avoid problems with uploaded filenames containing invalid |
|
6 |
+ XML entities, eg  |
|
7 |
+ * S3/S3.py: Warn if a non-printables char is passed to |
|
8 |
+ urlencode_string() - they should have been replaced earlier |
|
9 |
+ in the processing. |
|
10 |
+ * run-tests.py, TODO, NEWS: Updated. |
|
11 |
+ * testsuite/crappy-file-name.tar.gz: Tarball with a crappy-named |
|
12 |
+ file. Untar for the testsuite. |
|
13 |
+ |
|
14 |
+2009-05-29 Michal Ludvig <michal@logix.cz> |
|
15 |
+ |
|
3 | 16 |
* testsuite/blahBlah/*: Added files needed for run-tests.py |
4 | 17 |
|
5 | 18 |
2009-05-28 Michal Ludvig <michal@logix.cz> |
... | ... |
@@ -8,6 +8,8 @@ s3cmd 1.0.0 |
8 | 8 |
* Added --recursive support for [cp] and [mv], including |
9 | 9 |
multiple-source arguments, --include/--exclude, --dry-run, etc. |
10 | 10 |
* Added --exclude/--include and --dry-run for [del], [setacl]. |
11 |
+* Neutralise characters that are invalid in XML to avoid ExpatErrors. |
|
12 |
+ http://boodebr.org/main/python/all-about-python-and-unicode |
|
11 | 13 |
|
12 | 14 |
s3cmd 0.9.9 - 2009-02-17 |
13 | 15 |
=========== |
... | ... |
@@ -344,19 +344,19 @@ class S3(object): |
344 | 344 |
# systems. |
345 | 345 |
# [hope that sounds reassuring ;-)] |
346 | 346 |
o = ord(c) |
347 |
- if (o <= 32 or # Space and below |
|
347 |
+ if (o < 0x20 or o == 0x7f): |
|
348 |
+ error(u"Non-printable character 0x%02x in: %s" % (o, string)) |
|
349 |
+ error(u"Please report it to s3tools-bugs@lists.sourceforge.net") |
|
350 |
+ encoded += replace_nonprintables(c) |
|
351 |
+ elif (o == 0x20 or # Space and below |
|
348 | 352 |
o == 0x22 or # " |
349 | 353 |
o == 0x23 or # # |
350 |
- o == 0x25 or # % |
|
354 |
+ o == 0x25 or # % (escape character) |
|
351 | 355 |
o == 0x26 or # & |
352 | 356 |
o == 0x2B or # + (or it would become <space>) |
353 | 357 |
o == 0x3C or # < |
354 | 358 |
o == 0x3E or # > |
355 | 359 |
o == 0x3F or # ? |
356 |
- o == 0x5B or # [ |
|
357 |
- o == 0x5C or # \ |
|
358 |
- o == 0x5D or # ] |
|
359 |
- o == 0x5E or # ^ |
|
360 | 360 |
o == 0x60 or # ` |
361 | 361 |
o >= 123): # { and above, including >= 128 for UTF-8 |
362 | 362 |
encoded += "%%%02X" % o |
... | ... |
@@ -256,6 +256,29 @@ def unicodise_safe(string, encoding = None): |
256 | 256 |
|
257 | 257 |
return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?') |
258 | 258 |
|
259 |
+def replace_nonprintables(string): |
|
260 |
+ """ |
|
261 |
+ replace_nonprintables(string) |
|
262 |
+ |
|
263 |
+ Replaces all non-printable characters 'ch' in 'string' |
|
264 |
+ where ord(ch) <= 26 with ^@, ^A, ... ^Z |
|
265 |
+ """ |
|
266 |
+ new_string = "" |
|
267 |
+ modified = 0 |
|
268 |
+ for c in string: |
|
269 |
+ o = ord(c) |
|
270 |
+ if (o <= 31): |
|
271 |
+ new_string += "^" + chr(ord('@') + o) |
|
272 |
+ modified += 1 |
|
273 |
+ elif (o == 127): |
|
274 |
+ new_string += "^?" |
|
275 |
+ modified += 1 |
|
276 |
+ else: |
|
277 |
+ new_string += c |
|
278 |
+ if modified: |
|
279 |
+ warning("%d non-printable characters replaced in: %s" % (modified, new_string)) |
|
280 |
+ return new_string |
|
281 |
+ |
|
259 | 282 |
def sign_string(string_to_sign): |
260 | 283 |
#debug("string_to_sign: %s" % string_to_sign) |
261 | 284 |
signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip() |
... | ... |
@@ -12,8 +12,6 @@ TODO list for s3cmd project |
12 | 12 |
- Option --mime-type should set mime type with 'cp' and 'mv'. |
13 | 13 |
If possible --guess-mime-type should do as well. |
14 | 14 |
- Skip files that disapper during upload. Now it fails. |
15 |
- - Neutralise characters that are invalid in XML to avoid ExpatErrors. |
|
16 |
- http://boodebr.org/main/python/all-about-python-and-unicode |
|
17 | 15 |
|
18 | 16 |
- For 1.0.0 |
19 | 17 |
- Add 'geturl' command, both Unicode and urlencoded output. |
... | ... |
@@ -223,6 +223,8 @@ test_s3cmd("Buckets list", ["ls"], |
223 | 223 |
|
224 | 224 |
## ====== Sync to S3 |
225 | 225 |
test_s3cmd("Sync to S3", ['sync', 'testsuite/', 's3://s3cmd-autotest-1/xyz/', '--exclude', '.svn/*', '--exclude', '*.png', '--no-encrypt', '--exclude-from', 'testsuite/exclude.encodings' ], |
226 |
+ must_find = [ "WARNING: 32 non-printable characters replaced in: crappy-file-name/non-printables ^A^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^^^_^? +-[\]^<>%%\"'#{}`&?.end", |
|
227 |
+ "stored as 's3://s3cmd-autotest-1/xyz/crappy-file-name/non-printables ^A^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^^^_^? +-[\\]^<>%%\"'#{}`&?.end'" ], |
|
226 | 228 |
must_not_find_re = [ "\.svn/", "\.png$" ]) |
227 | 229 |
|
228 | 230 |
if have_encoding: |
... | ... |
@@ -638,6 +638,8 @@ def _get_filelist_local(local_uri): |
638 | 638 |
## for now skip over |
639 | 639 |
continue |
640 | 640 |
relative_file = unicodise(os.path.join(rel_root, f)) |
641 |
+ if not cfg.verbatim: |
|
642 |
+ relative_file = replace_nonprintables(relative_file) |
|
641 | 643 |
if relative_file.startswith('./'): |
642 | 644 |
relative_file = relative_file[2:] |
643 | 645 |
sr = os.stat_result(os.lstat(full_name)) |