Browse code

2007-08-13 Michal Ludvig <michal@logix.cz>

* S3/S3.py: Added function urlencode_string() that encodes
non-ascii characters in object name before sending it to S3.



git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@134 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2007/08/13 20:41:05
Showing 2 changed files
... ...
@@ -1,5 +1,10 @@
1 1
 2007-08-13  Michal Ludvig  <michal@logix.cz>
2 2
 
3
+	* S3/S3.py: Added function urlencode_string() that encodes
4
+	  non-ascii characters in object name before sending it to S3.
5
+
6
+2007-08-13  Michal Ludvig  <michal@logix.cz>
7
+
3 8
 	* README: Updated Amazon S3 pricing overview
4 9
 
5 10
 2007-08-13  Michal Ludvig  <michal@logix.cz>
... ...
@@ -180,12 +180,49 @@ class S3(object):
180 180
 		return self.object_delete(uri.bucket(), uri.object())
181 181
 
182 182
 	## Low level methods
183
+	def urlencode_string(self, string):
184
+		encoded = ""
185
+		## List of characters that must be escaped for S3
186
+		## Haven't found this in any official docs
187
+		## but my tests show it's more less correct.
188
+		## If you start getting InvalidSignature errors
189
+		## from S3 check the error headers returned
190
+		## from S3 to see whether the list hasn't
191
+		## changed.
192
+		for c in string:	# I'm not sure how to know in what encoding 
193
+					# 'object' is. Apparently "type(object)==str"
194
+					# but the contents is a string of unicode
195
+					# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k'
196
+					# Don't know what it will do on non-utf8 
197
+					# systems.
198
+					#           [hope that sounds reassuring ;-)]
199
+			o = ord(c)
200
+			if (o <= 32 or		# Space and below
201
+			    o == 0x22 or	# "
202
+			    o == 0x23 or	# #
203
+			    o == 0x25 or	# %
204
+			    o == 0x2B or	# + (or it would become <space>)
205
+			    o == 0x3C or	# <
206
+			    o == 0x3E or	# >
207
+			    o == 0x3F or	# ?
208
+			    o == 0x5B or	# [
209
+			    o == 0x5C or	# \
210
+			    o == 0x5D or	# ]
211
+			    o == 0x5E or	# ^
212
+			    o == 0x60 or	# `
213
+			    o >= 123):   	# { and above, including >= 128 for UTF-8
214
+				encoded += "%%%02X" % o
215
+			else:
216
+				encoded += c
217
+		debug("String '%s' encoded to '%s'" % (string, encoded))
218
+		return encoded
219
+
183 220
 	def create_request(self, operation, bucket = None, object = None, headers = None, **params):
184 221
 		resource = "/"
185 222
 		if bucket:
186 223
 			resource += str(bucket)
187 224
 			if object:
188
-				resource += "/"+str(object)
225
+				resource += "/" + self.urlencode_string(object)
189 226
 
190 227
 		if not headers:
191 228
 			headers = SortedDict()