Browse code

* s3cmd: Support for multiple sources in 'get' command.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@285 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2008/12/11 15:17:53
Showing 3 changed files
... ...
@@ -1,3 +1,7 @@
1
+2008-12-11  Michal Ludvig  <michal@logix.cz>
2
+
3
+	* s3cmd: Support for multiple sources in 'get' command.
4
+
1 5
 2008-12-10  Michal Ludvig  <michal@logix.cz>
2 6
 
3 7
 	* TODO: Updated list.
... ...
@@ -42,6 +42,9 @@ class S3Uri(object):
42 42
 	def public_url(self):
43 43
 		raise ValueError("This S3 URI does not have Anonymous URL representation")
44 44
 
45
+	def basename(self):
46
+		return self.__unicode__().split("/")[-1]
47
+
45 48
 class S3UriS3(S3Uri):
46 49
 	type = "s3"
47 50
 	_re = re.compile("^s3://([^/]+)/?(.*)", re.IGNORECASE)
... ...
@@ -231,43 +231,87 @@ def cmd_object_put(args):
231 231
 			os.remove(real_filename)
232 232
 
233 233
 def cmd_object_get(args):
234
-	s3 = S3(Config())
234
+	cfg = Config()
235
+	s3 = S3(cfg)
235 236
 
236
-	if not S3Uri(args[0]).type == 's3':
237
-		raise ParameterError("Expecting S3 URI instead of '%s'" % args[0])
237
+	## Check arguments:
238
+	## if not --recursive:
239
+	##   - first N arguments must be S3Uri
240
+	##   - if the last one is S3 make current dir the destination_base
241
+	##   - if the last one is a directory:
242
+	##       - take all 'basenames' of the remote objects and
243
+	##         make the destination name be 'destination_base'+'basename'
244
+	##   - if the last one is a file or not existing:
245
+	##       - if the number of sources (N, above) == 1 treat it
246
+	##         as a filename and save the object there.
247
+	##       - if there's more sources -> Error
248
+	## if --recursive:
249
+	##   - first N arguments must be S3Uri
250
+	##       - for each Uri get a list of remote objects with that Uri as a prefix
251
+	##       - apply exclude/include rules
252
+	##       - each list item will have MD5sum, Timestamp and pointer to S3Uri
253
+	##         used as a prefix.
254
+	##   - the last arg may be a local directory - destination_base
255
+	##   - if the last one is S3 make current dir the destination_base
256
+	##   - if the last one doesn't exist check remote list:
257
+	##       - if there is only one item and its_prefix==its_name 
258
+	##         download that item to the name given in last arg.
259
+	##       - if there are more remote items use the last arg as a destination_base
260
+	##         and try to create the directory (incl. all parents).
261
+	##
262
+	## In both cases we end up with a list mapping remote object names (keys) to local file names.
263
+
264
+	## Each item will contain the following attributes
265
+	# 'remote_uri' 'local_filename' 'remote_label' 'local_label'
266
+	download_list = [ ]
267
+
268
+	remote_uris = []
269
+
270
+	if len(args) == 0:
271
+		raise ParameterError("Nothing to download. Expecting S3 URI.")
272
+
273
+	if S3Uri(args[-1]).type != 's3':
274
+		destination_base = args.pop()
275
+	else:
276
+		destination_base = "."
238 277
 
239
-	destination_dir = None
240
-	destination_file = None
241
-	if len(args) > 1:
242
-		if S3Uri(args[-1]).type == 's3':
243
-			# all S3, use object names to local dir
244
-			check_args_type(args, type="s3", verbose_type="S3 URI")	# May raise ParameterError
245
-		else:
246
-			if (len(args) > 2):
247
-				# last must be dir, all preceding S3
248
-				if not os.path.isdir(args[-1]):
249
-					raise ParameterError("Last parameter must be a directory")
250
-				destination_dir = args.pop()
251
-				check_args_type(args, type="s3", verbose_type="S3 URI")	# May raise ParameterError
252
-			else:
253
-				# last must be a dir or a filename
254
-				if os.path.isdir(args[-1]):
255
-					destination_dir = args.pop()
256
-				else:
257
-					destination_file = args.pop()
278
+	if len(args) == 0:
279
+		raise ParameterError("Nothing to download. Expecting S3 URI.")
258 280
 
259
-	while (len(args)):
260
-		uri_arg = args.pop(0)
261
-		uri = S3Uri(uri_arg)
281
+	for arg in args:
282
+		uri = S3Uri(arg)
283
+		if not uri.type == 's3':
284
+			raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
285
+		remote_uris.append(uri)
262 286
 
263
-		start_position = 0
264
-		if destination_file:
265
-			destination = destination_file
266
-		elif destination_dir:
267
-			destination = destination_dir + "/" + uri.object()
287
+	if cfg.recursive:
288
+		raise NotImplementedError("Recursive get is not yet implemented")
289
+	else:
290
+		remote_keys = []
291
+		if not os.path.isdir(destination_base) or destination_base == '-':
292
+			if len(remote_uris) > 1:
293
+				raise ParameterError("Destination must be a directory when downloading multiple sources.")
294
+			download_item = {
295
+				'remote_uri' : remote_uris[0],
296
+				'local_filename' : destination_base
297
+			}
298
+			remote_keys.append(download_item)
268 299
 		else:
269
-			# By default the destination filename is the object name
270
-			destination = uri.object()
300
+			if os.path.isdir(destination_base) and destination_base[-1] != os.path.sep:
301
+				destination_base += os.path.sep
302
+			for uri in remote_uris:
303
+				download_item = {
304
+					'remote_uri' : uri,
305
+					'local_filename' : destination_base + uri.basename(),
306
+				}
307
+				remote_keys.append(download_item)
308
+
309
+	for item in remote_keys:
310
+		uri = item['remote_uri']
311
+		destination = item['local_filename']
312
+
313
+		start_position = 0
314
+
271 315
 		if destination == "-":
272 316
 			## stdout
273 317
 			dst_stream = sys.stdout
... ...
@@ -418,7 +462,7 @@ def _get_filelist_local(local_uri):
418 418
 	return loc_list
419 419
 
420 420
 def _get_filelist_remote(remote_uri):
421
-	info("Retrieving list of remote files...")
421
+	info("Retrieving list of remote files for %s ..." % remote_uri)
422 422
 
423 423
 	s3 = S3(Config())
424 424
 	response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = True)
... ...
@@ -430,12 +474,12 @@ def _get_filelist_remote(remote_uri):
430 430
 		key = object['Key'][rem_base_len:]
431 431
 		rem_list[key] = { 
432 432
 			'size' : int(object['Size']),
433
-			# 'mtime' : dateS3toUnix(object['LastModified']), ## That's upload time, not our lastmod time :-(
433
+			'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
434 434
 			'md5' : object['ETag'][1:-1],
435
-			'object_key' : object['Key']
435
+			'object_key' : object['Key'],
436 436
 		}
437 437
 	return rem_list
438
-	
438
+
439 439
 def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
440 440
 	info("Verifying checksums...")
441 441
 	cfg = Config()