Browse code

* s3cmd: Refactored cmd_sync() in preparation for remote->local sync.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@180 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2008/06/03 22:40:11
Showing 2 changed files
... ...
@@ -1,3 +1,8 @@
1
+2008-06-04  Michal Ludvig  <michal@logix.cz>
2
+
3
+	* s3cmd: Refactored cmd_sync() in preparation 
4
+	  for remote->local sync.
5
+
1 6
 2008-04-30  Michal Ludvig  <michal@logix.cz>
2 7
 
3 8
 	* s3db, S3/SimpleDB.py: Implemented almost full SimpleDB API.
... ...
@@ -298,52 +298,15 @@ def cmd_info(args):
298 298
 			else:
299 299
 				raise
300 300
 
301
-def cmd_sync(args):
302
-	def _build_attr_header(src):
303
-		attrs = {}
304
-		st = os.stat_result(os.stat(src))
305
-		for attr in cfg.preserve_attrs_list:
306
-			if attr == 'uname':
307
-				try:
308
-					val = pwd.getpwuid(st.st_uid).pw_name
309
-				except KeyError:
310
-					attr = "uid"
311
-					val = st.st_uid
312
-					warning("%s: Owner username not known. Storing UID=%d instead." % (src, val))
313
-			elif attr == 'gname':
314
-				try:
315
-					val = grp.getgrgid(st.st_gid).gr_name
316
-				except KeyError:
317
-					attr = "gid"
318
-					val = st.st_gid
319
-					warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val))
320
-			else:
321
-				val = getattr(st, 'st_' + attr)
322
-			attrs[attr] = val
323
-		result = ""
324
-		for k in attrs: result += "%s:%s/" % (k, attrs[k])
325
-		return { 'x-amz-meta-s3cmd-attrs' : result[:-1] }
326
-	src = args.pop(0)
327
-	if S3Uri(src).type != "file":
328
-		raise ParameterError("Source must be a local path instead of: %s" % src)
329
-	dst = args.pop(0)
330
-	if not dst.endswith('/'):
331
-		dst += "/"
332
-	dst_uri = S3Uri(dst)
333
-	if dst_uri.type != "s3":
334
-		raise ParameterError("Destination must be a S3 URI instead of: %s" % dst)
335
-	if (len(args)):
336
-		raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param'])
337
-
338
-	s3 = S3(Config())
339
-
301
+def _get_filelist_local(local_uri):
340 302
 	output("Compiling list of local files...")
341
-	if os.path.isdir(src):
342
-		loc_base = os.path.join(src, "")
343
-		filelist = os.walk(src)
303
+	local_path = local_uri.path()
304
+	if os.path.isdir(local_path):
305
+		loc_base = os.path.join(local_path, "")
306
+		filelist = os.walk(local_path)
344 307
 	else:
345 308
 		loc_base = "./"
346
-		filelist = [( '.', [], [src] )]
309
+		filelist = [( '.', [], [local_path] )]
347 310
 	loc_base_len = len(loc_base)
348 311
 	loc_list = {}
349 312
 	for root, dirs, files in filelist:
... ...
@@ -364,15 +327,17 @@ def cmd_sync(args):
364 364
 				'mtime' : sr.st_mtime,
365 365
 				## TODO: Possibly more to save here...
366 366
 			}
367
-	loc_count = len(loc_list)
368
-	
367
+	return loc_list
368
+
369
+def _get_filelist_remote(remote_uri):
369 370
 	output("Retrieving list of remote files...")
370
-	response = s3.bucket_list(dst_uri.bucket(), prefix = dst_uri.object())
371 371
 
372
-	rem_base = dst_uri.object()
372
+	s3 = S3(Config())
373
+	response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object())
374
+
375
+	rem_base = remote_uri.object()
373 376
 	rem_base_len = len(rem_base)
374 377
 	rem_list = {}
375
-	rem_count = len(response['list'])
376 378
 	for object in response['list']:
377 379
 		key = object['Key'][rem_base_len:].encode('utf-8')
378 380
 		rem_list[key] = { 
... ...
@@ -381,32 +346,85 @@ def cmd_sync(args):
381 381
 			'md5' : object['ETag'][1:-1],
382 382
 			'object_key' : object['Key'].encode('utf-8'),
383 383
 		}
384
-	output("Found %d local files, %d remote files" % (loc_count, rem_count))
385
-
384
+	return rem_list
385
+	
386
+def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
386 387
 	output("Verifying checksums...")
387
-	for file in loc_list.keys():
388
+	exists_list = {}
389
+	for file in src_list.keys():
388 390
 		debug("Checking %s ..." % file)
389
-		if rem_list.has_key(file):
391
+		if dst_list.has_key(file):
390 392
 			debug("%s exists in remote list" % file)
391 393
 			## Check size first
392
-			if rem_list[file]['size'] == loc_list[file]['size']:
393
-				debug("%s same size: %s" % (file, rem_list[file]['size']))
394
+			if dst_list[file]['size'] == src_list[file]['size']:
395
+				debug("%s same size: %s" % (file, dst_list[file]['size']))
394 396
 				## ... same size, check MD5
395
-				loc_md5 = Utils.hash_file_md5(loc_list[file]['full_name'])
396
-				if loc_md5 == rem_list[file]['md5']:
397
-					debug("%s md5 matches: %s" % (file, rem_list[file]['md5']))
397
+				if src_is_local_and_dst_is_remote:
398
+					src_md5 = Utils.hash_file_md5(src_list[file]['full_name'])
399
+					dst_md5 = dst_list[file]['md5']
400
+				else:
401
+					src_md5 = src_list[file]['md5']
402
+					dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name'])
403
+				if src_md5 == dst_md5:
404
+					debug("%s md5 matches: %s" % (file, dst_md5))
398 405
 					## Checksums are the same.
399
-					## Remove from local-list, all that is left there will be uploaded
400
-					debug("%s removed from local list - upload not needed" % file)
401
-					del(loc_list[file])
406
+					## Remove from source-list, all that is left there will be transferred
407
+					debug("%s removed from source list - transfer not needed" % file)
408
+					exists_list[file] = src_list[file]
409
+					del(src_list[file])
402 410
 				else:
403
-					debug("! %s md5 mismatch: local=%s remote=%s" % (file, loc_md5, rem_list[file]['md5']))
411
+					debug("! %s md5 mismatch: src=%s dst=%s" % (file, src_md5, dst_md5))
404 412
 			else:
405
-				debug("! %s size mismatch: local=%s remote=%s" % (file, loc_list[file]['size'], rem_list[file]['size']))
413
+				debug("! %s size mismatch: src=%s dst=%s" % (file, src_list[file]['size'], dst_list[file]['size']))
406 414
                         
407
-			## Remove from remote-list, all that is left there will be deleted
408
-			debug("%s removed from remote list" % file)
409
-			del(rem_list[file])
415
+			## Remove from destination-list, all that is left there will be deleted
416
+			debug("%s removed from destination list" % file)
417
+			del(dst_list[file])
418
+	return src_list, dst_list, exists_list
419
+
420
+def cmd_sync_remote2local(src, dst):
421
+	raise NotImplementedError("Remote->Local sync is not yet implemented.") 
422
+
423
+def cmd_sync_local2remote(src, dst):
424
+	def _build_attr_header(src):
425
+		attrs = {}
426
+		st = os.stat_result(os.stat(src))
427
+		for attr in cfg.preserve_attrs_list:
428
+			if attr == 'uname':
429
+				try:
430
+					val = pwd.getpwuid(st.st_uid).pw_name
431
+				except KeyError:
432
+					attr = "uid"
433
+					val = st.st_uid
434
+					warning("%s: Owner username not known. Storing UID=%d instead." % (src, val))
435
+			elif attr == 'gname':
436
+				try:
437
+					val = grp.getgrgid(st.st_gid).gr_name
438
+				except KeyError:
439
+					attr = "gid"
440
+					val = st.st_gid
441
+					warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val))
442
+			else:
443
+				val = getattr(st, 'st_' + attr)
444
+			attrs[attr] = val
445
+		result = ""
446
+		for k in attrs: result += "%s:%s/" % (k, attrs[k])
447
+		return { 'x-amz-meta-s3cmd-attrs' : result[:-1] }
448
+
449
+	s3 = S3(Config())
450
+
451
+	src_uri = S3Uri(src)
452
+	dst_uri = S3Uri(dst)
453
+
454
+	loc_list = _get_filelist_local(src_uri)
455
+	loc_count = len(loc_list)
456
+	
457
+	rem_list = _get_filelist_remote(dst_uri)
458
+	rem_count = len(rem_list)
459
+
460
+	output("Found %d local files, %d remote files" % (loc_count, rem_count))
461
+
462
+	_compare_filelists(loc_list, rem_list, True)
410 463
 
411 464
 	output("Summary: %d local files to upload, %d remote files to delete" % (len(loc_list), len(rem_list)))
412 465
 	for file in rem_list:
... ...
@@ -449,6 +467,20 @@ def cmd_sync(args):
449 449
 	output("Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s" % 
450 450
 	       (total_size, total_elapsed, speed_fmt[0], speed_fmt[1]))
451 451
 
452
+def cmd_sync(args):
453
+	src = args.pop(0)
454
+	dst = args.pop(0)
455
+	if (len(args)):
456
+		raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param'])
457
+
458
+	if not dst.endswith('/'):
459
+		dst += "/"
460
+
461
+	if S3Uri(src).type == "file" and S3Uri(dst).type == "s3":
462
+		return cmd_sync_local2remote(src, dst)
463
+	if S3Uri(src).type == "s3" and S3Uri(dst).type == "file":
464
+		return cmd_sync_remote2local(src, dst)
465
+	
452 466
 def resolve_list(lst, args):
453 467
 	retval = []
454 468
 	for item in lst: