Browse code

* s3cmd: Added support for remote-to-remote sync. (Based on patch from Sundar Raman - thanks!) * run-tests.py: Testsuite for the above.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@453 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2010/11/12 20:04:10
Showing 3 changed files
... ...
@@ -1,3 +1,9 @@
1
+2010-11-13  Michal Ludvig  <mludvig@logix.net.nz>
2
+
3
+	* s3cmd: Added support for remote-to-remote sync.
4
+	  (Based on patch from Sundar Raman - thanks!)
5
+	* run-tests.py: Testsuite for the above.
6
+
1 7
 2010-11-12  Michal Ludvig  <mludvig@logix.net.nz>
2 8
 
3 9
 	* s3cmd: Fixed typo in "s3cmd du" error path.
... ...
@@ -423,11 +423,28 @@ test_s3cmd("Copy between buckets", ['cp', '%s/xyz/etc2/Logo.PNG' % pbucket(1), '
423 423
 	must_find = [ "File %s/xyz/etc2/Logo.PNG copied to %s/xyz/etc2/logo.png" % (pbucket(1), pbucket(3)) ])
424 424
 
425 425
 ## ====== Recursive copy
426
-test_s3cmd("Recursive copy, set ACL", ['cp', '-r', '--acl-public', '%s/xyz/' % pbucket(1), '%s/copy' % pbucket(2), '--exclude', 'demo/*', '--exclude', 'non-printables*'],
426
+test_s3cmd("Recursive copy, set ACL", ['cp', '-r', '--acl-public', '%s/xyz/' % pbucket(1), '%s/copy' % pbucket(2), '--exclude', 'demo/dir?/*.txt', '--exclude', 'non-printables*'],
427 427
 	must_find = [ "File %s/xyz/etc2/Logo.PNG copied to %s/copy/etc2/Logo.PNG" % (pbucket(1), pbucket(2)),
428 428
 	              "File %s/xyz/blahBlah/Blah.txt copied to %s/copy/blahBlah/Blah.txt" % (pbucket(1), pbucket(2)),
429 429
 	              "File %s/xyz/blahBlah/blah.txt copied to %s/copy/blahBlah/blah.txt" % (pbucket(1), pbucket(2)) ],
430
-	must_not_find = [ "demo/" ])
430
+	must_not_find = [ "demo/dir1/file1-1.txt" ])
431
+
432
+## ====== Verify ACL and MIME type
433
+test_s3cmd("Verify ACL and MIME type", ['info', '%s/copy/etc2/Logo.PNG' % pbucket(2) ],
434
+	must_find_re = [ "MIME type:.*image/png", 
435
+	                 "ACL:.*\*anon\*: READ",
436
+					 "URL:.*http://%s.s3.amazonaws.com/copy/etc2/Logo.PNG" % bucket(2) ])
437
+
438
+## ====== Rename within S3
439
+test_s3cmd("Rename within S3", ['mv', '%s/copy/etc2/Logo.PNG' % pbucket(2), '%s/copy/etc/logo.png' % pbucket(2)],
440
+	must_find = [ 'File %s/copy/etc2/Logo.PNG moved to %s/copy/etc/logo.png' % (pbucket(2), pbucket(2))])
441
+
442
+## ====== Sync between buckets
443
+test_s3cmd("Sync remote2remote", ['sync', '%s/xyz/' % pbucket(1), '%s/copy/' % pbucket(2), '--delete-removed', '--exclude', 'non-printables*'],
444
+	must_find = [ "File %s/xyz/demo/dir1/file1-1.txt copied to %s/copy/demo/dir1/file1-1.txt" % (pbucket(1), pbucket(2)),
445
+	              "File %s/xyz/etc2/Logo.PNG copied to %s/copy/etc2/Logo.PNG" % (pbucket(1), pbucket(2)),
446
+	              "deleted: '%s/copy/etc/logo.png'" % pbucket(2) ],
447
+	must_not_find = [ "blah.txt" ])
431 448
 
432 449
 ## ====== Don't Put symbolic link
433 450
 test_s3cmd("Don't put symbolic links", ['put', 'testsuite/etc/linked1.png', 's3://%s/xyz/' % bucket(1),],
... ...
@@ -445,12 +462,6 @@ test_s3cmd("Sync symbolic links", ['sync', 'testsuite/', 's3://%s/xyz/' % bucket
445 445
                                "etc/brokenlink.png"],
446 446
            )
447 447
 
448
-## ====== Verify ACL and MIME type
449
-test_s3cmd("Verify ACL and MIME type", ['info', '%s/copy/etc2/Logo.PNG' % pbucket(2) ],
450
-	must_find_re = [ "MIME type:.*image/png", 
451
-	                 "ACL:.*\*anon\*: READ",
452
-					 "URL:.*http://%s.s3.amazonaws.com/copy/etc2/Logo.PNG" % bucket(2) ])
453
-
454 448
 ## ====== Multi source move
455 449
 test_s3cmd("Multi-source move", ['mv', '-r', '%s/copy/blahBlah/Blah.txt' % pbucket(2), '%s/copy/etc/' % pbucket(2), '%s/moved/' % pbucket(2)],
456 450
 	must_find = [ "File %s/copy/blahBlah/Blah.txt moved to %s/moved/Blah.txt" % (pbucket(2), pbucket(2)),
... ...
@@ -770,12 +770,18 @@ def _filelist_filter_exclude_include(src_list):
770 770
 			debug(u"PASS: %s" % (file))
771 771
 	return src_list, exclude_list
772 772
 
773
-def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
773
+def _compare_filelists(src_list, dst_list, src_remote, dst_remote):
774
+	def __direction_str(is_remote):
775
+		return is_remote and "remote" or "local"
776
+
777
+	# We don't support local->local sync, use 'rsync' or something like that instead ;-)
778
+	assert(not(src_remote == False and dst_remote == False))
779
+
774 780
 	info(u"Verifying attributes...")
775 781
 	cfg = Config()
776 782
 	exists_list = SortedDict(ignore_case = False)
777 783
 
778
-	debug("Comparing filelists (src_is_local_and_dst_is_remote=%s)" % src_is_local_and_dst_is_remote)
784
+	debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
779 785
 	debug("src_list.keys: %s" % src_list.keys())
780 786
 	debug("dst_list.keys: %s" % dst_list.keys())
781 787
 
... ...
@@ -799,12 +805,16 @@ def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
799 799
 			
800 800
 			if attribs_match and 'md5' in cfg.sync_checks:
801 801
 				## ... same size, check MD5
802
-				if src_is_local_and_dst_is_remote:
802
+				if src_remote == False and dst_remote == True:
803 803
 					src_md5 = Utils.hash_file_md5(src_list[file]['full_name'])
804 804
 					dst_md5 = dst_list[file]['md5']
805
-				else:
805
+				elif src_remote == True and dst_remote == False:
806 806
 					src_md5 = src_list[file]['md5']
807 807
 					dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name'])
808
+				elif src_remote == True and dst_remote == True:
809
+					src_md5 = src_list[file]['md5']
810
+					dst_md5 = dst_list[file]['md5']
811
+
808 812
 				if src_md5 != dst_md5:
809 813
 					## Checksums are different.
810 814
 					attribs_match = False
... ...
@@ -821,6 +831,80 @@ def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
821 821
 
822 822
 	return src_list, dst_list, exists_list
823 823
 
824
+def cmd_sync_remote2remote(args):
825
+	s3 = S3(Config())
826
+
827
+	# Normalise s3://uri (e.g. assert trailing slash)
828
+	destination_base = unicode(S3Uri(args[-1]))
829
+
830
+	src_list = fetch_remote_list(args[:-1], recursive = True, require_attribs = True)
831
+	dst_list = fetch_remote_list(destination_base, recursive = True, require_attribs = True)
832
+	 
833
+	src_count = len(src_list)
834
+	dst_count = len(dst_list)
835
+
836
+	info(u"Found %d source files, %d destination files" % (src_count, dst_count))
837
+
838
+	src_list, exclude_list = _filelist_filter_exclude_include(src_list)
839
+
840
+	src_list, dst_list, existing_list = _compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True)
841
+
842
+	src_count = len(src_list)
843
+	dst_count = len(dst_list)
844
+
845
+	print(u"Summary: %d source files to copy, %d files at destination to delete" % (src_count, dst_count))
846
+
847
+	if src_count > 0:
848
+		### Populate 'remote_uri' only if we've got something to sync from src to dst
849
+		for key in src_list:
850
+			src_list[key]['target_uri'] = destination_base + key
851
+
852
+	if cfg.dry_run:
853
+		for key in exclude_list:
854
+			output(u"exclude: %s" % unicodise(key))
855
+		if cfg.delete_removed:
856
+			for key in dst_list:
857
+				output(u"delete: %s" % dst_list[key]['object_uri_str'])
858
+		for key in src_list:
859
+			output(u"Sync: %s -> %s" % (src_list[key]['object_uri_str'], src_list[key]['target_uri']))
860
+		warning(u"Exitting now because of --dry-run")
861
+		return
862
+
863
+	# Delete items in destination that are not in source
864
+	if cfg.delete_removed:
865
+		if cfg.dry_run:
866
+			for key in dst_list:
867
+				output(u"delete: %s" % dst_list[key]['object_uri_str'])
868
+		else:
869
+			for key in dst_list:
870
+				uri = S3Uri(dst_list[key]['object_uri_str'])
871
+				s3.object_delete(uri)
872
+				output(u"deleted: '%s'" % uri)
873
+	
874
+	# Perform the synchronization of files
875
+	timestamp_start = time.time()
876
+	seq = 0
877
+	file_list = src_list.keys()
878
+	file_list.sort()
879
+	for file in file_list:
880
+		seq += 1
881
+		item = src_list[file]
882
+		src_uri = S3Uri(item['object_uri_str'])
883
+		dst_uri = S3Uri(item['target_uri'])
884
+		seq_label = "[%d of %d]" % (seq, src_count)
885
+		extra_headers = copy(cfg.extra_headers)
886
+		try:
887
+			response = s3.object_copy(src_uri, dst_uri, extra_headers)
888
+			output("File %(src)s copied to %(dst)s" % { "src" : src_uri, "dst" : dst_uri })
889
+		except S3Error, e:
890
+			error("File %(src)s could not be copied: %(e)s" % { "src" : src_uri, "e" : e })
891
+	total_elapsed = time.time() - timestamp_start
892
+	outstr = "Done. Copied %d files in %0.1f seconds, %0.2f files/s" % (seq, total_elapsed, seq/total_elapsed)
893
+	if seq > 0:
894
+		output(outstr)
895
+	else:
896
+		info(outstr)
897
+
824 898
 def cmd_sync_remote2local(args):
825 899
 	def _parse_attrs_header(attrs_header):
826 900
 		attrs = {}
... ...
@@ -842,7 +926,7 @@ def cmd_sync_remote2local(args):
842 842
 
843 843
 	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
844 844
 
845
-	remote_list, local_list, existing_list = _compare_filelists(remote_list, local_list, False)
845
+	remote_list, local_list, existing_list = _compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False)
846 846
 
847 847
 	local_count = len(local_list)
848 848
 	remote_count = len(remote_list)
... ...
@@ -1031,7 +1115,7 @@ def cmd_sync_local2remote(args):
1031 1031
 		# Flush remote_list, by the way
1032 1032
 		remote_list = { local_list.keys()[0] : remote_list_entry }
1033 1033
 
1034
-	local_list, remote_list, existing_list = _compare_filelists(local_list, remote_list, True)
1034
+	local_list, remote_list, existing_list = _compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True)
1035 1035
 
1036 1036
 	local_count = len(local_list)
1037 1037
 	remote_count = len(remote_list)
... ...
@@ -1118,6 +1202,8 @@ def cmd_sync(args):
1118 1118
 		return cmd_sync_local2remote(args)
1119 1119
 	if S3Uri(args[0]).type == "s3" and S3Uri(args[-1]).type == "file":
1120 1120
 		return cmd_sync_remote2local(args)
1121
+	if S3Uri(args[0]).type == "s3" and S3Uri(args[-1]).type == "s3":
1122
+		return cmd_sync_remote2remote(args)
1121 1123
 	raise ParameterError("Invalid source/destination: '%s'" % "' '".join(args))
1122 1124
 
1123 1125
 def cmd_setacl(args):