Browse code

Handle local hardlink and copy failures by downloading the objects

During sync remote-to-local, it tries to use hardlink in destination
files that already have a local copy somewhere. In the event local
hardlinking fails, it falls back to doing a local file copy. In the
event local copying fails, we need to retrieve the object from S3
again.

Matt Domsch authored on 2012/12/10 02:01:46
Showing 2 changed files
... ...
@@ -463,7 +463,7 @@ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates
463 463
                     # Found one, we want to copy
464 464
                     dst1 = list(dst_list.by_md5[md5])[0]
465 465
                     debug(u"REMOTE COPY src: %s -> %s" % (dst1, relative_file))
466
-		    copy_pairs.append((dst1, relative_file))
466
+		    copy_pairs.append((src_list[relative_file], dst1, relative_file))
467 467
 		    del(src_list[relative_file])
468 468
 		    del(dst_list[relative_file])
469 469
                 else:
... ...
@@ -485,7 +485,7 @@ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates
485 485
 	    if dst1 is not None:
486 486
                 # Found one, we want to copy
487 487
                 debug(u"REMOTE COPY dst: %s -> %s" % (dst1, relative_file))
488
-		copy_pairs.append((dst1, relative_file))
488
+		copy_pairs.append((src_list[relative_file], dst1, relative_file))
489 489
 		del(src_list[relative_file])
490 490
 	    else:
491 491
                 # we don't have this file, and we don't have a copy of this file elsewhere.  Get it.
... ...
@@ -857,8 +857,10 @@ def cmd_sync_remote2local(args):
857 857
     seq = 0
858 858
     seq, total_size = _download(remote_list, seq, remote_count + update_count, total_size, dir_cache)
859 859
     seq, total_size = _download(update_list, seq, remote_count + update_count, total_size, dir_cache)
860
-    failed_pairs = local_hardlink(copy_pairs, destination_base)
861
-    # fixme: do something about failed pairs - we couldn't hardlink or copy them, need to retrieve them
860
+
861
+    failed_hardlink_list = local_hardlink(copy_pairs, destination_base)
862
+    _set_local_filename(failed_hardlink_list, destination_base)
863
+    seq, total_size = _download(failed_hardlink_list, seq, len(failed_hardlink_list) + remote_count + update_count, total_size, dir_cache)
862 864
     
863 865
     total_elapsed = time.time() - timestamp_start
864 866
     speed_fmt = formatSize(total_size/total_elapsed, human_readable = True, floating_point = True)
... ...
@@ -875,23 +877,23 @@ def cmd_sync_remote2local(args):
875 875
         _do_deletes(local_list)
876 876
 
877 877
 def local_hardlink(copy_pairs, destination_base):
878
-    failed_pairs = []
879
-    for (dst1, dst2) in copy_pairs:
878
+    failed_hardlink_list = SortedDict()
879
+    for (src_obj, dst1, relative_file) in copy_pairs:
880 880
         try:
881
-            os.link(destination_base + dst1, destination_base + dst2)
882
-            debug(u"Hardlinking %s to %s" % (destination_base + dst1, destination_base + dst2))
881
+            os.link(destination_base + dst1, destination_base + relative_file)
882
+            debug(u"Hardlinking %s to %s" % (destination_base + dst1, destination_base + relative_file))
883 883
         except (IOError, OSError):
884 884
             try:
885
-                shutil.copy2(destination_base + dst1, destination_base + dst2)
886
-                debug(u"Hardlinking unavailable, copying %s to %s" % (destination_base + dst1, destination_base + dst2))
885
+                shutil.copy2(destination_base + dst1, destination_base + relative_file)
886
+                debug(u"Hardlinking unavailable, copying %s to %s" % (destination_base + dst1, destination_base + relative_file))
887 887
             except IOError, e:
888
-                error(u'Unable to hardlink or copy files %s -> %s: %s' % (destination_base+dst1, destination_base+dst2, e))
889
-                failed_pairs.append((dst1, dst2))
890
-    return failed_pairs
888
+                warning(u'Unable to hardlink or copy files %s -> %s: %s' % (destination_base + dst1, destination_base + relative_file, e))
889
+                failed_hardlink_list[relative_file] = src_obj
890
+    return failed_hardlink_list
891 891
 
892 892
 def remote_copy(s3, copy_pairs, destination_base):
893 893
     saved_bytes = 0
894
-    for (dst1, dst2) in copy_pairs:
894
+    for (src_obj, dst1, dst2) in copy_pairs:
895 895
         debug(u"Remote Copying from %s to %s" % (dst1, dst2))
896 896
         dst1_uri = S3Uri(destination_base + dst1)
897 897
         dst2_uri = S3Uri(destination_base + dst2)
... ...
@@ -1058,7 +1060,7 @@ def cmd_sync_local2remote(args):
1058 1058
                 output(u"upload: %s -> %s" % (local_list[key]['full_name_unicode'], local_list[key]['remote_uri']))
1059 1059
             for key in update_list:
1060 1060
                 output(u"upload: %s -> %s" % (update_list[key]['full_name_unicode'], update_list[key]['remote_uri']))
1061
-            for (dst1, dst2) in copy_pairs:
1061
+            for (src_obj, dst1, dst2) in copy_pairs:
1062 1062
                 output(u"remote copy: %s -> %s" % (dst1['object_key'], remote_list[dst2]['object_key']))
1063 1063
             if cfg.delete_removed:
1064 1064
                 for key in remote_list: