Browse code

Fix remote_copy to ensure that meta-s3cmd-attrs will be set based on the real source and not on the copy source

Florent Viard authored on 2020/04/19 09:36:20
Showing 2 changed files
... ...
@@ -621,7 +621,7 @@ def compare_filelists(src_list, dst_list, src_remote, dst_remote):
621 621
             try:
622 622
                 md5 = src_list.get_md5(relative_file)
623 623
             except IOError:
624
-               md5 = None
624
+                md5 = None
625 625
             dst1 = dst_list.find_md5_one(md5)
626 626
             if dst1 is not None:
627 627
                 # Found one, we want to copy
... ...
@@ -417,6 +417,7 @@ def cmd_object_put(args):
417 417
 
418 418
     seq = 0
419 419
     ret = EX_OK
420
+    local_list_get_md5_func = local_list.get_md5
420 421
     for key in local_list:
421 422
         seq += 1
422 423
 
... ...
@@ -428,7 +429,9 @@ def cmd_object_put(args):
428 428
         seq_label = "[%d of %d]" % (seq, local_count)
429 429
         if Config().encrypt:
430 430
             gpg_exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig)
431
-        attr_header = _build_attr_header(local_list, key)
431
+        attr_header = _build_attr_header(local_list[key],
432
+                                         key,
433
+                                         local_list_get_md5_func)
432 434
         debug(u"attr_header: %s" % attr_header)
433 435
         extra_headers.update(attr_header)
434 436
         try:
... ...
@@ -1159,8 +1162,9 @@ def cmd_sync_remote2remote(args):
1159 1159
     total_files_copied += nb_files
1160 1160
     total_size_copied += size
1161 1161
 
1162
-
1163
-    n_copied, bytes_saved, failed_copy_files = remote_copy(s3, copy_pairs, destination_base, None)
1162
+    src_list_get_md5_func = src_list.get_md5
1163
+    n_copied, bytes_saved, failed_copy_files = remote_copy(
1164
+        s3, copy_pairs, destination_base, None, src_list_get_md5_func)
1164 1165
     total_files_copied += n_copied
1165 1166
     total_size_copied += bytes_saved
1166 1167
 
... ...
@@ -1568,7 +1572,8 @@ def local_copy(copy_pairs, destination_base):
1568 1568
             failed_copy_list[relative_file] = src_obj
1569 1569
     return len(copy_pairs), saved_bytes, failed_copy_list
1570 1570
 
1571
-def remote_copy(s3, copy_pairs, destination_base, uploaded_objects_list=None):
1571
+def remote_copy(s3, copy_pairs, destination_base, uploaded_objects_list=None,
1572
+                metadata_update=False, local_list_get_md5_func=None):
1572 1573
     cfg = Config()
1573 1574
     saved_bytes = 0
1574 1575
     failed_copy_list = FileDict()
... ...
@@ -1582,6 +1587,11 @@ def remote_copy(s3, copy_pairs, destination_base, uploaded_objects_list=None):
1582 1582
         src_obj_size = src_obj.get(u'size', 0)
1583 1583
         seq_label = "[%d of %d]" % (seq, src_count)
1584 1584
         extra_headers = copy(cfg.extra_headers)
1585
+        if metadata_update:
1586
+            # source is a real local file with its own personal metadata
1587
+            attr_header = _build_attr_header(src_obj, dst2, local_list_get_md5_func)
1588
+            debug(u"attr_header: %s" % attr_header)
1589
+            extra_headers.update(attr_header)
1585 1590
         try:
1586 1591
             s3.object_copy(dst1_uri, dst2_uri, extra_headers,
1587 1592
                            src_size=src_obj_size,
... ...
@@ -1595,38 +1605,39 @@ def remote_copy(s3, copy_pairs, destination_base, uploaded_objects_list=None):
1595 1595
             failed_copy_list[dst2] = src_obj
1596 1596
     return (len(copy_pairs), saved_bytes, failed_copy_list)
1597 1597
 
1598
-def _build_attr_header(local_list, src):
1598
+def _build_attr_header(src_obj, src_relative_name,
1599
+                       local_list_get_md5_func=None):
1599 1600
     cfg = Config()
1600 1601
     attrs = {}
1601 1602
     if cfg.preserve_attrs:
1602 1603
         for attr in cfg.preserve_attrs_list:
1603 1604
             if attr == 'uname':
1604 1605
                 try:
1605
-                    val = Utils.urlencode_string(Utils.getpwuid_username(local_list[src]['uid']), unicode_output=True)
1606
+                    val = Utils.urlencode_string(Utils.getpwuid_username(src_obj['uid']), unicode_output=True)
1606 1607
                 except (KeyError, TypeError):
1607 1608
                     attr = "uid"
1608
-                    val = local_list[src].get('uid')
1609
+                    val = src_obj.get('uid')
1609 1610
                     if val:
1610
-                        warning(u"%s: Owner username not known. Storing UID=%d instead." % (src, val))
1611
+                        warning(u"%s: Owner username not known. Storing UID=%d instead." % (src_relative_name, val))
1611 1612
             elif attr == 'gname':
1612 1613
                 try:
1613
-                    val = Utils.urlencode_string(Utils.getgrgid_grpname(local_list[src].get('gid')), unicode_output=True)
1614
+                    val = Utils.urlencode_string(Utils.getgrgid_grpname(src_obj.get('gid')), unicode_output=True)
1614 1615
                 except (KeyError, TypeError):
1615 1616
                     attr = "gid"
1616
-                    val = local_list[src].get('gid')
1617
+                    val = src_obj.get('gid')
1617 1618
                     if val:
1618
-                        warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src, val))
1619
+                        warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src_relative_name, val))
1619 1620
             elif attr != "md5":
1620 1621
                 try:
1621
-                    val = getattr(local_list[src]['sr'], 'st_' + attr)
1622
+                    val = getattr(src_obj['sr'], 'st_' + attr)
1622 1623
                 except Exception:
1623 1624
                     val = None
1624 1625
             if val is not None:
1625 1626
                 attrs[attr] = val
1626 1627
 
1627
-    if 'md5' in cfg.preserve_attrs_list:
1628
+    if 'md5' in cfg.preserve_attrs_list and local_list_get_md5_func:
1628 1629
         try:
1629
-            val = local_list.get_md5(src)
1630
+            val = local_list_get_md5_func(src_relative_name)
1630 1631
             if val is not None:
1631 1632
                 attrs['md5'] = val
1632 1633
         except IOError:
... ...
@@ -1697,6 +1708,7 @@ def cmd_sync_local2remote(args):
1697 1697
         def _upload(local_list, seq, total, total_size):
1698 1698
             file_list = local_list.keys()
1699 1699
             file_list.sort()
1700
+            local_list_get_md5_func = local_list.get_md5
1700 1701
             ret = EX_OK
1701 1702
             for file in file_list:
1702 1703
                 seq += 1
... ...
@@ -1706,7 +1718,9 @@ def cmd_sync_local2remote(args):
1706 1706
                 seq_label = "[%d of %d]" % (seq, total)
1707 1707
                 extra_headers = copy(cfg.extra_headers)
1708 1708
                 try:
1709
-                    attr_header = _build_attr_header(local_list, file)
1709
+                    attr_header = _build_attr_header(local_list[file],
1710
+                                                     file,
1711
+                                                     local_list_get_md5_func)
1710 1712
                     debug(u"attr_header: %s" % attr_header)
1711 1713
                     extra_headers.update(attr_header)
1712 1714
                     response = s3.object_put(src, uri, extra_headers, extra_label = seq_label)
... ...
@@ -1824,9 +1838,10 @@ def cmd_sync_local2remote(args):
1824 1824
         # uploaded_objects_list reference is passed so it can be filled with
1825 1825
         # destination object of succcessful copies so that they can be
1826 1826
         # invalidated by cf
1827
-        n_copies, saved_bytes, failed_copy_files  = remote_copy(s3, copy_pairs,
1828
-                                                                destination_base,
1829
-                                                                uploaded_objects_list)
1827
+        local_list_get_md5_func = local_list.get_md5
1828
+        n_copies, saved_bytes, failed_copy_files  = remote_copy(
1829
+            s3, copy_pairs, destination_base, uploaded_objects_list,
1830
+            local_list_get_md5_func)
1830 1831
 
1831 1832
         #upload file that could not be copied
1832 1833
         debug("Process files that were not remotely copied")