Browse code

use metadata-stored md5 in bucket list, put, and info

Now that we can reasonably expect response['s3cmd-attrs'] to contain
valid md5 data, use that in the bucket list and info commands instead
of ETags if available.

Furthermore, be sure to store these attributes
in the put command, either when --preserve is given, or if we will be
uploading a multi-chunk file. We were storing these with the sync
--preserve command, be sure to do it in put also.

Matt Domsch authored on 2013/05/21 13:31:50
Showing 1 changed files
... ...
@@ -153,12 +153,22 @@ def subcmd_bucket_list(s3, uri):
153 153
             "uri": uri.compose_uri(bucket, prefix["Prefix"])})
154 154
 
155 155
     for object in response["list"]:
156
+        md5 = object['ETag'].strip('"')
157
+        if cfg.list_md5:
158
+            if md5.find('-') >= 0: # need to get md5 from the object
159
+                object_uri = uri.compose_uri(bucket, object["Key"])
160
+                info_response = s3.object_info(S3Uri(object_uri))
161
+                try:
162
+                    md5 = info_response['s3cmd-attrs']['md5']
163
+                except KeyError:
164
+                    pass
165
+
156 166
         size, size_coeff = formatSize(object["Size"], Config().human_readable_sizes)
157 167
         output(format_string % {
158 168
             "timestamp": formatDateTime(object["LastModified"]),
159 169
             "size" : str(size),
160 170
             "coeff": size_coeff,
161
-            "md5" : object['ETag'].strip('"'),
171
+            "md5" : md5,
162 172
             "uri": uri.compose_uri(bucket, object["Key"]),
163 173
             })
164 174
 
... ...
@@ -317,6 +327,10 @@ def cmd_object_put(args):
317 317
         seq_label = "[%d of %d]" % (seq, local_count)
318 318
         if Config().encrypt:
319 319
             exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig)
320
+        if cfg.preserve_attrs or local_list[key]['size'] > (cfg.multipart_chunk_size_mb * 1024 * 1024):
321
+            attr_header = _build_attr_header(local_list, key)
322
+            debug(u"attr_header: %s" % attr_header)
323
+            extra_headers.update(attr_header)
320 324
         try:
321 325
             response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label)
322 326
         except S3UploadError, e:
... ...
@@ -595,7 +609,12 @@ def cmd_info(args):
595 595
                 output(u"   File size: %s" % info['headers']['content-length'])
596 596
                 output(u"   Last mod:  %s" % info['headers']['last-modified'])
597 597
                 output(u"   MIME type: %s" % info['headers']['content-type'])
598
-                output(u"   MD5 sum:   %s" % info['headers']['etag'].strip('"'))
598
+                md5 = info['headers']['etag'].strip('"')
599
+                try:
600
+                    md5 = info['s3cmd-attrs']['md5']
601
+                except KeyError:
602
+                    pass
603
+                output(u"   MD5 sum:   %s" % md5)
599 604
             else:
600 605
                 info = s3.bucket_info(uri)
601 606
                 output(u"%s (bucket):" % uri.uri())
... ...
@@ -824,8 +843,8 @@ def cmd_sync_remote2local(args):
824 824
                     os.chmod(dst_file, mode);
825 825
                     
826 826
                     debug(u"renamed chkptfname=%s to dst_file=%s" % (unicodise(chkptfname), unicodise(dst_file)))
827
-                    if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs:
828
-                        attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
827
+                    if response.has_key('s3cmd-attrs') and cfg.preserve_attrs:
828
+                        attrs = response['s3cmd-attrs']
829 829
                         if attrs.has_key('mode'):
830 830
                             os.chmod(dst_file, int(attrs['mode']))
831 831
                         if attrs.has_key('mtime') or attrs.has_key('atime'):
... ...
@@ -945,41 +964,42 @@ def remote_copy(s3, copy_pairs, destination_base):
945 945
             raise
946 946
     return (len(copy_pairs), saved_bytes)
947 947
 
948
+def _build_attr_header(local_list, src):
949
+    import pwd, grp
950
+    attrs = {}
951
+    for attr in cfg.preserve_attrs_list:
952
+        if attr == 'uname':
953
+            try:
954
+                val = pwd.getpwuid(local_list[src]['uid']).pw_name
955
+            except KeyError:
956
+                attr = "uid"
957
+                val = local_list[src].get('uid')
958
+                warning(u"%s: Owner username not known. Storing UID=%d instead." % (src, val))
959
+        elif attr == 'gname':
960
+            try:
961
+                val = grp.getgrgid(local_list[src].get('gid')).gr_name
962
+            except KeyError:
963
+                attr = "gid"
964
+                val = local_list[src].get('gid')
965
+                warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src, val))
966
+        elif attr == 'md5':
967
+            try:
968
+                val = local_list.get_md5(src)
969
+            except IOError:
970
+                val = None
971
+        else:
972
+            val = getattr(local_list[src]['sr'], 'st_' + attr)
973
+        attrs[attr] = val
974
+
975
+    if 'md5' in attrs and attrs['md5'] is None:
976
+        del attrs['md5']
948 977
 
949
-def cmd_sync_local2remote(args):
950
-    def _build_attr_header(local_list, src):
951
-        import pwd, grp
952
-        attrs = {}
953
-        for attr in cfg.preserve_attrs_list:
954
-            if attr == 'uname':
955
-                try:
956
-                    val = pwd.getpwuid(local_list[src]['uid']).pw_name
957
-                except KeyError:
958
-                    attr = "uid"
959
-                    val = local_list[src].get('uid')
960
-                    warning(u"%s: Owner username not known. Storing UID=%d instead." % (src, val))
961
-            elif attr == 'gname':
962
-                try:
963
-                    val = grp.getgrgid(local_list[src].get('gid')).gr_name
964
-                except KeyError:
965
-                    attr = "gid"
966
-                    val = local_list[src].get('gid')
967
-                    warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src, val))
968
-            elif attr == 'md5':
969
-                try:
970
-                    val = local_list.get_md5(src)
971
-                except IOError:
972
-                    val = None
973
-            else:
974
-                val = getattr(local_list[src]['sr'], 'st_' + attr)
975
-            attrs[attr] = val
978
+    result = ""
979
+    for k in attrs: result += "%s:%s/" % (k, attrs[k])
980
+    return { 'x-amz-meta-s3cmd-attrs' : result[:-1] }
976 981
 
977
-        if 'md5' in attrs and attrs['md5'] is None:
978
-            del attrs['md5']
979 982
 
980
-        result = ""
981
-        for k in attrs: result += "%s:%s/" % (k, attrs[k])
982
-        return { 'x-amz-meta-s3cmd-attrs' : result[:-1] }
983
+def cmd_sync_local2remote(args):
983 984
 
984 985
     def _do_deletes(s3, remote_list):
985 986
         for key in remote_list: