Browse code

batch_delete fixed, subcmd_batch_del() added and replaced _do_deletes()

Vasileios Mitrousis authored on 2014/03/04 03:52:37
Showing 2 changed files
... ...
@@ -12,6 +12,7 @@ import httplib
12 12
 import logging
13 13
 import mimetypes
14 14
 import re
15
+from xml.sax import saxutils
15 16
 from logging import debug, info, warning, error
16 17
 from stat import ST_SIZE
17 18
 
... ...
@@ -488,7 +489,7 @@ class S3(object):
488 488
 
489 489
     def object_batch_delete(self, remote_list):
490 490
         def compose_batch_del_xml(bucket, key_list):
491
-            body = "<Delete>"
491
+            body = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Delete>"
492 492
             for key in key_list:
493 493
                 uri = S3Uri(key)
494 494
                 if uri.type != "s3":
... ...
@@ -497,9 +498,10 @@ class S3(object):
497 497
                     raise ValueError("URI '%s' has no object" % key)
498 498
                 if uri.bucket() != bucket:
499 499
                     raise ValueError("The batch should contain keys from the same bucket")
500
-                object = self.urlencode_string(uri.object())
501
-                body += "<Object><Key>%s</Key></Object>" % object
502
-            body += "</Delete>"
500
+                object = saxutils.escape(uri.object())
501
+                body += u"<Object><Key>%s</Key></Object>" % object
502
+            body += u"</Delete>"
503
+            body = body.encode('utf-8')
503 504
             return body
504 505
 
505 506
         batch = [remote_list[item]['object_uri_str'] for item in remote_list]
... ...
@@ -244,15 +244,16 @@ def cmd_website_delete(args):
244 244
 def cmd_bucket_delete(args):
245 245
     def _bucket_delete_one(uri, args = None):
246 246
         try:
247
-            marker = args.get('marker', '') if args else ''
248 247
             response = s3.bucket_delete(uri.bucket())
248
+            output(u"Bucket '%s' removed" % uri.uri())
249 249
         except S3Error, e:
250 250
             if e.info['Code'] == 'BucketNotEmpty' and (cfg.force or cfg.recursive):
251 251
                 warning(u"Bucket is not empty. Removing all the objects from it first. This may take some time...")
252
-                batch_uri_args = {'marker': marker}
253
-                marker = subcmd_object_del_uri(uri.uri(), recursive = True, 
254
-                                               batch_mode = True, batch_uri_args = batch_uri_args)
255
-                return _bucket_delete_one(uri, args = {'marker': marker})
252
+                success = subcmd_batch_del(uri.uri())
253
+                if success:
254
+                    return _bucket_delete_one(uri)
255
+                else:
256
+                    output(u"Bucket was not removed")
256 257
             elif S3.codes.has_key(e.info["Code"]):
257 258
                 error(S3.codes[e.info["Code"]] % uri.bucket())
258 259
                 return
... ...
@@ -265,7 +266,6 @@ def cmd_bucket_delete(args):
265 265
         if not uri.type == "s3" or not uri.has_bucket() or uri.has_object():
266 266
             raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg)
267 267
         _bucket_delete_one(uri)
268
-        output(u"Bucket '%s' removed" % uri.uri())
269 268
 
270 269
 def cmd_object_put(args):
271 270
     cfg = Config()
... ...
@@ -511,14 +511,55 @@ def cmd_object_del(args):
511 511
                 raise ParameterError("File name required, not only the bucket name. Alternatively use --recursive")
512 512
         subcmd_object_del_uri(uri_str)
513 513
 
514
-def subcmd_object_del_uri(uri_str, recursive = None, batch_mode = False, batch_uri_args = {}):
514
+def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None):
515
+    s3 = S3(cfg)
516
+
517
+    if len([item for item in [uri_str, bucket, remote_list] if item]) != 1:
518
+        raise ValueError("One and only one of 'uri_str', 'bucket', 'remote_list' can be specified.")
519
+
520
+    batch_mode = False
521
+    if bucket:
522
+        uri_str = "s3://%s" % bucket
523
+    if not remote_list:
524
+        batch_mode = True
525
+        remote_list = fetch_remote_list(uri_str, require_attribs = False, batch_mode = True)
526
+    if len(remote_list) == 0:
527
+        warning(u"Remote list is empty.")
528
+        return False
529
+
530
+    if cfg.max_delete > 0 and len(remote_list) > cfg.max_delete:
531
+        warning(u"delete: maximum requested number of deletes would be exceeded, none performed.")
532
+        return False
533
+
534
+    while True:
535
+        if cfg.dry_run:
536
+            for key in remote_list:
537
+                output(u"delete: %s" % remote_list[key]['object_uri_str'])
538
+        else:
539
+            response = s3.object_batch_delete(remote_list)
540
+            output('\n'.join((u"File %s deleted" % remote_list[p]['object_uri_str']) for p in remote_list))
541
+
542
+        keys_count = len(remote_list) if remote_list else 0
543
+        if keys_count < 1000 or not batch_mode:
544
+            break
545
+        last_key = S3Uri(remote_list[remote_list.keys()[-1]]['object_uri_str']) if remote_list else None
546
+        if not last_key or not last_key.has_object:
547
+            break
548
+        marker = last_key.object()
549
+        remote_list = fetch_remote_list(uri_str, require_attribs = False, batch_mode = True,
550
+                                        uri_params = {"marker": marker})
551
+    if cfg.dry_run:
552
+        warning(u"Exiting now because of --dry-run")
553
+        return False
554
+    return True
555
+
556
+def subcmd_object_del_uri(uri_str, recursive = None):
515 557
     s3 = S3(cfg)
516 558
 
517 559
     if recursive is None:
518 560
         recursive = cfg.recursive
519 561
 
520
-    remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive, 
521
-                                    batch_mode = batch_mode, uri_params = batch_uri_args)
562
+    remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive)
522 563
     remote_list, exclude_list = filter_exclude_include(remote_list)
523 564
 
524 565
     remote_count = len(remote_list)
... ...
@@ -537,28 +578,11 @@ def subcmd_object_del_uri(uri_str, recursive = None, batch_mode = False, batch_u
537 537
         warning(u"Exiting now because of --dry-run")
538 538
         return
539 539
 
540
-    if batch_mode:
541
-        response = s3.object_batch_delete(remote_list)
542
-        keys_count = len(remote_list) if remote_list else 0
543
-        first_key = S3Uri(remote_list[remote_list.keys()[0]]['object_uri_str']) if remote_list else "None"
544
-        last_key = S3Uri(remote_list[remote_list.keys()[-1]]['object_uri_str']) if remote_list else "None"
545
-        output(u"Number of deleted keys: %d" % keys_count)
546
-        output(u"From: \t%s" % first_key)
547
-        output(u"To: \t%s" % last_key)
548
-        remote_list = []
549
-        if keys_count == 1000:
550
-            # reached limit of keys, probably there are more..
551
-            # worst case scenario we do a useless listing but that's ok
552
-            marker = last_key.object() if last_key != "None" and last_key.has_object() else ''
553
-            return marker 
554
-
555 540
     for key in remote_list:
556 541
         item = remote_list[key]
557 542
         response = s3.object_delete(S3Uri(item['object_uri_str']))
558 543
         output(u"File %s deleted" % item['object_uri_str'])
559 544
 
560
-    return ''
561
-
562 545
 def cmd_object_restore(args):
563 546
     s3 = S3(cfg)
564 547
 
... ...
@@ -718,20 +742,6 @@ def filedicts_to_keys(*args):
718 718
     return keys
719 719
 
720 720
 def cmd_sync_remote2remote(args):
721
-    def _do_deletes(s3, dst_list):
722
-        if cfg.max_delete > 0 and len(dst_list) > cfg.max_delete:
723
-            warning(u"delete: maximum requested number of deletes would be exceeded, none performed.")
724
-            return
725
-        # Delete items in destination that are not in source
726
-        if cfg.dry_run:
727
-            for key in dst_list:
728
-                output(u"delete: %s" % dst_list[key]['object_uri_str'])
729
-        else:
730
-            for key in dst_list:
731
-                uri = S3Uri(dst_list[key]['object_uri_str'])
732
-                s3.object_delete(uri)
733
-                output(u"deleted: '%s'" % uri)
734
-
735 721
     s3 = S3(Config())
736 722
 
737 723
     # Normalise s3://uri (e.g. assert trailing slash)
... ...
@@ -786,7 +796,7 @@ def cmd_sync_remote2remote(args):
786 786
 
787 787
     # Delete items in destination that are not in source
788 788
     if cfg.delete_removed and not cfg.delete_after:
789
-        _do_deletes(s3, dst_list)
789
+        subcmd_batch_del(remote_list = dst_list)
790 790
 
791 791
     def _upload(src_list, seq, src_count):
792 792
         file_list = src_list.keys()
... ...
@@ -830,7 +840,7 @@ def cmd_sync_remote2remote(args):
830 830
 
831 831
     # Delete items in destination that are not in source
832 832
     if cfg.delete_removed and cfg.delete_after:
833
-        _do_deletes(s3, dst_list)
833
+        subcmd_batch_del(remote_list = dst_list)
834 834
 
835 835
 def cmd_sync_remote2local(args):
836 836
     def _do_deletes(local_list):
... ...
@@ -1136,16 +1146,6 @@ def _build_attr_header(local_list, src):
1136 1136
 
1137 1137
 
1138 1138
 def cmd_sync_local2remote(args):
1139
-
1140
-    def _do_deletes(s3, remote_list):
1141
-        if cfg.max_delete > 0 and len(remote_list) > cfg.max_delete:
1142
-            warning(u"delete: maximum requested number of deletes would be exceeded, none performed.")
1143
-            return
1144
-        for key in remote_list:
1145
-            uri = S3Uri(remote_list[key]['object_uri_str'])
1146
-            s3.object_delete(uri)
1147
-            output(u"deleted: '%s'" % uri)
1148
-
1149 1139
     def _single_process(local_list):
1150 1140
         for dest in destinations:
1151 1141
             ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash)
... ...
@@ -1281,7 +1281,7 @@ def cmd_sync_local2remote(args):
1281 1281
             cfg.delete_removed = False
1282 1282
 
1283 1283
         if cfg.delete_removed and not cfg.delete_after:
1284
-            _do_deletes(s3, remote_list)
1284
+            subcmd_batch_del(remote_list = remote_list)
1285 1285
 
1286 1286
         total_size = 0
1287 1287
         total_elapsed = 0.0
... ...
@@ -1297,7 +1297,7 @@ def cmd_sync_local2remote(args):
1297 1297
         n, total_size = _upload(failed_copy_files, n, failed_copy_count, total_size)
1298 1298
 
1299 1299
         if cfg.delete_removed and cfg.delete_after:
1300
-            _do_deletes(s3, remote_list)
1300
+            subcmd_batch_del(remote_list = remote_list)
1301 1301
         total_elapsed = time.time() - timestamp_start
1302 1302
         total_speed = total_elapsed and total_size/total_elapsed or 0.0
1303 1303
         speed_fmt = formatSize(total_speed, human_readable = True, floating_point = True)