Browse code

fix subcmd_batch_del(), using SortedDict slices

subcmd_batch_del() was sending the entire remote_list() as a single
batch delete operation to S3. That fails for >1000 objects, though we
were ignoring the fail. It also can timeout uploading a huge list
(one example was deleting 40k objects; a 7MB deletion list XML) and
trying to churn through it.

The whole looking for a marker bit was poor. We have the remote_list,
we just couldn't slice it up. The previous commit adds the getslice
operator, so now we can. This greatly simplifies the delete operation
as we can iterate over slices of 1000 until it's empty.

Matt Domsch authored on 2014/04/11 09:53:45
Showing 1 changed files
... ...
@@ -538,7 +538,17 @@ def cmd_object_del(args):
538 538
             subcmd_batch_del(uri_str = uri_str)
539 539
 
540 540
 def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None):
541
-    s3 = S3(cfg)
541
+    def _batch_del(remote_list):
542
+        s3 = S3(cfg)
543
+        to_delete = remote_list[:1000]
544
+        remote_list = remote_list[1000:]
545
+        while len(to_delete):
546
+            debug(u"Batch delete %d, remaining %d" % (len(to_delete), len(remote_list)))
547
+            if not cfg.dry_run:
548
+                response = s3.object_batch_delete(to_delete)
549
+            output('\n'.join((u"File %s deleted" % to_delete[p]['object_uri_str']) for p in to_delete))
550
+            to_delete = remote_list[:1000]
551
+            remote_list = remote_list[1000:]
542 552
 
543 553
     if len([item for item in [uri_str, bucket, remote_list] if item]) != 1:
544 554
         raise ValueError("One and only one of 'uri_str', 'bucket', 'remote_list' can be specified.")
... ...
@@ -558,30 +568,8 @@ def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None):
558 558
         warning(u"delete: maximum requested number of deletes would be exceeded, none performed.")
559 559
         return False
560 560
 
561
-    while True:
562
-        if cfg.dry_run:
563
-            output('\n'.join((u"File %s deleted" % remote_list[p]['object_uri_str']) for p in remote_list))
564
-        else:
565
-            response = s3.object_batch_delete(remote_list)
566
-            output('\n'.join((u"File %s deleted" % remote_list[p]['object_uri_str']) for p in remote_list))
567
-
568
-        if len(remote_list):
569
-            keys_count = len(remote_list)
570
-        else:
571
-            keys_count = 0
572
-        if keys_count < 1000 or not batch_mode:
573
-            break
574
-
575
-        if len(remote_list):
576
-            last_key = S3Uri(remote_list[remote_list.keys()[-1]]['object_uri_str'])
577
-        else:
578
-            last_key = None
561
+    _batch_del(remote_list)
579 562
 
580
-        if not last_key or not last_key.has_object:
581
-            break
582
-        marker = last_key.object()
583
-        remote_list, exclude_list = fetch_remote_list(uri_str, require_attribs = False, batch_mode = True,
584
-                                                      uri_params = {"marker": marker})
585 563
     if cfg.dry_run:
586 564
         warning(u"Exiting now because of --dry-run")
587 565
         return False