subcmd_batch_del() was sending the entire remote_list() as a single
batch delete operation to S3. That fails for >1000 objects, though we
were ignoring the fail. It also can timeout uploading a huge list
(one example was deleting 40k objects; a 7MB deletion list XML) and
trying to churn through it.
The whole looking for a marker bit was poor. We have the remote_list,
we just couldn't slice it up. The previous commit adds the getslice
operator, so now we can. This greatly simplifies the delete operation
as we can iterate over slices of 1000 until it's empty.
... | ... |
@@ -538,7 +538,17 @@ def cmd_object_del(args): |
538 | 538 |
subcmd_batch_del(uri_str = uri_str) |
539 | 539 |
|
540 | 540 |
def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None): |
541 |
- s3 = S3(cfg) |
|
541 |
+ def _batch_del(remote_list): |
|
542 |
+ s3 = S3(cfg) |
|
543 |
+ to_delete = remote_list[:1000] |
|
544 |
+ remote_list = remote_list[1000:] |
|
545 |
+ while len(to_delete): |
|
546 |
+ debug(u"Batch delete %d, remaining %d" % (len(to_delete), len(remote_list))) |
|
547 |
+ if not cfg.dry_run: |
|
548 |
+ response = s3.object_batch_delete(to_delete) |
|
549 |
+ output('\n'.join((u"File %s deleted" % to_delete[p]['object_uri_str']) for p in to_delete)) |
|
550 |
+ to_delete = remote_list[:1000] |
|
551 |
+ remote_list = remote_list[1000:] |
|
542 | 552 |
|
543 | 553 |
if len([item for item in [uri_str, bucket, remote_list] if item]) != 1: |
544 | 554 |
raise ValueError("One and only one of 'uri_str', 'bucket', 'remote_list' can be specified.") |
... | ... |
@@ -558,30 +568,8 @@ def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None): |
558 | 558 |
warning(u"delete: maximum requested number of deletes would be exceeded, none performed.") |
559 | 559 |
return False |
560 | 560 |
|
561 |
- while True: |
|
562 |
- if cfg.dry_run: |
|
563 |
- output('\n'.join((u"File %s deleted" % remote_list[p]['object_uri_str']) for p in remote_list)) |
|
564 |
- else: |
|
565 |
- response = s3.object_batch_delete(remote_list) |
|
566 |
- output('\n'.join((u"File %s deleted" % remote_list[p]['object_uri_str']) for p in remote_list)) |
|
567 |
- |
|
568 |
- if len(remote_list): |
|
569 |
- keys_count = len(remote_list) |
|
570 |
- else: |
|
571 |
- keys_count = 0 |
|
572 |
- if keys_count < 1000 or not batch_mode: |
|
573 |
- break |
|
574 |
- |
|
575 |
- if len(remote_list): |
|
576 |
- last_key = S3Uri(remote_list[remote_list.keys()[-1]]['object_uri_str']) |
|
577 |
- else: |
|
578 |
- last_key = None |
|
561 |
+ _batch_del(remote_list) |
|
579 | 562 |
|
580 |
- if not last_key or not last_key.has_object: |
|
581 |
- break |
|
582 |
- marker = last_key.object() |
|
583 |
- remote_list, exclude_list = fetch_remote_list(uri_str, require_attribs = False, batch_mode = True, |
|
584 |
- uri_params = {"marker": marker}) |
|
585 | 563 |
if cfg.dry_run: |
586 | 564 |
warning(u"Exiting now because of --dry-run") |
587 | 565 |
return False |