Let's imagine you have a S3 bucket that is versioned. It contains thousands of files. If you try to delete it you're warned because of the versions. You cannot select all files in AWS Console so a way to do this is from your command line:
aws s3 rm s3://YOUR_BUCKET --recursive
But what about the versions and maybe delete markers. We can use the following Python script:
Let's imagine you have a S3 bucket that is versioned. It contains thousands of files. If you try to delete it you're warned because of the versions. You cannot select all files in AWS Console so a way to do this is from your command line:
aws s3 rm s3://YOUR_BUCKET --recursive
But what about the versions and maybe delete markers. We can use the following Python script:
#!/usr/bin/env python3
import boto3
from itertools import islice
from typing import Iterable, Dict
bucket = 'YOUR_BUCKET'
s3_client = boto3.client('s3')
def batched(iterable: Iterable, n: int) -> Iterable[list]:
"""Yield lists of size n from iterable."""
it = iter(iterable)
while True:
batch = list(islice(it, n))
if not batch:
break
yield batch
def iter_all_object_versions(bucket: str, prefix: str | None = None) -> Iterable[Dict[str, str]]:
"""Iterate all versions and delete markers in a S3 bucket (optionally under a prefix)."""
paginator = s3_client.get_paginator('list_object_versions')
params = {'Bucket': bucket}
if prefix:
params['Prefix'] = prefix
for page in paginator.paginate(**params):
for v in page.get('Versions', []):
yield {'Key': v['Key'], 'VersionId': v['VersionId'], 'IsDeleteMarker': False}
for dm in page.get('DeleteMarkers', []):
yield {'Key': dm['Key'], 'VersionId': dm['VersionId'], 'IsDeleteMarker': True}
def remove_s3_object_versions(bucket: str, prefix: str | None = None, dry_run: bool = False, batch_size: int = 1000):
total_versions = 0
total_delete_markers = 0
to_delete = []
for entry in iter_all_object_versions(bucket, prefix):
if entry['IsDeleteMarker']:
total_delete_markers += 1
else:
total_versions += 1
to_delete.append({'Key': entry['Key'], 'VersionId': entry['VersionId']})
print(f"[*] Discovered {total_versions} versions and {total_delete_markers} delete markers (total {total_versions + total_delete_markers}) in bucket "{bucket}"{f" with prefix '{prefix}'" if prefix else ''}.")
if dry_run:
print("[!] Dry run enabled. No deletions performed.")
return
deleted_count = 0
for batch in batched(to_delete, batch_size):
s3_client.delete_objects(Bucket=bucket, Delete={'Objects': batch, 'Quiet': True})
deleted_count += len(batch)
if deleted_count % 5000 == 0:
print(f"[*]Progress: deleted {deleted_count} items.")
print(f"[*] Deletion complete. Removed {deleted_count} versioned entries from bucket "{bucket}".")
if __name__ == '__main__':
remove_s3_object_versions(bucket=bucket, dry_run=False)