poniedziałek, 10 listopada 2025

Remove a versioned S3 bucket

Let's imagine you have a S3 bucket that is versioned. It contains thousands of files. If you try to delete it you're warned because of the versions. You cannot select all files in AWS Console so a way to do this is from your command line:
aws s3 rm s3://YOUR_BUCKET --recursive

But what about the versions and maybe delete markers. We can use the following Python script:

Let's imagine you have a S3 bucket that is versioned. It contains thousands of files. If you try to delete it you're warned because of the versions. You cannot select all files in AWS Console so a way to do this is from your command line:
aws s3 rm s3://YOUR_BUCKET --recursive

But what about the versions and maybe delete markers. We can use the following Python script:

#!/usr/bin/env python3

import boto3
from itertools import islice
from typing import Iterable, Dict

bucket = 'YOUR_BUCKET'
s3_client = boto3.client('s3')

def batched(iterable: Iterable, n: int) -> Iterable[list]:
    """Yield lists of size n from iterable."""
    it = iter(iterable)

    while True:
        batch = list(islice(it, n))
        if not batch:
            break

        yield batch

def iter_all_object_versions(bucket: str, prefix: str | None = None) -> Iterable[Dict[str, str]]:
    """Iterate all versions and delete markers in a S3 bucket (optionally under a prefix)."""
    paginator = s3_client.get_paginator('list_object_versions')
    params = {'Bucket': bucket}

    if prefix:
        params['Prefix'] = prefix

    for page in paginator.paginate(**params):
        for v in page.get('Versions', []):
            yield {'Key': v['Key'], 'VersionId': v['VersionId'], 'IsDeleteMarker': False}
        for dm in page.get('DeleteMarkers', []):
            yield {'Key': dm['Key'], 'VersionId': dm['VersionId'], 'IsDeleteMarker': True}

def remove_s3_object_versions(bucket: str, prefix: str | None = None, dry_run: bool = False, batch_size: int = 1000):
    total_versions = 0
    total_delete_markers = 0
    to_delete = []

    for entry in iter_all_object_versions(bucket, prefix):
        if entry['IsDeleteMarker']:
            total_delete_markers += 1
        else:
            total_versions += 1
        to_delete.append({'Key': entry['Key'], 'VersionId': entry['VersionId']})

    print(f"[*] Discovered {total_versions} versions and {total_delete_markers} delete markers (total {total_versions + total_delete_markers}) in bucket "{bucket}"{f" with prefix '{prefix}'" if prefix else ''}.")

    if dry_run:
        print("[!] Dry run enabled. No deletions performed.")
        return

    deleted_count = 0

    for batch in batched(to_delete, batch_size):
        s3_client.delete_objects(Bucket=bucket, Delete={'Objects': batch, 'Quiet': True})

        deleted_count += len(batch)

        if deleted_count % 5000 == 0:
            print(f"[*]Progress: deleted {deleted_count} items.")

    print(f"[*] Deletion complete. Removed {deleted_count} versioned entries from bucket "{bucket}".")

if __name__ == '__main__':
    remove_s3_object_versions(bucket=bucket, dry_run=False)