feat: Enhance S3 migration CLI with new commands and EC2 detection option

This commit is contained in:
Simone Scarduzio
2025-10-12 23:12:32 +02:00
parent b2ca59490b
commit aea5cb5d9a
5 changed files with 54 additions and 30 deletions

View File

@@ -89,6 +89,7 @@ docker run -v /shared-cache:/tmp/.deltaglider \
- `DG_CACHE_BACKEND`: Cache backend (default: `filesystem`, options: `filesystem`, `memory`)
- `DG_CACHE_MEMORY_SIZE_MB`: Memory cache size in MB (default: `100`)
- `DG_CACHE_ENCRYPTION_KEY`: Optional base64-encoded encryption key for cross-process cache sharing
- `DG_DISABLE_EC2_DETECTION`: Disable EC2 instance detection (default: `false`, set to `true` to disable)
- `AWS_ENDPOINT_URL`: S3 endpoint URL (default: AWS S3)
- `AWS_ACCESS_KEY_ID`: AWS access key
- `AWS_SECRET_ACCESS_KEY`: AWS secret key
@@ -578,6 +579,7 @@ The migration preserves all file names and structure while applying DeltaGlider'
-**Battle tested**: 200K+ files in production
-**Data integrity**: SHA256 verification on every operation
-**Cost optimization**: Automatic EC2 region detection warns about cross-region charges - [📖 EC2 Detection Guide](docs/EC2_REGION_DETECTION.md)
-**S3 compatible**: Works with AWS, MinIO, Cloudflare R2, etc.
-**Atomic operations**: No partial states
-**Concurrent safe**: Multiple clients supported

View File

@@ -9,6 +9,8 @@ DeltaGlider provides AWS S3 CLI compatible commands with automatic delta compres
- `deltaglider ls [s3_url]` - List buckets and objects
- `deltaglider rm <s3_url>` - Remove objects
- `deltaglider sync <source> <destination>` - Synchronize directories
- `deltaglider migrate <source> <destination>` - Migrate S3 buckets with compression and EC2 cost warnings
- `deltaglider stats <bucket>` - Get bucket statistics and compression metrics
- `deltaglider verify <s3_url>` - Verify file integrity
### Current Usage Examples

View File

@@ -283,12 +283,14 @@ def copy_s3_to_s3(
tmp_path = Path(tmp.name)
# Write stream to temp file
with open(tmp_path, 'wb') as f:
with open(tmp_path, "wb") as f:
shutil.copyfileobj(source_stream, f)
try:
# Use DeltaService.put() with override_name to preserve original filename
summary = service.put(tmp_path, dest_deltaspace, max_ratio, override_name=original_filename)
summary = service.put(
tmp_path, dest_deltaspace, max_ratio, override_name=original_filename
)
if not quiet:
if summary.delta_size:
@@ -371,7 +373,9 @@ def migrate_s3_to_s3(
click.echo(f"Migrating from s3://{source_bucket}/{source_prefix}")
click.echo(f" to s3://{dest_bucket}/{effective_dest_prefix}")
else:
click.echo(f"Migrating from s3://{source_bucket}/{source_prefix} to s3://{dest_bucket}/{dest_prefix}")
click.echo(
f"Migrating from s3://{source_bucket}/{source_prefix} to s3://{dest_bucket}/{dest_prefix}"
)
click.echo("Scanning source and destination buckets...")
# List source objects
@@ -396,7 +400,9 @@ def migrate_s3_to_s3(
source_objects.append(obj)
# List destination objects to detect what needs copying
dest_list_prefix = f"{dest_bucket}/{effective_dest_prefix}" if effective_dest_prefix else dest_bucket
dest_list_prefix = (
f"{dest_bucket}/{effective_dest_prefix}" if effective_dest_prefix else dest_bucket
)
dest_keys = set()
for obj in service.storage.list(dest_list_prefix):
@@ -429,6 +435,7 @@ def migrate_s3_to_s3(
return
if not quiet:
def format_bytes(size: int) -> str:
size_float = float(size)
for unit in ["B", "KB", "MB", "GB", "TB"]:
@@ -487,7 +494,7 @@ def migrate_s3_to_s3(
dest_s3_url,
quiet=True,
max_ratio=max_ratio,
no_delta=no_delta
no_delta=no_delta,
)
successful += 1
@@ -517,10 +524,13 @@ def migrate_s3_to_s3(
if successful > 0 and not no_delta:
try:
from ...client import DeltaGliderClient
client = DeltaGliderClient(service)
dest_stats = client.get_bucket_stats(dest_bucket, detailed_stats=False)
if dest_stats.delta_objects > 0:
click.echo(f"\nCompression achieved: {dest_stats.average_compression_ratio:.1%}")
click.echo(
f"\nCompression achieved: {dest_stats.average_compression_ratio:.1%}"
)
click.echo(f"Space saved: {format_bytes(dest_stats.space_saved)}")
except Exception:
pass # Ignore stats errors

View File

@@ -129,7 +129,14 @@ def _version_callback(ctx: click.Context, param: click.Parameter, value: bool) -
@click.group()
@click.option("--debug", is_flag=True, help="Enable debug logging")
@click.option("--version", is_flag=True, is_eager=True, expose_value=False, callback=_version_callback, help="Show version and exit")
@click.option(
"--version",
is_flag=True,
is_eager=True,
expose_value=False,
callback=_version_callback,
help="Show version and exit",
)
@click.pass_context
def cli(ctx: click.Context, debug: bool) -> None:
"""DeltaGlider - Delta-aware S3 file storage wrapper."""
@@ -662,7 +669,9 @@ def verify(service: DeltaService, s3_url: str) -> None:
@click.option("--max-ratio", type=float, help="Max delta/file ratio (default: 0.5)")
@click.option("--dry-run", is_flag=True, help="Show what would be migrated without migrating")
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
@click.option("--no-preserve-prefix", is_flag=True, help="Don't preserve source prefix in destination")
@click.option(
"--no-preserve-prefix", is_flag=True, help="Don't preserve source prefix in destination"
)
@click.option("--endpoint-url", help="Override S3 endpoint URL")
@click.option("--region", help="AWS region")
@click.option("--profile", help="AWS profile to use")

View File

@@ -1,7 +1,5 @@
"""Test S3-to-S3 migration functionality."""
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
@@ -68,21 +66,21 @@ def test_migrate_s3_to_s3_with_resume(mock_service):
mock_service.storage.list.side_effect = list_side_effect
# Mock the copy operation and click functions
# Use quiet=True to skip EC2 detection logging
with patch("deltaglider.app.cli.aws_compat.copy_s3_to_s3") as mock_copy:
with patch("deltaglider.app.cli.aws_compat.click.echo") as mock_echo:
with patch("deltaglider.app.cli.aws_compat.click.confirm", return_value=True):
migrate_s3_to_s3(
mock_service,
"s3://source-bucket/",
"s3://dest-bucket/",
exclude=None,
include=None,
quiet=False,
no_delta=False,
max_ratio=None,
dry_run=False,
skip_confirm=False,
)
with patch("deltaglider.app.cli.aws_compat.click.confirm", return_value=True):
migrate_s3_to_s3(
mock_service,
"s3://source-bucket/",
"s3://dest-bucket/",
exclude=None,
include=None,
quiet=True, # Skip EC2 detection and logging
no_delta=False,
max_ratio=None,
dry_run=False,
skip_confirm=False,
)
# Should copy only file2.zip and subdir/file3.zip (file1 already exists)
assert mock_copy.call_count == 2
@@ -92,7 +90,10 @@ def test_migrate_s3_to_s3_with_resume(mock_service):
migrated_files = [(args[1], args[2]) for args in call_args]
assert ("s3://source-bucket/file2.zip", "s3://dest-bucket/file2.zip") in migrated_files
assert ("s3://source-bucket/subdir/file3.zip", "s3://dest-bucket/subdir/file3.zip") in migrated_files
assert (
"s3://source-bucket/subdir/file3.zip",
"s3://dest-bucket/subdir/file3.zip",
) in migrated_files
def test_migrate_s3_to_s3_dry_run(mock_service):
@@ -118,7 +119,7 @@ def test_migrate_s3_to_s3_dry_run(mock_service):
"s3://dest-bucket/",
exclude=None,
include=None,
quiet=False,
quiet=True, # Skip EC2 detection
no_delta=False,
max_ratio=None,
dry_run=True,
@@ -172,7 +173,7 @@ def test_migrate_s3_to_s3_with_filters(mock_service):
"s3://dest-bucket/",
exclude="*.log",
include=None,
quiet=False,
quiet=True, # Skip EC2 detection
no_delta=False,
max_ratio=None,
dry_run=False,
@@ -213,7 +214,7 @@ def test_migrate_s3_to_s3_skip_confirm(mock_service):
"s3://dest-bucket/",
exclude=None,
include=None,
quiet=False,
quiet=True, # Skip EC2 detection
no_delta=False,
max_ratio=None,
dry_run=False,
@@ -255,7 +256,7 @@ def test_migrate_s3_to_s3_with_prefix(mock_service):
"s3://dest-bucket/archive/",
exclude=None,
include=None,
quiet=False,
quiet=True, # Skip EC2 detection
no_delta=False,
max_ratio=None,
dry_run=False,
@@ -266,4 +267,4 @@ def test_migrate_s3_to_s3_with_prefix(mock_service):
mock_copy.assert_called_once()
call_args = mock_copy.call_args[0]
assert call_args[1] == "s3://source-bucket/data/file1.zip"
assert call_args[2] == "s3://dest-bucket/archive/file1.zip"
assert call_args[2] == "s3://dest-bucket/archive/file1.zip"