mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-01-11 22:30:48 +01:00
feat: Enhance S3 migration CLI with new commands and EC2 detection option
This commit is contained in:
@@ -89,6 +89,7 @@ docker run -v /shared-cache:/tmp/.deltaglider \
|
||||
- `DG_CACHE_BACKEND`: Cache backend (default: `filesystem`, options: `filesystem`, `memory`)
|
||||
- `DG_CACHE_MEMORY_SIZE_MB`: Memory cache size in MB (default: `100`)
|
||||
- `DG_CACHE_ENCRYPTION_KEY`: Optional base64-encoded encryption key for cross-process cache sharing
|
||||
- `DG_DISABLE_EC2_DETECTION`: Disable EC2 instance detection (default: `false`, set to `true` to disable)
|
||||
- `AWS_ENDPOINT_URL`: S3 endpoint URL (default: AWS S3)
|
||||
- `AWS_ACCESS_KEY_ID`: AWS access key
|
||||
- `AWS_SECRET_ACCESS_KEY`: AWS secret key
|
||||
@@ -578,6 +579,7 @@ The migration preserves all file names and structure while applying DeltaGlider'
|
||||
|
||||
- ✅ **Battle tested**: 200K+ files in production
|
||||
- ✅ **Data integrity**: SHA256 verification on every operation
|
||||
- ✅ **Cost optimization**: Automatic EC2 region detection warns about cross-region charges - [📖 EC2 Detection Guide](docs/EC2_REGION_DETECTION.md)
|
||||
- ✅ **S3 compatible**: Works with AWS, MinIO, Cloudflare R2, etc.
|
||||
- ✅ **Atomic operations**: No partial states
|
||||
- ✅ **Concurrent safe**: Multiple clients supported
|
||||
|
||||
@@ -9,6 +9,8 @@ DeltaGlider provides AWS S3 CLI compatible commands with automatic delta compres
|
||||
- `deltaglider ls [s3_url]` - List buckets and objects
|
||||
- `deltaglider rm <s3_url>` - Remove objects
|
||||
- `deltaglider sync <source> <destination>` - Synchronize directories
|
||||
- `deltaglider migrate <source> <destination>` - Migrate S3 buckets with compression and EC2 cost warnings
|
||||
- `deltaglider stats <bucket>` - Get bucket statistics and compression metrics
|
||||
- `deltaglider verify <s3_url>` - Verify file integrity
|
||||
|
||||
### Current Usage Examples
|
||||
|
||||
@@ -283,12 +283,14 @@ def copy_s3_to_s3(
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
# Write stream to temp file
|
||||
with open(tmp_path, 'wb') as f:
|
||||
with open(tmp_path, "wb") as f:
|
||||
shutil.copyfileobj(source_stream, f)
|
||||
|
||||
try:
|
||||
# Use DeltaService.put() with override_name to preserve original filename
|
||||
summary = service.put(tmp_path, dest_deltaspace, max_ratio, override_name=original_filename)
|
||||
summary = service.put(
|
||||
tmp_path, dest_deltaspace, max_ratio, override_name=original_filename
|
||||
)
|
||||
|
||||
if not quiet:
|
||||
if summary.delta_size:
|
||||
@@ -371,7 +373,9 @@ def migrate_s3_to_s3(
|
||||
click.echo(f"Migrating from s3://{source_bucket}/{source_prefix}")
|
||||
click.echo(f" to s3://{dest_bucket}/{effective_dest_prefix}")
|
||||
else:
|
||||
click.echo(f"Migrating from s3://{source_bucket}/{source_prefix} to s3://{dest_bucket}/{dest_prefix}")
|
||||
click.echo(
|
||||
f"Migrating from s3://{source_bucket}/{source_prefix} to s3://{dest_bucket}/{dest_prefix}"
|
||||
)
|
||||
click.echo("Scanning source and destination buckets...")
|
||||
|
||||
# List source objects
|
||||
@@ -396,7 +400,9 @@ def migrate_s3_to_s3(
|
||||
source_objects.append(obj)
|
||||
|
||||
# List destination objects to detect what needs copying
|
||||
dest_list_prefix = f"{dest_bucket}/{effective_dest_prefix}" if effective_dest_prefix else dest_bucket
|
||||
dest_list_prefix = (
|
||||
f"{dest_bucket}/{effective_dest_prefix}" if effective_dest_prefix else dest_bucket
|
||||
)
|
||||
dest_keys = set()
|
||||
|
||||
for obj in service.storage.list(dest_list_prefix):
|
||||
@@ -429,6 +435,7 @@ def migrate_s3_to_s3(
|
||||
return
|
||||
|
||||
if not quiet:
|
||||
|
||||
def format_bytes(size: int) -> str:
|
||||
size_float = float(size)
|
||||
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
||||
@@ -487,7 +494,7 @@ def migrate_s3_to_s3(
|
||||
dest_s3_url,
|
||||
quiet=True,
|
||||
max_ratio=max_ratio,
|
||||
no_delta=no_delta
|
||||
no_delta=no_delta,
|
||||
)
|
||||
|
||||
successful += 1
|
||||
@@ -517,10 +524,13 @@ def migrate_s3_to_s3(
|
||||
if successful > 0 and not no_delta:
|
||||
try:
|
||||
from ...client import DeltaGliderClient
|
||||
|
||||
client = DeltaGliderClient(service)
|
||||
dest_stats = client.get_bucket_stats(dest_bucket, detailed_stats=False)
|
||||
if dest_stats.delta_objects > 0:
|
||||
click.echo(f"\nCompression achieved: {dest_stats.average_compression_ratio:.1%}")
|
||||
click.echo(
|
||||
f"\nCompression achieved: {dest_stats.average_compression_ratio:.1%}"
|
||||
)
|
||||
click.echo(f"Space saved: {format_bytes(dest_stats.space_saved)}")
|
||||
except Exception:
|
||||
pass # Ignore stats errors
|
||||
|
||||
@@ -129,7 +129,14 @@ def _version_callback(ctx: click.Context, param: click.Parameter, value: bool) -
|
||||
|
||||
@click.group()
|
||||
@click.option("--debug", is_flag=True, help="Enable debug logging")
|
||||
@click.option("--version", is_flag=True, is_eager=True, expose_value=False, callback=_version_callback, help="Show version and exit")
|
||||
@click.option(
|
||||
"--version",
|
||||
is_flag=True,
|
||||
is_eager=True,
|
||||
expose_value=False,
|
||||
callback=_version_callback,
|
||||
help="Show version and exit",
|
||||
)
|
||||
@click.pass_context
|
||||
def cli(ctx: click.Context, debug: bool) -> None:
|
||||
"""DeltaGlider - Delta-aware S3 file storage wrapper."""
|
||||
@@ -662,7 +669,9 @@ def verify(service: DeltaService, s3_url: str) -> None:
|
||||
@click.option("--max-ratio", type=float, help="Max delta/file ratio (default: 0.5)")
|
||||
@click.option("--dry-run", is_flag=True, help="Show what would be migrated without migrating")
|
||||
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
|
||||
@click.option("--no-preserve-prefix", is_flag=True, help="Don't preserve source prefix in destination")
|
||||
@click.option(
|
||||
"--no-preserve-prefix", is_flag=True, help="Don't preserve source prefix in destination"
|
||||
)
|
||||
@click.option("--endpoint-url", help="Override S3 endpoint URL")
|
||||
@click.option("--region", help="AWS region")
|
||||
@click.option("--profile", help="AWS profile to use")
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
"""Test S3-to-S3 migration functionality."""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -68,21 +66,21 @@ def test_migrate_s3_to_s3_with_resume(mock_service):
|
||||
mock_service.storage.list.side_effect = list_side_effect
|
||||
|
||||
# Mock the copy operation and click functions
|
||||
# Use quiet=True to skip EC2 detection logging
|
||||
with patch("deltaglider.app.cli.aws_compat.copy_s3_to_s3") as mock_copy:
|
||||
with patch("deltaglider.app.cli.aws_compat.click.echo") as mock_echo:
|
||||
with patch("deltaglider.app.cli.aws_compat.click.confirm", return_value=True):
|
||||
migrate_s3_to_s3(
|
||||
mock_service,
|
||||
"s3://source-bucket/",
|
||||
"s3://dest-bucket/",
|
||||
exclude=None,
|
||||
include=None,
|
||||
quiet=False,
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=False,
|
||||
skip_confirm=False,
|
||||
)
|
||||
with patch("deltaglider.app.cli.aws_compat.click.confirm", return_value=True):
|
||||
migrate_s3_to_s3(
|
||||
mock_service,
|
||||
"s3://source-bucket/",
|
||||
"s3://dest-bucket/",
|
||||
exclude=None,
|
||||
include=None,
|
||||
quiet=True, # Skip EC2 detection and logging
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=False,
|
||||
skip_confirm=False,
|
||||
)
|
||||
|
||||
# Should copy only file2.zip and subdir/file3.zip (file1 already exists)
|
||||
assert mock_copy.call_count == 2
|
||||
@@ -92,7 +90,10 @@ def test_migrate_s3_to_s3_with_resume(mock_service):
|
||||
migrated_files = [(args[1], args[2]) for args in call_args]
|
||||
|
||||
assert ("s3://source-bucket/file2.zip", "s3://dest-bucket/file2.zip") in migrated_files
|
||||
assert ("s3://source-bucket/subdir/file3.zip", "s3://dest-bucket/subdir/file3.zip") in migrated_files
|
||||
assert (
|
||||
"s3://source-bucket/subdir/file3.zip",
|
||||
"s3://dest-bucket/subdir/file3.zip",
|
||||
) in migrated_files
|
||||
|
||||
|
||||
def test_migrate_s3_to_s3_dry_run(mock_service):
|
||||
@@ -118,7 +119,7 @@ def test_migrate_s3_to_s3_dry_run(mock_service):
|
||||
"s3://dest-bucket/",
|
||||
exclude=None,
|
||||
include=None,
|
||||
quiet=False,
|
||||
quiet=True, # Skip EC2 detection
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=True,
|
||||
@@ -172,7 +173,7 @@ def test_migrate_s3_to_s3_with_filters(mock_service):
|
||||
"s3://dest-bucket/",
|
||||
exclude="*.log",
|
||||
include=None,
|
||||
quiet=False,
|
||||
quiet=True, # Skip EC2 detection
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=False,
|
||||
@@ -213,7 +214,7 @@ def test_migrate_s3_to_s3_skip_confirm(mock_service):
|
||||
"s3://dest-bucket/",
|
||||
exclude=None,
|
||||
include=None,
|
||||
quiet=False,
|
||||
quiet=True, # Skip EC2 detection
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=False,
|
||||
@@ -255,7 +256,7 @@ def test_migrate_s3_to_s3_with_prefix(mock_service):
|
||||
"s3://dest-bucket/archive/",
|
||||
exclude=None,
|
||||
include=None,
|
||||
quiet=False,
|
||||
quiet=True, # Skip EC2 detection
|
||||
no_delta=False,
|
||||
max_ratio=None,
|
||||
dry_run=False,
|
||||
@@ -266,4 +267,4 @@ def test_migrate_s3_to_s3_with_prefix(mock_service):
|
||||
mock_copy.assert_called_once()
|
||||
call_args = mock_copy.call_args[0]
|
||||
assert call_args[1] == "s3://source-bucket/data/file1.zip"
|
||||
assert call_args[2] == "s3://dest-bucket/archive/file1.zip"
|
||||
assert call_args[2] == "s3://dest-bucket/archive/file1.zip"
|
||||
|
||||
Reference in New Issue
Block a user