fix: Preserve original filenames during S3-to-S3 migration

This commit is contained in:
Simone Scarduzio
2025-10-12 18:10:04 +02:00
parent 14c6af0f35
commit 4f56c4b600
3 changed files with 77 additions and 14 deletions

View File

@@ -1,5 +1,6 @@
"""AWS S3 CLI compatible commands."""
import shutil
import sys
from pathlib import Path
@@ -163,27 +164,67 @@ def copy_s3_to_s3(
max_ratio: float | None = None,
no_delta: bool = False,
) -> None:
"""Copy object between S3 locations with optional delta compression."""
"""Copy object between S3 locations with optional delta compression.
This performs a direct S3-to-S3 transfer using streaming to preserve
the original file content and apply delta compression at the destination.
"""
source_bucket, source_key = parse_s3_url(source_url)
dest_bucket, dest_key = parse_s3_url(dest_url)
if not quiet:
click.echo(f"copy: 's3://{source_bucket}/{source_key}' to 's3://{dest_bucket}/{dest_key}'")
# Use temporary file
import tempfile
try:
# Get the source object as a stream
source_stream = service.storage.get(f"{source_bucket}/{source_key}")
with tempfile.NamedTemporaryFile(suffix=Path(source_key).suffix) as tmp:
tmp_path = Path(tmp.name)
# Determine the destination deltaspace
dest_key_parts = dest_key.split("/")
if len(dest_key_parts) > 1:
dest_prefix = "/".join(dest_key_parts[:-1])
else:
dest_prefix = ""
# Download from source
download_file(service, source_url, tmp_path, quiet=True)
dest_deltaspace = DeltaSpace(bucket=dest_bucket, prefix=dest_prefix)
# Upload to destination with optional delta compression
upload_file(service, tmp_path, dest_url, max_ratio, no_delta, quiet=True)
# If delta is disabled or max_ratio specified, use direct put
if no_delta:
# Direct storage put without delta compression
service.storage.put(f"{dest_bucket}/{dest_key}", source_stream, {})
if not quiet:
click.echo("Copy completed (no delta compression)")
else:
# Write to a temporary file and use override_name to preserve original filename
import tempfile
if not quiet:
click.echo("Copy completed")
# Extract original filename from source
original_filename = Path(source_key).name
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(source_key).suffix) as tmp:
tmp_path = Path(tmp.name)
# Write stream to temp file
with open(tmp_path, 'wb') as f:
shutil.copyfileobj(source_stream, f)
try:
# Use DeltaService.put() with override_name to preserve original filename
summary = service.put(tmp_path, dest_deltaspace, max_ratio, override_name=original_filename)
if not quiet:
if summary.delta_size:
ratio = round((summary.delta_size / summary.file_size) * 100, 1)
click.echo(f"Copy completed with delta compression ({ratio}% of original)")
else:
click.echo("Copy completed (stored as reference)")
finally:
# Clean up temp file
tmp_path.unlink(missing_ok=True)
except Exception as e:
click.echo(f"S3-to-S3 copy failed: {e}", err=True)
raise
def migrate_s3_to_s3(

View File

@@ -93,16 +93,27 @@ class DeltaService:
return any(name_lower.endswith(ext) for ext in self.delta_extensions)
def put(
self, local_file: Path, delta_space: DeltaSpace, max_ratio: float | None = None
self,
local_file: Path,
delta_space: DeltaSpace,
max_ratio: float | None = None,
override_name: str | None = None,
) -> PutSummary:
"""Upload file as reference or delta (for archive files) or directly (for other files)."""
"""Upload file as reference or delta (for archive files) or directly (for other files).
Args:
local_file: Path to the local file to upload
delta_space: DeltaSpace (bucket + prefix) for the upload
max_ratio: Maximum acceptable delta/file ratio (default: service max_ratio)
override_name: Optional name to use instead of local_file.name (useful for S3-to-S3 copies)
"""
if max_ratio is None:
max_ratio = self.max_ratio
start_time = self.clock.now()
file_size = local_file.stat().st_size
file_sha256 = self.hasher.sha256(local_file)
original_name = local_file.name
original_name = override_name if override_name else local_file.name
self.logger.info(
"Starting put operation",