diff --git a/CLAUDE.md b/CLAUDE.md index 0a7ffb0..2397b34 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -129,7 +129,6 @@ src/deltaglider/ 4. **AWS S3 CLI Compatibility**: - Commands (`cp`, `ls`, `rm`, `sync`) mirror AWS CLI syntax exactly - Located in `app/cli/main.py` with helpers in `aws_compat.py` - - Maintains backward compatibility with original `put`/`get` commands ### Key Algorithms diff --git a/README.md b/README.md index de88ea9..0dd69e6 100644 --- a/README.md +++ b/README.md @@ -91,15 +91,6 @@ deltaglider sync --exclude "*.log" ./src/ s3://backup/ # Exclude patterns deltaglider cp file.zip s3://bucket/ --endpoint-url http://localhost:9000 ``` -### Legacy Commands (still supported) - -```bash -# Original DeltaGlider commands -deltaglider put my-app-v1.0.0.zip s3://releases/ -deltaglider get s3://releases/my-app-v1.0.1.zip -deltaglider verify s3://releases/my-app-v1.0.1.zip.delta -``` - ## Why xdelta3 Excels at Archive Compression Traditional diff algorithms (like `diff` or `git diff`) work line-by-line on text files. Binary diff tools like `bsdiff` or `courgette` are optimized for executables. But **xdelta3** is uniquely suited for compressed archives because: @@ -495,7 +486,7 @@ uv run pytest # Run with local MinIO docker-compose up -d export AWS_ENDPOINT_URL=http://localhost:9000 -deltaglider put test.zip s3://test/ +deltaglider cp test.zip s3://test/ ``` ## FAQ diff --git a/docs/aws-s3-cli-compatibility.md b/docs/aws-s3-cli-compatibility.md index fe08415..899d5d4 100644 --- a/docs/aws-s3-cli-compatibility.md +++ b/docs/aws-s3-cli-compatibility.md @@ -1,21 +1,23 @@ -# AWS S3 CLI Compatibility Plan for DeltaGlider +# AWS S3 CLI Compatibility for DeltaGlider ## Current State -DeltaGlider currently provides a custom CLI with the following commands: +DeltaGlider provides AWS S3 CLI compatible commands with automatic delta compression: -### Existing Commands -- `deltaglider put ` - Upload file with delta compression -- `deltaglider get [-o output]` - Download and reconstruct file +### Commands +- `deltaglider cp ` - Copy files with delta compression +- `deltaglider ls [s3_url]` - List buckets and objects +- `deltaglider rm ` - Remove objects +- `deltaglider sync ` - Synchronize directories - `deltaglider verify ` - Verify file integrity ### Current Usage Examples ```bash # Upload a file -deltaglider put myfile.zip s3://bucket/path/to/file.zip +deltaglider cp myfile.zip s3://bucket/path/to/file.zip -# Download a file (auto-detects .delta) -deltaglider get s3://bucket/path/to/file.zip +# Download a file +deltaglider cp s3://bucket/path/to/file.zip . # Verify integrity deltaglider verify s3://bucket/path/to/file.zip.delta @@ -168,18 +170,7 @@ Additional flags specific to DeltaGlider's delta compression: 3. Create migration guide from aws-cli 4. Performance benchmarks comparing to aws-cli -## Migration Path for Existing Users - -### Alias Support During Transition -```bash -# Old command -> New command mapping -deltaglider put FILE S3_URL -> deltaglider cp FILE S3_URL -deltaglider get S3_URL -> deltaglider cp S3_URL . -deltaglider verify S3_URL -> deltaglider ls --verify S3_URL -``` - -### Environment Variables -- `DELTAGLIDER_LEGACY_MODE=1` - Use old command syntax +## Environment Variables - `DELTAGLIDER_AWS_COMPAT=1` - Strict AWS S3 CLI compatibility mode ## Success Criteria diff --git a/docs/case-study-readonlyrest.md b/docs/case-study-readonlyrest.md index 3b0642c..1764e2d 100644 --- a/docs/case-study-readonlyrest.md +++ b/docs/case-study-readonlyrest.md @@ -57,7 +57,7 @@ aws s3 cp readonlyrest-1.66.1_es8.0.0.zip s3://releases/ # Size on S3: 82.5MB # With DeltaGlider -deltaglider put readonlyrest-1.66.1_es8.0.0.zip s3://releases/ +deltaglider cp readonlyrest-1.66.1_es8.0.0.zip s3://releases/ # Size on S3: 65KB (99.92% smaller!) ``` @@ -186,7 +186,7 @@ This intelligence meant our 127,455 checksum files were uploaded directly, avoid ```bash # Simple integration into our CI/CD - aws s3 cp $FILE s3://releases/ -+ deltaglider put $FILE s3://releases/ ++ deltaglider cp $FILE s3://releases/ ``` ### Week 4: Full Migration @@ -253,10 +253,10 @@ Storage costs scale linearly with data growth. Without DeltaGlider: pip install deltaglider # Upload a file (automatic compression) -deltaglider put my-release-v1.0.0.zip s3://releases/ +deltaglider cp my-release-v1.0.0.zip s3://releases/ # Download (automatic reconstruction) -deltaglider get s3://releases/my-release-v1.0.0.zip +deltaglider cp s3://releases/my-release-v1.0.0.zip . # It's that simple. ``` @@ -277,12 +277,12 @@ completely_different: 0% # No compression (uploaded as-is) **GitHub Actions**: ```yaml - name: Upload Release - run: deltaglider put dist/*.zip s3://releases/${{ github.ref_name }}/ + run: deltaglider cp dist/*.zip s3://releases/${{ github.ref_name }}/ ``` **Jenkins Pipeline**: ```groovy -sh "deltaglider put ${WORKSPACE}/target/*.jar s3://artifacts/" +sh "deltaglider cp ${WORKSPACE}/target/*.jar s3://artifacts/" ``` **Python Script**: @@ -327,7 +327,7 @@ python calculate_savings.py --path /your/releases # Try it yourself docker run -p 9000:9000 minio/minio # Local S3 pip install deltaglider -deltaglider put your-file.zip s3://test/ +deltaglider cp your-file.zip s3://test/ ``` --- diff --git a/src/deltaglider/app/cli/main.py b/src/deltaglider/app/cli/main.py index 668d1ef..e82676d 100644 --- a/src/deltaglider/app/cli/main.py +++ b/src/deltaglider/app/cli/main.py @@ -16,7 +16,7 @@ from ...adapters import ( UtcClockAdapter, XdeltaAdapter, ) -from ...core import DeltaService, DeltaSpace, ObjectKey +from ...core import DeltaService, ObjectKey from ...ports import MetricsPort from .aws_compat import ( copy_s3_to_s3, @@ -556,130 +556,6 @@ def sync( sys.exit(1) -@cli.command() -@click.argument("file", type=click.Path(exists=True, path_type=Path)) -@click.argument("s3_url") -@click.option("--max-ratio", type=float, help="Max delta/file ratio (default: 0.5)") -@click.pass_obj -def put(service: DeltaService, file: Path, s3_url: str, max_ratio: float | None) -> None: - """Upload file as reference or delta (legacy command, use 'cp' instead).""" - # Parse S3 URL - if not s3_url.startswith("s3://"): - click.echo(f"Error: Invalid S3 URL: {s3_url}", err=True) - sys.exit(1) - - # Extract bucket and prefix - s3_path = s3_url[5:].rstrip("/") - parts = s3_path.split("/", 1) - bucket = parts[0] - prefix = parts[1] if len(parts) > 1 else "" - - delta_space = DeltaSpace(bucket=bucket, prefix=prefix) - - try: - summary = service.put(file, delta_space, max_ratio) - - # Output JSON summary - output = { - "operation": summary.operation, - "bucket": summary.bucket, - "key": summary.key, - "original_name": summary.original_name, - "file_size": summary.file_size, - "file_sha256": summary.file_sha256, - } - - if summary.delta_size is not None: - output["delta_size"] = summary.delta_size - output["delta_ratio"] = round(summary.delta_ratio or 0, 3) - - if summary.ref_key: - output["ref_key"] = summary.ref_key - output["ref_sha256"] = summary.ref_sha256 - - output["cache_hit"] = summary.cache_hit - - click.echo(json.dumps(output, indent=2)) - - except Exception as e: - click.echo(f"Error: {e}", err=True) - sys.exit(1) - - -@cli.command() -@click.argument("s3_url") -@click.option("-o", "--output", type=click.Path(path_type=Path), help="Output file path") -@click.pass_obj -def get(service: DeltaService, s3_url: str, output: Path | None) -> None: - """Download and hydrate delta file. - - The S3 URL can be either: - - Full path to delta file: s3://bucket/path/to/file.zip.delta - - Path to original file (will append .delta): s3://bucket/path/to/file.zip - """ - # Parse S3 URL - if not s3_url.startswith("s3://"): - click.echo(f"Error: Invalid S3 URL: {s3_url}", err=True) - sys.exit(1) - - s3_path = s3_url[5:] - parts = s3_path.split("/", 1) - if len(parts) != 2: - click.echo(f"Error: Invalid S3 URL: {s3_url}", err=True) - sys.exit(1) - - bucket = parts[0] - key = parts[1] - - # Try to determine if this is a direct file or needs .delta appended - # First try the key as-is - obj_key = ObjectKey(bucket=bucket, key=key) - - # Check if the file exists using the service's storage port - # which already has proper credentials configured - try: - # Try to head the object as-is - obj_head = service.storage.head(f"{bucket}/{key}") - if obj_head is not None: - click.echo(f"Found file: s3://{bucket}/{key}") - else: - # If not found and doesn't end with .delta, try adding .delta - if not key.endswith(".delta"): - delta_key = f"{key}.delta" - delta_head = service.storage.head(f"{bucket}/{delta_key}") - if delta_head is not None: - key = delta_key - obj_key = ObjectKey(bucket=bucket, key=key) - click.echo(f"Found delta file: s3://{bucket}/{key}") - else: - click.echo( - f"Error: File not found: s3://{bucket}/{key} (also tried .delta)", err=True - ) - sys.exit(1) - else: - click.echo(f"Error: File not found: s3://{bucket}/{key}", err=True) - sys.exit(1) - except Exception: - # For unexpected errors, just proceed with the original key - click.echo(f"Warning: Could not check file existence, proceeding with: s3://{bucket}/{key}") - - # Determine output path - if output is None: - # Extract original name from delta name - if key.endswith(".delta"): - output = Path(Path(key).stem) - else: - output = Path(Path(key).name) - - try: - service.get(obj_key, output) - click.echo(f"Successfully retrieved: {output}") - - except Exception as e: - click.echo(f"Error: {e}", err=True) - sys.exit(1) - - @cli.command() @click.argument("s3_url") @click.pass_obj diff --git a/tests/e2e/test_localstack.py b/tests/e2e/test_localstack.py index ef82139..9efcad1 100644 --- a/tests/e2e/test_localstack.py +++ b/tests/e2e/test_localstack.py @@ -72,7 +72,7 @@ class TestLocalStackE2E: file2.write_text("Plugin version 1.0.1 content with minor changes") # Upload first file (becomes reference) - result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/plugins/"]) + result = runner.invoke(cli, ["cp", str(file1), f"s3://{test_bucket}/plugins/"]) assert result.exit_code == 0 output1 = extract_json_from_cli_output(result.output) assert output1["operation"] == "create_reference" @@ -85,7 +85,7 @@ class TestLocalStackE2E: assert "plugins/plugin-v1.0.0.zip.delta" in keys # Upload second file (creates delta) - result = runner.invoke(cli, ["put", str(file2), f"s3://{test_bucket}/plugins/"]) + result = runner.invoke(cli, ["cp", str(file2), f"s3://{test_bucket}/plugins/"]) assert result.exit_code == 0 output2 = extract_json_from_cli_output(result.output) assert output2["operation"] == "create_delta" @@ -97,9 +97,8 @@ class TestLocalStackE2E: result = runner.invoke( cli, [ - "get", + "cp", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta", - "-o", str(output_file), ], ) @@ -130,10 +129,10 @@ class TestLocalStackE2E: file_b1.write_text("Application B version 1") # Upload to different deltaspaces - result = runner.invoke(cli, ["put", str(file_a1), f"s3://{test_bucket}/apps/app-a/"]) + result = runner.invoke(cli, ["cp", str(file_a1), f"s3://{test_bucket}/apps/app-a/"]) assert result.exit_code == 0 - result = runner.invoke(cli, ["put", str(file_b1), f"s3://{test_bucket}/apps/app-b/"]) + result = runner.invoke(cli, ["cp", str(file_b1), f"s3://{test_bucket}/apps/app-b/"]) assert result.exit_code == 0 # Verify each deltaspace has its own reference @@ -160,14 +159,14 @@ class TestLocalStackE2E: file2.write_text("B" * 1000) # Completely different # Upload first file - result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/test/"]) + result = runner.invoke(cli, ["cp", str(file1), f"s3://{test_bucket}/test/"]) assert result.exit_code == 0 # Upload second file with low max-ratio result = runner.invoke( cli, [ - "put", + "cp", str(file2), f"s3://{test_bucket}/test/", "--max-ratio", diff --git a/tests/integration/test_get_command.py b/tests/integration/test_get_command.py deleted file mode 100644 index 57a889d..0000000 --- a/tests/integration/test_get_command.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Integration test for get command.""" - -import tempfile -from pathlib import Path -from unittest.mock import Mock, patch - -import pytest -from click.testing import CliRunner - -from deltaglider.app.cli.main import cli -from deltaglider.core import ObjectKey - - -@pytest.fixture -def mock_service(): - """Create a mock DeltaService.""" - return Mock() - - -def test_get_command_with_original_name(mock_service): - """Test get command with original filename (auto-appends .delta).""" - runner = CliRunner() - - # Mock the service.get method and storage.head - mock_service.get = Mock() - mock_service.storage.head = Mock( - side_effect=[ - None, # First check for original file returns None - Mock(), # Second check for .delta file returns something - ] - ) - - with patch("deltaglider.app.cli.main.create_service", return_value=mock_service): - # Run get with original filename (should auto-append .delta) - result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip"]) - - # Check it was successful - assert result.exit_code == 0 - assert "Found delta file: s3://test-bucket/data/myfile.zip.delta" in result.output - assert "Successfully retrieved: myfile.zip" in result.output - - # Verify the service was called with the correct arguments - mock_service.get.assert_called_once() - call_args = mock_service.get.call_args - obj_key = call_args[0][0] - output_path = call_args[0][1] - - assert isinstance(obj_key, ObjectKey) - assert obj_key.bucket == "test-bucket" - assert obj_key.key == "data/myfile.zip.delta" - assert output_path == Path("myfile.zip") - - -def test_get_command_with_delta_name(mock_service): - """Test get command with explicit .delta filename.""" - runner = CliRunner() - - # Mock the service.get method and storage.head - mock_service.get = Mock() - mock_service.storage.head = Mock(return_value=Mock()) # File exists - - with patch("deltaglider.app.cli.main.create_service", return_value=mock_service): - # Run get with explicit .delta filename - result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip.delta"]) - - # Check it was successful - assert result.exit_code == 0 - assert "Found file: s3://test-bucket/data/myfile.zip.delta" in result.output - assert "Successfully retrieved: myfile.zip" in result.output - - # Verify the service was called with the correct arguments - mock_service.get.assert_called_once() - call_args = mock_service.get.call_args - obj_key = call_args[0][0] - output_path = call_args[0][1] - - assert isinstance(obj_key, ObjectKey) - assert obj_key.bucket == "test-bucket" - assert obj_key.key == "data/myfile.zip.delta" - assert output_path == Path("myfile.zip") - - -def test_get_command_with_output_option(mock_service): - """Test get command with custom output path.""" - runner = CliRunner() - - # Mock the service.get method and storage.head - mock_service.get = Mock() - mock_service.storage.head = Mock( - side_effect=[ - None, # First check for original file returns None - Mock(), # Second check for .delta file returns something - ] - ) - - with patch("deltaglider.app.cli.main.create_service", return_value=mock_service): - with tempfile.TemporaryDirectory() as tmpdir: - output_file = Path(tmpdir) / "custom_output.zip" - - # Run get with custom output path - result = runner.invoke( - cli, ["get", "s3://test-bucket/data/myfile.zip", "-o", str(output_file)] - ) - - # Check it was successful - assert result.exit_code == 0 - assert f"Successfully retrieved: {output_file}" in result.output - - # Verify the service was called with the correct arguments - mock_service.get.assert_called_once() - call_args = mock_service.get.call_args - obj_key = call_args[0][0] - output_path = call_args[0][1] - - assert isinstance(obj_key, ObjectKey) - assert obj_key.bucket == "test-bucket" - assert obj_key.key == "data/myfile.zip.delta" - assert output_path == output_file - - -def test_get_command_error_handling(mock_service): - """Test get command error handling.""" - runner = CliRunner() - - # Mock the service.get method to raise an error - mock_service.get = Mock(side_effect=FileNotFoundError("Delta not found")) - - with patch("deltaglider.app.cli.main.create_service", return_value=mock_service): - # Run get command - result = runner.invoke(cli, ["get", "s3://test-bucket/data/missing.zip"]) - - # Check it failed with error message - assert result.exit_code == 1 - assert "Error: Delta not found" in result.output - - -def test_get_command_invalid_url(): - """Test get command with invalid S3 URL.""" - runner = CliRunner() - - # Run get with invalid URL - result = runner.invoke(cli, ["get", "http://invalid-url/file.zip"]) - - # Check it failed with error message - assert result.exit_code == 1 - assert "Error: Invalid S3 URL" in result.output