diff --git a/CLAUDE.md b/CLAUDE.md index 5b2f401..ad9df87 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -113,10 +113,10 @@ src/deltaglider/ ### Core Concepts -1. **Leaf**: A prefix in S3 where related files are stored. Contains a `reference.bin` file that serves as the base for delta compression. +1. **DeltaSpace**: A prefix in S3 where related files are stored for delta compression. Contains a `reference.bin` file that serves as the base for delta compression. 2. **Delta Compression Flow**: - - First file uploaded to a Leaf becomes the reference (stored as `reference.bin`) + - First file uploaded to a DeltaSpace becomes the reference (stored as `reference.bin`) - Subsequent files are compared against the reference using xdelta3 - Only the differences (delta) are stored with `.delta` suffix - Metadata in S3 tags preserves original file info and delta relationships @@ -199,7 +199,7 @@ Core delta logic is in `src/deltaglider/core/service.py`: 4. **Atomic Operations**: All S3 operations are atomic - no partial states are left if operations fail. -5. **Reference File Updates**: Currently, the first file uploaded to a Leaf becomes the permanent reference. Future versions may implement reference rotation. +5. **Reference File Updates**: Currently, the first file uploaded to a DeltaSpace becomes the permanent reference. Future versions may implement reference rotation. ## Performance Considerations diff --git a/README.md b/README.md index 8fdb714..39ff7e4 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ deltaglider rm -r s3://backups/2023/ ```python from pathlib import Path -from deltaglider.core import DeltaService, Leaf, ObjectKey +from deltaglider.core import DeltaService, DeltaSpace, ObjectKey from deltaglider.adapters import ( S3StorageAdapter, XdeltaAdapter, @@ -261,8 +261,8 @@ def create_service( service = create_service() # Upload a file with automatic delta compression -leaf = Leaf(bucket="my-releases", prefix="v2.0.0") -summary = service.put(Path("my-app-v2.0.0.zip"), leaf) +delta_space = DeltaSpace(bucket="my-releases", prefix="v2.0.0") +summary = service.put(Path("my-app-v2.0.0.zip"), delta_space) print(f"Operation: {summary.operation}") # 'create_reference' or 'create_delta' print(f"Stored at: s3://{summary.bucket}/{summary.key}") diff --git a/docs/deltaglider_architecture_guidelines.txt b/docs/deltaglider_architecture_guidelines.txt index 9c5bdeb..c51f557 100644 --- a/docs/deltaglider_architecture_guidelines.txt +++ b/docs/deltaglider_architecture_guidelines.txt @@ -42,21 +42,21 @@ interface DiffPort { interface HashPort { sha256(pathOrStream) -> Sha256 } interface CachePort { - refPath(bucket, leaf) -> Path - hasRef(bucket, leaf, sha) -> Bool - writeRef(bucket, leaf, src) -> Path - evict(bucket, leaf) + refPath(bucket, prefix) -> Path + hasRef(bucket, prefix, sha) -> Bool + writeRef(bucket, prefix, src) -> Path + evict(bucket, prefix) } interface DeltaService { - put(localFile, leaf, maxRatio) -> PutSummary + put(localFile, deltaSpace, maxRatio) -> PutSummary get(deltaKey, out) -> void verify(deltaKey) -> VerifyResult } 4. Domain Use-Cases ------------------- -put(localFile, leaf): +put(localFile, deltaSpace): - If no reference.bin: upload as reference, cache, create zero-diff delta. - Else: ensure cached reference valid, generate delta, upload with metadata. @@ -69,7 +69,7 @@ verify(deltaKey): 5. Object Model --------------- -- Leaf { bucket, prefix } +- DeltaSpace { bucket, prefix } - ObjectKey { bucket, key } - Sha256 { hex } - DeltaMeta { tool, original_name, file_sha256, file_size, created_at, ref_key, ref_sha256, delta_size, note? } diff --git a/docs/deltaglider_metadata_schema.txt b/docs/deltaglider_metadata_schema.txt index 8ef41a4..52738bd 100644 --- a/docs/deltaglider_metadata_schema.txt +++ b/docs/deltaglider_metadata_schema.txt @@ -12,7 +12,7 @@ General Rules Reference Object (`reference.bin`) --------------------------------- -Stored once per leaf prefix. +Stored once per DeltaSpace prefix. Required keys: - tool: deltaglider/0.1.0 @@ -31,7 +31,7 @@ Required keys: - file_sha256: SHA256 of hydrated file - file_size: size in bytes of hydrated file - created_at: ISO8601 UTC timestamp -- ref_key: key of reference file (e.g. path/to/leaf/reference.bin) +- ref_key: key of reference file (e.g. path/to/delta_space/reference.bin) - ref_sha256: SHA256 of reference file - delta_size: size in bytes of delta file - delta_cmd: "xdelta3 -e -9 -s reference.bin .delta" diff --git a/docs/deltaglider_specs.txt b/docs/deltaglider_specs.txt index 6316f76..15b6472 100644 --- a/docs/deltaglider_specs.txt +++ b/docs/deltaglider_specs.txt @@ -12,14 +12,14 @@ The cost of storing large binary artifacts (e.g., ZIP plugins, deliverables) on by only a few kilobytes. Current practice redundantly uploads full versions, wasting space and increasing transfer times. deltaglider is a CLI tool that transparently reduces storage overhead by representing a directory of similar large files as: -- A single reference file (reference.bin) in each leaf S3 prefix. +- A single reference file (reference.bin) in each DeltaSpace S3 prefix. - A set of delta files (.delta) encoding differences against the reference. This approach compresses storage usage to near-optimal while retaining simple semantics. Goals ----- -1. Save S3 space by storing only one full copy of similar files per leaf and small binary deltas for subsequent versions. +1. Save S3 space by storing only one full copy of similar files per DeltaSpace and small binary deltas for subsequent versions. 2. Transparent developer workflow – deltaglider put/get mirrors aws s3 cp. 3. Minimal state management – no manifests, no external databases. 4. Integrity assurance – strong hashing (SHA256) stored in metadata, verified on upload/restore. @@ -28,19 +28,19 @@ Goals Non-Goals --------- - Deduplication across multiple directories/prefixes. -- Streaming delta generation across multiple references (always one reference per leaf). +- Streaming delta generation across multiple references (always one reference per DeltaSpace). - Automatic background compaction or garbage collection. Terminology ----------- -- Leaf prefix: An S3 "directory" containing only files, no further sub-prefixes. -- Reference file: The first uploaded file in a leaf, stored as reference.bin. +- DeltaSpace: An S3 prefix containing related files for delta compression. +- Reference file: The first uploaded file in a DeltaSpace, stored as reference.bin. - Delta file: Result of running xdelta3 against the reference, named .delta. Architecture ------------ Reference Selection -- First uploaded file in a leaf becomes the reference. +- First uploaded file in a DeltaSpace becomes the reference. - Stored as reference.bin. - Original filename preserved in metadata of both reference.bin and zero-diff delta. @@ -68,7 +68,7 @@ Local Cache CLI Specification ----------------- -deltaglider put +deltaglider put - If no reference.bin: upload as reference.bin, upload zero-diff .delta. - If reference.bin exists: create delta, upload .delta with metadata. - Output JSON summary. diff --git a/src/deltaglider/adapters/cache_fs.py b/src/deltaglider/adapters/cache_fs.py index d6fdb2e..a0c2a05 100644 --- a/src/deltaglider/adapters/cache_fs.py +++ b/src/deltaglider/adapters/cache_fs.py @@ -15,30 +15,30 @@ class FsCacheAdapter(CachePort): self.base_dir = base_dir self.hasher = hasher - def ref_path(self, bucket: str, leaf: str) -> Path: + def ref_path(self, bucket: str, prefix: str) -> Path: """Get path where reference should be cached.""" - cache_dir = self.base_dir / bucket / leaf + cache_dir = self.base_dir / bucket / prefix return cache_dir / "reference.bin" - def has_ref(self, bucket: str, leaf: str, sha: str) -> bool: + def has_ref(self, bucket: str, prefix: str, sha: str) -> bool: """Check if reference exists and matches SHA.""" - path = self.ref_path(bucket, leaf) + path = self.ref_path(bucket, prefix) if not path.exists(): return False actual_sha = self.hasher.sha256(path) return actual_sha == sha - def write_ref(self, bucket: str, leaf: str, src: Path) -> Path: + def write_ref(self, bucket: str, prefix: str, src: Path) -> Path: """Cache reference file.""" - path = self.ref_path(bucket, leaf) + path = self.ref_path(bucket, prefix) path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, path) return path - def evict(self, bucket: str, leaf: str) -> None: + def evict(self, bucket: str, prefix: str) -> None: """Remove cached reference.""" - path = self.ref_path(bucket, leaf) + path = self.ref_path(bucket, prefix) if path.exists(): path.unlink() # Clean up empty directories diff --git a/src/deltaglider/app/cli/aws_compat.py b/src/deltaglider/app/cli/aws_compat.py index 8273ebc..04b2dea 100644 --- a/src/deltaglider/app/cli/aws_compat.py +++ b/src/deltaglider/app/cli/aws_compat.py @@ -5,7 +5,7 @@ from pathlib import Path import click -from ...core import DeltaService, Leaf, ObjectKey +from ...core import DeltaService, DeltaSpace, ObjectKey def is_s3_path(path: str) -> bool: @@ -55,7 +55,7 @@ def upload_file( if not key or key.endswith("/"): key = (key + local_path.name).lstrip("/") - leaf = Leaf(bucket=bucket, prefix="/".join(key.split("/")[:-1])) + delta_space = DeltaSpace(bucket=bucket, prefix="/".join(key.split("/")[:-1])) try: # Check if delta should be disabled @@ -69,7 +69,7 @@ def upload_file( click.echo(f"upload: '{local_path}' to 's3://{bucket}/{key}' ({file_size} bytes)") else: # Use delta compression - summary = service.put(local_path, leaf, max_ratio) + summary = service.put(local_path, delta_space, max_ratio) if not quiet: if summary.delta_size: diff --git a/src/deltaglider/app/cli/main.py b/src/deltaglider/app/cli/main.py index 512f8a8..04df848 100644 --- a/src/deltaglider/app/cli/main.py +++ b/src/deltaglider/app/cli/main.py @@ -16,7 +16,7 @@ from ...adapters import ( UtcClockAdapter, XdeltaAdapter, ) -from ...core import DeltaService, Leaf, ObjectKey +from ...core import DeltaService, DeltaSpace, ObjectKey from .aws_compat import ( copy_s3_to_s3, determine_operation, @@ -537,10 +537,10 @@ def put(service: DeltaService, file: Path, s3_url: str, max_ratio: float | None) bucket = parts[0] prefix = parts[1] if len(parts) > 1 else "" - leaf = Leaf(bucket=bucket, prefix=prefix) + delta_space = DeltaSpace(bucket=bucket, prefix=prefix) try: - summary = service.put(file, leaf, max_ratio) + summary = service.put(file, delta_space, max_ratio) # Output JSON summary output = { diff --git a/src/deltaglider/core/__init__.py b/src/deltaglider/core/__init__.py index 9b5c10b..3c5166b 100644 --- a/src/deltaglider/core/__init__.py +++ b/src/deltaglider/core/__init__.py @@ -12,7 +12,7 @@ from .errors import ( ) from .models import ( DeltaMeta, - Leaf, + DeltaSpace, ObjectKey, PutSummary, ReferenceMeta, @@ -30,7 +30,7 @@ __all__ = [ "DiffDecodeError", "StorageIOError", "PolicyViolationWarning", - "Leaf", + "DeltaSpace", "ObjectKey", "Sha256", "DeltaMeta", diff --git a/src/deltaglider/core/models.py b/src/deltaglider/core/models.py index 6c82d68..7f978ac 100644 --- a/src/deltaglider/core/models.py +++ b/src/deltaglider/core/models.py @@ -5,8 +5,8 @@ from datetime import datetime @dataclass(frozen=True) -class Leaf: - """S3 leaf prefix.""" +class DeltaSpace: + """S3 delta compression space - a prefix containing related files for delta compression.""" bucket: str prefix: str diff --git a/src/deltaglider/core/service.py b/src/deltaglider/core/service.py index 5ec179e..76f7f6d 100644 --- a/src/deltaglider/core/service.py +++ b/src/deltaglider/core/service.py @@ -25,7 +25,7 @@ from .errors import ( ) from .models import ( DeltaMeta, - Leaf, + DeltaSpace, ObjectKey, PutSummary, ReferenceMeta, @@ -93,7 +93,9 @@ class DeltaService: # Check simple extensions return any(name_lower.endswith(ext) for ext in self.delta_extensions) - def put(self, local_file: Path, leaf: Leaf, max_ratio: float | None = None) -> PutSummary: + def put( + self, local_file: Path, delta_space: DeltaSpace, max_ratio: float | None = None + ) -> PutSummary: """Upload file as reference or delta (for archive files) or directly (for other files).""" if max_ratio is None: max_ratio = self.max_ratio @@ -106,7 +108,7 @@ class DeltaService: self.logger.info( "Starting put operation", file=str(local_file), - leaf=f"{leaf.bucket}/{leaf.prefix}", + leaf=f"{delta_space.bucket}/{delta_space.prefix}", size=file_size, ) @@ -119,23 +121,25 @@ class DeltaService: "Uploading file directly (no delta for this type)", file_type=Path(original_name).suffix, ) - summary = self._upload_direct(local_file, leaf, file_sha256, original_name, file_size) + summary = self._upload_direct( + local_file, delta_space, file_sha256, original_name, file_size + ) else: # For archive files, use the delta compression system # Check for existing reference - ref_key = leaf.reference_key() - ref_head = self.storage.head(f"{leaf.bucket}/{ref_key}") + ref_key = delta_space.reference_key() + ref_head = self.storage.head(f"{delta_space.bucket}/{ref_key}") if ref_head is None: # Create reference summary = self._create_reference( - local_file, leaf, file_sha256, original_name, file_size + local_file, delta_space, file_sha256, original_name, file_size ) else: # Create delta summary = self._create_delta( local_file, - leaf, + delta_space, ref_head, file_sha256, original_name, @@ -147,7 +151,7 @@ class DeltaService: self.logger.log_operation( op="put", key=summary.key, - leaf=f"{leaf.bucket}/{leaf.prefix}", + leaf=f"{delta_space.bucket}/{delta_space.prefix}", sizes={"file": file_size, "delta": summary.delta_size or file_size}, durations={"total": duration}, cache_hit=summary.cache_hit, @@ -197,17 +201,19 @@ class DeltaService: leaf_prefix = "/".join(ref_parts[:-1]) else: leaf_prefix = "" - leaf = Leaf(bucket=object_key.bucket, prefix=leaf_prefix) + delta_space = DeltaSpace(bucket=object_key.bucket, prefix=leaf_prefix) - cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, delta_meta.ref_sha256) + cache_hit = self.cache.has_ref( + delta_space.bucket, delta_space.prefix, delta_meta.ref_sha256 + ) if not cache_hit: - self._cache_reference(leaf, delta_meta.ref_sha256) + self._cache_reference(delta_space, delta_meta.ref_sha256) # Download delta and decode with tempfile.TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) delta_path = tmp_path / "delta" - ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix) + ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) out_path = tmp_path / "output" # Download delta @@ -241,7 +247,7 @@ class DeltaService: self.logger.log_operation( op="get", key=object_key.key, - leaf=f"{leaf.bucket}/{leaf.prefix}", + leaf=f"{delta_space.bucket}/{delta_space.prefix}", sizes={"delta": delta_meta.delta_size, "file": delta_meta.file_size}, durations={"total": duration}, cache_hit=cache_hit, @@ -285,14 +291,14 @@ class DeltaService: def _create_reference( self, local_file: Path, - leaf: Leaf, + delta_space: DeltaSpace, file_sha256: str, original_name: str, file_size: int, ) -> PutSummary: """Create reference file.""" - ref_key = leaf.reference_key() - full_ref_key = f"{leaf.bucket}/{ref_key}" + ref_key = delta_space.reference_key() + full_ref_key = f"{delta_space.bucket}/{ref_key}" # Create reference metadata ref_meta = ReferenceMeta( @@ -320,14 +326,16 @@ class DeltaService: ref_sha256 = file_sha256 # Cache reference - cached_path = self.cache.write_ref(leaf.bucket, leaf.prefix, local_file) + cached_path = self.cache.write_ref(delta_space.bucket, delta_space.prefix, local_file) self.logger.debug("Cached reference", path=str(cached_path)) # Also create zero-diff delta delta_key = ( - f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta" + f"{delta_space.prefix}/{original_name}.delta" + if delta_space.prefix + else f"{original_name}.delta" ) - full_delta_key = f"{leaf.bucket}/{delta_key}" + full_delta_key = f"{delta_space.bucket}/{delta_key}" with tempfile.NamedTemporaryFile() as zero_delta: # Create empty delta using xdelta3 @@ -357,7 +365,7 @@ class DeltaService: self.metrics.increment("deltaglider.reference.created") return PutSummary( operation="create_reference", - bucket=leaf.bucket, + bucket=delta_space.bucket, key=ref_key, original_name=original_name, file_size=file_size, @@ -367,7 +375,7 @@ class DeltaService: def _create_delta( self, local_file: Path, - leaf: Leaf, + delta_space: DeltaSpace, ref_head: ObjectHead, file_sha256: str, original_name: str, @@ -375,15 +383,15 @@ class DeltaService: max_ratio: float, ) -> PutSummary: """Create delta file.""" - ref_key = leaf.reference_key() + ref_key = delta_space.reference_key() ref_sha256 = ref_head.metadata["file_sha256"] # Ensure reference is cached - cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, ref_sha256) + cache_hit = self.cache.has_ref(delta_space.bucket, delta_space.prefix, ref_sha256) if not cache_hit: - self._cache_reference(leaf, ref_sha256) + self._cache_reference(delta_space, ref_sha256) - ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix) + ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) # Create delta with tempfile.NamedTemporaryFile(suffix=".delta") as delta_file: @@ -412,9 +420,11 @@ class DeltaService: # Create delta metadata delta_key = ( - f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta" + f"{delta_space.prefix}/{original_name}.delta" + if delta_space.prefix + else f"{original_name}.delta" ) - full_delta_key = f"{leaf.bucket}/{delta_key}" + full_delta_key = f"{delta_space.bucket}/{delta_key}" delta_meta = DeltaMeta( tool=self.tool_version, @@ -445,7 +455,7 @@ class DeltaService: return PutSummary( operation="create_delta", - bucket=leaf.bucket, + bucket=delta_space.bucket, key=delta_key, original_name=original_name, file_size=file_size, @@ -457,10 +467,10 @@ class DeltaService: cache_hit=cache_hit, ) - def _cache_reference(self, leaf: Leaf, expected_sha: str) -> None: + def _cache_reference(self, delta_space: DeltaSpace, expected_sha: str) -> None: """Download and cache reference.""" - ref_key = leaf.reference_key() - full_ref_key = f"{leaf.bucket}/{ref_key}" + ref_key = delta_space.reference_key() + full_ref_key = f"{delta_space.bucket}/{ref_key}" self.logger.info("Caching reference", key=ref_key) @@ -482,7 +492,7 @@ class DeltaService: ) # Cache it - self.cache.write_ref(leaf.bucket, leaf.prefix, tmp_path) + self.cache.write_ref(delta_space.bucket, delta_space.prefix, tmp_path) tmp_path.unlink() def _get_direct( @@ -533,18 +543,18 @@ class DeltaService: def _upload_direct( self, local_file: Path, - leaf: Leaf, + delta_space: DeltaSpace, file_sha256: str, original_name: str, file_size: int, ) -> PutSummary: """Upload file directly to S3 without delta compression.""" # Construct the key path - if leaf.prefix: - key = f"{leaf.prefix}/{original_name}" + if delta_space.prefix: + key = f"{delta_space.prefix}/{original_name}" else: key = original_name - full_key = f"{leaf.bucket}/{key}" + full_key = f"{delta_space.bucket}/{key}" # Create metadata for the file metadata = { @@ -568,7 +578,7 @@ class DeltaService: return PutSummary( operation="upload_direct", - bucket=leaf.bucket, + bucket=delta_space.bucket, key=key, original_name=original_name, file_size=file_size, diff --git a/src/deltaglider/ports/cache.py b/src/deltaglider/ports/cache.py index 1f32080..bd76395 100644 --- a/src/deltaglider/ports/cache.py +++ b/src/deltaglider/ports/cache.py @@ -7,18 +7,18 @@ from typing import Protocol class CachePort(Protocol): """Port for cache operations.""" - def ref_path(self, bucket: str, leaf: str) -> Path: + def ref_path(self, bucket: str, prefix: str) -> Path: """Get path where reference should be cached.""" ... - def has_ref(self, bucket: str, leaf: str, sha: str) -> bool: + def has_ref(self, bucket: str, prefix: str, sha: str) -> bool: """Check if reference exists and matches SHA.""" ... - def write_ref(self, bucket: str, leaf: str, src: Path) -> Path: + def write_ref(self, bucket: str, prefix: str, src: Path) -> Path: """Cache reference file.""" ... - def evict(self, bucket: str, leaf: str) -> None: + def evict(self, bucket: str, prefix: str) -> None: """Remove cached reference.""" ... diff --git a/tests/integration/test_full_workflow.py b/tests/integration/test_full_workflow.py index 20d9150..e23409c 100644 --- a/tests/integration/test_full_workflow.py +++ b/tests/integration/test_full_workflow.py @@ -3,7 +3,7 @@ import io from pathlib import Path -from deltaglider.core import Leaf, ObjectKey +from deltaglider.core import DeltaSpace, ObjectKey def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff): @@ -25,7 +25,7 @@ def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff): mock_diff.decode.side_effect = decode_side_effect - leaf = Leaf(bucket="test-bucket", prefix="test/data") + delta_space = DeltaSpace(bucket="test-bucket", prefix="test/data") # Storage state tracking storage_data = {} @@ -76,28 +76,28 @@ def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff): mock_storage.get.side_effect = mock_get # Step 1: Put the first file (creates reference) - summary1 = service.put(file1, leaf) + summary1 = service.put(file1, delta_space) assert summary1.operation == "create_reference" assert summary1.key == "test/data/reference.bin" # Verify reference was stored - ref_key = f"{leaf.bucket}/{leaf.reference_key()}" + ref_key = f"{delta_space.bucket}/{delta_space.reference_key()}" assert ref_key in storage_data assert storage_data[ref_key]["content"] == file1_content # Step 2: Put the second file (creates delta) - summary2 = service.put(file2, leaf) + summary2 = service.put(file2, delta_space) assert summary2.operation == "create_delta" assert summary2.key == "test/data/version2.zip.delta" assert summary2.delta_size is not None assert summary2.ref_key == "test/data/reference.bin" # Verify delta was stored - delta_key = f"{leaf.bucket}/{summary2.key}" + delta_key = f"{delta_space.bucket}/{summary2.key}" assert delta_key in storage_data # Step 3: Get the delta file back - obj_key = ObjectKey(bucket=leaf.bucket, key=summary2.key) + obj_key = ObjectKey(bucket=delta_space.bucket, key=summary2.key) service.get(obj_key, output_file) # Step 4: Verify the recovered file matches the original @@ -118,7 +118,7 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff): mock_diff.decode.side_effect = decode_side_effect - leaf = Leaf(bucket="test-bucket", prefix="archive") + delta_space = DeltaSpace(bucket="test-bucket", prefix="archive") # Storage state tracking storage_data = {} @@ -169,7 +169,7 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff): mock_storage.get.side_effect = mock_get # Put the file - summary = service.put(test_file, leaf) + summary = service.put(test_file, delta_space) # Get it back using original name (without .delta) # The service should internally look for "mydata.zip.delta" @@ -178,9 +178,9 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff): # Use the key without .delta suffix if summary.operation == "create_reference": # If it's a reference, the zero-diff delta was created - obj_key = ObjectKey(bucket=leaf.bucket, key="archive/mydata.zip.delta") + obj_key = ObjectKey(bucket=delta_space.bucket, key="archive/mydata.zip.delta") else: - obj_key = ObjectKey(bucket=leaf.bucket, key=summary.key) + obj_key = ObjectKey(bucket=delta_space.bucket, key=summary.key) service.get(obj_key, output_file) diff --git a/tests/unit/test_core_service.py b/tests/unit/test_core_service.py index 2ec93ac..81d064a 100644 --- a/tests/unit/test_core_service.py +++ b/tests/unit/test_core_service.py @@ -5,7 +5,7 @@ import warnings import pytest from deltaglider.core import ( - Leaf, + DeltaSpace, NotFoundError, ObjectKey, PolicyViolationWarning, @@ -19,12 +19,12 @@ class TestDeltaServicePut: def test_create_reference_first_file(self, service, sample_file, mock_storage): """Test creating reference for first file.""" # Setup - leaf = Leaf(bucket="test-bucket", prefix="test/prefix") + delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix") mock_storage.head.return_value = None # No reference exists mock_storage.put.return_value = PutResult(etag="abc123") # Execute - summary = service.put(sample_file, leaf) + summary = service.put(sample_file, delta_space) # Verify assert summary.operation == "create_reference" @@ -41,7 +41,7 @@ class TestDeltaServicePut: def test_create_delta_subsequent_file(self, service, sample_file, mock_storage, mock_diff): """Test creating delta for subsequent file.""" # Setup - leaf = Leaf(bucket="test-bucket", prefix="test/prefix") + delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix") # Create reference content and compute its SHA import io @@ -68,12 +68,12 @@ class TestDeltaServicePut: mock_storage.get.return_value = io.BytesIO(ref_content) # Create cached reference with matching content - ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix) + ref_path = service.cache.ref_path(delta_space.bucket, delta_space.prefix) ref_path.parent.mkdir(parents=True, exist_ok=True) ref_path.write_bytes(ref_content) # Execute - summary = service.put(sample_file, leaf) + summary = service.put(sample_file, delta_space) # Verify assert summary.operation == "create_delta" @@ -89,7 +89,7 @@ class TestDeltaServicePut: def test_delta_ratio_warning(self, service, sample_file, mock_storage, mock_diff): """Test warning when delta ratio exceeds threshold.""" # Setup - leaf = Leaf(bucket="test-bucket", prefix="test/prefix") + delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix") # Create reference content and compute its SHA import io @@ -119,14 +119,14 @@ class TestDeltaServicePut: mock_diff.encode.side_effect = large_encode # Create cached reference with matching content - ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix) + ref_path = service.cache.ref_path(delta_space.bucket, delta_space.prefix) ref_path.parent.mkdir(parents=True, exist_ok=True) ref_path.write_bytes(ref_content) # Execute and check warning with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - service.put(sample_file, leaf, max_ratio=0.1) + service.put(sample_file, delta_space, max_ratio=0.1) assert len(w) == 1 assert issubclass(w[0].category, PolicyViolationWarning)