mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-01-11 22:30:48 +01:00
refactor: Rename Leaf to DeltaSpace for semantic clarity
- Renamed Leaf class to DeltaSpace throughout the codebase - Updated all imports, method signatures, and variable names - Updated documentation and comments to reflect the new naming - DeltaSpace better represents a container for delta-compressed files The term "DeltaSpace" is more semantically accurate than "Leaf" as it represents a space/container for managing related files with delta compression, not a terminal node in a tree structure. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -113,10 +113,10 @@ src/deltaglider/
|
||||
|
||||
### Core Concepts
|
||||
|
||||
1. **Leaf**: A prefix in S3 where related files are stored. Contains a `reference.bin` file that serves as the base for delta compression.
|
||||
1. **DeltaSpace**: A prefix in S3 where related files are stored for delta compression. Contains a `reference.bin` file that serves as the base for delta compression.
|
||||
|
||||
2. **Delta Compression Flow**:
|
||||
- First file uploaded to a Leaf becomes the reference (stored as `reference.bin`)
|
||||
- First file uploaded to a DeltaSpace becomes the reference (stored as `reference.bin`)
|
||||
- Subsequent files are compared against the reference using xdelta3
|
||||
- Only the differences (delta) are stored with `.delta` suffix
|
||||
- Metadata in S3 tags preserves original file info and delta relationships
|
||||
@@ -199,7 +199,7 @@ Core delta logic is in `src/deltaglider/core/service.py`:
|
||||
|
||||
4. **Atomic Operations**: All S3 operations are atomic - no partial states are left if operations fail.
|
||||
|
||||
5. **Reference File Updates**: Currently, the first file uploaded to a Leaf becomes the permanent reference. Future versions may implement reference rotation.
|
||||
5. **Reference File Updates**: Currently, the first file uploaded to a DeltaSpace becomes the permanent reference. Future versions may implement reference rotation.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
|
||||
@@ -196,7 +196,7 @@ deltaglider rm -r s3://backups/2023/
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from deltaglider.core import DeltaService, Leaf, ObjectKey
|
||||
from deltaglider.core import DeltaService, DeltaSpace, ObjectKey
|
||||
from deltaglider.adapters import (
|
||||
S3StorageAdapter,
|
||||
XdeltaAdapter,
|
||||
@@ -261,8 +261,8 @@ def create_service(
|
||||
service = create_service()
|
||||
|
||||
# Upload a file with automatic delta compression
|
||||
leaf = Leaf(bucket="my-releases", prefix="v2.0.0")
|
||||
summary = service.put(Path("my-app-v2.0.0.zip"), leaf)
|
||||
delta_space = DeltaSpace(bucket="my-releases", prefix="v2.0.0")
|
||||
summary = service.put(Path("my-app-v2.0.0.zip"), delta_space)
|
||||
|
||||
print(f"Operation: {summary.operation}") # 'create_reference' or 'create_delta'
|
||||
print(f"Stored at: s3://{summary.bucket}/{summary.key}")
|
||||
|
||||
@@ -42,21 +42,21 @@ interface DiffPort {
|
||||
interface HashPort { sha256(pathOrStream) -> Sha256 }
|
||||
|
||||
interface CachePort {
|
||||
refPath(bucket, leaf) -> Path
|
||||
hasRef(bucket, leaf, sha) -> Bool
|
||||
writeRef(bucket, leaf, src) -> Path
|
||||
evict(bucket, leaf)
|
||||
refPath(bucket, prefix) -> Path
|
||||
hasRef(bucket, prefix, sha) -> Bool
|
||||
writeRef(bucket, prefix, src) -> Path
|
||||
evict(bucket, prefix)
|
||||
}
|
||||
|
||||
interface DeltaService {
|
||||
put(localFile, leaf, maxRatio) -> PutSummary
|
||||
put(localFile, deltaSpace, maxRatio) -> PutSummary
|
||||
get(deltaKey, out) -> void
|
||||
verify(deltaKey) -> VerifyResult
|
||||
}
|
||||
|
||||
4. Domain Use-Cases
|
||||
-------------------
|
||||
put(localFile, leaf):
|
||||
put(localFile, deltaSpace):
|
||||
- If no reference.bin: upload as reference, cache, create zero-diff delta.
|
||||
- Else: ensure cached reference valid, generate delta, upload with metadata.
|
||||
|
||||
@@ -69,7 +69,7 @@ verify(deltaKey):
|
||||
|
||||
5. Object Model
|
||||
---------------
|
||||
- Leaf { bucket, prefix }
|
||||
- DeltaSpace { bucket, prefix }
|
||||
- ObjectKey { bucket, key }
|
||||
- Sha256 { hex }
|
||||
- DeltaMeta { tool, original_name, file_sha256, file_size, created_at, ref_key, ref_sha256, delta_size, note? }
|
||||
|
||||
@@ -12,7 +12,7 @@ General Rules
|
||||
|
||||
Reference Object (`reference.bin`)
|
||||
---------------------------------
|
||||
Stored once per leaf prefix.
|
||||
Stored once per DeltaSpace prefix.
|
||||
|
||||
Required keys:
|
||||
- tool: deltaglider/0.1.0
|
||||
@@ -31,7 +31,7 @@ Required keys:
|
||||
- file_sha256: SHA256 of hydrated file
|
||||
- file_size: size in bytes of hydrated file
|
||||
- created_at: ISO8601 UTC timestamp
|
||||
- ref_key: key of reference file (e.g. path/to/leaf/reference.bin)
|
||||
- ref_key: key of reference file (e.g. path/to/delta_space/reference.bin)
|
||||
- ref_sha256: SHA256 of reference file
|
||||
- delta_size: size in bytes of delta file
|
||||
- delta_cmd: "xdelta3 -e -9 -s reference.bin <file> <file>.delta"
|
||||
|
||||
@@ -12,14 +12,14 @@ The cost of storing large binary artifacts (e.g., ZIP plugins, deliverables) on
|
||||
by only a few kilobytes. Current practice redundantly uploads full versions, wasting space and increasing transfer times.
|
||||
|
||||
deltaglider is a CLI tool that transparently reduces storage overhead by representing a directory of similar large files as:
|
||||
- A single reference file (reference.bin) in each leaf S3 prefix.
|
||||
- A single reference file (reference.bin) in each DeltaSpace S3 prefix.
|
||||
- A set of delta files (<original>.delta) encoding differences against the reference.
|
||||
|
||||
This approach compresses storage usage to near-optimal while retaining simple semantics.
|
||||
|
||||
Goals
|
||||
-----
|
||||
1. Save S3 space by storing only one full copy of similar files per leaf and small binary deltas for subsequent versions.
|
||||
1. Save S3 space by storing only one full copy of similar files per DeltaSpace and small binary deltas for subsequent versions.
|
||||
2. Transparent developer workflow – deltaglider put/get mirrors aws s3 cp.
|
||||
3. Minimal state management – no manifests, no external databases.
|
||||
4. Integrity assurance – strong hashing (SHA256) stored in metadata, verified on upload/restore.
|
||||
@@ -28,19 +28,19 @@ Goals
|
||||
Non-Goals
|
||||
---------
|
||||
- Deduplication across multiple directories/prefixes.
|
||||
- Streaming delta generation across multiple references (always one reference per leaf).
|
||||
- Streaming delta generation across multiple references (always one reference per DeltaSpace).
|
||||
- Automatic background compaction or garbage collection.
|
||||
|
||||
Terminology
|
||||
-----------
|
||||
- Leaf prefix: An S3 "directory" containing only files, no further sub-prefixes.
|
||||
- Reference file: The first uploaded file in a leaf, stored as reference.bin.
|
||||
- DeltaSpace: An S3 prefix containing related files for delta compression.
|
||||
- Reference file: The first uploaded file in a DeltaSpace, stored as reference.bin.
|
||||
- Delta file: Result of running xdelta3 against the reference, named <original>.delta.
|
||||
|
||||
Architecture
|
||||
------------
|
||||
Reference Selection
|
||||
- First uploaded file in a leaf becomes the reference.
|
||||
- First uploaded file in a DeltaSpace becomes the reference.
|
||||
- Stored as reference.bin.
|
||||
- Original filename preserved in metadata of both reference.bin and zero-diff delta.
|
||||
|
||||
@@ -68,7 +68,7 @@ Local Cache
|
||||
|
||||
CLI Specification
|
||||
-----------------
|
||||
deltaglider put <file> <s3://bucket/path/to/leaf/>
|
||||
deltaglider put <file> <s3://bucket/path/to/delta_space/>
|
||||
- If no reference.bin: upload <file> as reference.bin, upload zero-diff <file>.delta.
|
||||
- If reference.bin exists: create delta, upload <file>.delta with metadata.
|
||||
- Output JSON summary.
|
||||
|
||||
@@ -15,30 +15,30 @@ class FsCacheAdapter(CachePort):
|
||||
self.base_dir = base_dir
|
||||
self.hasher = hasher
|
||||
|
||||
def ref_path(self, bucket: str, leaf: str) -> Path:
|
||||
def ref_path(self, bucket: str, prefix: str) -> Path:
|
||||
"""Get path where reference should be cached."""
|
||||
cache_dir = self.base_dir / bucket / leaf
|
||||
cache_dir = self.base_dir / bucket / prefix
|
||||
return cache_dir / "reference.bin"
|
||||
|
||||
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
|
||||
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
|
||||
"""Check if reference exists and matches SHA."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
if not path.exists():
|
||||
return False
|
||||
|
||||
actual_sha = self.hasher.sha256(path)
|
||||
return actual_sha == sha
|
||||
|
||||
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
|
||||
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
|
||||
"""Cache reference file."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, path)
|
||||
return path
|
||||
|
||||
def evict(self, bucket: str, leaf: str) -> None:
|
||||
def evict(self, bucket: str, prefix: str) -> None:
|
||||
"""Remove cached reference."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
# Clean up empty directories
|
||||
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from ...core import DeltaService, Leaf, ObjectKey
|
||||
from ...core import DeltaService, DeltaSpace, ObjectKey
|
||||
|
||||
|
||||
def is_s3_path(path: str) -> bool:
|
||||
@@ -55,7 +55,7 @@ def upload_file(
|
||||
if not key or key.endswith("/"):
|
||||
key = (key + local_path.name).lstrip("/")
|
||||
|
||||
leaf = Leaf(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
|
||||
delta_space = DeltaSpace(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
|
||||
|
||||
try:
|
||||
# Check if delta should be disabled
|
||||
@@ -69,7 +69,7 @@ def upload_file(
|
||||
click.echo(f"upload: '{local_path}' to 's3://{bucket}/{key}' ({file_size} bytes)")
|
||||
else:
|
||||
# Use delta compression
|
||||
summary = service.put(local_path, leaf, max_ratio)
|
||||
summary = service.put(local_path, delta_space, max_ratio)
|
||||
|
||||
if not quiet:
|
||||
if summary.delta_size:
|
||||
|
||||
@@ -16,7 +16,7 @@ from ...adapters import (
|
||||
UtcClockAdapter,
|
||||
XdeltaAdapter,
|
||||
)
|
||||
from ...core import DeltaService, Leaf, ObjectKey
|
||||
from ...core import DeltaService, DeltaSpace, ObjectKey
|
||||
from .aws_compat import (
|
||||
copy_s3_to_s3,
|
||||
determine_operation,
|
||||
@@ -537,10 +537,10 @@ def put(service: DeltaService, file: Path, s3_url: str, max_ratio: float | None)
|
||||
bucket = parts[0]
|
||||
prefix = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
leaf = Leaf(bucket=bucket, prefix=prefix)
|
||||
delta_space = DeltaSpace(bucket=bucket, prefix=prefix)
|
||||
|
||||
try:
|
||||
summary = service.put(file, leaf, max_ratio)
|
||||
summary = service.put(file, delta_space, max_ratio)
|
||||
|
||||
# Output JSON summary
|
||||
output = {
|
||||
|
||||
@@ -12,7 +12,7 @@ from .errors import (
|
||||
)
|
||||
from .models import (
|
||||
DeltaMeta,
|
||||
Leaf,
|
||||
DeltaSpace,
|
||||
ObjectKey,
|
||||
PutSummary,
|
||||
ReferenceMeta,
|
||||
@@ -30,7 +30,7 @@ __all__ = [
|
||||
"DiffDecodeError",
|
||||
"StorageIOError",
|
||||
"PolicyViolationWarning",
|
||||
"Leaf",
|
||||
"DeltaSpace",
|
||||
"ObjectKey",
|
||||
"Sha256",
|
||||
"DeltaMeta",
|
||||
|
||||
@@ -5,8 +5,8 @@ from datetime import datetime
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Leaf:
|
||||
"""S3 leaf prefix."""
|
||||
class DeltaSpace:
|
||||
"""S3 delta compression space - a prefix containing related files for delta compression."""
|
||||
|
||||
bucket: str
|
||||
prefix: str
|
||||
|
||||
@@ -25,7 +25,7 @@ from .errors import (
|
||||
)
|
||||
from .models import (
|
||||
DeltaMeta,
|
||||
Leaf,
|
||||
DeltaSpace,
|
||||
ObjectKey,
|
||||
PutSummary,
|
||||
ReferenceMeta,
|
||||
@@ -93,7 +93,9 @@ class DeltaService:
|
||||
# Check simple extensions
|
||||
return any(name_lower.endswith(ext) for ext in self.delta_extensions)
|
||||
|
||||
def put(self, local_file: Path, leaf: Leaf, max_ratio: float | None = None) -> PutSummary:
|
||||
def put(
|
||||
self, local_file: Path, delta_space: DeltaSpace, max_ratio: float | None = None
|
||||
) -> PutSummary:
|
||||
"""Upload file as reference or delta (for archive files) or directly (for other files)."""
|
||||
if max_ratio is None:
|
||||
max_ratio = self.max_ratio
|
||||
@@ -106,7 +108,7 @@ class DeltaService:
|
||||
self.logger.info(
|
||||
"Starting put operation",
|
||||
file=str(local_file),
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
size=file_size,
|
||||
)
|
||||
|
||||
@@ -119,23 +121,25 @@ class DeltaService:
|
||||
"Uploading file directly (no delta for this type)",
|
||||
file_type=Path(original_name).suffix,
|
||||
)
|
||||
summary = self._upload_direct(local_file, leaf, file_sha256, original_name, file_size)
|
||||
summary = self._upload_direct(
|
||||
local_file, delta_space, file_sha256, original_name, file_size
|
||||
)
|
||||
else:
|
||||
# For archive files, use the delta compression system
|
||||
# Check for existing reference
|
||||
ref_key = leaf.reference_key()
|
||||
ref_head = self.storage.head(f"{leaf.bucket}/{ref_key}")
|
||||
ref_key = delta_space.reference_key()
|
||||
ref_head = self.storage.head(f"{delta_space.bucket}/{ref_key}")
|
||||
|
||||
if ref_head is None:
|
||||
# Create reference
|
||||
summary = self._create_reference(
|
||||
local_file, leaf, file_sha256, original_name, file_size
|
||||
local_file, delta_space, file_sha256, original_name, file_size
|
||||
)
|
||||
else:
|
||||
# Create delta
|
||||
summary = self._create_delta(
|
||||
local_file,
|
||||
leaf,
|
||||
delta_space,
|
||||
ref_head,
|
||||
file_sha256,
|
||||
original_name,
|
||||
@@ -147,7 +151,7 @@ class DeltaService:
|
||||
self.logger.log_operation(
|
||||
op="put",
|
||||
key=summary.key,
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
sizes={"file": file_size, "delta": summary.delta_size or file_size},
|
||||
durations={"total": duration},
|
||||
cache_hit=summary.cache_hit,
|
||||
@@ -197,17 +201,19 @@ class DeltaService:
|
||||
leaf_prefix = "/".join(ref_parts[:-1])
|
||||
else:
|
||||
leaf_prefix = ""
|
||||
leaf = Leaf(bucket=object_key.bucket, prefix=leaf_prefix)
|
||||
delta_space = DeltaSpace(bucket=object_key.bucket, prefix=leaf_prefix)
|
||||
|
||||
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, delta_meta.ref_sha256)
|
||||
cache_hit = self.cache.has_ref(
|
||||
delta_space.bucket, delta_space.prefix, delta_meta.ref_sha256
|
||||
)
|
||||
if not cache_hit:
|
||||
self._cache_reference(leaf, delta_meta.ref_sha256)
|
||||
self._cache_reference(delta_space, delta_meta.ref_sha256)
|
||||
|
||||
# Download delta and decode
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp_path = Path(tmpdir)
|
||||
delta_path = tmp_path / "delta"
|
||||
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
out_path = tmp_path / "output"
|
||||
|
||||
# Download delta
|
||||
@@ -241,7 +247,7 @@ class DeltaService:
|
||||
self.logger.log_operation(
|
||||
op="get",
|
||||
key=object_key.key,
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
sizes={"delta": delta_meta.delta_size, "file": delta_meta.file_size},
|
||||
durations={"total": duration},
|
||||
cache_hit=cache_hit,
|
||||
@@ -285,14 +291,14 @@ class DeltaService:
|
||||
def _create_reference(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
file_size: int,
|
||||
) -> PutSummary:
|
||||
"""Create reference file."""
|
||||
ref_key = leaf.reference_key()
|
||||
full_ref_key = f"{leaf.bucket}/{ref_key}"
|
||||
ref_key = delta_space.reference_key()
|
||||
full_ref_key = f"{delta_space.bucket}/{ref_key}"
|
||||
|
||||
# Create reference metadata
|
||||
ref_meta = ReferenceMeta(
|
||||
@@ -320,14 +326,16 @@ class DeltaService:
|
||||
ref_sha256 = file_sha256
|
||||
|
||||
# Cache reference
|
||||
cached_path = self.cache.write_ref(leaf.bucket, leaf.prefix, local_file)
|
||||
cached_path = self.cache.write_ref(delta_space.bucket, delta_space.prefix, local_file)
|
||||
self.logger.debug("Cached reference", path=str(cached_path))
|
||||
|
||||
# Also create zero-diff delta
|
||||
delta_key = (
|
||||
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
|
||||
f"{delta_space.prefix}/{original_name}.delta"
|
||||
if delta_space.prefix
|
||||
else f"{original_name}.delta"
|
||||
)
|
||||
full_delta_key = f"{leaf.bucket}/{delta_key}"
|
||||
full_delta_key = f"{delta_space.bucket}/{delta_key}"
|
||||
|
||||
with tempfile.NamedTemporaryFile() as zero_delta:
|
||||
# Create empty delta using xdelta3
|
||||
@@ -357,7 +365,7 @@ class DeltaService:
|
||||
self.metrics.increment("deltaglider.reference.created")
|
||||
return PutSummary(
|
||||
operation="create_reference",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=ref_key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
@@ -367,7 +375,7 @@ class DeltaService:
|
||||
def _create_delta(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
ref_head: ObjectHead,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
@@ -375,15 +383,15 @@ class DeltaService:
|
||||
max_ratio: float,
|
||||
) -> PutSummary:
|
||||
"""Create delta file."""
|
||||
ref_key = leaf.reference_key()
|
||||
ref_key = delta_space.reference_key()
|
||||
ref_sha256 = ref_head.metadata["file_sha256"]
|
||||
|
||||
# Ensure reference is cached
|
||||
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, ref_sha256)
|
||||
cache_hit = self.cache.has_ref(delta_space.bucket, delta_space.prefix, ref_sha256)
|
||||
if not cache_hit:
|
||||
self._cache_reference(leaf, ref_sha256)
|
||||
self._cache_reference(delta_space, ref_sha256)
|
||||
|
||||
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
|
||||
# Create delta
|
||||
with tempfile.NamedTemporaryFile(suffix=".delta") as delta_file:
|
||||
@@ -412,9 +420,11 @@ class DeltaService:
|
||||
|
||||
# Create delta metadata
|
||||
delta_key = (
|
||||
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
|
||||
f"{delta_space.prefix}/{original_name}.delta"
|
||||
if delta_space.prefix
|
||||
else f"{original_name}.delta"
|
||||
)
|
||||
full_delta_key = f"{leaf.bucket}/{delta_key}"
|
||||
full_delta_key = f"{delta_space.bucket}/{delta_key}"
|
||||
|
||||
delta_meta = DeltaMeta(
|
||||
tool=self.tool_version,
|
||||
@@ -445,7 +455,7 @@ class DeltaService:
|
||||
|
||||
return PutSummary(
|
||||
operation="create_delta",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=delta_key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
@@ -457,10 +467,10 @@ class DeltaService:
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
|
||||
def _cache_reference(self, leaf: Leaf, expected_sha: str) -> None:
|
||||
def _cache_reference(self, delta_space: DeltaSpace, expected_sha: str) -> None:
|
||||
"""Download and cache reference."""
|
||||
ref_key = leaf.reference_key()
|
||||
full_ref_key = f"{leaf.bucket}/{ref_key}"
|
||||
ref_key = delta_space.reference_key()
|
||||
full_ref_key = f"{delta_space.bucket}/{ref_key}"
|
||||
|
||||
self.logger.info("Caching reference", key=ref_key)
|
||||
|
||||
@@ -482,7 +492,7 @@ class DeltaService:
|
||||
)
|
||||
|
||||
# Cache it
|
||||
self.cache.write_ref(leaf.bucket, leaf.prefix, tmp_path)
|
||||
self.cache.write_ref(delta_space.bucket, delta_space.prefix, tmp_path)
|
||||
tmp_path.unlink()
|
||||
|
||||
def _get_direct(
|
||||
@@ -533,18 +543,18 @@ class DeltaService:
|
||||
def _upload_direct(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
file_size: int,
|
||||
) -> PutSummary:
|
||||
"""Upload file directly to S3 without delta compression."""
|
||||
# Construct the key path
|
||||
if leaf.prefix:
|
||||
key = f"{leaf.prefix}/{original_name}"
|
||||
if delta_space.prefix:
|
||||
key = f"{delta_space.prefix}/{original_name}"
|
||||
else:
|
||||
key = original_name
|
||||
full_key = f"{leaf.bucket}/{key}"
|
||||
full_key = f"{delta_space.bucket}/{key}"
|
||||
|
||||
# Create metadata for the file
|
||||
metadata = {
|
||||
@@ -568,7 +578,7 @@ class DeltaService:
|
||||
|
||||
return PutSummary(
|
||||
operation="upload_direct",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
|
||||
@@ -7,18 +7,18 @@ from typing import Protocol
|
||||
class CachePort(Protocol):
|
||||
"""Port for cache operations."""
|
||||
|
||||
def ref_path(self, bucket: str, leaf: str) -> Path:
|
||||
def ref_path(self, bucket: str, prefix: str) -> Path:
|
||||
"""Get path where reference should be cached."""
|
||||
...
|
||||
|
||||
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
|
||||
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
|
||||
"""Check if reference exists and matches SHA."""
|
||||
...
|
||||
|
||||
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
|
||||
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
|
||||
"""Cache reference file."""
|
||||
...
|
||||
|
||||
def evict(self, bucket: str, leaf: str) -> None:
|
||||
def evict(self, bucket: str, prefix: str) -> None:
|
||||
"""Remove cached reference."""
|
||||
...
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
from deltaglider.core import Leaf, ObjectKey
|
||||
from deltaglider.core import DeltaSpace, ObjectKey
|
||||
|
||||
|
||||
def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff):
|
||||
@@ -25,7 +25,7 @@ def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff):
|
||||
|
||||
mock_diff.decode.side_effect = decode_side_effect
|
||||
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/data")
|
||||
delta_space = DeltaSpace(bucket="test-bucket", prefix="test/data")
|
||||
|
||||
# Storage state tracking
|
||||
storage_data = {}
|
||||
@@ -76,28 +76,28 @@ def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff):
|
||||
mock_storage.get.side_effect = mock_get
|
||||
|
||||
# Step 1: Put the first file (creates reference)
|
||||
summary1 = service.put(file1, leaf)
|
||||
summary1 = service.put(file1, delta_space)
|
||||
assert summary1.operation == "create_reference"
|
||||
assert summary1.key == "test/data/reference.bin"
|
||||
|
||||
# Verify reference was stored
|
||||
ref_key = f"{leaf.bucket}/{leaf.reference_key()}"
|
||||
ref_key = f"{delta_space.bucket}/{delta_space.reference_key()}"
|
||||
assert ref_key in storage_data
|
||||
assert storage_data[ref_key]["content"] == file1_content
|
||||
|
||||
# Step 2: Put the second file (creates delta)
|
||||
summary2 = service.put(file2, leaf)
|
||||
summary2 = service.put(file2, delta_space)
|
||||
assert summary2.operation == "create_delta"
|
||||
assert summary2.key == "test/data/version2.zip.delta"
|
||||
assert summary2.delta_size is not None
|
||||
assert summary2.ref_key == "test/data/reference.bin"
|
||||
|
||||
# Verify delta was stored
|
||||
delta_key = f"{leaf.bucket}/{summary2.key}"
|
||||
delta_key = f"{delta_space.bucket}/{summary2.key}"
|
||||
assert delta_key in storage_data
|
||||
|
||||
# Step 3: Get the delta file back
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key=summary2.key)
|
||||
obj_key = ObjectKey(bucket=delta_space.bucket, key=summary2.key)
|
||||
service.get(obj_key, output_file)
|
||||
|
||||
# Step 4: Verify the recovered file matches the original
|
||||
@@ -118,7 +118,7 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff):
|
||||
|
||||
mock_diff.decode.side_effect = decode_side_effect
|
||||
|
||||
leaf = Leaf(bucket="test-bucket", prefix="archive")
|
||||
delta_space = DeltaSpace(bucket="test-bucket", prefix="archive")
|
||||
|
||||
# Storage state tracking
|
||||
storage_data = {}
|
||||
@@ -169,7 +169,7 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff):
|
||||
mock_storage.get.side_effect = mock_get
|
||||
|
||||
# Put the file
|
||||
summary = service.put(test_file, leaf)
|
||||
summary = service.put(test_file, delta_space)
|
||||
|
||||
# Get it back using original name (without .delta)
|
||||
# The service should internally look for "mydata.zip.delta"
|
||||
@@ -178,9 +178,9 @@ def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff):
|
||||
# Use the key without .delta suffix
|
||||
if summary.operation == "create_reference":
|
||||
# If it's a reference, the zero-diff delta was created
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key="archive/mydata.zip.delta")
|
||||
obj_key = ObjectKey(bucket=delta_space.bucket, key="archive/mydata.zip.delta")
|
||||
else:
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key=summary.key)
|
||||
obj_key = ObjectKey(bucket=delta_space.bucket, key=summary.key)
|
||||
|
||||
service.get(obj_key, output_file)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import warnings
|
||||
import pytest
|
||||
|
||||
from deltaglider.core import (
|
||||
Leaf,
|
||||
DeltaSpace,
|
||||
NotFoundError,
|
||||
ObjectKey,
|
||||
PolicyViolationWarning,
|
||||
@@ -19,12 +19,12 @@ class TestDeltaServicePut:
|
||||
def test_create_reference_first_file(self, service, sample_file, mock_storage):
|
||||
"""Test creating reference for first file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix")
|
||||
mock_storage.head.return_value = None # No reference exists
|
||||
mock_storage.put.return_value = PutResult(etag="abc123")
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
summary = service.put(sample_file, delta_space)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_reference"
|
||||
@@ -41,7 +41,7 @@ class TestDeltaServicePut:
|
||||
def test_create_delta_subsequent_file(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test creating delta for subsequent file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
@@ -68,12 +68,12 @@ class TestDeltaServicePut:
|
||||
mock_storage.get.return_value = io.BytesIO(ref_content)
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = service.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
summary = service.put(sample_file, delta_space)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_delta"
|
||||
@@ -89,7 +89,7 @@ class TestDeltaServicePut:
|
||||
def test_delta_ratio_warning(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test warning when delta ratio exceeds threshold."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
delta_space = DeltaSpace(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
@@ -119,14 +119,14 @@ class TestDeltaServicePut:
|
||||
mock_diff.encode.side_effect = large_encode
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = service.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute and check warning
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
service.put(sample_file, leaf, max_ratio=0.1)
|
||||
service.put(sample_file, delta_space, max_ratio=0.1)
|
||||
|
||||
assert len(w) == 1
|
||||
assert issubclass(w[0].category, PolicyViolationWarning)
|
||||
|
||||
Reference in New Issue
Block a user