refactor: Rename Leaf to DeltaSpace for semantic clarity

- Renamed Leaf class to DeltaSpace throughout the codebase
- Updated all imports, method signatures, and variable names
- Updated documentation and comments to reflect the new naming
- DeltaSpace better represents a container for delta-compressed files

The term "DeltaSpace" is more semantically accurate than "Leaf" as it
represents a space/container for managing related files with delta
compression, not a terminal node in a tree structure.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Simone Scarduzio
2025-09-23 08:05:20 +02:00
parent 0613de9a5e
commit fb3ad0e076
14 changed files with 112 additions and 102 deletions

View File

@@ -15,30 +15,30 @@ class FsCacheAdapter(CachePort):
self.base_dir = base_dir
self.hasher = hasher
def ref_path(self, bucket: str, leaf: str) -> Path:
def ref_path(self, bucket: str, prefix: str) -> Path:
"""Get path where reference should be cached."""
cache_dir = self.base_dir / bucket / leaf
cache_dir = self.base_dir / bucket / prefix
return cache_dir / "reference.bin"
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
"""Check if reference exists and matches SHA."""
path = self.ref_path(bucket, leaf)
path = self.ref_path(bucket, prefix)
if not path.exists():
return False
actual_sha = self.hasher.sha256(path)
return actual_sha == sha
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
"""Cache reference file."""
path = self.ref_path(bucket, leaf)
path = self.ref_path(bucket, prefix)
path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, path)
return path
def evict(self, bucket: str, leaf: str) -> None:
def evict(self, bucket: str, prefix: str) -> None:
"""Remove cached reference."""
path = self.ref_path(bucket, leaf)
path = self.ref_path(bucket, prefix)
if path.exists():
path.unlink()
# Clean up empty directories

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import click
from ...core import DeltaService, Leaf, ObjectKey
from ...core import DeltaService, DeltaSpace, ObjectKey
def is_s3_path(path: str) -> bool:
@@ -55,7 +55,7 @@ def upload_file(
if not key or key.endswith("/"):
key = (key + local_path.name).lstrip("/")
leaf = Leaf(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
delta_space = DeltaSpace(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
try:
# Check if delta should be disabled
@@ -69,7 +69,7 @@ def upload_file(
click.echo(f"upload: '{local_path}' to 's3://{bucket}/{key}' ({file_size} bytes)")
else:
# Use delta compression
summary = service.put(local_path, leaf, max_ratio)
summary = service.put(local_path, delta_space, max_ratio)
if not quiet:
if summary.delta_size:

View File

@@ -16,7 +16,7 @@ from ...adapters import (
UtcClockAdapter,
XdeltaAdapter,
)
from ...core import DeltaService, Leaf, ObjectKey
from ...core import DeltaService, DeltaSpace, ObjectKey
from .aws_compat import (
copy_s3_to_s3,
determine_operation,
@@ -537,10 +537,10 @@ def put(service: DeltaService, file: Path, s3_url: str, max_ratio: float | None)
bucket = parts[0]
prefix = parts[1] if len(parts) > 1 else ""
leaf = Leaf(bucket=bucket, prefix=prefix)
delta_space = DeltaSpace(bucket=bucket, prefix=prefix)
try:
summary = service.put(file, leaf, max_ratio)
summary = service.put(file, delta_space, max_ratio)
# Output JSON summary
output = {

View File

@@ -12,7 +12,7 @@ from .errors import (
)
from .models import (
DeltaMeta,
Leaf,
DeltaSpace,
ObjectKey,
PutSummary,
ReferenceMeta,
@@ -30,7 +30,7 @@ __all__ = [
"DiffDecodeError",
"StorageIOError",
"PolicyViolationWarning",
"Leaf",
"DeltaSpace",
"ObjectKey",
"Sha256",
"DeltaMeta",

View File

@@ -5,8 +5,8 @@ from datetime import datetime
@dataclass(frozen=True)
class Leaf:
"""S3 leaf prefix."""
class DeltaSpace:
"""S3 delta compression space - a prefix containing related files for delta compression."""
bucket: str
prefix: str

View File

@@ -25,7 +25,7 @@ from .errors import (
)
from .models import (
DeltaMeta,
Leaf,
DeltaSpace,
ObjectKey,
PutSummary,
ReferenceMeta,
@@ -93,7 +93,9 @@ class DeltaService:
# Check simple extensions
return any(name_lower.endswith(ext) for ext in self.delta_extensions)
def put(self, local_file: Path, leaf: Leaf, max_ratio: float | None = None) -> PutSummary:
def put(
self, local_file: Path, delta_space: DeltaSpace, max_ratio: float | None = None
) -> PutSummary:
"""Upload file as reference or delta (for archive files) or directly (for other files)."""
if max_ratio is None:
max_ratio = self.max_ratio
@@ -106,7 +108,7 @@ class DeltaService:
self.logger.info(
"Starting put operation",
file=str(local_file),
leaf=f"{leaf.bucket}/{leaf.prefix}",
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
size=file_size,
)
@@ -119,23 +121,25 @@ class DeltaService:
"Uploading file directly (no delta for this type)",
file_type=Path(original_name).suffix,
)
summary = self._upload_direct(local_file, leaf, file_sha256, original_name, file_size)
summary = self._upload_direct(
local_file, delta_space, file_sha256, original_name, file_size
)
else:
# For archive files, use the delta compression system
# Check for existing reference
ref_key = leaf.reference_key()
ref_head = self.storage.head(f"{leaf.bucket}/{ref_key}")
ref_key = delta_space.reference_key()
ref_head = self.storage.head(f"{delta_space.bucket}/{ref_key}")
if ref_head is None:
# Create reference
summary = self._create_reference(
local_file, leaf, file_sha256, original_name, file_size
local_file, delta_space, file_sha256, original_name, file_size
)
else:
# Create delta
summary = self._create_delta(
local_file,
leaf,
delta_space,
ref_head,
file_sha256,
original_name,
@@ -147,7 +151,7 @@ class DeltaService:
self.logger.log_operation(
op="put",
key=summary.key,
leaf=f"{leaf.bucket}/{leaf.prefix}",
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
sizes={"file": file_size, "delta": summary.delta_size or file_size},
durations={"total": duration},
cache_hit=summary.cache_hit,
@@ -197,17 +201,19 @@ class DeltaService:
leaf_prefix = "/".join(ref_parts[:-1])
else:
leaf_prefix = ""
leaf = Leaf(bucket=object_key.bucket, prefix=leaf_prefix)
delta_space = DeltaSpace(bucket=object_key.bucket, prefix=leaf_prefix)
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, delta_meta.ref_sha256)
cache_hit = self.cache.has_ref(
delta_space.bucket, delta_space.prefix, delta_meta.ref_sha256
)
if not cache_hit:
self._cache_reference(leaf, delta_meta.ref_sha256)
self._cache_reference(delta_space, delta_meta.ref_sha256)
# Download delta and decode
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
delta_path = tmp_path / "delta"
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
out_path = tmp_path / "output"
# Download delta
@@ -241,7 +247,7 @@ class DeltaService:
self.logger.log_operation(
op="get",
key=object_key.key,
leaf=f"{leaf.bucket}/{leaf.prefix}",
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
sizes={"delta": delta_meta.delta_size, "file": delta_meta.file_size},
durations={"total": duration},
cache_hit=cache_hit,
@@ -285,14 +291,14 @@ class DeltaService:
def _create_reference(
self,
local_file: Path,
leaf: Leaf,
delta_space: DeltaSpace,
file_sha256: str,
original_name: str,
file_size: int,
) -> PutSummary:
"""Create reference file."""
ref_key = leaf.reference_key()
full_ref_key = f"{leaf.bucket}/{ref_key}"
ref_key = delta_space.reference_key()
full_ref_key = f"{delta_space.bucket}/{ref_key}"
# Create reference metadata
ref_meta = ReferenceMeta(
@@ -320,14 +326,16 @@ class DeltaService:
ref_sha256 = file_sha256
# Cache reference
cached_path = self.cache.write_ref(leaf.bucket, leaf.prefix, local_file)
cached_path = self.cache.write_ref(delta_space.bucket, delta_space.prefix, local_file)
self.logger.debug("Cached reference", path=str(cached_path))
# Also create zero-diff delta
delta_key = (
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
f"{delta_space.prefix}/{original_name}.delta"
if delta_space.prefix
else f"{original_name}.delta"
)
full_delta_key = f"{leaf.bucket}/{delta_key}"
full_delta_key = f"{delta_space.bucket}/{delta_key}"
with tempfile.NamedTemporaryFile() as zero_delta:
# Create empty delta using xdelta3
@@ -357,7 +365,7 @@ class DeltaService:
self.metrics.increment("deltaglider.reference.created")
return PutSummary(
operation="create_reference",
bucket=leaf.bucket,
bucket=delta_space.bucket,
key=ref_key,
original_name=original_name,
file_size=file_size,
@@ -367,7 +375,7 @@ class DeltaService:
def _create_delta(
self,
local_file: Path,
leaf: Leaf,
delta_space: DeltaSpace,
ref_head: ObjectHead,
file_sha256: str,
original_name: str,
@@ -375,15 +383,15 @@ class DeltaService:
max_ratio: float,
) -> PutSummary:
"""Create delta file."""
ref_key = leaf.reference_key()
ref_key = delta_space.reference_key()
ref_sha256 = ref_head.metadata["file_sha256"]
# Ensure reference is cached
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, ref_sha256)
cache_hit = self.cache.has_ref(delta_space.bucket, delta_space.prefix, ref_sha256)
if not cache_hit:
self._cache_reference(leaf, ref_sha256)
self._cache_reference(delta_space, ref_sha256)
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
# Create delta
with tempfile.NamedTemporaryFile(suffix=".delta") as delta_file:
@@ -412,9 +420,11 @@ class DeltaService:
# Create delta metadata
delta_key = (
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
f"{delta_space.prefix}/{original_name}.delta"
if delta_space.prefix
else f"{original_name}.delta"
)
full_delta_key = f"{leaf.bucket}/{delta_key}"
full_delta_key = f"{delta_space.bucket}/{delta_key}"
delta_meta = DeltaMeta(
tool=self.tool_version,
@@ -445,7 +455,7 @@ class DeltaService:
return PutSummary(
operation="create_delta",
bucket=leaf.bucket,
bucket=delta_space.bucket,
key=delta_key,
original_name=original_name,
file_size=file_size,
@@ -457,10 +467,10 @@ class DeltaService:
cache_hit=cache_hit,
)
def _cache_reference(self, leaf: Leaf, expected_sha: str) -> None:
def _cache_reference(self, delta_space: DeltaSpace, expected_sha: str) -> None:
"""Download and cache reference."""
ref_key = leaf.reference_key()
full_ref_key = f"{leaf.bucket}/{ref_key}"
ref_key = delta_space.reference_key()
full_ref_key = f"{delta_space.bucket}/{ref_key}"
self.logger.info("Caching reference", key=ref_key)
@@ -482,7 +492,7 @@ class DeltaService:
)
# Cache it
self.cache.write_ref(leaf.bucket, leaf.prefix, tmp_path)
self.cache.write_ref(delta_space.bucket, delta_space.prefix, tmp_path)
tmp_path.unlink()
def _get_direct(
@@ -533,18 +543,18 @@ class DeltaService:
def _upload_direct(
self,
local_file: Path,
leaf: Leaf,
delta_space: DeltaSpace,
file_sha256: str,
original_name: str,
file_size: int,
) -> PutSummary:
"""Upload file directly to S3 without delta compression."""
# Construct the key path
if leaf.prefix:
key = f"{leaf.prefix}/{original_name}"
if delta_space.prefix:
key = f"{delta_space.prefix}/{original_name}"
else:
key = original_name
full_key = f"{leaf.bucket}/{key}"
full_key = f"{delta_space.bucket}/{key}"
# Create metadata for the file
metadata = {
@@ -568,7 +578,7 @@ class DeltaService:
return PutSummary(
operation="upload_direct",
bucket=leaf.bucket,
bucket=delta_space.bucket,
key=key,
original_name=original_name,
file_size=file_size,

View File

@@ -7,18 +7,18 @@ from typing import Protocol
class CachePort(Protocol):
"""Port for cache operations."""
def ref_path(self, bucket: str, leaf: str) -> Path:
def ref_path(self, bucket: str, prefix: str) -> Path:
"""Get path where reference should be cached."""
...
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
"""Check if reference exists and matches SHA."""
...
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
"""Cache reference file."""
...
def evict(self, bucket: str, leaf: str) -> None:
def evict(self, bucket: str, prefix: str) -> None:
"""Remove cached reference."""
...