mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-03-29 13:32:02 +02:00
refactor: Rename Leaf to DeltaSpace for semantic clarity
- Renamed Leaf class to DeltaSpace throughout the codebase - Updated all imports, method signatures, and variable names - Updated documentation and comments to reflect the new naming - DeltaSpace better represents a container for delta-compressed files The term "DeltaSpace" is more semantically accurate than "Leaf" as it represents a space/container for managing related files with delta compression, not a terminal node in a tree structure. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -15,30 +15,30 @@ class FsCacheAdapter(CachePort):
|
||||
self.base_dir = base_dir
|
||||
self.hasher = hasher
|
||||
|
||||
def ref_path(self, bucket: str, leaf: str) -> Path:
|
||||
def ref_path(self, bucket: str, prefix: str) -> Path:
|
||||
"""Get path where reference should be cached."""
|
||||
cache_dir = self.base_dir / bucket / leaf
|
||||
cache_dir = self.base_dir / bucket / prefix
|
||||
return cache_dir / "reference.bin"
|
||||
|
||||
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
|
||||
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
|
||||
"""Check if reference exists and matches SHA."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
if not path.exists():
|
||||
return False
|
||||
|
||||
actual_sha = self.hasher.sha256(path)
|
||||
return actual_sha == sha
|
||||
|
||||
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
|
||||
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
|
||||
"""Cache reference file."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, path)
|
||||
return path
|
||||
|
||||
def evict(self, bucket: str, leaf: str) -> None:
|
||||
def evict(self, bucket: str, prefix: str) -> None:
|
||||
"""Remove cached reference."""
|
||||
path = self.ref_path(bucket, leaf)
|
||||
path = self.ref_path(bucket, prefix)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
# Clean up empty directories
|
||||
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from ...core import DeltaService, Leaf, ObjectKey
|
||||
from ...core import DeltaService, DeltaSpace, ObjectKey
|
||||
|
||||
|
||||
def is_s3_path(path: str) -> bool:
|
||||
@@ -55,7 +55,7 @@ def upload_file(
|
||||
if not key or key.endswith("/"):
|
||||
key = (key + local_path.name).lstrip("/")
|
||||
|
||||
leaf = Leaf(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
|
||||
delta_space = DeltaSpace(bucket=bucket, prefix="/".join(key.split("/")[:-1]))
|
||||
|
||||
try:
|
||||
# Check if delta should be disabled
|
||||
@@ -69,7 +69,7 @@ def upload_file(
|
||||
click.echo(f"upload: '{local_path}' to 's3://{bucket}/{key}' ({file_size} bytes)")
|
||||
else:
|
||||
# Use delta compression
|
||||
summary = service.put(local_path, leaf, max_ratio)
|
||||
summary = service.put(local_path, delta_space, max_ratio)
|
||||
|
||||
if not quiet:
|
||||
if summary.delta_size:
|
||||
|
||||
@@ -16,7 +16,7 @@ from ...adapters import (
|
||||
UtcClockAdapter,
|
||||
XdeltaAdapter,
|
||||
)
|
||||
from ...core import DeltaService, Leaf, ObjectKey
|
||||
from ...core import DeltaService, DeltaSpace, ObjectKey
|
||||
from .aws_compat import (
|
||||
copy_s3_to_s3,
|
||||
determine_operation,
|
||||
@@ -537,10 +537,10 @@ def put(service: DeltaService, file: Path, s3_url: str, max_ratio: float | None)
|
||||
bucket = parts[0]
|
||||
prefix = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
leaf = Leaf(bucket=bucket, prefix=prefix)
|
||||
delta_space = DeltaSpace(bucket=bucket, prefix=prefix)
|
||||
|
||||
try:
|
||||
summary = service.put(file, leaf, max_ratio)
|
||||
summary = service.put(file, delta_space, max_ratio)
|
||||
|
||||
# Output JSON summary
|
||||
output = {
|
||||
|
||||
@@ -12,7 +12,7 @@ from .errors import (
|
||||
)
|
||||
from .models import (
|
||||
DeltaMeta,
|
||||
Leaf,
|
||||
DeltaSpace,
|
||||
ObjectKey,
|
||||
PutSummary,
|
||||
ReferenceMeta,
|
||||
@@ -30,7 +30,7 @@ __all__ = [
|
||||
"DiffDecodeError",
|
||||
"StorageIOError",
|
||||
"PolicyViolationWarning",
|
||||
"Leaf",
|
||||
"DeltaSpace",
|
||||
"ObjectKey",
|
||||
"Sha256",
|
||||
"DeltaMeta",
|
||||
|
||||
@@ -5,8 +5,8 @@ from datetime import datetime
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Leaf:
|
||||
"""S3 leaf prefix."""
|
||||
class DeltaSpace:
|
||||
"""S3 delta compression space - a prefix containing related files for delta compression."""
|
||||
|
||||
bucket: str
|
||||
prefix: str
|
||||
|
||||
@@ -25,7 +25,7 @@ from .errors import (
|
||||
)
|
||||
from .models import (
|
||||
DeltaMeta,
|
||||
Leaf,
|
||||
DeltaSpace,
|
||||
ObjectKey,
|
||||
PutSummary,
|
||||
ReferenceMeta,
|
||||
@@ -93,7 +93,9 @@ class DeltaService:
|
||||
# Check simple extensions
|
||||
return any(name_lower.endswith(ext) for ext in self.delta_extensions)
|
||||
|
||||
def put(self, local_file: Path, leaf: Leaf, max_ratio: float | None = None) -> PutSummary:
|
||||
def put(
|
||||
self, local_file: Path, delta_space: DeltaSpace, max_ratio: float | None = None
|
||||
) -> PutSummary:
|
||||
"""Upload file as reference or delta (for archive files) or directly (for other files)."""
|
||||
if max_ratio is None:
|
||||
max_ratio = self.max_ratio
|
||||
@@ -106,7 +108,7 @@ class DeltaService:
|
||||
self.logger.info(
|
||||
"Starting put operation",
|
||||
file=str(local_file),
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
size=file_size,
|
||||
)
|
||||
|
||||
@@ -119,23 +121,25 @@ class DeltaService:
|
||||
"Uploading file directly (no delta for this type)",
|
||||
file_type=Path(original_name).suffix,
|
||||
)
|
||||
summary = self._upload_direct(local_file, leaf, file_sha256, original_name, file_size)
|
||||
summary = self._upload_direct(
|
||||
local_file, delta_space, file_sha256, original_name, file_size
|
||||
)
|
||||
else:
|
||||
# For archive files, use the delta compression system
|
||||
# Check for existing reference
|
||||
ref_key = leaf.reference_key()
|
||||
ref_head = self.storage.head(f"{leaf.bucket}/{ref_key}")
|
||||
ref_key = delta_space.reference_key()
|
||||
ref_head = self.storage.head(f"{delta_space.bucket}/{ref_key}")
|
||||
|
||||
if ref_head is None:
|
||||
# Create reference
|
||||
summary = self._create_reference(
|
||||
local_file, leaf, file_sha256, original_name, file_size
|
||||
local_file, delta_space, file_sha256, original_name, file_size
|
||||
)
|
||||
else:
|
||||
# Create delta
|
||||
summary = self._create_delta(
|
||||
local_file,
|
||||
leaf,
|
||||
delta_space,
|
||||
ref_head,
|
||||
file_sha256,
|
||||
original_name,
|
||||
@@ -147,7 +151,7 @@ class DeltaService:
|
||||
self.logger.log_operation(
|
||||
op="put",
|
||||
key=summary.key,
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
sizes={"file": file_size, "delta": summary.delta_size or file_size},
|
||||
durations={"total": duration},
|
||||
cache_hit=summary.cache_hit,
|
||||
@@ -197,17 +201,19 @@ class DeltaService:
|
||||
leaf_prefix = "/".join(ref_parts[:-1])
|
||||
else:
|
||||
leaf_prefix = ""
|
||||
leaf = Leaf(bucket=object_key.bucket, prefix=leaf_prefix)
|
||||
delta_space = DeltaSpace(bucket=object_key.bucket, prefix=leaf_prefix)
|
||||
|
||||
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, delta_meta.ref_sha256)
|
||||
cache_hit = self.cache.has_ref(
|
||||
delta_space.bucket, delta_space.prefix, delta_meta.ref_sha256
|
||||
)
|
||||
if not cache_hit:
|
||||
self._cache_reference(leaf, delta_meta.ref_sha256)
|
||||
self._cache_reference(delta_space, delta_meta.ref_sha256)
|
||||
|
||||
# Download delta and decode
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp_path = Path(tmpdir)
|
||||
delta_path = tmp_path / "delta"
|
||||
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
out_path = tmp_path / "output"
|
||||
|
||||
# Download delta
|
||||
@@ -241,7 +247,7 @@ class DeltaService:
|
||||
self.logger.log_operation(
|
||||
op="get",
|
||||
key=object_key.key,
|
||||
leaf=f"{leaf.bucket}/{leaf.prefix}",
|
||||
leaf=f"{delta_space.bucket}/{delta_space.prefix}",
|
||||
sizes={"delta": delta_meta.delta_size, "file": delta_meta.file_size},
|
||||
durations={"total": duration},
|
||||
cache_hit=cache_hit,
|
||||
@@ -285,14 +291,14 @@ class DeltaService:
|
||||
def _create_reference(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
file_size: int,
|
||||
) -> PutSummary:
|
||||
"""Create reference file."""
|
||||
ref_key = leaf.reference_key()
|
||||
full_ref_key = f"{leaf.bucket}/{ref_key}"
|
||||
ref_key = delta_space.reference_key()
|
||||
full_ref_key = f"{delta_space.bucket}/{ref_key}"
|
||||
|
||||
# Create reference metadata
|
||||
ref_meta = ReferenceMeta(
|
||||
@@ -320,14 +326,16 @@ class DeltaService:
|
||||
ref_sha256 = file_sha256
|
||||
|
||||
# Cache reference
|
||||
cached_path = self.cache.write_ref(leaf.bucket, leaf.prefix, local_file)
|
||||
cached_path = self.cache.write_ref(delta_space.bucket, delta_space.prefix, local_file)
|
||||
self.logger.debug("Cached reference", path=str(cached_path))
|
||||
|
||||
# Also create zero-diff delta
|
||||
delta_key = (
|
||||
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
|
||||
f"{delta_space.prefix}/{original_name}.delta"
|
||||
if delta_space.prefix
|
||||
else f"{original_name}.delta"
|
||||
)
|
||||
full_delta_key = f"{leaf.bucket}/{delta_key}"
|
||||
full_delta_key = f"{delta_space.bucket}/{delta_key}"
|
||||
|
||||
with tempfile.NamedTemporaryFile() as zero_delta:
|
||||
# Create empty delta using xdelta3
|
||||
@@ -357,7 +365,7 @@ class DeltaService:
|
||||
self.metrics.increment("deltaglider.reference.created")
|
||||
return PutSummary(
|
||||
operation="create_reference",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=ref_key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
@@ -367,7 +375,7 @@ class DeltaService:
|
||||
def _create_delta(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
ref_head: ObjectHead,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
@@ -375,15 +383,15 @@ class DeltaService:
|
||||
max_ratio: float,
|
||||
) -> PutSummary:
|
||||
"""Create delta file."""
|
||||
ref_key = leaf.reference_key()
|
||||
ref_key = delta_space.reference_key()
|
||||
ref_sha256 = ref_head.metadata["file_sha256"]
|
||||
|
||||
# Ensure reference is cached
|
||||
cache_hit = self.cache.has_ref(leaf.bucket, leaf.prefix, ref_sha256)
|
||||
cache_hit = self.cache.has_ref(delta_space.bucket, delta_space.prefix, ref_sha256)
|
||||
if not cache_hit:
|
||||
self._cache_reference(leaf, ref_sha256)
|
||||
self._cache_reference(delta_space, ref_sha256)
|
||||
|
||||
ref_path = self.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
|
||||
# Create delta
|
||||
with tempfile.NamedTemporaryFile(suffix=".delta") as delta_file:
|
||||
@@ -412,9 +420,11 @@ class DeltaService:
|
||||
|
||||
# Create delta metadata
|
||||
delta_key = (
|
||||
f"{leaf.prefix}/{original_name}.delta" if leaf.prefix else f"{original_name}.delta"
|
||||
f"{delta_space.prefix}/{original_name}.delta"
|
||||
if delta_space.prefix
|
||||
else f"{original_name}.delta"
|
||||
)
|
||||
full_delta_key = f"{leaf.bucket}/{delta_key}"
|
||||
full_delta_key = f"{delta_space.bucket}/{delta_key}"
|
||||
|
||||
delta_meta = DeltaMeta(
|
||||
tool=self.tool_version,
|
||||
@@ -445,7 +455,7 @@ class DeltaService:
|
||||
|
||||
return PutSummary(
|
||||
operation="create_delta",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=delta_key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
@@ -457,10 +467,10 @@ class DeltaService:
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
|
||||
def _cache_reference(self, leaf: Leaf, expected_sha: str) -> None:
|
||||
def _cache_reference(self, delta_space: DeltaSpace, expected_sha: str) -> None:
|
||||
"""Download and cache reference."""
|
||||
ref_key = leaf.reference_key()
|
||||
full_ref_key = f"{leaf.bucket}/{ref_key}"
|
||||
ref_key = delta_space.reference_key()
|
||||
full_ref_key = f"{delta_space.bucket}/{ref_key}"
|
||||
|
||||
self.logger.info("Caching reference", key=ref_key)
|
||||
|
||||
@@ -482,7 +492,7 @@ class DeltaService:
|
||||
)
|
||||
|
||||
# Cache it
|
||||
self.cache.write_ref(leaf.bucket, leaf.prefix, tmp_path)
|
||||
self.cache.write_ref(delta_space.bucket, delta_space.prefix, tmp_path)
|
||||
tmp_path.unlink()
|
||||
|
||||
def _get_direct(
|
||||
@@ -533,18 +543,18 @@ class DeltaService:
|
||||
def _upload_direct(
|
||||
self,
|
||||
local_file: Path,
|
||||
leaf: Leaf,
|
||||
delta_space: DeltaSpace,
|
||||
file_sha256: str,
|
||||
original_name: str,
|
||||
file_size: int,
|
||||
) -> PutSummary:
|
||||
"""Upload file directly to S3 without delta compression."""
|
||||
# Construct the key path
|
||||
if leaf.prefix:
|
||||
key = f"{leaf.prefix}/{original_name}"
|
||||
if delta_space.prefix:
|
||||
key = f"{delta_space.prefix}/{original_name}"
|
||||
else:
|
||||
key = original_name
|
||||
full_key = f"{leaf.bucket}/{key}"
|
||||
full_key = f"{delta_space.bucket}/{key}"
|
||||
|
||||
# Create metadata for the file
|
||||
metadata = {
|
||||
@@ -568,7 +578,7 @@ class DeltaService:
|
||||
|
||||
return PutSummary(
|
||||
operation="upload_direct",
|
||||
bucket=leaf.bucket,
|
||||
bucket=delta_space.bucket,
|
||||
key=key,
|
||||
original_name=original_name,
|
||||
file_size=file_size,
|
||||
|
||||
@@ -7,18 +7,18 @@ from typing import Protocol
|
||||
class CachePort(Protocol):
|
||||
"""Port for cache operations."""
|
||||
|
||||
def ref_path(self, bucket: str, leaf: str) -> Path:
|
||||
def ref_path(self, bucket: str, prefix: str) -> Path:
|
||||
"""Get path where reference should be cached."""
|
||||
...
|
||||
|
||||
def has_ref(self, bucket: str, leaf: str, sha: str) -> bool:
|
||||
def has_ref(self, bucket: str, prefix: str, sha: str) -> bool:
|
||||
"""Check if reference exists and matches SHA."""
|
||||
...
|
||||
|
||||
def write_ref(self, bucket: str, leaf: str, src: Path) -> Path:
|
||||
def write_ref(self, bucket: str, prefix: str, src: Path) -> Path:
|
||||
"""Cache reference file."""
|
||||
...
|
||||
|
||||
def evict(self, bucket: str, leaf: str) -> None:
|
||||
def evict(self, bucket: str, prefix: str) -> None:
|
||||
"""Remove cached reference."""
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user