Initial commit: DeltaGlider - 99.9% compression for S3 storage

DeltaGlider reduces storage costs by storing only binary deltas between
similar files. Achieves 99.9% compression for versioned artifacts.

Key features:
- Intelligent file type detection (delta for archives, direct for others)
- Drop-in S3 replacement with automatic compression
- SHA256 integrity verification on every operation
- Clean hexagonal architecture
- Full test coverage
- Production tested with 200K+ files

Case study: ReadOnlyREST reduced 4TB to 5GB (99.9% compression)
This commit is contained in:
Simone Scarduzio
2025-09-22 15:49:31 +02:00
commit 7562064832
50 changed files with 4520 additions and 0 deletions

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tests for DeltaGlider."""

101
tests/conftest.py Normal file
View File

@@ -0,0 +1,101 @@
"""Pytest configuration and fixtures."""
import shutil
import tempfile
from pathlib import Path
from unittest.mock import Mock
import pytest
from deltaglider.adapters import (
FsCacheAdapter,
NoopMetricsAdapter,
Sha256Adapter,
StdLoggerAdapter,
UtcClockAdapter,
)
from deltaglider.core import DeltaService
@pytest.fixture
def temp_dir():
"""Create temporary directory."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)
@pytest.fixture
def sample_file(temp_dir):
"""Create sample test file."""
file_path = temp_dir / "test.zip"
file_path.write_text("Sample content for testing")
return file_path
@pytest.fixture
def mock_storage():
"""Create mock storage port."""
return Mock()
@pytest.fixture
def mock_diff():
"""Create mock diff port."""
mock = Mock()
# Make encode create empty delta file
def encode_side_effect(base, target, out):
out.write_bytes(b"delta content")
mock.encode.side_effect = encode_side_effect
return mock
@pytest.fixture
def real_hasher():
"""Create real SHA256 hasher."""
return Sha256Adapter()
@pytest.fixture
def cache_adapter(temp_dir, real_hasher):
"""Create filesystem cache adapter."""
cache_dir = temp_dir / "cache"
return FsCacheAdapter(cache_dir, real_hasher)
@pytest.fixture
def clock_adapter():
"""Create UTC clock adapter."""
return UtcClockAdapter()
@pytest.fixture
def logger_adapter():
"""Create logger adapter."""
return StdLoggerAdapter(level="DEBUG")
@pytest.fixture
def metrics_adapter():
"""Create metrics adapter."""
return NoopMetricsAdapter()
@pytest.fixture
def service(mock_storage, mock_diff, real_hasher, cache_adapter, clock_adapter, logger_adapter, metrics_adapter):
"""Create DeltaService with test adapters."""
return DeltaService(
storage=mock_storage,
diff=mock_diff,
hasher=real_hasher,
cache=cache_adapter,
clock=clock_adapter,
logger=logger_adapter,
metrics=metrics_adapter,
)
@pytest.fixture
def skip_if_no_xdelta():
"""Skip test if xdelta3 not available."""
if shutil.which("xdelta3") is None:
pytest.skip("xdelta3 not available")

1
tests/e2e/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""End-to-end tests for DeltaGlider."""

View File

@@ -0,0 +1,162 @@
"""E2E tests with LocalStack."""
import json
import os
import tempfile
from pathlib import Path
import boto3
import pytest
from click.testing import CliRunner
from deltaglider.app.cli.main import cli
@pytest.mark.e2e
@pytest.mark.usefixtures("skip_if_no_xdelta")
class TestLocalStackE2E:
"""E2E tests using LocalStack."""
@pytest.fixture
def s3_client(self):
"""Create S3 client for LocalStack."""
return boto3.client(
"s3",
endpoint_url=os.environ.get("AWS_ENDPOINT_URL", "http://localhost:4566"),
aws_access_key_id="test",
aws_secret_access_key="test",
region_name="us-east-1",
)
@pytest.fixture
def test_bucket(self, s3_client):
"""Create test bucket."""
bucket_name = "test-deltaglider-bucket"
try:
s3_client.create_bucket(Bucket=bucket_name)
except s3_client.exceptions.BucketAlreadyExists:
pass
yield bucket_name
# Cleanup
try:
# Delete all objects
response = s3_client.list_objects_v2(Bucket=bucket_name)
if "Contents" in response:
for obj in response["Contents"]:
s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"])
s3_client.delete_bucket(Bucket=bucket_name)
except Exception:
pass
def test_full_workflow(self, test_bucket, s3_client):
"""Test complete put/get/verify workflow."""
runner = CliRunner()
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
# Create test files
file1 = tmpdir / "plugin-v1.0.0.zip"
file1.write_text("Plugin version 1.0.0 content")
file2 = tmpdir / "plugin-v1.0.1.zip"
file2.write_text("Plugin version 1.0.1 content with minor changes")
# Upload first file (becomes reference)
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/plugins/"])
assert result.exit_code == 0
output1 = json.loads(result.output)
assert output1["operation"] == "create_reference"
assert output1["key"] == "plugins/reference.bin"
# Verify reference was created
objects = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="plugins/")
keys = [obj["Key"] for obj in objects["Contents"]]
assert "plugins/reference.bin" in keys
assert "plugins/plugin-v1.0.0.zip.delta" in keys
# Upload second file (creates delta)
result = runner.invoke(cli, ["put", str(file2), f"s3://{test_bucket}/plugins/"])
assert result.exit_code == 0
output2 = json.loads(result.output)
assert output2["operation"] == "create_delta"
assert output2["key"] == "plugins/plugin-v1.0.1.zip.delta"
assert "delta_ratio" in output2
# Download and verify second file
output_file = tmpdir / "downloaded.zip"
result = runner.invoke(
cli,
["get", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta", "-o", str(output_file)],
)
assert result.exit_code == 0
assert output_file.read_text() == file2.read_text()
# Verify integrity
result = runner.invoke(
cli,
["verify", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta"],
)
assert result.exit_code == 0
verify_output = json.loads(result.output)
assert verify_output["valid"] is True
def test_multiple_leaves(self, test_bucket, s3_client):
"""Test multiple leaf directories with separate references."""
runner = CliRunner()
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
# Create test files for different leaves
file_a1 = tmpdir / "app-a-v1.zip"
file_a1.write_text("Application A version 1")
file_b1 = tmpdir / "app-b-v1.zip"
file_b1.write_text("Application B version 1")
# Upload to different leaves
result = runner.invoke(cli, ["put", str(file_a1), f"s3://{test_bucket}/apps/app-a/"])
assert result.exit_code == 0
result = runner.invoke(cli, ["put", str(file_b1), f"s3://{test_bucket}/apps/app-b/"])
assert result.exit_code == 0
# Verify each leaf has its own reference
objects_a = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-a/")
keys_a = [obj["Key"] for obj in objects_a["Contents"]]
assert "apps/app-a/reference.bin" in keys_a
objects_b = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-b/")
keys_b = [obj["Key"] for obj in objects_b["Contents"]]
assert "apps/app-b/reference.bin" in keys_b
def test_large_delta_warning(self, test_bucket, s3_client):
"""Test warning for large delta ratio."""
runner = CliRunner()
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
# Create very different files
file1 = tmpdir / "file1.zip"
file1.write_text("A" * 1000)
file2 = tmpdir / "file2.zip"
file2.write_text("B" * 1000) # Completely different
# Upload first file
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/test/"])
assert result.exit_code == 0
# Upload second file with low max-ratio
result = runner.invoke(
cli,
["put", str(file2), f"s3://{test_bucket}/test/", "--max-ratio", "0.1"],
)
assert result.exit_code == 0
# Warning should be logged but operation should succeed
output = json.loads(result.output)
assert output["operation"] == "create_delta"
# Delta ratio should be high (files are completely different)
assert output["delta_ratio"] > 0.5

View File

@@ -0,0 +1 @@
"""Integration tests for DeltaGlider."""

View File

@@ -0,0 +1,191 @@
"""Integration test for full put/get workflow."""
import io
import tempfile
from pathlib import Path
from unittest.mock import Mock
import pytest
from deltaglider.core import DeltaService, Leaf, ObjectKey
def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff):
"""Test complete workflow: put a file, then get it back."""
# Create test files
file1_content = b"This is the first version of the file."
file2_content = b"This is the second version of the file with changes."
file1 = temp_dir / "version1.txt"
file2 = temp_dir / "version2.txt"
output_file = temp_dir / "recovered.txt"
file1.write_bytes(file1_content)
file2.write_bytes(file2_content)
# Set up mock_diff decode to write the target content
def decode_side_effect(base, delta, out):
out.write_bytes(file2_content)
mock_diff.decode.side_effect = decode_side_effect
leaf = Leaf(bucket="test-bucket", prefix="test/data")
# Storage state tracking
storage_data = {}
def mock_head(key):
"""Mock head_object."""
if key in storage_data:
return storage_data[key]["head"]
return None
def mock_put(key, body, metadata, content_type="application/octet-stream"):
"""Mock put_object."""
from deltaglider.ports.storage import PutResult, ObjectHead
# Read content if it's a Path
if isinstance(body, Path):
content = body.read_bytes()
elif isinstance(body, bytes):
content = body
else:
content = body.read()
storage_data[key] = {
"content": content,
"head": ObjectHead(
key=key.split("/", 1)[1],
size=len(content),
etag="mock-etag",
last_modified=None,
metadata=metadata,
)
}
return PutResult(etag="mock-etag")
def mock_get(key):
"""Mock get_object."""
# The key might come without bucket prefix, so check both formats
if key in storage_data:
return io.BytesIO(storage_data[key]["content"])
# Also try with test-bucket prefix if not found
full_key = f"test-bucket/{key}" if not key.startswith("test-bucket/") else key
if full_key in storage_data:
return io.BytesIO(storage_data[full_key]["content"])
raise FileNotFoundError(f"Object not found: {key}")
mock_storage.head.side_effect = mock_head
mock_storage.put.side_effect = mock_put
mock_storage.get.side_effect = mock_get
# Step 1: Put the first file (creates reference)
summary1 = service.put(file1, leaf)
assert summary1.operation == "create_reference"
assert summary1.key == "test/data/reference.bin"
# Verify reference was stored
ref_key = f"{leaf.bucket}/{leaf.reference_key()}"
assert ref_key in storage_data
assert storage_data[ref_key]["content"] == file1_content
# Step 2: Put the second file (creates delta)
summary2 = service.put(file2, leaf)
assert summary2.operation == "create_delta"
assert summary2.key == "test/data/version2.txt.delta"
assert summary2.delta_size is not None
assert summary2.ref_key == "test/data/reference.bin"
# Verify delta was stored
delta_key = f"{leaf.bucket}/{summary2.key}"
assert delta_key in storage_data
# Step 3: Get the delta file back
obj_key = ObjectKey(bucket=leaf.bucket, key=summary2.key)
service.get(obj_key, output_file)
# Step 4: Verify the recovered file matches the original
recovered_content = output_file.read_bytes()
assert recovered_content == file2_content
def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff):
"""Test get command behavior when .delta suffix is auto-appended."""
# Create test file
file_content = b"Test file content for auto-suffix test."
test_file = temp_dir / "mydata.zip"
test_file.write_bytes(file_content)
# Set up mock_diff decode to write the target content
def decode_side_effect(base, delta, out):
out.write_bytes(file_content)
mock_diff.decode.side_effect = decode_side_effect
leaf = Leaf(bucket="test-bucket", prefix="archive")
# Storage state tracking
storage_data = {}
def mock_head(key):
"""Mock head_object."""
if key in storage_data:
return storage_data[key]["head"]
return None
def mock_put(key, body, metadata, content_type="application/octet-stream"):
"""Mock put_object."""
from deltaglider.ports.storage import PutResult, ObjectHead
# Read content if it's a Path
if isinstance(body, Path):
content = body.read_bytes()
elif isinstance(body, bytes):
content = body
else:
content = body.read()
storage_data[key] = {
"content": content,
"head": ObjectHead(
key=key.split("/", 1)[1],
size=len(content),
etag="mock-etag",
last_modified=None,
metadata=metadata,
)
}
return PutResult(etag="mock-etag")
def mock_get(key):
"""Mock get_object."""
# The key might come without bucket prefix, so check both formats
if key in storage_data:
return io.BytesIO(storage_data[key]["content"])
# Also try with test-bucket prefix if not found
full_key = f"test-bucket/{key}" if not key.startswith("test-bucket/") else key
if full_key in storage_data:
return io.BytesIO(storage_data[full_key]["content"])
raise FileNotFoundError(f"Object not found: {key}")
mock_storage.head.side_effect = mock_head
mock_storage.put.side_effect = mock_put
mock_storage.get.side_effect = mock_get
# Put the file
summary = service.put(test_file, leaf)
# Get it back using original name (without .delta)
# The service should internally look for "mydata.zip.delta"
output_file = temp_dir / "recovered.zip"
# Use the key without .delta suffix
if summary.operation == "create_reference":
# If it's a reference, the zero-diff delta was created
obj_key = ObjectKey(bucket=leaf.bucket, key="archive/mydata.zip.delta")
else:
obj_key = ObjectKey(bucket=leaf.bucket, key=summary.key)
service.get(obj_key, output_file)
# Verify the recovered file matches the original
recovered_content = output_file.read_bytes()
assert recovered_content == file_content

View File

@@ -0,0 +1,135 @@
"""Integration test for get command."""
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from click.testing import CliRunner
from deltaglider.app.cli.main import cli
from deltaglider.core import ObjectKey
@pytest.fixture
def mock_service():
"""Create a mock DeltaService."""
return Mock()
def test_get_command_with_original_name(mock_service):
"""Test get command with original filename (auto-appends .delta)."""
runner = CliRunner()
# Mock the service.get method
mock_service.get = Mock()
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
# Run get with original filename (should auto-append .delta)
result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip"])
# Check it was successful
assert result.exit_code == 0
assert "Looking for delta file: s3://test-bucket/data/myfile.zip.delta" in result.output
assert "Successfully reconstructed: myfile.zip" in result.output
# Verify the service was called with the correct arguments
mock_service.get.assert_called_once()
call_args = mock_service.get.call_args
obj_key = call_args[0][0]
output_path = call_args[0][1]
assert isinstance(obj_key, ObjectKey)
assert obj_key.bucket == "test-bucket"
assert obj_key.key == "data/myfile.zip.delta"
assert output_path == Path("myfile.zip")
def test_get_command_with_delta_name(mock_service):
"""Test get command with explicit .delta filename."""
runner = CliRunner()
# Mock the service.get method
mock_service.get = Mock()
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
# Run get with explicit .delta filename
result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip.delta"])
# Check it was successful
assert result.exit_code == 0
assert "Looking for delta file" not in result.output # Should not print this message
assert "Successfully reconstructed: myfile.zip" in result.output
# Verify the service was called with the correct arguments
mock_service.get.assert_called_once()
call_args = mock_service.get.call_args
obj_key = call_args[0][0]
output_path = call_args[0][1]
assert isinstance(obj_key, ObjectKey)
assert obj_key.bucket == "test-bucket"
assert obj_key.key == "data/myfile.zip.delta"
assert output_path == Path("myfile.zip")
def test_get_command_with_output_option(mock_service):
"""Test get command with custom output path."""
runner = CliRunner()
# Mock the service.get method
mock_service.get = Mock()
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
with tempfile.TemporaryDirectory() as tmpdir:
output_file = Path(tmpdir) / "custom_output.zip"
# Run get with custom output path
result = runner.invoke(cli, [
"get",
"s3://test-bucket/data/myfile.zip",
"-o", str(output_file)
])
# Check it was successful
assert result.exit_code == 0
assert f"Successfully reconstructed: {output_file}" in result.output
# Verify the service was called with the correct arguments
mock_service.get.assert_called_once()
call_args = mock_service.get.call_args
obj_key = call_args[0][0]
output_path = call_args[0][1]
assert isinstance(obj_key, ObjectKey)
assert obj_key.bucket == "test-bucket"
assert obj_key.key == "data/myfile.zip.delta"
assert output_path == output_file
def test_get_command_error_handling(mock_service):
"""Test get command error handling."""
runner = CliRunner()
# Mock the service.get method to raise an error
mock_service.get = Mock(side_effect=FileNotFoundError("Delta not found"))
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
# Run get command
result = runner.invoke(cli, ["get", "s3://test-bucket/data/missing.zip"])
# Check it failed with error message
assert result.exit_code == 1
assert "Error: Delta not found" in result.output
def test_get_command_invalid_url():
"""Test get command with invalid S3 URL."""
runner = CliRunner()
# Run get with invalid URL
result = runner.invoke(cli, ["get", "http://invalid-url/file.zip"])
# Check it failed with error message
assert result.exit_code == 1
assert "Error: Invalid S3 URL" in result.output

View File

@@ -0,0 +1,106 @@
"""Integration tests for xdelta3."""
import pytest
from deltaglider.adapters import XdeltaAdapter
@pytest.mark.usefixtures("skip_if_no_xdelta")
class TestXdeltaIntegration:
"""Test xdelta3 integration."""
def test_encode_decode_roundtrip(self, temp_dir):
"""Test encoding and decoding roundtrip."""
# Setup
adapter = XdeltaAdapter()
# Create base and target files
base = temp_dir / "base.txt"
base.write_text("This is the base file content.")
target = temp_dir / "target.txt"
target.write_text("This is the modified target file content with changes.")
delta = temp_dir / "delta.bin"
output = temp_dir / "output.txt"
# Encode
adapter.encode(base, target, delta)
# Verify delta was created
assert delta.exists()
assert delta.stat().st_size > 0
# Decode
adapter.decode(base, delta, output)
# Verify output matches target
assert output.read_text() == target.read_text()
def test_encode_identical_files(self, temp_dir):
"""Test encoding identical files produces small delta."""
# Setup
adapter = XdeltaAdapter()
# Create identical files
base = temp_dir / "base.txt"
content = "This is identical content in both files." * 100
base.write_text(content)
target = temp_dir / "target.txt"
target.write_text(content)
delta = temp_dir / "delta.bin"
# Encode
adapter.encode(base, target, delta)
# Verify delta is small (much smaller than original)
assert delta.exists()
assert delta.stat().st_size < len(content) / 10 # Delta should be <10% of original
def test_encode_completely_different_files(self, temp_dir):
"""Test encoding completely different files."""
# Setup
adapter = XdeltaAdapter()
# Create completely different files
base = temp_dir / "base.txt"
base.write_text("A" * 1000)
target = temp_dir / "target.txt"
target.write_text("B" * 1000)
delta = temp_dir / "delta.bin"
# Encode
adapter.encode(base, target, delta)
# Delta will be roughly the size of target since files are completely different
assert delta.exists()
# Note: xdelta3 compression may still reduce size somewhat
def test_encode_binary_files(self, temp_dir):
"""Test encoding binary files."""
# Setup
adapter = XdeltaAdapter()
# Create binary files
base = temp_dir / "base.bin"
base.write_bytes(b"\x00\x01\x02\x03" * 256)
target = temp_dir / "target.bin"
target.write_bytes(b"\x00\x01\x02\x03" * 200 + b"\xFF\xFE\xFD\xFC" * 56)
delta = temp_dir / "delta.bin"
output = temp_dir / "output.bin"
# Encode
adapter.encode(base, target, delta)
# Decode
adapter.decode(base, delta, output)
# Verify
assert output.read_bytes() == target.read_bytes()

1
tests/unit/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Unit tests for DeltaGlider."""

210
tests/unit/test_adapters.py Normal file
View File

@@ -0,0 +1,210 @@
"""Unit tests for adapters."""
import hashlib
from datetime import UTC, datetime
from deltaglider.adapters import (
FsCacheAdapter,
NoopMetricsAdapter,
Sha256Adapter,
StdLoggerAdapter,
UtcClockAdapter,
)
class TestSha256Adapter:
"""Test SHA256 adapter."""
def test_sha256_from_path(self, temp_dir):
"""Test computing SHA256 from file path."""
# Setup
file_path = temp_dir / "test.txt"
content = b"Hello, World!"
file_path.write_bytes(content)
# Expected SHA256
expected = hashlib.sha256(content).hexdigest()
# Execute
adapter = Sha256Adapter()
actual = adapter.sha256(file_path)
# Verify
assert actual == expected
def test_sha256_from_stream(self, temp_dir):
"""Test computing SHA256 from stream."""
# Setup
content = b"Hello, Stream!"
expected = hashlib.sha256(content).hexdigest()
# Execute
adapter = Sha256Adapter()
import io
stream = io.BytesIO(content)
actual = adapter.sha256(stream)
# Verify
assert actual == expected
class TestFsCacheAdapter:
"""Test filesystem cache adapter."""
def test_ref_path(self, temp_dir):
"""Test reference path generation."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Execute
path = adapter.ref_path("my-bucket", "path/to/leaf")
# Verify
expected = temp_dir / "cache" / "my-bucket" / "path/to/leaf" / "reference.bin"
assert path == expected
def test_has_ref_not_exists(self, temp_dir):
"""Test checking non-existent reference."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Execute
result = adapter.has_ref("bucket", "leaf", "abc123")
# Verify
assert result is False
def test_has_ref_wrong_sha(self, temp_dir):
"""Test checking reference with wrong SHA."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Create reference with known content
ref_path = adapter.ref_path("bucket", "leaf")
ref_path.parent.mkdir(parents=True, exist_ok=True)
content = b"reference content"
ref_path.write_bytes(content)
# Execute with wrong SHA
result = adapter.has_ref("bucket", "leaf", "wrong_sha")
# Verify
assert result is False
def test_has_ref_correct_sha(self, temp_dir):
"""Test checking reference with correct SHA."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Create reference with known content
ref_path = adapter.ref_path("bucket", "leaf")
ref_path.parent.mkdir(parents=True, exist_ok=True)
content = b"reference content"
ref_path.write_bytes(content)
correct_sha = hasher.sha256(ref_path)
# Execute with correct SHA
result = adapter.has_ref("bucket", "leaf", correct_sha)
# Verify
assert result is True
def test_write_ref(self, temp_dir):
"""Test writing reference to cache."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Create source file
src = temp_dir / "source.bin"
src.write_text("source content")
# Execute
cached = adapter.write_ref("bucket", "leaf/path", src)
# Verify
assert cached.exists()
assert cached.read_text() == "source content"
assert cached == temp_dir / "cache" / "bucket" / "leaf/path" / "reference.bin"
def test_evict(self, temp_dir):
"""Test evicting cached reference."""
# Setup
hasher = Sha256Adapter()
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
# Create cached reference
ref_path = adapter.ref_path("bucket", "leaf")
ref_path.parent.mkdir(parents=True, exist_ok=True)
ref_path.write_text("cached")
# Execute
adapter.evict("bucket", "leaf")
# Verify
assert not ref_path.exists()
class TestUtcClockAdapter:
"""Test UTC clock adapter."""
def test_now_returns_utc(self):
"""Test that now() returns UTC time."""
# Execute
adapter = UtcClockAdapter()
now = adapter.now()
# Verify
assert isinstance(now, datetime)
# Should be close to current UTC time
utc_now = datetime.now(UTC).replace(tzinfo=None)
diff = abs((now - utc_now).total_seconds())
assert diff < 1 # Within 1 second
class TestStdLoggerAdapter:
"""Test standard logger adapter."""
def test_log_levels(self):
"""Test different log levels."""
# Setup
adapter = StdLoggerAdapter(level="DEBUG")
# Execute - should not raise
adapter.debug("Debug message", extra="data")
adapter.info("Info message", key="value")
adapter.warning("Warning message", count=123)
adapter.error("Error message", error="details")
def test_log_operation(self):
"""Test structured operation logging."""
# Setup
adapter = StdLoggerAdapter()
# Execute - should not raise
adapter.log_operation(
op="put",
key="test/key",
leaf="bucket/prefix",
sizes={"file": 1000, "delta": 100},
durations={"total": 1.5},
cache_hit=True,
)
class TestNoopMetricsAdapter:
"""Test no-op metrics adapter."""
def test_noop_methods(self):
"""Test that all methods are no-ops."""
# Setup
adapter = NoopMetricsAdapter()
# Execute - should not raise or do anything
adapter.increment("counter", 1, {"tag": "value"})
adapter.gauge("gauge", 42.5, {"env": "test"})
adapter.timing("timer", 1.234, {"op": "test"})

View File

@@ -0,0 +1,235 @@
"""Unit tests for DeltaService."""
import warnings
import pytest
from deltaglider.core import (
Leaf,
NotFoundError,
ObjectKey,
PolicyViolationWarning,
)
from deltaglider.ports.storage import ObjectHead, PutResult
class TestDeltaServicePut:
"""Test DeltaService.put method."""
def test_create_reference_first_file(self, service, sample_file, mock_storage):
"""Test creating reference for first file."""
# Setup
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
mock_storage.head.return_value = None # No reference exists
mock_storage.put.return_value = PutResult(etag="abc123")
# Execute
summary = service.put(sample_file, leaf)
# Verify
assert summary.operation == "create_reference"
assert summary.bucket == "test-bucket"
assert summary.key == "test/prefix/reference.bin"
assert summary.original_name == "test.zip"
assert summary.file_size > 0
assert summary.file_sha256 is not None
# Check storage calls
assert mock_storage.head.call_count == 2 # Initial check + re-check
assert mock_storage.put.call_count == 2 # Reference + zero-diff delta
def test_create_delta_subsequent_file(self, service, sample_file, mock_storage, mock_diff):
"""Test creating delta for subsequent file."""
# Setup
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
# Create reference content and compute its SHA
import io
ref_content = b"reference content for test"
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
ref_metadata = {
"tool": "deltaglider/0.1.0",
"source_name": "original.zip",
"file_sha256": ref_sha,
"created_at": "2025-01-01T00:00:00Z",
}
mock_storage.head.return_value = ObjectHead(
key="test/prefix/reference.bin",
size=1000,
etag="ref123",
last_modified=None,
metadata=ref_metadata,
)
mock_storage.put.return_value = PutResult(etag="delta123")
# Mock storage.get to return the reference content
mock_storage.get.return_value = io.BytesIO(ref_content)
# Create cached reference with matching content
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
ref_path.parent.mkdir(parents=True, exist_ok=True)
ref_path.write_bytes(ref_content)
# Execute
summary = service.put(sample_file, leaf)
# Verify
assert summary.operation == "create_delta"
assert summary.bucket == "test-bucket"
assert summary.key == "test/prefix/test.zip.delta"
assert summary.delta_size is not None
assert summary.delta_ratio is not None
assert summary.ref_key == "test/prefix/reference.bin"
# Check diff was called
mock_diff.encode.assert_called_once()
def test_delta_ratio_warning(self, service, sample_file, mock_storage, mock_diff):
"""Test warning when delta ratio exceeds threshold."""
# Setup
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
# Create reference content and compute its SHA
import io
ref_content = b"reference content for test"
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
ref_metadata = {
"file_sha256": ref_sha,
}
mock_storage.head.return_value = ObjectHead(
key="test/prefix/reference.bin",
size=1000,
etag="ref123",
last_modified=None,
metadata=ref_metadata,
)
mock_storage.put.return_value = PutResult(etag="delta123")
# Mock storage.get to return the reference content
mock_storage.get.return_value = io.BytesIO(ref_content)
# Make delta large (exceeds ratio)
def large_encode(base, target, out):
out.write_bytes(b"x" * 10000) # Large delta
mock_diff.encode.side_effect = large_encode
# Create cached reference with matching content
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
ref_path.parent.mkdir(parents=True, exist_ok=True)
ref_path.write_bytes(ref_content)
# Execute and check warning
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
service.put(sample_file, leaf, max_ratio=0.1)
assert len(w) == 1
assert issubclass(w[0].category, PolicyViolationWarning)
assert "exceeds threshold" in str(w[0].message)
class TestDeltaServiceGet:
"""Test DeltaService.get method."""
def test_get_not_found(self, service, mock_storage, temp_dir):
"""Test get with non-existent delta."""
# Setup
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
mock_storage.head.return_value = None
# Execute and verify
with pytest.raises(NotFoundError):
service.get(delta_key, temp_dir / "output.zip")
def test_get_missing_metadata(self, service, mock_storage, temp_dir):
"""Test get with missing metadata."""
# Setup
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
mock_storage.head.return_value = ObjectHead(
key="test/file.zip.delta",
size=100,
etag="abc",
last_modified=None,
metadata={}, # Missing required metadata
)
# Execute and verify
from deltaglider.core.errors import StorageIOError
with pytest.raises(StorageIOError):
service.get(delta_key, temp_dir / "output.zip")
class TestDeltaServiceVerify:
"""Test DeltaService.verify method."""
def test_verify_valid(self, service, mock_storage, mock_diff, temp_dir):
"""Test verify with valid delta."""
# Setup
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
# Create test file content
test_content = b"test file content"
temp_file = temp_dir / "temp"
temp_file.write_bytes(test_content)
test_sha = service.hasher.sha256(temp_file)
# Create reference content for mock
import io
ref_content = b"reference content for test"
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
delta_metadata = {
"tool": "deltaglider/0.1.0",
"original_name": "file.zip",
"file_sha256": test_sha,
"file_size": str(len(test_content)),
"created_at": "2025-01-01T00:00:00Z",
"ref_key": "test/reference.bin",
"ref_sha256": ref_sha,
"delta_size": "100",
"delta_cmd": "xdelta3 -e -9 -s reference.bin file.zip file.zip.delta",
}
mock_storage.head.return_value = ObjectHead(
key="test/file.zip.delta",
size=100,
etag="delta123",
last_modified=None,
metadata=delta_metadata,
)
# Mock storage.get to return content based on which key is requested
# Storage.get is called with full keys like "bucket/path/file"
def get_side_effect(key):
# Check the actual key passed
if "delta" in key:
return io.BytesIO(b"delta content")
elif "reference.bin" in key:
# Return reference content for the reference file
return io.BytesIO(ref_content)
else:
# Default case - return reference content
return io.BytesIO(ref_content)
mock_storage.get.side_effect = get_side_effect
# Setup mock diff decode to create correct file
def decode_correct(base, delta, out):
out.write_bytes(test_content)
mock_diff.decode.side_effect = decode_correct
# Create cached reference
ref_path = service.cache.ref_path("test-bucket", "test")
ref_path.parent.mkdir(parents=True, exist_ok=True)
ref_path.write_bytes(ref_content)
# Execute
result = service.verify(delta_key)
# Verify
assert result.valid is True
assert result.expected_sha256 == test_sha
assert result.actual_sha256 == test_sha
assert "verified" in result.message.lower()