mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-04-17 14:09:45 +02:00
Initial commit: DeltaGlider - 99.9% compression for S3 storage
DeltaGlider reduces storage costs by storing only binary deltas between similar files. Achieves 99.9% compression for versioned artifacts. Key features: - Intelligent file type detection (delta for archives, direct for others) - Drop-in S3 replacement with automatic compression - SHA256 integrity verification on every operation - Clean hexagonal architecture - Full test coverage - Production tested with 200K+ files Case study: ReadOnlyREST reduced 4TB to 5GB (99.9% compression)
This commit is contained in:
1
tests/unit/__init__.py
Normal file
1
tests/unit/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Unit tests for DeltaGlider."""
|
||||
210
tests/unit/test_adapters.py
Normal file
210
tests/unit/test_adapters.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Unit tests for adapters."""
|
||||
|
||||
import hashlib
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from deltaglider.adapters import (
|
||||
FsCacheAdapter,
|
||||
NoopMetricsAdapter,
|
||||
Sha256Adapter,
|
||||
StdLoggerAdapter,
|
||||
UtcClockAdapter,
|
||||
)
|
||||
|
||||
|
||||
class TestSha256Adapter:
|
||||
"""Test SHA256 adapter."""
|
||||
|
||||
def test_sha256_from_path(self, temp_dir):
|
||||
"""Test computing SHA256 from file path."""
|
||||
# Setup
|
||||
file_path = temp_dir / "test.txt"
|
||||
content = b"Hello, World!"
|
||||
file_path.write_bytes(content)
|
||||
|
||||
# Expected SHA256
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Execute
|
||||
adapter = Sha256Adapter()
|
||||
actual = adapter.sha256(file_path)
|
||||
|
||||
# Verify
|
||||
assert actual == expected
|
||||
|
||||
def test_sha256_from_stream(self, temp_dir):
|
||||
"""Test computing SHA256 from stream."""
|
||||
# Setup
|
||||
content = b"Hello, Stream!"
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Execute
|
||||
adapter = Sha256Adapter()
|
||||
import io
|
||||
stream = io.BytesIO(content)
|
||||
actual = adapter.sha256(stream)
|
||||
|
||||
# Verify
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestFsCacheAdapter:
|
||||
"""Test filesystem cache adapter."""
|
||||
|
||||
def test_ref_path(self, temp_dir):
|
||||
"""Test reference path generation."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Execute
|
||||
path = adapter.ref_path("my-bucket", "path/to/leaf")
|
||||
|
||||
# Verify
|
||||
expected = temp_dir / "cache" / "my-bucket" / "path/to/leaf" / "reference.bin"
|
||||
assert path == expected
|
||||
|
||||
def test_has_ref_not_exists(self, temp_dir):
|
||||
"""Test checking non-existent reference."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Execute
|
||||
result = adapter.has_ref("bucket", "leaf", "abc123")
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
def test_has_ref_wrong_sha(self, temp_dir):
|
||||
"""Test checking reference with wrong SHA."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create reference with known content
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = b"reference content"
|
||||
ref_path.write_bytes(content)
|
||||
|
||||
# Execute with wrong SHA
|
||||
result = adapter.has_ref("bucket", "leaf", "wrong_sha")
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
def test_has_ref_correct_sha(self, temp_dir):
|
||||
"""Test checking reference with correct SHA."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create reference with known content
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = b"reference content"
|
||||
ref_path.write_bytes(content)
|
||||
correct_sha = hasher.sha256(ref_path)
|
||||
|
||||
# Execute with correct SHA
|
||||
result = adapter.has_ref("bucket", "leaf", correct_sha)
|
||||
|
||||
# Verify
|
||||
assert result is True
|
||||
|
||||
def test_write_ref(self, temp_dir):
|
||||
"""Test writing reference to cache."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create source file
|
||||
src = temp_dir / "source.bin"
|
||||
src.write_text("source content")
|
||||
|
||||
# Execute
|
||||
cached = adapter.write_ref("bucket", "leaf/path", src)
|
||||
|
||||
# Verify
|
||||
assert cached.exists()
|
||||
assert cached.read_text() == "source content"
|
||||
assert cached == temp_dir / "cache" / "bucket" / "leaf/path" / "reference.bin"
|
||||
|
||||
def test_evict(self, temp_dir):
|
||||
"""Test evicting cached reference."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create cached reference
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_text("cached")
|
||||
|
||||
# Execute
|
||||
adapter.evict("bucket", "leaf")
|
||||
|
||||
# Verify
|
||||
assert not ref_path.exists()
|
||||
|
||||
|
||||
class TestUtcClockAdapter:
|
||||
"""Test UTC clock adapter."""
|
||||
|
||||
def test_now_returns_utc(self):
|
||||
"""Test that now() returns UTC time."""
|
||||
# Execute
|
||||
adapter = UtcClockAdapter()
|
||||
now = adapter.now()
|
||||
|
||||
# Verify
|
||||
assert isinstance(now, datetime)
|
||||
# Should be close to current UTC time
|
||||
utc_now = datetime.now(UTC).replace(tzinfo=None)
|
||||
diff = abs((now - utc_now).total_seconds())
|
||||
assert diff < 1 # Within 1 second
|
||||
|
||||
|
||||
class TestStdLoggerAdapter:
|
||||
"""Test standard logger adapter."""
|
||||
|
||||
def test_log_levels(self):
|
||||
"""Test different log levels."""
|
||||
# Setup
|
||||
adapter = StdLoggerAdapter(level="DEBUG")
|
||||
|
||||
# Execute - should not raise
|
||||
adapter.debug("Debug message", extra="data")
|
||||
adapter.info("Info message", key="value")
|
||||
adapter.warning("Warning message", count=123)
|
||||
adapter.error("Error message", error="details")
|
||||
|
||||
def test_log_operation(self):
|
||||
"""Test structured operation logging."""
|
||||
# Setup
|
||||
adapter = StdLoggerAdapter()
|
||||
|
||||
# Execute - should not raise
|
||||
adapter.log_operation(
|
||||
op="put",
|
||||
key="test/key",
|
||||
leaf="bucket/prefix",
|
||||
sizes={"file": 1000, "delta": 100},
|
||||
durations={"total": 1.5},
|
||||
cache_hit=True,
|
||||
)
|
||||
|
||||
|
||||
class TestNoopMetricsAdapter:
|
||||
"""Test no-op metrics adapter."""
|
||||
|
||||
def test_noop_methods(self):
|
||||
"""Test that all methods are no-ops."""
|
||||
# Setup
|
||||
adapter = NoopMetricsAdapter()
|
||||
|
||||
# Execute - should not raise or do anything
|
||||
adapter.increment("counter", 1, {"tag": "value"})
|
||||
adapter.gauge("gauge", 42.5, {"env": "test"})
|
||||
adapter.timing("timer", 1.234, {"op": "test"})
|
||||
235
tests/unit/test_core_service.py
Normal file
235
tests/unit/test_core_service.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""Unit tests for DeltaService."""
|
||||
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from deltaglider.core import (
|
||||
Leaf,
|
||||
NotFoundError,
|
||||
ObjectKey,
|
||||
PolicyViolationWarning,
|
||||
)
|
||||
from deltaglider.ports.storage import ObjectHead, PutResult
|
||||
|
||||
|
||||
class TestDeltaServicePut:
|
||||
"""Test DeltaService.put method."""
|
||||
|
||||
def test_create_reference_first_file(self, service, sample_file, mock_storage):
|
||||
"""Test creating reference for first file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
mock_storage.head.return_value = None # No reference exists
|
||||
mock_storage.put.return_value = PutResult(etag="abc123")
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_reference"
|
||||
assert summary.bucket == "test-bucket"
|
||||
assert summary.key == "test/prefix/reference.bin"
|
||||
assert summary.original_name == "test.zip"
|
||||
assert summary.file_size > 0
|
||||
assert summary.file_sha256 is not None
|
||||
|
||||
# Check storage calls
|
||||
assert mock_storage.head.call_count == 2 # Initial check + re-check
|
||||
assert mock_storage.put.call_count == 2 # Reference + zero-diff delta
|
||||
|
||||
def test_create_delta_subsequent_file(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test creating delta for subsequent file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
ref_metadata = {
|
||||
"tool": "deltaglider/0.1.0",
|
||||
"source_name": "original.zip",
|
||||
"file_sha256": ref_sha,
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/prefix/reference.bin",
|
||||
size=1000,
|
||||
etag="ref123",
|
||||
last_modified=None,
|
||||
metadata=ref_metadata,
|
||||
)
|
||||
mock_storage.put.return_value = PutResult(etag="delta123")
|
||||
|
||||
# Mock storage.get to return the reference content
|
||||
mock_storage.get.return_value = io.BytesIO(ref_content)
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_delta"
|
||||
assert summary.bucket == "test-bucket"
|
||||
assert summary.key == "test/prefix/test.zip.delta"
|
||||
assert summary.delta_size is not None
|
||||
assert summary.delta_ratio is not None
|
||||
assert summary.ref_key == "test/prefix/reference.bin"
|
||||
|
||||
# Check diff was called
|
||||
mock_diff.encode.assert_called_once()
|
||||
|
||||
def test_delta_ratio_warning(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test warning when delta ratio exceeds threshold."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
ref_metadata = {
|
||||
"file_sha256": ref_sha,
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/prefix/reference.bin",
|
||||
size=1000,
|
||||
etag="ref123",
|
||||
last_modified=None,
|
||||
metadata=ref_metadata,
|
||||
)
|
||||
mock_storage.put.return_value = PutResult(etag="delta123")
|
||||
|
||||
# Mock storage.get to return the reference content
|
||||
mock_storage.get.return_value = io.BytesIO(ref_content)
|
||||
|
||||
# Make delta large (exceeds ratio)
|
||||
def large_encode(base, target, out):
|
||||
out.write_bytes(b"x" * 10000) # Large delta
|
||||
|
||||
mock_diff.encode.side_effect = large_encode
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute and check warning
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
service.put(sample_file, leaf, max_ratio=0.1)
|
||||
|
||||
assert len(w) == 1
|
||||
assert issubclass(w[0].category, PolicyViolationWarning)
|
||||
assert "exceeds threshold" in str(w[0].message)
|
||||
|
||||
|
||||
class TestDeltaServiceGet:
|
||||
"""Test DeltaService.get method."""
|
||||
|
||||
def test_get_not_found(self, service, mock_storage, temp_dir):
|
||||
"""Test get with non-existent delta."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
mock_storage.head.return_value = None
|
||||
|
||||
# Execute and verify
|
||||
with pytest.raises(NotFoundError):
|
||||
service.get(delta_key, temp_dir / "output.zip")
|
||||
|
||||
def test_get_missing_metadata(self, service, mock_storage, temp_dir):
|
||||
"""Test get with missing metadata."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/file.zip.delta",
|
||||
size=100,
|
||||
etag="abc",
|
||||
last_modified=None,
|
||||
metadata={}, # Missing required metadata
|
||||
)
|
||||
|
||||
# Execute and verify
|
||||
from deltaglider.core.errors import StorageIOError
|
||||
with pytest.raises(StorageIOError):
|
||||
service.get(delta_key, temp_dir / "output.zip")
|
||||
|
||||
|
||||
class TestDeltaServiceVerify:
|
||||
"""Test DeltaService.verify method."""
|
||||
|
||||
def test_verify_valid(self, service, mock_storage, mock_diff, temp_dir):
|
||||
"""Test verify with valid delta."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
|
||||
# Create test file content
|
||||
test_content = b"test file content"
|
||||
temp_file = temp_dir / "temp"
|
||||
temp_file.write_bytes(test_content)
|
||||
test_sha = service.hasher.sha256(temp_file)
|
||||
|
||||
# Create reference content for mock
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
delta_metadata = {
|
||||
"tool": "deltaglider/0.1.0",
|
||||
"original_name": "file.zip",
|
||||
"file_sha256": test_sha,
|
||||
"file_size": str(len(test_content)),
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"ref_key": "test/reference.bin",
|
||||
"ref_sha256": ref_sha,
|
||||
"delta_size": "100",
|
||||
"delta_cmd": "xdelta3 -e -9 -s reference.bin file.zip file.zip.delta",
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/file.zip.delta",
|
||||
size=100,
|
||||
etag="delta123",
|
||||
last_modified=None,
|
||||
metadata=delta_metadata,
|
||||
)
|
||||
|
||||
# Mock storage.get to return content based on which key is requested
|
||||
# Storage.get is called with full keys like "bucket/path/file"
|
||||
def get_side_effect(key):
|
||||
# Check the actual key passed
|
||||
if "delta" in key:
|
||||
return io.BytesIO(b"delta content")
|
||||
elif "reference.bin" in key:
|
||||
# Return reference content for the reference file
|
||||
return io.BytesIO(ref_content)
|
||||
else:
|
||||
# Default case - return reference content
|
||||
return io.BytesIO(ref_content)
|
||||
mock_storage.get.side_effect = get_side_effect
|
||||
|
||||
# Setup mock diff decode to create correct file
|
||||
def decode_correct(base, delta, out):
|
||||
out.write_bytes(test_content)
|
||||
mock_diff.decode.side_effect = decode_correct
|
||||
|
||||
# Create cached reference
|
||||
ref_path = service.cache.ref_path("test-bucket", "test")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute
|
||||
result = service.verify(delta_key)
|
||||
|
||||
# Verify
|
||||
assert result.valid is True
|
||||
assert result.expected_sha256 == test_sha
|
||||
assert result.actual_sha256 == test_sha
|
||||
assert "verified" in result.message.lower()
|
||||
|
||||
Reference in New Issue
Block a user