mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-04-19 15:11:17 +02:00
Initial commit: DeltaGlider - 99.9% compression for S3 storage
DeltaGlider reduces storage costs by storing only binary deltas between similar files. Achieves 99.9% compression for versioned artifacts. Key features: - Intelligent file type detection (delta for archives, direct for others) - Drop-in S3 replacement with automatic compression - SHA256 integrity verification on every operation - Clean hexagonal architecture - Full test coverage - Production tested with 200K+ files Case study: ReadOnlyREST reduced 4TB to 5GB (99.9% compression)
This commit is contained in:
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for DeltaGlider."""
|
||||
101
tests/conftest.py
Normal file
101
tests/conftest.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Pytest configuration and fixtures."""
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from deltaglider.adapters import (
|
||||
FsCacheAdapter,
|
||||
NoopMetricsAdapter,
|
||||
Sha256Adapter,
|
||||
StdLoggerAdapter,
|
||||
UtcClockAdapter,
|
||||
)
|
||||
from deltaglider.core import DeltaService
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir():
|
||||
"""Create temporary directory."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_file(temp_dir):
|
||||
"""Create sample test file."""
|
||||
file_path = temp_dir / "test.zip"
|
||||
file_path.write_text("Sample content for testing")
|
||||
return file_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_storage():
|
||||
"""Create mock storage port."""
|
||||
return Mock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_diff():
|
||||
"""Create mock diff port."""
|
||||
mock = Mock()
|
||||
# Make encode create empty delta file
|
||||
def encode_side_effect(base, target, out):
|
||||
out.write_bytes(b"delta content")
|
||||
mock.encode.side_effect = encode_side_effect
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def real_hasher():
|
||||
"""Create real SHA256 hasher."""
|
||||
return Sha256Adapter()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cache_adapter(temp_dir, real_hasher):
|
||||
"""Create filesystem cache adapter."""
|
||||
cache_dir = temp_dir / "cache"
|
||||
return FsCacheAdapter(cache_dir, real_hasher)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def clock_adapter():
|
||||
"""Create UTC clock adapter."""
|
||||
return UtcClockAdapter()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def logger_adapter():
|
||||
"""Create logger adapter."""
|
||||
return StdLoggerAdapter(level="DEBUG")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metrics_adapter():
|
||||
"""Create metrics adapter."""
|
||||
return NoopMetricsAdapter()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def service(mock_storage, mock_diff, real_hasher, cache_adapter, clock_adapter, logger_adapter, metrics_adapter):
|
||||
"""Create DeltaService with test adapters."""
|
||||
return DeltaService(
|
||||
storage=mock_storage,
|
||||
diff=mock_diff,
|
||||
hasher=real_hasher,
|
||||
cache=cache_adapter,
|
||||
clock=clock_adapter,
|
||||
logger=logger_adapter,
|
||||
metrics=metrics_adapter,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def skip_if_no_xdelta():
|
||||
"""Skip test if xdelta3 not available."""
|
||||
if shutil.which("xdelta3") is None:
|
||||
pytest.skip("xdelta3 not available")
|
||||
1
tests/e2e/__init__.py
Normal file
1
tests/e2e/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""End-to-end tests for DeltaGlider."""
|
||||
162
tests/e2e/test_localstack.py
Normal file
162
tests/e2e/test_localstack.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""E2E tests with LocalStack."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from deltaglider.app.cli.main import cli
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.usefixtures("skip_if_no_xdelta")
|
||||
class TestLocalStackE2E:
|
||||
"""E2E tests using LocalStack."""
|
||||
|
||||
@pytest.fixture
|
||||
def s3_client(self):
|
||||
"""Create S3 client for LocalStack."""
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=os.environ.get("AWS_ENDPOINT_URL", "http://localhost:4566"),
|
||||
aws_access_key_id="test",
|
||||
aws_secret_access_key="test",
|
||||
region_name="us-east-1",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def test_bucket(self, s3_client):
|
||||
"""Create test bucket."""
|
||||
bucket_name = "test-deltaglider-bucket"
|
||||
try:
|
||||
s3_client.create_bucket(Bucket=bucket_name)
|
||||
except s3_client.exceptions.BucketAlreadyExists:
|
||||
pass
|
||||
yield bucket_name
|
||||
# Cleanup
|
||||
try:
|
||||
# Delete all objects
|
||||
response = s3_client.list_objects_v2(Bucket=bucket_name)
|
||||
if "Contents" in response:
|
||||
for obj in response["Contents"]:
|
||||
s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"])
|
||||
s3_client.delete_bucket(Bucket=bucket_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_full_workflow(self, test_bucket, s3_client):
|
||||
"""Test complete put/get/verify workflow."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create test files
|
||||
file1 = tmpdir / "plugin-v1.0.0.zip"
|
||||
file1.write_text("Plugin version 1.0.0 content")
|
||||
|
||||
file2 = tmpdir / "plugin-v1.0.1.zip"
|
||||
file2.write_text("Plugin version 1.0.1 content with minor changes")
|
||||
|
||||
# Upload first file (becomes reference)
|
||||
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/plugins/"])
|
||||
assert result.exit_code == 0
|
||||
output1 = json.loads(result.output)
|
||||
assert output1["operation"] == "create_reference"
|
||||
assert output1["key"] == "plugins/reference.bin"
|
||||
|
||||
# Verify reference was created
|
||||
objects = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="plugins/")
|
||||
keys = [obj["Key"] for obj in objects["Contents"]]
|
||||
assert "plugins/reference.bin" in keys
|
||||
assert "plugins/plugin-v1.0.0.zip.delta" in keys
|
||||
|
||||
# Upload second file (creates delta)
|
||||
result = runner.invoke(cli, ["put", str(file2), f"s3://{test_bucket}/plugins/"])
|
||||
assert result.exit_code == 0
|
||||
output2 = json.loads(result.output)
|
||||
assert output2["operation"] == "create_delta"
|
||||
assert output2["key"] == "plugins/plugin-v1.0.1.zip.delta"
|
||||
assert "delta_ratio" in output2
|
||||
|
||||
# Download and verify second file
|
||||
output_file = tmpdir / "downloaded.zip"
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["get", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta", "-o", str(output_file)],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert output_file.read_text() == file2.read_text()
|
||||
|
||||
# Verify integrity
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["verify", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
verify_output = json.loads(result.output)
|
||||
assert verify_output["valid"] is True
|
||||
|
||||
def test_multiple_leaves(self, test_bucket, s3_client):
|
||||
"""Test multiple leaf directories with separate references."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create test files for different leaves
|
||||
file_a1 = tmpdir / "app-a-v1.zip"
|
||||
file_a1.write_text("Application A version 1")
|
||||
|
||||
file_b1 = tmpdir / "app-b-v1.zip"
|
||||
file_b1.write_text("Application B version 1")
|
||||
|
||||
# Upload to different leaves
|
||||
result = runner.invoke(cli, ["put", str(file_a1), f"s3://{test_bucket}/apps/app-a/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
result = runner.invoke(cli, ["put", str(file_b1), f"s3://{test_bucket}/apps/app-b/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Verify each leaf has its own reference
|
||||
objects_a = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-a/")
|
||||
keys_a = [obj["Key"] for obj in objects_a["Contents"]]
|
||||
assert "apps/app-a/reference.bin" in keys_a
|
||||
|
||||
objects_b = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-b/")
|
||||
keys_b = [obj["Key"] for obj in objects_b["Contents"]]
|
||||
assert "apps/app-b/reference.bin" in keys_b
|
||||
|
||||
def test_large_delta_warning(self, test_bucket, s3_client):
|
||||
"""Test warning for large delta ratio."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create very different files
|
||||
file1 = tmpdir / "file1.zip"
|
||||
file1.write_text("A" * 1000)
|
||||
|
||||
file2 = tmpdir / "file2.zip"
|
||||
file2.write_text("B" * 1000) # Completely different
|
||||
|
||||
# Upload first file
|
||||
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/test/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Upload second file with low max-ratio
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["put", str(file2), f"s3://{test_bucket}/test/", "--max-ratio", "0.1"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
# Warning should be logged but operation should succeed
|
||||
output = json.loads(result.output)
|
||||
assert output["operation"] == "create_delta"
|
||||
# Delta ratio should be high (files are completely different)
|
||||
assert output["delta_ratio"] > 0.5
|
||||
1
tests/integration/__init__.py
Normal file
1
tests/integration/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Integration tests for DeltaGlider."""
|
||||
191
tests/integration/test_full_workflow.py
Normal file
191
tests/integration/test_full_workflow.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""Integration test for full put/get workflow."""
|
||||
|
||||
import io
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from deltaglider.core import DeltaService, Leaf, ObjectKey
|
||||
|
||||
|
||||
def test_full_put_get_workflow(service, temp_dir, mock_storage, mock_diff):
|
||||
"""Test complete workflow: put a file, then get it back."""
|
||||
# Create test files
|
||||
file1_content = b"This is the first version of the file."
|
||||
file2_content = b"This is the second version of the file with changes."
|
||||
|
||||
file1 = temp_dir / "version1.txt"
|
||||
file2 = temp_dir / "version2.txt"
|
||||
output_file = temp_dir / "recovered.txt"
|
||||
|
||||
file1.write_bytes(file1_content)
|
||||
file2.write_bytes(file2_content)
|
||||
|
||||
# Set up mock_diff decode to write the target content
|
||||
def decode_side_effect(base, delta, out):
|
||||
out.write_bytes(file2_content)
|
||||
mock_diff.decode.side_effect = decode_side_effect
|
||||
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/data")
|
||||
|
||||
# Storage state tracking
|
||||
storage_data = {}
|
||||
|
||||
def mock_head(key):
|
||||
"""Mock head_object."""
|
||||
if key in storage_data:
|
||||
return storage_data[key]["head"]
|
||||
return None
|
||||
|
||||
def mock_put(key, body, metadata, content_type="application/octet-stream"):
|
||||
"""Mock put_object."""
|
||||
from deltaglider.ports.storage import PutResult, ObjectHead
|
||||
|
||||
# Read content if it's a Path
|
||||
if isinstance(body, Path):
|
||||
content = body.read_bytes()
|
||||
elif isinstance(body, bytes):
|
||||
content = body
|
||||
else:
|
||||
content = body.read()
|
||||
|
||||
storage_data[key] = {
|
||||
"content": content,
|
||||
"head": ObjectHead(
|
||||
key=key.split("/", 1)[1],
|
||||
size=len(content),
|
||||
etag="mock-etag",
|
||||
last_modified=None,
|
||||
metadata=metadata,
|
||||
)
|
||||
}
|
||||
return PutResult(etag="mock-etag")
|
||||
|
||||
def mock_get(key):
|
||||
"""Mock get_object."""
|
||||
# The key might come without bucket prefix, so check both formats
|
||||
if key in storage_data:
|
||||
return io.BytesIO(storage_data[key]["content"])
|
||||
# Also try with test-bucket prefix if not found
|
||||
full_key = f"test-bucket/{key}" if not key.startswith("test-bucket/") else key
|
||||
if full_key in storage_data:
|
||||
return io.BytesIO(storage_data[full_key]["content"])
|
||||
raise FileNotFoundError(f"Object not found: {key}")
|
||||
|
||||
mock_storage.head.side_effect = mock_head
|
||||
mock_storage.put.side_effect = mock_put
|
||||
mock_storage.get.side_effect = mock_get
|
||||
|
||||
# Step 1: Put the first file (creates reference)
|
||||
summary1 = service.put(file1, leaf)
|
||||
assert summary1.operation == "create_reference"
|
||||
assert summary1.key == "test/data/reference.bin"
|
||||
|
||||
# Verify reference was stored
|
||||
ref_key = f"{leaf.bucket}/{leaf.reference_key()}"
|
||||
assert ref_key in storage_data
|
||||
assert storage_data[ref_key]["content"] == file1_content
|
||||
|
||||
# Step 2: Put the second file (creates delta)
|
||||
summary2 = service.put(file2, leaf)
|
||||
assert summary2.operation == "create_delta"
|
||||
assert summary2.key == "test/data/version2.txt.delta"
|
||||
assert summary2.delta_size is not None
|
||||
assert summary2.ref_key == "test/data/reference.bin"
|
||||
|
||||
# Verify delta was stored
|
||||
delta_key = f"{leaf.bucket}/{summary2.key}"
|
||||
assert delta_key in storage_data
|
||||
|
||||
# Step 3: Get the delta file back
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key=summary2.key)
|
||||
service.get(obj_key, output_file)
|
||||
|
||||
# Step 4: Verify the recovered file matches the original
|
||||
recovered_content = output_file.read_bytes()
|
||||
assert recovered_content == file2_content
|
||||
|
||||
|
||||
def test_get_with_auto_delta_suffix(service, temp_dir, mock_storage, mock_diff):
|
||||
"""Test get command behavior when .delta suffix is auto-appended."""
|
||||
# Create test file
|
||||
file_content = b"Test file content for auto-suffix test."
|
||||
test_file = temp_dir / "mydata.zip"
|
||||
test_file.write_bytes(file_content)
|
||||
|
||||
# Set up mock_diff decode to write the target content
|
||||
def decode_side_effect(base, delta, out):
|
||||
out.write_bytes(file_content)
|
||||
mock_diff.decode.side_effect = decode_side_effect
|
||||
|
||||
leaf = Leaf(bucket="test-bucket", prefix="archive")
|
||||
|
||||
# Storage state tracking
|
||||
storage_data = {}
|
||||
|
||||
def mock_head(key):
|
||||
"""Mock head_object."""
|
||||
if key in storage_data:
|
||||
return storage_data[key]["head"]
|
||||
return None
|
||||
|
||||
def mock_put(key, body, metadata, content_type="application/octet-stream"):
|
||||
"""Mock put_object."""
|
||||
from deltaglider.ports.storage import PutResult, ObjectHead
|
||||
|
||||
# Read content if it's a Path
|
||||
if isinstance(body, Path):
|
||||
content = body.read_bytes()
|
||||
elif isinstance(body, bytes):
|
||||
content = body
|
||||
else:
|
||||
content = body.read()
|
||||
|
||||
storage_data[key] = {
|
||||
"content": content,
|
||||
"head": ObjectHead(
|
||||
key=key.split("/", 1)[1],
|
||||
size=len(content),
|
||||
etag="mock-etag",
|
||||
last_modified=None,
|
||||
metadata=metadata,
|
||||
)
|
||||
}
|
||||
return PutResult(etag="mock-etag")
|
||||
|
||||
def mock_get(key):
|
||||
"""Mock get_object."""
|
||||
# The key might come without bucket prefix, so check both formats
|
||||
if key in storage_data:
|
||||
return io.BytesIO(storage_data[key]["content"])
|
||||
# Also try with test-bucket prefix if not found
|
||||
full_key = f"test-bucket/{key}" if not key.startswith("test-bucket/") else key
|
||||
if full_key in storage_data:
|
||||
return io.BytesIO(storage_data[full_key]["content"])
|
||||
raise FileNotFoundError(f"Object not found: {key}")
|
||||
|
||||
mock_storage.head.side_effect = mock_head
|
||||
mock_storage.put.side_effect = mock_put
|
||||
mock_storage.get.side_effect = mock_get
|
||||
|
||||
# Put the file
|
||||
summary = service.put(test_file, leaf)
|
||||
|
||||
# Get it back using original name (without .delta)
|
||||
# The service should internally look for "mydata.zip.delta"
|
||||
output_file = temp_dir / "recovered.zip"
|
||||
|
||||
# Use the key without .delta suffix
|
||||
if summary.operation == "create_reference":
|
||||
# If it's a reference, the zero-diff delta was created
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key="archive/mydata.zip.delta")
|
||||
else:
|
||||
obj_key = ObjectKey(bucket=leaf.bucket, key=summary.key)
|
||||
|
||||
service.get(obj_key, output_file)
|
||||
|
||||
# Verify the recovered file matches the original
|
||||
recovered_content = output_file.read_bytes()
|
||||
assert recovered_content == file_content
|
||||
135
tests/integration/test_get_command.py
Normal file
135
tests/integration/test_get_command.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""Integration test for get command."""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from deltaglider.app.cli.main import cli
|
||||
from deltaglider.core import ObjectKey
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_service():
|
||||
"""Create a mock DeltaService."""
|
||||
return Mock()
|
||||
|
||||
|
||||
def test_get_command_with_original_name(mock_service):
|
||||
"""Test get command with original filename (auto-appends .delta)."""
|
||||
runner = CliRunner()
|
||||
|
||||
# Mock the service.get method
|
||||
mock_service.get = Mock()
|
||||
|
||||
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
|
||||
# Run get with original filename (should auto-append .delta)
|
||||
result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip"])
|
||||
|
||||
# Check it was successful
|
||||
assert result.exit_code == 0
|
||||
assert "Looking for delta file: s3://test-bucket/data/myfile.zip.delta" in result.output
|
||||
assert "Successfully reconstructed: myfile.zip" in result.output
|
||||
|
||||
# Verify the service was called with the correct arguments
|
||||
mock_service.get.assert_called_once()
|
||||
call_args = mock_service.get.call_args
|
||||
obj_key = call_args[0][0]
|
||||
output_path = call_args[0][1]
|
||||
|
||||
assert isinstance(obj_key, ObjectKey)
|
||||
assert obj_key.bucket == "test-bucket"
|
||||
assert obj_key.key == "data/myfile.zip.delta"
|
||||
assert output_path == Path("myfile.zip")
|
||||
|
||||
|
||||
def test_get_command_with_delta_name(mock_service):
|
||||
"""Test get command with explicit .delta filename."""
|
||||
runner = CliRunner()
|
||||
|
||||
# Mock the service.get method
|
||||
mock_service.get = Mock()
|
||||
|
||||
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
|
||||
# Run get with explicit .delta filename
|
||||
result = runner.invoke(cli, ["get", "s3://test-bucket/data/myfile.zip.delta"])
|
||||
|
||||
# Check it was successful
|
||||
assert result.exit_code == 0
|
||||
assert "Looking for delta file" not in result.output # Should not print this message
|
||||
assert "Successfully reconstructed: myfile.zip" in result.output
|
||||
|
||||
# Verify the service was called with the correct arguments
|
||||
mock_service.get.assert_called_once()
|
||||
call_args = mock_service.get.call_args
|
||||
obj_key = call_args[0][0]
|
||||
output_path = call_args[0][1]
|
||||
|
||||
assert isinstance(obj_key, ObjectKey)
|
||||
assert obj_key.bucket == "test-bucket"
|
||||
assert obj_key.key == "data/myfile.zip.delta"
|
||||
assert output_path == Path("myfile.zip")
|
||||
|
||||
|
||||
def test_get_command_with_output_option(mock_service):
|
||||
"""Test get command with custom output path."""
|
||||
runner = CliRunner()
|
||||
|
||||
# Mock the service.get method
|
||||
mock_service.get = Mock()
|
||||
|
||||
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
output_file = Path(tmpdir) / "custom_output.zip"
|
||||
|
||||
# Run get with custom output path
|
||||
result = runner.invoke(cli, [
|
||||
"get",
|
||||
"s3://test-bucket/data/myfile.zip",
|
||||
"-o", str(output_file)
|
||||
])
|
||||
|
||||
# Check it was successful
|
||||
assert result.exit_code == 0
|
||||
assert f"Successfully reconstructed: {output_file}" in result.output
|
||||
|
||||
# Verify the service was called with the correct arguments
|
||||
mock_service.get.assert_called_once()
|
||||
call_args = mock_service.get.call_args
|
||||
obj_key = call_args[0][0]
|
||||
output_path = call_args[0][1]
|
||||
|
||||
assert isinstance(obj_key, ObjectKey)
|
||||
assert obj_key.bucket == "test-bucket"
|
||||
assert obj_key.key == "data/myfile.zip.delta"
|
||||
assert output_path == output_file
|
||||
|
||||
|
||||
def test_get_command_error_handling(mock_service):
|
||||
"""Test get command error handling."""
|
||||
runner = CliRunner()
|
||||
|
||||
# Mock the service.get method to raise an error
|
||||
mock_service.get = Mock(side_effect=FileNotFoundError("Delta not found"))
|
||||
|
||||
with patch("deltaglider.app.cli.main.create_service", return_value=mock_service):
|
||||
# Run get command
|
||||
result = runner.invoke(cli, ["get", "s3://test-bucket/data/missing.zip"])
|
||||
|
||||
# Check it failed with error message
|
||||
assert result.exit_code == 1
|
||||
assert "Error: Delta not found" in result.output
|
||||
|
||||
|
||||
def test_get_command_invalid_url():
|
||||
"""Test get command with invalid S3 URL."""
|
||||
runner = CliRunner()
|
||||
|
||||
# Run get with invalid URL
|
||||
result = runner.invoke(cli, ["get", "http://invalid-url/file.zip"])
|
||||
|
||||
# Check it failed with error message
|
||||
assert result.exit_code == 1
|
||||
assert "Error: Invalid S3 URL" in result.output
|
||||
106
tests/integration/test_xdelta.py
Normal file
106
tests/integration/test_xdelta.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Integration tests for xdelta3."""
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
from deltaglider.adapters import XdeltaAdapter
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("skip_if_no_xdelta")
|
||||
class TestXdeltaIntegration:
|
||||
"""Test xdelta3 integration."""
|
||||
|
||||
def test_encode_decode_roundtrip(self, temp_dir):
|
||||
"""Test encoding and decoding roundtrip."""
|
||||
# Setup
|
||||
adapter = XdeltaAdapter()
|
||||
|
||||
# Create base and target files
|
||||
base = temp_dir / "base.txt"
|
||||
base.write_text("This is the base file content.")
|
||||
|
||||
target = temp_dir / "target.txt"
|
||||
target.write_text("This is the modified target file content with changes.")
|
||||
|
||||
delta = temp_dir / "delta.bin"
|
||||
output = temp_dir / "output.txt"
|
||||
|
||||
# Encode
|
||||
adapter.encode(base, target, delta)
|
||||
|
||||
# Verify delta was created
|
||||
assert delta.exists()
|
||||
assert delta.stat().st_size > 0
|
||||
|
||||
# Decode
|
||||
adapter.decode(base, delta, output)
|
||||
|
||||
# Verify output matches target
|
||||
assert output.read_text() == target.read_text()
|
||||
|
||||
def test_encode_identical_files(self, temp_dir):
|
||||
"""Test encoding identical files produces small delta."""
|
||||
# Setup
|
||||
adapter = XdeltaAdapter()
|
||||
|
||||
# Create identical files
|
||||
base = temp_dir / "base.txt"
|
||||
content = "This is identical content in both files." * 100
|
||||
base.write_text(content)
|
||||
|
||||
target = temp_dir / "target.txt"
|
||||
target.write_text(content)
|
||||
|
||||
delta = temp_dir / "delta.bin"
|
||||
|
||||
# Encode
|
||||
adapter.encode(base, target, delta)
|
||||
|
||||
# Verify delta is small (much smaller than original)
|
||||
assert delta.exists()
|
||||
assert delta.stat().st_size < len(content) / 10 # Delta should be <10% of original
|
||||
|
||||
def test_encode_completely_different_files(self, temp_dir):
|
||||
"""Test encoding completely different files."""
|
||||
# Setup
|
||||
adapter = XdeltaAdapter()
|
||||
|
||||
# Create completely different files
|
||||
base = temp_dir / "base.txt"
|
||||
base.write_text("A" * 1000)
|
||||
|
||||
target = temp_dir / "target.txt"
|
||||
target.write_text("B" * 1000)
|
||||
|
||||
delta = temp_dir / "delta.bin"
|
||||
|
||||
# Encode
|
||||
adapter.encode(base, target, delta)
|
||||
|
||||
# Delta will be roughly the size of target since files are completely different
|
||||
assert delta.exists()
|
||||
# Note: xdelta3 compression may still reduce size somewhat
|
||||
|
||||
def test_encode_binary_files(self, temp_dir):
|
||||
"""Test encoding binary files."""
|
||||
# Setup
|
||||
adapter = XdeltaAdapter()
|
||||
|
||||
# Create binary files
|
||||
base = temp_dir / "base.bin"
|
||||
base.write_bytes(b"\x00\x01\x02\x03" * 256)
|
||||
|
||||
target = temp_dir / "target.bin"
|
||||
target.write_bytes(b"\x00\x01\x02\x03" * 200 + b"\xFF\xFE\xFD\xFC" * 56)
|
||||
|
||||
delta = temp_dir / "delta.bin"
|
||||
output = temp_dir / "output.bin"
|
||||
|
||||
# Encode
|
||||
adapter.encode(base, target, delta)
|
||||
|
||||
# Decode
|
||||
adapter.decode(base, delta, output)
|
||||
|
||||
# Verify
|
||||
assert output.read_bytes() == target.read_bytes()
|
||||
1
tests/unit/__init__.py
Normal file
1
tests/unit/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Unit tests for DeltaGlider."""
|
||||
210
tests/unit/test_adapters.py
Normal file
210
tests/unit/test_adapters.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Unit tests for adapters."""
|
||||
|
||||
import hashlib
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from deltaglider.adapters import (
|
||||
FsCacheAdapter,
|
||||
NoopMetricsAdapter,
|
||||
Sha256Adapter,
|
||||
StdLoggerAdapter,
|
||||
UtcClockAdapter,
|
||||
)
|
||||
|
||||
|
||||
class TestSha256Adapter:
|
||||
"""Test SHA256 adapter."""
|
||||
|
||||
def test_sha256_from_path(self, temp_dir):
|
||||
"""Test computing SHA256 from file path."""
|
||||
# Setup
|
||||
file_path = temp_dir / "test.txt"
|
||||
content = b"Hello, World!"
|
||||
file_path.write_bytes(content)
|
||||
|
||||
# Expected SHA256
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Execute
|
||||
adapter = Sha256Adapter()
|
||||
actual = adapter.sha256(file_path)
|
||||
|
||||
# Verify
|
||||
assert actual == expected
|
||||
|
||||
def test_sha256_from_stream(self, temp_dir):
|
||||
"""Test computing SHA256 from stream."""
|
||||
# Setup
|
||||
content = b"Hello, Stream!"
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Execute
|
||||
adapter = Sha256Adapter()
|
||||
import io
|
||||
stream = io.BytesIO(content)
|
||||
actual = adapter.sha256(stream)
|
||||
|
||||
# Verify
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestFsCacheAdapter:
|
||||
"""Test filesystem cache adapter."""
|
||||
|
||||
def test_ref_path(self, temp_dir):
|
||||
"""Test reference path generation."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Execute
|
||||
path = adapter.ref_path("my-bucket", "path/to/leaf")
|
||||
|
||||
# Verify
|
||||
expected = temp_dir / "cache" / "my-bucket" / "path/to/leaf" / "reference.bin"
|
||||
assert path == expected
|
||||
|
||||
def test_has_ref_not_exists(self, temp_dir):
|
||||
"""Test checking non-existent reference."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Execute
|
||||
result = adapter.has_ref("bucket", "leaf", "abc123")
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
def test_has_ref_wrong_sha(self, temp_dir):
|
||||
"""Test checking reference with wrong SHA."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create reference with known content
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = b"reference content"
|
||||
ref_path.write_bytes(content)
|
||||
|
||||
# Execute with wrong SHA
|
||||
result = adapter.has_ref("bucket", "leaf", "wrong_sha")
|
||||
|
||||
# Verify
|
||||
assert result is False
|
||||
|
||||
def test_has_ref_correct_sha(self, temp_dir):
|
||||
"""Test checking reference with correct SHA."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create reference with known content
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = b"reference content"
|
||||
ref_path.write_bytes(content)
|
||||
correct_sha = hasher.sha256(ref_path)
|
||||
|
||||
# Execute with correct SHA
|
||||
result = adapter.has_ref("bucket", "leaf", correct_sha)
|
||||
|
||||
# Verify
|
||||
assert result is True
|
||||
|
||||
def test_write_ref(self, temp_dir):
|
||||
"""Test writing reference to cache."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create source file
|
||||
src = temp_dir / "source.bin"
|
||||
src.write_text("source content")
|
||||
|
||||
# Execute
|
||||
cached = adapter.write_ref("bucket", "leaf/path", src)
|
||||
|
||||
# Verify
|
||||
assert cached.exists()
|
||||
assert cached.read_text() == "source content"
|
||||
assert cached == temp_dir / "cache" / "bucket" / "leaf/path" / "reference.bin"
|
||||
|
||||
def test_evict(self, temp_dir):
|
||||
"""Test evicting cached reference."""
|
||||
# Setup
|
||||
hasher = Sha256Adapter()
|
||||
adapter = FsCacheAdapter(temp_dir / "cache", hasher)
|
||||
|
||||
# Create cached reference
|
||||
ref_path = adapter.ref_path("bucket", "leaf")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_text("cached")
|
||||
|
||||
# Execute
|
||||
adapter.evict("bucket", "leaf")
|
||||
|
||||
# Verify
|
||||
assert not ref_path.exists()
|
||||
|
||||
|
||||
class TestUtcClockAdapter:
|
||||
"""Test UTC clock adapter."""
|
||||
|
||||
def test_now_returns_utc(self):
|
||||
"""Test that now() returns UTC time."""
|
||||
# Execute
|
||||
adapter = UtcClockAdapter()
|
||||
now = adapter.now()
|
||||
|
||||
# Verify
|
||||
assert isinstance(now, datetime)
|
||||
# Should be close to current UTC time
|
||||
utc_now = datetime.now(UTC).replace(tzinfo=None)
|
||||
diff = abs((now - utc_now).total_seconds())
|
||||
assert diff < 1 # Within 1 second
|
||||
|
||||
|
||||
class TestStdLoggerAdapter:
|
||||
"""Test standard logger adapter."""
|
||||
|
||||
def test_log_levels(self):
|
||||
"""Test different log levels."""
|
||||
# Setup
|
||||
adapter = StdLoggerAdapter(level="DEBUG")
|
||||
|
||||
# Execute - should not raise
|
||||
adapter.debug("Debug message", extra="data")
|
||||
adapter.info("Info message", key="value")
|
||||
adapter.warning("Warning message", count=123)
|
||||
adapter.error("Error message", error="details")
|
||||
|
||||
def test_log_operation(self):
|
||||
"""Test structured operation logging."""
|
||||
# Setup
|
||||
adapter = StdLoggerAdapter()
|
||||
|
||||
# Execute - should not raise
|
||||
adapter.log_operation(
|
||||
op="put",
|
||||
key="test/key",
|
||||
leaf="bucket/prefix",
|
||||
sizes={"file": 1000, "delta": 100},
|
||||
durations={"total": 1.5},
|
||||
cache_hit=True,
|
||||
)
|
||||
|
||||
|
||||
class TestNoopMetricsAdapter:
|
||||
"""Test no-op metrics adapter."""
|
||||
|
||||
def test_noop_methods(self):
|
||||
"""Test that all methods are no-ops."""
|
||||
# Setup
|
||||
adapter = NoopMetricsAdapter()
|
||||
|
||||
# Execute - should not raise or do anything
|
||||
adapter.increment("counter", 1, {"tag": "value"})
|
||||
adapter.gauge("gauge", 42.5, {"env": "test"})
|
||||
adapter.timing("timer", 1.234, {"op": "test"})
|
||||
235
tests/unit/test_core_service.py
Normal file
235
tests/unit/test_core_service.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""Unit tests for DeltaService."""
|
||||
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from deltaglider.core import (
|
||||
Leaf,
|
||||
NotFoundError,
|
||||
ObjectKey,
|
||||
PolicyViolationWarning,
|
||||
)
|
||||
from deltaglider.ports.storage import ObjectHead, PutResult
|
||||
|
||||
|
||||
class TestDeltaServicePut:
|
||||
"""Test DeltaService.put method."""
|
||||
|
||||
def test_create_reference_first_file(self, service, sample_file, mock_storage):
|
||||
"""Test creating reference for first file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
mock_storage.head.return_value = None # No reference exists
|
||||
mock_storage.put.return_value = PutResult(etag="abc123")
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_reference"
|
||||
assert summary.bucket == "test-bucket"
|
||||
assert summary.key == "test/prefix/reference.bin"
|
||||
assert summary.original_name == "test.zip"
|
||||
assert summary.file_size > 0
|
||||
assert summary.file_sha256 is not None
|
||||
|
||||
# Check storage calls
|
||||
assert mock_storage.head.call_count == 2 # Initial check + re-check
|
||||
assert mock_storage.put.call_count == 2 # Reference + zero-diff delta
|
||||
|
||||
def test_create_delta_subsequent_file(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test creating delta for subsequent file."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
ref_metadata = {
|
||||
"tool": "deltaglider/0.1.0",
|
||||
"source_name": "original.zip",
|
||||
"file_sha256": ref_sha,
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/prefix/reference.bin",
|
||||
size=1000,
|
||||
etag="ref123",
|
||||
last_modified=None,
|
||||
metadata=ref_metadata,
|
||||
)
|
||||
mock_storage.put.return_value = PutResult(etag="delta123")
|
||||
|
||||
# Mock storage.get to return the reference content
|
||||
mock_storage.get.return_value = io.BytesIO(ref_content)
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute
|
||||
summary = service.put(sample_file, leaf)
|
||||
|
||||
# Verify
|
||||
assert summary.operation == "create_delta"
|
||||
assert summary.bucket == "test-bucket"
|
||||
assert summary.key == "test/prefix/test.zip.delta"
|
||||
assert summary.delta_size is not None
|
||||
assert summary.delta_ratio is not None
|
||||
assert summary.ref_key == "test/prefix/reference.bin"
|
||||
|
||||
# Check diff was called
|
||||
mock_diff.encode.assert_called_once()
|
||||
|
||||
def test_delta_ratio_warning(self, service, sample_file, mock_storage, mock_diff):
|
||||
"""Test warning when delta ratio exceeds threshold."""
|
||||
# Setup
|
||||
leaf = Leaf(bucket="test-bucket", prefix="test/prefix")
|
||||
|
||||
# Create reference content and compute its SHA
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
ref_metadata = {
|
||||
"file_sha256": ref_sha,
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/prefix/reference.bin",
|
||||
size=1000,
|
||||
etag="ref123",
|
||||
last_modified=None,
|
||||
metadata=ref_metadata,
|
||||
)
|
||||
mock_storage.put.return_value = PutResult(etag="delta123")
|
||||
|
||||
# Mock storage.get to return the reference content
|
||||
mock_storage.get.return_value = io.BytesIO(ref_content)
|
||||
|
||||
# Make delta large (exceeds ratio)
|
||||
def large_encode(base, target, out):
|
||||
out.write_bytes(b"x" * 10000) # Large delta
|
||||
|
||||
mock_diff.encode.side_effect = large_encode
|
||||
|
||||
# Create cached reference with matching content
|
||||
ref_path = service.cache.ref_path(leaf.bucket, leaf.prefix)
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute and check warning
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
service.put(sample_file, leaf, max_ratio=0.1)
|
||||
|
||||
assert len(w) == 1
|
||||
assert issubclass(w[0].category, PolicyViolationWarning)
|
||||
assert "exceeds threshold" in str(w[0].message)
|
||||
|
||||
|
||||
class TestDeltaServiceGet:
|
||||
"""Test DeltaService.get method."""
|
||||
|
||||
def test_get_not_found(self, service, mock_storage, temp_dir):
|
||||
"""Test get with non-existent delta."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
mock_storage.head.return_value = None
|
||||
|
||||
# Execute and verify
|
||||
with pytest.raises(NotFoundError):
|
||||
service.get(delta_key, temp_dir / "output.zip")
|
||||
|
||||
def test_get_missing_metadata(self, service, mock_storage, temp_dir):
|
||||
"""Test get with missing metadata."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/file.zip.delta",
|
||||
size=100,
|
||||
etag="abc",
|
||||
last_modified=None,
|
||||
metadata={}, # Missing required metadata
|
||||
)
|
||||
|
||||
# Execute and verify
|
||||
from deltaglider.core.errors import StorageIOError
|
||||
with pytest.raises(StorageIOError):
|
||||
service.get(delta_key, temp_dir / "output.zip")
|
||||
|
||||
|
||||
class TestDeltaServiceVerify:
|
||||
"""Test DeltaService.verify method."""
|
||||
|
||||
def test_verify_valid(self, service, mock_storage, mock_diff, temp_dir):
|
||||
"""Test verify with valid delta."""
|
||||
# Setup
|
||||
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
|
||||
|
||||
# Create test file content
|
||||
test_content = b"test file content"
|
||||
temp_file = temp_dir / "temp"
|
||||
temp_file.write_bytes(test_content)
|
||||
test_sha = service.hasher.sha256(temp_file)
|
||||
|
||||
# Create reference content for mock
|
||||
import io
|
||||
ref_content = b"reference content for test"
|
||||
ref_sha = service.hasher.sha256(io.BytesIO(ref_content))
|
||||
|
||||
delta_metadata = {
|
||||
"tool": "deltaglider/0.1.0",
|
||||
"original_name": "file.zip",
|
||||
"file_sha256": test_sha,
|
||||
"file_size": str(len(test_content)),
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"ref_key": "test/reference.bin",
|
||||
"ref_sha256": ref_sha,
|
||||
"delta_size": "100",
|
||||
"delta_cmd": "xdelta3 -e -9 -s reference.bin file.zip file.zip.delta",
|
||||
}
|
||||
mock_storage.head.return_value = ObjectHead(
|
||||
key="test/file.zip.delta",
|
||||
size=100,
|
||||
etag="delta123",
|
||||
last_modified=None,
|
||||
metadata=delta_metadata,
|
||||
)
|
||||
|
||||
# Mock storage.get to return content based on which key is requested
|
||||
# Storage.get is called with full keys like "bucket/path/file"
|
||||
def get_side_effect(key):
|
||||
# Check the actual key passed
|
||||
if "delta" in key:
|
||||
return io.BytesIO(b"delta content")
|
||||
elif "reference.bin" in key:
|
||||
# Return reference content for the reference file
|
||||
return io.BytesIO(ref_content)
|
||||
else:
|
||||
# Default case - return reference content
|
||||
return io.BytesIO(ref_content)
|
||||
mock_storage.get.side_effect = get_side_effect
|
||||
|
||||
# Setup mock diff decode to create correct file
|
||||
def decode_correct(base, delta, out):
|
||||
out.write_bytes(test_content)
|
||||
mock_diff.decode.side_effect = decode_correct
|
||||
|
||||
# Create cached reference
|
||||
ref_path = service.cache.ref_path("test-bucket", "test")
|
||||
ref_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ref_path.write_bytes(ref_content)
|
||||
|
||||
# Execute
|
||||
result = service.verify(delta_key)
|
||||
|
||||
# Verify
|
||||
assert result.valid is True
|
||||
assert result.expected_sha256 == test_sha
|
||||
assert result.actual_sha256 == test_sha
|
||||
assert "verified" in result.message.lower()
|
||||
|
||||
Reference in New Issue
Block a user