mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-04-19 15:01:23 +02:00
Initial commit: DeltaGlider - 99.9% compression for S3 storage
DeltaGlider reduces storage costs by storing only binary deltas between similar files. Achieves 99.9% compression for versioned artifacts. Key features: - Intelligent file type detection (delta for archives, direct for others) - Drop-in S3 replacement with automatic compression - SHA256 integrity verification on every operation - Clean hexagonal architecture - Full test coverage - Production tested with 200K+ files Case study: ReadOnlyREST reduced 4TB to 5GB (99.9% compression)
This commit is contained in:
1
tests/e2e/__init__.py
Normal file
1
tests/e2e/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""End-to-end tests for DeltaGlider."""
|
||||
162
tests/e2e/test_localstack.py
Normal file
162
tests/e2e/test_localstack.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""E2E tests with LocalStack."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from deltaglider.app.cli.main import cli
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.usefixtures("skip_if_no_xdelta")
|
||||
class TestLocalStackE2E:
|
||||
"""E2E tests using LocalStack."""
|
||||
|
||||
@pytest.fixture
|
||||
def s3_client(self):
|
||||
"""Create S3 client for LocalStack."""
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=os.environ.get("AWS_ENDPOINT_URL", "http://localhost:4566"),
|
||||
aws_access_key_id="test",
|
||||
aws_secret_access_key="test",
|
||||
region_name="us-east-1",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def test_bucket(self, s3_client):
|
||||
"""Create test bucket."""
|
||||
bucket_name = "test-deltaglider-bucket"
|
||||
try:
|
||||
s3_client.create_bucket(Bucket=bucket_name)
|
||||
except s3_client.exceptions.BucketAlreadyExists:
|
||||
pass
|
||||
yield bucket_name
|
||||
# Cleanup
|
||||
try:
|
||||
# Delete all objects
|
||||
response = s3_client.list_objects_v2(Bucket=bucket_name)
|
||||
if "Contents" in response:
|
||||
for obj in response["Contents"]:
|
||||
s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"])
|
||||
s3_client.delete_bucket(Bucket=bucket_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_full_workflow(self, test_bucket, s3_client):
|
||||
"""Test complete put/get/verify workflow."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create test files
|
||||
file1 = tmpdir / "plugin-v1.0.0.zip"
|
||||
file1.write_text("Plugin version 1.0.0 content")
|
||||
|
||||
file2 = tmpdir / "plugin-v1.0.1.zip"
|
||||
file2.write_text("Plugin version 1.0.1 content with minor changes")
|
||||
|
||||
# Upload first file (becomes reference)
|
||||
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/plugins/"])
|
||||
assert result.exit_code == 0
|
||||
output1 = json.loads(result.output)
|
||||
assert output1["operation"] == "create_reference"
|
||||
assert output1["key"] == "plugins/reference.bin"
|
||||
|
||||
# Verify reference was created
|
||||
objects = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="plugins/")
|
||||
keys = [obj["Key"] for obj in objects["Contents"]]
|
||||
assert "plugins/reference.bin" in keys
|
||||
assert "plugins/plugin-v1.0.0.zip.delta" in keys
|
||||
|
||||
# Upload second file (creates delta)
|
||||
result = runner.invoke(cli, ["put", str(file2), f"s3://{test_bucket}/plugins/"])
|
||||
assert result.exit_code == 0
|
||||
output2 = json.loads(result.output)
|
||||
assert output2["operation"] == "create_delta"
|
||||
assert output2["key"] == "plugins/plugin-v1.0.1.zip.delta"
|
||||
assert "delta_ratio" in output2
|
||||
|
||||
# Download and verify second file
|
||||
output_file = tmpdir / "downloaded.zip"
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["get", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta", "-o", str(output_file)],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert output_file.read_text() == file2.read_text()
|
||||
|
||||
# Verify integrity
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["verify", f"s3://{test_bucket}/plugins/plugin-v1.0.1.zip.delta"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
verify_output = json.loads(result.output)
|
||||
assert verify_output["valid"] is True
|
||||
|
||||
def test_multiple_leaves(self, test_bucket, s3_client):
|
||||
"""Test multiple leaf directories with separate references."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create test files for different leaves
|
||||
file_a1 = tmpdir / "app-a-v1.zip"
|
||||
file_a1.write_text("Application A version 1")
|
||||
|
||||
file_b1 = tmpdir / "app-b-v1.zip"
|
||||
file_b1.write_text("Application B version 1")
|
||||
|
||||
# Upload to different leaves
|
||||
result = runner.invoke(cli, ["put", str(file_a1), f"s3://{test_bucket}/apps/app-a/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
result = runner.invoke(cli, ["put", str(file_b1), f"s3://{test_bucket}/apps/app-b/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Verify each leaf has its own reference
|
||||
objects_a = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-a/")
|
||||
keys_a = [obj["Key"] for obj in objects_a["Contents"]]
|
||||
assert "apps/app-a/reference.bin" in keys_a
|
||||
|
||||
objects_b = s3_client.list_objects_v2(Bucket=test_bucket, Prefix="apps/app-b/")
|
||||
keys_b = [obj["Key"] for obj in objects_b["Contents"]]
|
||||
assert "apps/app-b/reference.bin" in keys_b
|
||||
|
||||
def test_large_delta_warning(self, test_bucket, s3_client):
|
||||
"""Test warning for large delta ratio."""
|
||||
runner = CliRunner()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# Create very different files
|
||||
file1 = tmpdir / "file1.zip"
|
||||
file1.write_text("A" * 1000)
|
||||
|
||||
file2 = tmpdir / "file2.zip"
|
||||
file2.write_text("B" * 1000) # Completely different
|
||||
|
||||
# Upload first file
|
||||
result = runner.invoke(cli, ["put", str(file1), f"s3://{test_bucket}/test/"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Upload second file with low max-ratio
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["put", str(file2), f"s3://{test_bucket}/test/", "--max-ratio", "0.1"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
# Warning should be logged but operation should succeed
|
||||
output = json.loads(result.output)
|
||||
assert output["operation"] == "create_delta"
|
||||
# Delta ratio should be high (files are completely different)
|
||||
assert output["delta_ratio"] > 0.5
|
||||
Reference in New Issue
Block a user