fix: Handle regular S3 objects without DeltaGlider metadata

- get_object() now transparently downloads regular S3 objects
- Falls back to direct download when file_sha256 metadata is missing
- Enables DeltaGlider to work with existing S3 buckets
- Add test for downloading regular S3 files

Fixes issue where get_object() would fail with NotFoundError when
trying to download objects uploaded outside of DeltaGlider.

This allows users to:
- Browse existing S3 buckets with non-DeltaGlider objects
- Download any S3 object regardless of upload method
- Use DeltaGlider as a drop-in S3 client replacement

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Simone Scarduzio
2025-10-06 14:44:25 +02:00
parent 34c871b0d7
commit 9c1659a1f1
3 changed files with 61 additions and 10 deletions

View File

@@ -21,7 +21,6 @@ from .errors import (
IntegrityMismatchError,
NotFoundError,
PolicyViolationWarning,
StorageIOError,
)
from .models import (
DeltaMeta,
@@ -171,10 +170,28 @@ class DeltaService:
if obj_head is None:
raise NotFoundError(f"Object not found: {object_key.key}")
# Check if this is a regular S3 object (not uploaded via DeltaGlider)
# Regular S3 objects won't have DeltaGlider metadata
if "file_sha256" not in obj_head.metadata:
raise StorageIOError(f"Missing metadata on {object_key.key}")
# This is a regular S3 object, download it directly
self.logger.info(
"Downloading regular S3 object (no DeltaGlider metadata)",
key=object_key.key,
)
self._get_direct(object_key, obj_head, out)
duration = (self.clock.now() - start_time).total_seconds()
self.logger.log_operation(
op="get",
key=object_key.key,
deltaspace=f"{object_key.bucket}",
sizes={"file": obj_head.size},
durations={"total": duration},
cache_hit=False,
)
self.metrics.timing("deltaglider.get.duration", duration)
return
# Check if this is a direct upload (non-delta)
# Check if this is a direct upload (non-delta) uploaded via DeltaGlider
if obj_head.metadata.get("compression") == "none":
# Direct download without delta processing
self._get_direct(object_key, obj_head, out)

View File

@@ -258,6 +258,26 @@ class TestBoto3Compatibility:
content = response["Body"].read()
assert content == b"Test Content"
def test_get_object_regular_s3_file(self, client):
"""Test get_object with regular S3 files (not uploaded via DeltaGlider)."""
content = b"Regular S3 File Content"
# Add as a regular S3 object WITHOUT DeltaGlider metadata
client.service.storage.objects["test-bucket/regular-file.pdf"] = {
"data": content,
"size": len(content),
"metadata": {}, # No DeltaGlider metadata
}
# Should successfully download the regular S3 object
response = client.get_object(Bucket="test-bucket", Key="regular-file.pdf")
assert "Body" in response
downloaded_content = response["Body"].read()
assert downloaded_content == content
assert response["ContentLength"] == len(content)
def test_list_objects(self, client):
"""Test list_objects with various options."""
# List all objects (default: FetchMetadata=False)

View File

@@ -147,22 +147,36 @@ class TestDeltaServiceGet:
service.get(delta_key, temp_dir / "output.zip")
def test_get_missing_metadata(self, service, mock_storage, temp_dir):
"""Test get with missing metadata."""
"""Test get with missing metadata (regular S3 object)."""
# Setup
delta_key = ObjectKey(bucket="test-bucket", key="test/file.zip.delta")
# Create test content
test_content = b"regular S3 file content"
# Mock a regular S3 object without DeltaGlider metadata
mock_storage.head.return_value = ObjectHead(
key="test/file.zip.delta",
size=100,
size=len(test_content),
etag="abc",
last_modified=None,
metadata={}, # Missing required metadata
metadata={}, # Missing DeltaGlider metadata - this is a regular S3 object
)
# Execute and verify
from deltaglider.core.errors import StorageIOError
# Mock the storage.get to return the content
from unittest.mock import MagicMock
with pytest.raises(StorageIOError):
service.get(delta_key, temp_dir / "output.zip")
mock_stream = MagicMock()
mock_stream.read.side_effect = [test_content, b""] # Return content then EOF
mock_storage.get.return_value = mock_stream
# Execute - should successfully download regular S3 object
output_path = temp_dir / "output.zip"
service.get(delta_key, output_path)
# Verify - file should be downloaded
assert output_path.exists()
assert output_path.read_bytes() == test_content
class TestDeltaServiceVerify: