mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-01-11 14:20:33 +01:00
feat: Implement boto3-compatible dict responses (Phase 2)
Changed list_objects() to return boto3-compatible dict instead of custom
ListObjectsResponse dataclass. This makes DeltaGlider a true drop-in replacement
for boto3.client('s3').
Changes:
- list_objects() now returns dict[str, Any] with boto3-compatible structure:
* Contents: list[S3Object] (dict with Key, Size, LastModified, etc.)
* CommonPrefixes: list[dict] for folder simulation
* IsTruncated, NextContinuationToken for pagination
* DeltaGlider metadata stored in standard Metadata field
- Updated all client methods that use list_objects() to work with dict responses:
* find_similar_files()
* get_bucket_stats()
* CLI ls command
- Updated all tests to use dict access (response['Contents']) instead of
dataclass access (response.contents)
- Updated examples/boto3_compatible_types.py to demonstrate usage
- DeltaGlider-specific metadata now in Metadata field:
* deltaglider-is-delta: "true"/"false"
* deltaglider-original-size: string number
* deltaglider-compression-ratio: string number or "unknown"
* deltaglider-reference-key: optional string
Benefits:
- True drop-in replacement for boto3
- No learning curve - if you know boto3, you know DeltaGlider
- Works with any boto3-compatible library
- Type safety through TypedDict (no boto3 import needed)
- Zero runtime overhead (TypedDict compiles to plain dict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
314
docs/BOTO3_COMPATIBILITY_VISION.md
Normal file
314
docs/BOTO3_COMPATIBILITY_VISION.md
Normal file
@@ -0,0 +1,314 @@
|
||||
# boto3 Compatibility Vision
|
||||
|
||||
## Current State (v4.2.3)
|
||||
|
||||
DeltaGlider currently uses custom dataclasses for responses:
|
||||
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsResponse, ObjectInfo
|
||||
|
||||
client = create_client()
|
||||
response: ListObjectsResponse = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response.contents: # Custom field name
|
||||
print(f"{obj.key}: {obj.size}") # Custom ObjectInfo dataclass
|
||||
```
|
||||
|
||||
**Problems:**
|
||||
- ❌ Not a true drop-in replacement for boto3
|
||||
- ❌ Users need to learn DeltaGlider-specific types
|
||||
- ❌ Can't use with tools expecting boto3 responses
|
||||
- ❌ Different API surface (`.contents` vs `['Contents']`)
|
||||
|
||||
## Target State (v5.0.0)
|
||||
|
||||
DeltaGlider should return native boto3-compatible dicts with TypedDict type hints:
|
||||
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsV2Response
|
||||
|
||||
client = create_client()
|
||||
response: ListObjectsV2Response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']: # boto3-compatible!
|
||||
print(f"{obj['Key']}: {obj['Size']}") # Works exactly like boto3
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- ✅ **True drop-in replacement** - swap `boto3.client('s3')` with `create_client()`
|
||||
- ✅ **No learning curve** - if you know boto3, you know DeltaGlider
|
||||
- ✅ **Tool compatibility** - works with any library expecting boto3 types
|
||||
- ✅ **Type safety** - TypedDict provides IDE autocomplete without boto3 import
|
||||
- ✅ **Zero runtime overhead** - TypedDict compiles to plain dict
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Type Definitions ✅ (DONE)
|
||||
|
||||
Created `deltaglider/types.py` with comprehensive TypedDict definitions:
|
||||
|
||||
```python
|
||||
from typing import TypedDict, NotRequired
|
||||
from datetime import datetime
|
||||
|
||||
class S3Object(TypedDict):
|
||||
Key: str
|
||||
Size: int
|
||||
LastModified: datetime
|
||||
ETag: NotRequired[str]
|
||||
StorageClass: NotRequired[str]
|
||||
|
||||
class ListObjectsV2Response(TypedDict):
|
||||
Contents: list[S3Object]
|
||||
CommonPrefixes: NotRequired[list[dict[str, str]]]
|
||||
IsTruncated: NotRequired[bool]
|
||||
NextContinuationToken: NotRequired[str]
|
||||
```
|
||||
|
||||
**Key insight:** TypedDict provides type safety at development time but compiles to plain `dict` at runtime!
|
||||
|
||||
### Phase 2: Refactor Client Methods (TODO)
|
||||
|
||||
Update all client methods to return boto3-compatible dicts:
|
||||
|
||||
#### `list_objects()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def list_objects(...) -> ListObjectsResponse: # Custom dataclass
|
||||
return ListObjectsResponse(
|
||||
name=bucket,
|
||||
contents=[ObjectInfo(...), ...] # Custom dataclass
|
||||
)
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def list_objects(...) -> ListObjectsV2Response: # TypedDict
|
||||
return {
|
||||
'Contents': [
|
||||
{
|
||||
'Key': 'file.zip', # .delta suffix already stripped
|
||||
'Size': 1024,
|
||||
'LastModified': datetime(...),
|
||||
'ETag': '"abc123"',
|
||||
}
|
||||
],
|
||||
'CommonPrefixes': [{'Prefix': 'dir/'}],
|
||||
'IsTruncated': False,
|
||||
}
|
||||
```
|
||||
|
||||
**Key changes:**
|
||||
1. Return plain dict instead of custom dataclass
|
||||
2. Use boto3 field names: `Contents` not `contents`, `Key` not `key`
|
||||
3. Strip `.delta` suffix transparently (already done)
|
||||
4. Hide `reference.bin` files (already done)
|
||||
|
||||
#### `put_object()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def put_object(...) -> dict[str, Any]:
|
||||
return {
|
||||
"ETag": etag,
|
||||
"VersionId": None,
|
||||
"DeltaGliderInfo": {...} # Custom field
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def put_object(...) -> PutObjectResponse: # TypedDict
|
||||
return {
|
||||
'ETag': etag,
|
||||
'ResponseMetadata': {'HTTPStatusCode': 200},
|
||||
# DeltaGlider metadata goes in Metadata field
|
||||
'Metadata': {
|
||||
'deltaglider-is-delta': 'true',
|
||||
'deltaglider-compression-ratio': '0.99'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `get_object()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def get_object(...) -> dict[str, Any]:
|
||||
return {
|
||||
"Body": data,
|
||||
"ContentLength": len(data),
|
||||
"DeltaGliderInfo": {...} # Custom field
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def get_object(...) -> GetObjectResponse: # TypedDict
|
||||
return {
|
||||
'Body': data, # bytes, not StreamingBody (simpler!)
|
||||
'ContentLength': len(data),
|
||||
'LastModified': datetime(...),
|
||||
'ETag': '"abc123"',
|
||||
'Metadata': { # DeltaGlider metadata here
|
||||
'deltaglider-is-delta': 'true'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `delete_object()`, `delete_objects()`, `head_object()`, etc.
|
||||
|
||||
All follow the same pattern: return boto3-compatible dicts with TypedDict hints.
|
||||
|
||||
### Phase 3: Backward Compatibility (TODO)
|
||||
|
||||
Keep old dataclasses for 1-2 versions with deprecation warnings:
|
||||
|
||||
```python
|
||||
class ListObjectsResponse:
|
||||
"""DEPRECATED: Use dict responses with ListObjectsV2Response type hint.
|
||||
|
||||
This will be removed in v6.0.0. Update your code:
|
||||
|
||||
Before:
|
||||
response.contents[0].key
|
||||
|
||||
After:
|
||||
response['Contents'][0]['Key']
|
||||
"""
|
||||
def __init__(self, data: dict):
|
||||
warnings.warn(
|
||||
"ListObjectsResponse dataclass is deprecated. "
|
||||
"Use dict responses with ListObjectsV2Response type hint.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
self._data = data
|
||||
|
||||
@property
|
||||
def contents(self):
|
||||
return [ObjectInfo(obj) for obj in self._data.get('Contents', [])]
|
||||
```
|
||||
|
||||
### Phase 4: Update Documentation (TODO)
|
||||
|
||||
1. Update all examples to use dict responses
|
||||
2. Add migration guide from v4.x to v5.0
|
||||
3. Update BOTO3_COMPATIBILITY.md
|
||||
4. Add "Drop-in Replacement" marketing language
|
||||
|
||||
### Phase 5: Update Tests (TODO)
|
||||
|
||||
Convert all tests from:
|
||||
```python
|
||||
assert response.contents[0].key == "file.zip"
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
assert response['Contents'][0]['Key'] == "file.zip"
|
||||
```
|
||||
|
||||
## Migration Guide (for users)
|
||||
|
||||
### v4.x → v5.0
|
||||
|
||||
**Old code (v4.x):**
|
||||
```python
|
||||
from deltaglider import create_client
|
||||
|
||||
client = create_client()
|
||||
response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response.contents: # Dataclass attribute
|
||||
print(f"{obj.key}: {obj.size}") # Dataclass attributes
|
||||
```
|
||||
|
||||
**New code (v5.0):**
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsV2Response
|
||||
|
||||
client = create_client()
|
||||
response: ListObjectsV2Response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']: # Dict key (boto3-compatible)
|
||||
print(f"{obj['Key']}: {obj['Size']}") # Dict keys (boto3-compatible)
|
||||
```
|
||||
|
||||
**Or even simpler - no type hint needed:**
|
||||
```python
|
||||
client = create_client()
|
||||
response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']:
|
||||
print(f"{obj['Key']}: {obj['Size']}")
|
||||
```
|
||||
|
||||
## Benefits Summary
|
||||
|
||||
### For Users
|
||||
- **Zero learning curve** - if you know boto3, you're done
|
||||
- **Drop-in replacement** - literally change one line (client creation)
|
||||
- **Type safety** - TypedDict provides autocomplete without boto3 dependency
|
||||
- **Tool compatibility** - works with all boto3-compatible libraries
|
||||
|
||||
### For DeltaGlider
|
||||
- **Simpler codebase** - no custom dataclasses to maintain
|
||||
- **Better marketing** - true "drop-in replacement" claim
|
||||
- **Easier testing** - test against boto3 behavior directly
|
||||
- **Future-proof** - if boto3 adds fields, users can access them immediately
|
||||
|
||||
## Technical Details
|
||||
|
||||
### How TypedDict Works
|
||||
|
||||
```python
|
||||
from typing import TypedDict
|
||||
|
||||
class MyResponse(TypedDict):
|
||||
Key: str
|
||||
Size: int
|
||||
|
||||
# At runtime, this is just a dict!
|
||||
response: MyResponse = {'Key': 'file.zip', 'Size': 1024}
|
||||
print(type(response)) # <class 'dict'>
|
||||
|
||||
# But mypy and IDEs understand the structure
|
||||
response['Key'] # ✅ Autocomplete works!
|
||||
response['Nonexistent'] # ❌ Mypy error: Key 'Nonexistent' not found
|
||||
```
|
||||
|
||||
### DeltaGlider-Specific Metadata
|
||||
|
||||
Store in standard boto3 `Metadata` field:
|
||||
|
||||
```python
|
||||
{
|
||||
'Key': 'file.zip',
|
||||
'Size': 1024,
|
||||
'Metadata': {
|
||||
# DeltaGlider-specific fields (prefixed for safety)
|
||||
'deltaglider-is-delta': 'true',
|
||||
'deltaglider-compression-ratio': '0.99',
|
||||
'deltaglider-original-size': '100000',
|
||||
'deltaglider-reference-key': 'releases/v1.0.0/reference.bin',
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This is:
|
||||
- ✅ boto3-compatible (Metadata is a standard field)
|
||||
- ✅ Namespaced (deltaglider- prefix prevents conflicts)
|
||||
- ✅ Optional (tools can ignore it)
|
||||
- ✅ Type-safe (Metadata: NotRequired[dict[str, str]])
|
||||
|
||||
## Status
|
||||
|
||||
- ✅ **Phase 1:** TypedDict definitions created
|
||||
- ⏳ **Phase 2:** Refactor client methods (IN PROGRESS)
|
||||
- ⏳ **Phase 3:** Backward compatibility (TODO)
|
||||
- ⏳ **Phase 4:** Documentation updates (TODO)
|
||||
- ⏳ **Phase 5:** Test updates (TODO)
|
||||
|
||||
**Target:** v5.0.0 release
|
||||
@@ -1,7 +1,11 @@
|
||||
"""Example: Using boto3-compatible types without importing boto3.
|
||||
"""Example: Using boto3-compatible responses without importing boto3.
|
||||
|
||||
This demonstrates how DeltaGlider provides full type safety without
|
||||
requiring boto3 imports in user code.
|
||||
This demonstrates how DeltaGlider provides full type safety and boto3 compatibility
|
||||
without requiring boto3 imports in user code.
|
||||
|
||||
As of v5.0.0, DeltaGlider returns plain dicts (not custom dataclasses) that are
|
||||
100% compatible with boto3 S3 responses. You get IDE autocomplete through TypedDict
|
||||
type hints without any runtime overhead.
|
||||
"""
|
||||
|
||||
from deltaglider import ListObjectsV2Response, S3Object, create_client
|
||||
@@ -17,6 +21,7 @@ def process_files(bucket: str, prefix: str) -> None:
|
||||
Bucket=bucket, Prefix=prefix, Delimiter="/"
|
||||
)
|
||||
|
||||
# Response is a plain dict - 100% boto3-compatible
|
||||
# TypedDict provides autocomplete and type checking
|
||||
for obj in response["Contents"]:
|
||||
# obj is typed as S3Object - all fields have autocomplete!
|
||||
@@ -24,6 +29,12 @@ def process_files(bucket: str, prefix: str) -> None:
|
||||
size: int = obj["Size"] # ✅ IDE knows this is int
|
||||
print(f"{key}: {size} bytes")
|
||||
|
||||
# DeltaGlider metadata is in the standard Metadata field
|
||||
metadata = obj.get("Metadata", {})
|
||||
if metadata.get("deltaglider-is-delta") == "true":
|
||||
compression = metadata.get("deltaglider-compression-ratio", "unknown")
|
||||
print(f" └─ Delta file (compression: {compression})")
|
||||
|
||||
# Optional fields work too
|
||||
for prefix_dict in response.get("CommonPrefixes", []):
|
||||
print(f"Directory: {prefix_dict['Prefix']}")
|
||||
@@ -49,3 +60,5 @@ if __name__ == "__main__":
|
||||
print("✅ Full type safety without boto3 imports!")
|
||||
print("✅ 100% compatible with boto3")
|
||||
print("✅ Drop-in replacement")
|
||||
print("✅ Plain dict responses (not custom dataclasses)")
|
||||
print("✅ DeltaGlider metadata in standard Metadata field")
|
||||
|
||||
@@ -259,18 +259,18 @@ def ls(
|
||||
return f"{size_float:.1f}P"
|
||||
|
||||
# List objects using SDK (automatically filters .delta and reference.bin)
|
||||
from deltaglider.client import DeltaGliderClient, ListObjectsResponse
|
||||
from deltaglider.client import DeltaGliderClient
|
||||
|
||||
client = DeltaGliderClient(service)
|
||||
dg_response: ListObjectsResponse = client.list_objects(
|
||||
dg_response = client.list_objects(
|
||||
Bucket=bucket_name, Prefix=prefix_str, MaxKeys=10000, Delimiter="/" if not recursive else ""
|
||||
)
|
||||
objects = dg_response.contents
|
||||
objects = dg_response["Contents"]
|
||||
|
||||
# Filter by recursive flag
|
||||
if not recursive:
|
||||
# Show common prefixes (subdirectories) from S3 response
|
||||
for common_prefix in dg_response.common_prefixes:
|
||||
for common_prefix in dg_response.get("CommonPrefixes", []):
|
||||
prefix_path = common_prefix.get("Prefix", "")
|
||||
# Show only the directory name, not the full path
|
||||
if prefix_str:
|
||||
@@ -283,7 +283,8 @@ def ls(
|
||||
# Only show files at current level (not in subdirectories)
|
||||
filtered_objects = []
|
||||
for obj in objects:
|
||||
rel_path = obj.key[len(prefix_str):] if prefix_str else obj.key
|
||||
obj_key = obj["Key"]
|
||||
rel_path = obj_key[len(prefix_str):] if prefix_str else obj_key
|
||||
# Only include if it's a direct child (no / in relative path)
|
||||
if "/" not in rel_path and rel_path:
|
||||
filtered_objects.append(obj)
|
||||
@@ -294,23 +295,24 @@ def ls(
|
||||
total_count = 0
|
||||
|
||||
for obj in objects:
|
||||
total_size += obj.size
|
||||
total_size += obj["Size"]
|
||||
total_count += 1
|
||||
|
||||
# Format the display
|
||||
size_str = format_bytes(obj.size)
|
||||
size_str = format_bytes(obj["Size"])
|
||||
# last_modified is a string from SDK, parse it if needed
|
||||
if isinstance(obj.last_modified, str):
|
||||
last_modified = obj.get("LastModified", "")
|
||||
if isinstance(last_modified, str):
|
||||
# Already a string, extract date portion
|
||||
date_str = obj.last_modified[:19].replace("T", " ")
|
||||
date_str = last_modified[:19].replace("T", " ")
|
||||
else:
|
||||
date_str = obj.last_modified.strftime("%Y-%m-%d %H:%M:%S")
|
||||
date_str = last_modified.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Show only the filename relative to current prefix (like AWS CLI)
|
||||
if prefix_str:
|
||||
display_key = obj.key[len(prefix_str):]
|
||||
display_key = obj["Key"][len(prefix_str):]
|
||||
else:
|
||||
display_key = obj.key
|
||||
display_key = obj["Key"]
|
||||
|
||||
click.echo(f"{date_str} {size_str:>10} {display_key}")
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from .client_delete_helpers import delete_with_delta_suffix
|
||||
from .client_models import (
|
||||
BucketStats,
|
||||
CompressionEstimate,
|
||||
ListObjectsResponse,
|
||||
ObjectInfo,
|
||||
UploadSummary,
|
||||
)
|
||||
@@ -197,7 +196,7 @@ class DeltaGliderClient:
|
||||
StartAfter: str | None = None,
|
||||
FetchMetadata: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> ListObjectsResponse:
|
||||
) -> dict[str, Any]:
|
||||
"""List objects in bucket with smart metadata fetching.
|
||||
|
||||
This method optimizes performance by:
|
||||
@@ -227,11 +226,11 @@ class DeltaGliderClient:
|
||||
# Fast listing for UI display (no metadata)
|
||||
response = client.list_objects(Bucket='releases', MaxKeys=100)
|
||||
|
||||
# Paginated listing
|
||||
# Paginated listing (boto3-compatible dict response)
|
||||
response = client.list_objects(
|
||||
Bucket='releases',
|
||||
MaxKeys=50,
|
||||
ContinuationToken=response.next_continuation_token
|
||||
ContinuationToken=response.get('NextContinuationToken')
|
||||
)
|
||||
|
||||
# Detailed listing with compression stats (slower, only for analytics)
|
||||
@@ -265,7 +264,7 @@ class DeltaGliderClient:
|
||||
"is_truncated": False,
|
||||
}
|
||||
|
||||
# Convert to ObjectInfo objects with smart metadata fetching
|
||||
# Convert to boto3-compatible S3Object dicts
|
||||
contents = []
|
||||
for obj in result.get("objects", []):
|
||||
# Skip reference.bin files (internal files, never exposed to users)
|
||||
@@ -280,20 +279,21 @@ class DeltaGliderClient:
|
||||
if is_delta:
|
||||
display_key = display_key[:-6] # Remove .delta suffix
|
||||
|
||||
# Create object info with basic data (no HEAD request)
|
||||
info = ObjectInfo(
|
||||
key=display_key, # Use cleaned key without .delta
|
||||
size=obj["size"],
|
||||
last_modified=obj.get("last_modified", ""),
|
||||
etag=obj.get("etag"),
|
||||
storage_class=obj.get("storage_class", "STANDARD"),
|
||||
# DeltaGlider fields
|
||||
original_size=obj["size"], # For non-delta, original = stored
|
||||
compressed_size=obj["size"],
|
||||
is_delta=is_delta,
|
||||
compression_ratio=0.0 if not is_delta else None,
|
||||
reference_key=None,
|
||||
)
|
||||
# Create boto3-compatible S3Object dict
|
||||
s3_obj: dict[str, Any] = {
|
||||
"Key": display_key, # Use cleaned key without .delta
|
||||
"Size": obj["size"],
|
||||
"LastModified": obj.get("last_modified", ""),
|
||||
"ETag": obj.get("etag"),
|
||||
"StorageClass": obj.get("storage_class", "STANDARD"),
|
||||
}
|
||||
|
||||
# Add DeltaGlider metadata in optional Metadata field
|
||||
deltaglider_metadata: dict[str, str] = {
|
||||
"deltaglider-is-delta": str(is_delta).lower(),
|
||||
"deltaglider-original-size": str(obj["size"]),
|
||||
"deltaglider-compression-ratio": "0.0" if not is_delta else "unknown",
|
||||
}
|
||||
|
||||
# SMART METADATA FETCHING:
|
||||
# 1. NEVER fetch metadata for non-delta files (no point)
|
||||
@@ -304,28 +304,45 @@ class DeltaGliderClient:
|
||||
if obj_head and obj_head.metadata:
|
||||
metadata = obj_head.metadata
|
||||
# Update with actual compression stats
|
||||
info.original_size = int(metadata.get("file_size", obj["size"]))
|
||||
info.compression_ratio = float(metadata.get("compression_ratio", 0.0))
|
||||
info.reference_key = metadata.get("ref_key")
|
||||
original_size = int(metadata.get("file_size", obj["size"]))
|
||||
compression_ratio = float(metadata.get("compression_ratio", 0.0))
|
||||
reference_key = metadata.get("ref_key")
|
||||
|
||||
deltaglider_metadata["deltaglider-original-size"] = str(original_size)
|
||||
deltaglider_metadata["deltaglider-compression-ratio"] = str(compression_ratio)
|
||||
if reference_key:
|
||||
deltaglider_metadata["deltaglider-reference-key"] = reference_key
|
||||
except Exception as e:
|
||||
# Log but don't fail the listing
|
||||
self.service.logger.debug(f"Failed to fetch metadata for {obj['key']}: {e}")
|
||||
|
||||
contents.append(info)
|
||||
s3_obj["Metadata"] = deltaglider_metadata
|
||||
contents.append(s3_obj)
|
||||
|
||||
# Build response with pagination support
|
||||
response = ListObjectsResponse(
|
||||
name=Bucket,
|
||||
prefix=Prefix,
|
||||
delimiter=Delimiter,
|
||||
max_keys=MaxKeys,
|
||||
contents=contents,
|
||||
common_prefixes=[{"Prefix": p} for p in result.get("common_prefixes", [])],
|
||||
is_truncated=result.get("is_truncated", False),
|
||||
next_continuation_token=result.get("next_continuation_token"),
|
||||
continuation_token=ContinuationToken,
|
||||
key_count=len(contents),
|
||||
)
|
||||
# Build boto3-compatible response dict
|
||||
response: dict[str, Any] = {
|
||||
"Contents": contents,
|
||||
"Name": Bucket,
|
||||
"Prefix": Prefix,
|
||||
"KeyCount": len(contents),
|
||||
"MaxKeys": MaxKeys,
|
||||
}
|
||||
|
||||
# Add optional fields
|
||||
if Delimiter:
|
||||
response["Delimiter"] = Delimiter
|
||||
|
||||
common_prefixes = result.get("common_prefixes", [])
|
||||
if common_prefixes:
|
||||
response["CommonPrefixes"] = [{"Prefix": p} for p in common_prefixes]
|
||||
|
||||
if result.get("is_truncated"):
|
||||
response["IsTruncated"] = True
|
||||
if result.get("next_continuation_token"):
|
||||
response["NextContinuationToken"] = result["next_continuation_token"]
|
||||
|
||||
if ContinuationToken:
|
||||
response["ContinuationToken"] = ContinuationToken
|
||||
|
||||
return response
|
||||
|
||||
@@ -987,12 +1004,13 @@ class DeltaGliderClient:
|
||||
base_name = Path(filename).stem
|
||||
ext = Path(filename).suffix
|
||||
|
||||
for obj in response.contents:
|
||||
obj_base = Path(obj.key).stem
|
||||
obj_ext = Path(obj.key).suffix
|
||||
for obj in response["Contents"]:
|
||||
obj_key = obj["Key"]
|
||||
obj_base = Path(obj_key).stem
|
||||
obj_ext = Path(obj_key).suffix
|
||||
|
||||
# Skip delta files and references
|
||||
if obj.key.endswith(".delta") or obj.key.endswith("reference.bin"):
|
||||
if obj_key.endswith(".delta") or obj_key.endswith("reference.bin"):
|
||||
continue
|
||||
|
||||
score = 0.0
|
||||
@@ -1014,10 +1032,10 @@ class DeltaGliderClient:
|
||||
if score > 0.5:
|
||||
similar.append(
|
||||
{
|
||||
"Key": obj.key,
|
||||
"Size": obj.size,
|
||||
"Key": obj_key,
|
||||
"Size": obj["Size"],
|
||||
"Similarity": score,
|
||||
"LastModified": obj.last_modified,
|
||||
"LastModified": obj["LastModified"],
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1103,12 +1121,40 @@ class DeltaGliderClient:
|
||||
FetchMetadata=detailed_stats, # Only fetch metadata if detailed stats requested
|
||||
)
|
||||
|
||||
all_objects.extend(response.contents)
|
||||
# Extract S3Objects from response (with Metadata containing DeltaGlider info)
|
||||
for obj_dict in response["Contents"]:
|
||||
# Convert dict back to ObjectInfo for backward compatibility with stats calculation
|
||||
metadata = obj_dict.get("Metadata", {})
|
||||
# Parse compression ratio safely (handle "unknown" value)
|
||||
compression_ratio_str = metadata.get("deltaglider-compression-ratio", "0.0")
|
||||
try:
|
||||
compression_ratio = (
|
||||
float(compression_ratio_str)
|
||||
if compression_ratio_str != "unknown"
|
||||
else 0.0
|
||||
)
|
||||
except ValueError:
|
||||
compression_ratio = 0.0
|
||||
|
||||
if not response.is_truncated:
|
||||
all_objects.append(
|
||||
ObjectInfo(
|
||||
key=obj_dict["Key"],
|
||||
size=obj_dict["Size"],
|
||||
last_modified=obj_dict.get("LastModified", ""),
|
||||
etag=obj_dict.get("ETag"),
|
||||
storage_class=obj_dict.get("StorageClass", "STANDARD"),
|
||||
original_size=int(metadata.get("deltaglider-original-size", obj_dict["Size"])),
|
||||
compressed_size=obj_dict["Size"],
|
||||
is_delta=metadata.get("deltaglider-is-delta", "false") == "true",
|
||||
compression_ratio=compression_ratio,
|
||||
reference_key=metadata.get("deltaglider-reference-key"),
|
||||
)
|
||||
)
|
||||
|
||||
if not response.get("IsTruncated"):
|
||||
break
|
||||
|
||||
continuation_token = response.next_continuation_token
|
||||
continuation_token = response.get("NextContinuationToken")
|
||||
|
||||
# Calculate statistics
|
||||
total_size = 0
|
||||
|
||||
@@ -10,7 +10,6 @@ This allows DeltaGlider to be a true drop-in replacement for boto3.s3.Client.
|
||||
from datetime import datetime
|
||||
from typing import Any, Literal, NotRequired, TypedDict
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# S3 Object Types
|
||||
# ============================================================================
|
||||
|
||||
@@ -10,7 +10,6 @@ from deltaglider import create_client
|
||||
from deltaglider.client import (
|
||||
BucketStats,
|
||||
CompressionEstimate,
|
||||
ListObjectsResponse,
|
||||
ObjectInfo,
|
||||
)
|
||||
|
||||
@@ -279,27 +278,35 @@ class TestBoto3Compatibility:
|
||||
assert response["ContentLength"] == len(content)
|
||||
|
||||
def test_list_objects(self, client):
|
||||
"""Test list_objects with various options."""
|
||||
"""Test list_objects with various options (boto3-compatible dict response)."""
|
||||
# List all objects (default: FetchMetadata=False)
|
||||
response = client.list_objects(Bucket="test-bucket")
|
||||
|
||||
assert isinstance(response, ListObjectsResponse)
|
||||
assert response.key_count > 0
|
||||
assert len(response.contents) > 0
|
||||
# Response is now a boto3-compatible dict (not ListObjectsResponse)
|
||||
assert isinstance(response, dict)
|
||||
assert response["KeyCount"] > 0
|
||||
assert len(response["Contents"]) > 0
|
||||
|
||||
# Verify S3Object structure
|
||||
for obj in response["Contents"]:
|
||||
assert "Key" in obj
|
||||
assert "Size" in obj
|
||||
assert "LastModified" in obj
|
||||
assert "Metadata" in obj # DeltaGlider metadata
|
||||
|
||||
# Test with FetchMetadata=True (should only affect delta files)
|
||||
response_with_metadata = client.list_objects(Bucket="test-bucket", FetchMetadata=True)
|
||||
assert isinstance(response_with_metadata, ListObjectsResponse)
|
||||
assert response_with_metadata.key_count > 0
|
||||
assert isinstance(response_with_metadata, dict)
|
||||
assert response_with_metadata["KeyCount"] > 0
|
||||
|
||||
def test_list_objects_with_delimiter(self, client):
|
||||
"""Test list_objects with delimiter for folder simulation."""
|
||||
"""Test list_objects with delimiter for folder simulation (boto3-compatible dict response)."""
|
||||
response = client.list_objects(Bucket="test-bucket", Prefix="", Delimiter="/")
|
||||
|
||||
# Should have common prefixes for folders
|
||||
assert len(response.common_prefixes) > 0
|
||||
assert {"Prefix": "folder1/"} in response.common_prefixes
|
||||
assert {"Prefix": "folder2/"} in response.common_prefixes
|
||||
assert len(response.get("CommonPrefixes", [])) > 0
|
||||
assert {"Prefix": "folder1/"} in response["CommonPrefixes"]
|
||||
assert {"Prefix": "folder2/"} in response["CommonPrefixes"]
|
||||
|
||||
def test_delete_object(self, client):
|
||||
"""Test delete_object."""
|
||||
|
||||
@@ -53,8 +53,11 @@ class TestSDKFiltering:
|
||||
client = DeltaGliderClient(service)
|
||||
response = client.list_objects(Bucket="test-bucket", Prefix="releases/")
|
||||
|
||||
# Response is now a boto3-compatible dict
|
||||
contents = response["Contents"]
|
||||
|
||||
# Verify .delta suffix is stripped
|
||||
keys = [obj.key for obj in response.contents]
|
||||
keys = [obj["Key"] for obj in contents]
|
||||
assert "releases/app-v1.zip" in keys
|
||||
assert "releases/app-v2.zip" in keys
|
||||
assert "releases/README.md" in keys
|
||||
@@ -63,8 +66,12 @@ class TestSDKFiltering:
|
||||
for key in keys:
|
||||
assert not key.endswith(".delta"), f"Found .delta suffix in: {key}"
|
||||
|
||||
# Verify is_delta flag is set correctly
|
||||
delta_objects = [obj for obj in response.contents if obj.is_delta]
|
||||
# Verify is_delta flag is set correctly in Metadata
|
||||
delta_objects = [
|
||||
obj
|
||||
for obj in contents
|
||||
if obj.get("Metadata", {}).get("deltaglider-is-delta") == "true"
|
||||
]
|
||||
assert len(delta_objects) == 2
|
||||
|
||||
def test_list_objects_filters_reference_bin(self):
|
||||
@@ -106,15 +113,18 @@ class TestSDKFiltering:
|
||||
client = DeltaGliderClient(service)
|
||||
response = client.list_objects(Bucket="test-bucket", Prefix="releases/")
|
||||
|
||||
# Response is now a boto3-compatible dict
|
||||
contents = response["Contents"]
|
||||
|
||||
# Verify NO reference.bin files in output
|
||||
keys = [obj.key for obj in response.contents]
|
||||
keys = [obj["Key"] for obj in contents]
|
||||
for key in keys:
|
||||
assert not key.endswith("reference.bin"), f"Found reference.bin in: {key}"
|
||||
|
||||
# Should only have the app.zip (with .delta stripped)
|
||||
assert len(response.contents) == 1
|
||||
assert response.contents[0].key == "releases/app.zip"
|
||||
assert response.contents[0].is_delta is True
|
||||
assert len(contents) == 1
|
||||
assert contents[0]["Key"] == "releases/app.zip"
|
||||
assert contents[0].get("Metadata", {}).get("deltaglider-is-delta") == "true"
|
||||
|
||||
def test_list_objects_combined_filtering(self):
|
||||
"""Test filtering of both .delta and reference.bin together."""
|
||||
@@ -170,12 +180,15 @@ class TestSDKFiltering:
|
||||
client = DeltaGliderClient(service)
|
||||
response = client.list_objects(Bucket="test-bucket", Prefix="data/")
|
||||
|
||||
# Response is now a boto3-compatible dict
|
||||
contents = response["Contents"]
|
||||
|
||||
# Should filter out 2 reference.bin files
|
||||
# Should strip .delta from 3 files
|
||||
# Should keep 1 regular file as-is
|
||||
assert len(response.contents) == 4 # 3 deltas + 1 regular file
|
||||
assert len(contents) == 4 # 3 deltas + 1 regular file
|
||||
|
||||
keys = [obj.key for obj in response.contents]
|
||||
keys = [obj["Key"] for obj in contents]
|
||||
expected_keys = ["data/file1.zip", "data/file2.zip", "data/file3.txt", "data/sub/app.jar"]
|
||||
assert sorted(keys) == sorted(expected_keys)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user