mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-01-11 14:20:33 +01:00
updates
This commit is contained in:
@@ -1,28 +1,18 @@
|
||||
# boto3 Compatibility Vision
|
||||
|
||||
## Current State (v4.2.3)
|
||||
DeltaGlider is a drop-in replacement for boto3's S3 client. This document spells out what “drop-in”
|
||||
means in practice so new projects can adopt the SDK with confidence.
|
||||
|
||||
DeltaGlider currently uses custom dataclasses for responses:
|
||||
## Current State (v5.x and newer)
|
||||
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsResponse, ObjectInfo
|
||||
|
||||
client = create_client()
|
||||
response: ListObjectsResponse = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response.contents: # Custom field name
|
||||
print(f"{obj.key}: {obj.size}") # Custom ObjectInfo dataclass
|
||||
```
|
||||
|
||||
**Problems:**
|
||||
- ❌ Not a true drop-in replacement for boto3
|
||||
- ❌ Users need to learn DeltaGlider-specific types
|
||||
- ❌ Can't use with tools expecting boto3 responses
|
||||
- ❌ Different API surface (`.contents` vs `['Contents']`)
|
||||
|
||||
## Target State (v5.0.0)
|
||||
|
||||
DeltaGlider should return native boto3-compatible dicts with TypedDict type hints:
|
||||
- `DeltaGliderClient` methods such as `list_objects`, `put_object`, `get_object`, `delete_object`,
|
||||
`delete_objects`, `head_object`, etc. return **boto3-compatible dicts**.
|
||||
- TypedDict aliases in `deltaglider.types` (e.g. `ListObjectsV2Response`, `PutObjectResponse`) give
|
||||
IDE/type-checking support without importing boto3.
|
||||
- DeltaGlider-specific metadata lives inside standard boto3 fields (typically `Metadata`), so tools
|
||||
that ignore those keys see the exact same structures as they would from boto3.
|
||||
- Tests and documentation exercise and describe the boto3-style responses (`response['Contents']`
|
||||
instead of `response.contents`).
|
||||
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsV2Response
|
||||
@@ -30,239 +20,35 @@ from deltaglider import create_client, ListObjectsV2Response
|
||||
client = create_client()
|
||||
response: ListObjectsV2Response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']: # boto3-compatible!
|
||||
print(f"{obj['Key']}: {obj['Size']}") # Works exactly like boto3
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- ✅ **True drop-in replacement** - swap `boto3.client('s3')` with `create_client()`
|
||||
- ✅ **No learning curve** - if you know boto3, you know DeltaGlider
|
||||
- ✅ **Tool compatibility** - works with any library expecting boto3 types
|
||||
- ✅ **Type safety** - TypedDict provides IDE autocomplete without boto3 import
|
||||
- ✅ **Zero runtime overhead** - TypedDict compiles to plain dict
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Type Definitions ✅ (DONE)
|
||||
|
||||
Created `deltaglider/types.py` with comprehensive TypedDict definitions:
|
||||
|
||||
```python
|
||||
from typing import TypedDict, NotRequired
|
||||
from datetime import datetime
|
||||
|
||||
class S3Object(TypedDict):
|
||||
Key: str
|
||||
Size: int
|
||||
LastModified: datetime
|
||||
ETag: NotRequired[str]
|
||||
StorageClass: NotRequired[str]
|
||||
|
||||
class ListObjectsV2Response(TypedDict):
|
||||
Contents: list[S3Object]
|
||||
CommonPrefixes: NotRequired[list[dict[str, str]]]
|
||||
IsTruncated: NotRequired[bool]
|
||||
NextContinuationToken: NotRequired[str]
|
||||
```
|
||||
|
||||
**Key insight:** TypedDict provides type safety at development time but compiles to plain `dict` at runtime!
|
||||
|
||||
### Phase 2: Refactor Client Methods (TODO)
|
||||
|
||||
Update all client methods to return boto3-compatible dicts:
|
||||
|
||||
#### `list_objects()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def list_objects(...) -> ListObjectsResponse: # Custom dataclass
|
||||
return ListObjectsResponse(
|
||||
name=bucket,
|
||||
contents=[ObjectInfo(...), ...] # Custom dataclass
|
||||
)
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def list_objects(...) -> ListObjectsV2Response: # TypedDict
|
||||
return {
|
||||
'Contents': [
|
||||
{
|
||||
'Key': 'file.zip', # .delta suffix already stripped
|
||||
'Size': 1024,
|
||||
'LastModified': datetime(...),
|
||||
'ETag': '"abc123"',
|
||||
}
|
||||
],
|
||||
'CommonPrefixes': [{'Prefix': 'dir/'}],
|
||||
'IsTruncated': False,
|
||||
}
|
||||
```
|
||||
|
||||
**Key changes:**
|
||||
1. Return plain dict instead of custom dataclass
|
||||
2. Use boto3 field names: `Contents` not `contents`, `Key` not `key`
|
||||
3. Strip `.delta` suffix transparently (already done)
|
||||
4. Hide `reference.bin` files (already done)
|
||||
|
||||
#### `put_object()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def put_object(...) -> dict[str, Any]:
|
||||
return {
|
||||
"ETag": etag,
|
||||
"VersionId": None,
|
||||
"DeltaGliderInfo": {...} # Custom field
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def put_object(...) -> PutObjectResponse: # TypedDict
|
||||
return {
|
||||
'ETag': etag,
|
||||
'ResponseMetadata': {'HTTPStatusCode': 200},
|
||||
# DeltaGlider metadata goes in Metadata field
|
||||
'Metadata': {
|
||||
'deltaglider-is-delta': 'true',
|
||||
'deltaglider-compression-ratio': '0.99'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `get_object()`
|
||||
|
||||
**Before:**
|
||||
```python
|
||||
def get_object(...) -> dict[str, Any]:
|
||||
return {
|
||||
"Body": data,
|
||||
"ContentLength": len(data),
|
||||
"DeltaGliderInfo": {...} # Custom field
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```python
|
||||
def get_object(...) -> GetObjectResponse: # TypedDict
|
||||
return {
|
||||
'Body': data, # bytes, not StreamingBody (simpler!)
|
||||
'ContentLength': len(data),
|
||||
'LastModified': datetime(...),
|
||||
'ETag': '"abc123"',
|
||||
'Metadata': { # DeltaGlider metadata here
|
||||
'deltaglider-is-delta': 'true'
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `delete_object()`, `delete_objects()`, `head_object()`, etc.
|
||||
|
||||
All follow the same pattern: return boto3-compatible dicts with TypedDict hints.
|
||||
|
||||
### Phase 3: Backward Compatibility (TODO)
|
||||
|
||||
Keep old dataclasses for 1-2 versions with deprecation warnings:
|
||||
|
||||
```python
|
||||
class ListObjectsResponse:
|
||||
"""DEPRECATED: Use dict responses with ListObjectsV2Response type hint.
|
||||
|
||||
This will be removed in v6.0.0. Update your code:
|
||||
|
||||
Before:
|
||||
response.contents[0].key
|
||||
|
||||
After:
|
||||
response['Contents'][0]['Key']
|
||||
"""
|
||||
def __init__(self, data: dict):
|
||||
warnings.warn(
|
||||
"ListObjectsResponse dataclass is deprecated. "
|
||||
"Use dict responses with ListObjectsV2Response type hint.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
self._data = data
|
||||
|
||||
@property
|
||||
def contents(self):
|
||||
return [ObjectInfo(obj) for obj in self._data.get('Contents', [])]
|
||||
```
|
||||
|
||||
### Phase 4: Update Documentation (TODO)
|
||||
|
||||
1. Update all examples to use dict responses
|
||||
2. Add migration guide from v4.x to v5.0
|
||||
3. Update BOTO3_COMPATIBILITY.md
|
||||
4. Add "Drop-in Replacement" marketing language
|
||||
|
||||
### Phase 5: Update Tests (TODO)
|
||||
|
||||
Convert all tests from:
|
||||
```python
|
||||
assert response.contents[0].key == "file.zip"
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
assert response['Contents'][0]['Key'] == "file.zip"
|
||||
```
|
||||
|
||||
## Migration Guide (for users)
|
||||
|
||||
### v4.x → v5.0
|
||||
|
||||
**Old code (v4.x):**
|
||||
```python
|
||||
from deltaglider import create_client
|
||||
|
||||
client = create_client()
|
||||
response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response.contents: # Dataclass attribute
|
||||
print(f"{obj.key}: {obj.size}") # Dataclass attributes
|
||||
```
|
||||
|
||||
**New code (v5.0):**
|
||||
```python
|
||||
from deltaglider import create_client, ListObjectsV2Response
|
||||
|
||||
client = create_client()
|
||||
response: ListObjectsV2Response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']: # Dict key (boto3-compatible)
|
||||
print(f"{obj['Key']}: {obj['Size']}") # Dict keys (boto3-compatible)
|
||||
```
|
||||
|
||||
**Or even simpler - no type hint needed:**
|
||||
```python
|
||||
client = create_client()
|
||||
response = client.list_objects(Bucket='my-bucket')
|
||||
|
||||
for obj in response['Contents']:
|
||||
print(f"{obj['Key']}: {obj['Size']}")
|
||||
print(f"{obj['Key']}: {obj['Size']} bytes")
|
||||
```
|
||||
|
||||
## Key Design Points
|
||||
|
||||
- **TypedDict everywhere** – `put_object`, `get_object`, `list_objects`, `delete_object`, etc.
|
||||
return the same shapes boto3 does. Use the provided aliases (`ListObjectsV2Response`,
|
||||
`PutObjectResponse`, …) for IDE/completion help.
|
||||
- **Metadata namespace** – DeltaGlider-specific flags such as `deltaglider-is-delta` live under the
|
||||
regular `Metadata` key so every response remains valid boto3 output.
|
||||
- **No shims required** – responses are plain dicts. If you already know boto3, you already know how
|
||||
to consume DeltaGlider outputs.
|
||||
|
||||
## Benefits Summary
|
||||
|
||||
### For Users
|
||||
- **Zero learning curve** - if you know boto3, you're done
|
||||
- **Drop-in replacement** - literally change one line (client creation)
|
||||
- **Type safety** - TypedDict provides autocomplete without boto3 dependency
|
||||
- **Tool compatibility** - works with all boto3-compatible libraries
|
||||
- **Zero learning curve** – identical data structures to boto3.
|
||||
- **Tooling compatibility** – works with any boto3-aware tool or library.
|
||||
- **Type safety** – TypedDicts provide IDE autocomplete even without boto3 installed.
|
||||
|
||||
### For DeltaGlider
|
||||
- **Simpler codebase** - no custom dataclasses to maintain
|
||||
- **Better marketing** - true "drop-in replacement" claim
|
||||
- **Easier testing** - test against boto3 behavior directly
|
||||
- **Future-proof** - if boto3 adds fields, users can access them immediately
|
||||
- **Cleaner internals** – no custom dataclasses to maintain.
|
||||
- **Simpler docs/tests** – examples mirror boto3 verbatim.
|
||||
- **Marketing accuracy** – "drop-in replacement" is now literal.
|
||||
|
||||
## Technical Details
|
||||
|
||||
### How TypedDict Works
|
||||
|
||||
### TypedDict refresher
|
||||
```python
|
||||
from typing import TypedDict
|
||||
|
||||
@@ -270,47 +56,29 @@ class MyResponse(TypedDict):
|
||||
Key: str
|
||||
Size: int
|
||||
|
||||
# At runtime, this is just a dict!
|
||||
response: MyResponse = {'Key': 'file.zip', 'Size': 1024}
|
||||
print(type(response)) # <class 'dict'>
|
||||
|
||||
# But mypy and IDEs understand the structure
|
||||
response['Key'] # ✅ Autocomplete works!
|
||||
response['Nonexistent'] # ❌ Mypy error: Key 'Nonexistent' not found
|
||||
resp: MyResponse = {'Key': 'file.zip', 'Size': 1024}
|
||||
print(type(resp)) # <class 'dict'>
|
||||
```
|
||||
At runtime the structure is still a plain `dict`, but static type-checkers understand the shape.
|
||||
|
||||
### DeltaGlider-Specific Metadata
|
||||
|
||||
Store in standard boto3 `Metadata` field:
|
||||
### DeltaGlider Metadata
|
||||
|
||||
Delta-specific fields live inside the standard `Metadata` map. Example list_objects entry:
|
||||
```python
|
||||
{
|
||||
'Key': 'file.zip',
|
||||
'Size': 1024,
|
||||
'Metadata': {
|
||||
# DeltaGlider-specific fields (prefixed for safety)
|
||||
'deltaglider-is-delta': 'true',
|
||||
'deltaglider-compression-ratio': '0.99',
|
||||
'deltaglider-original-size': '100000',
|
||||
'deltaglider-reference-key': 'releases/v1.0.0/reference.bin',
|
||||
'deltaglider-original-size': '50000000',
|
||||
}
|
||||
}
|
||||
```
|
||||
These keys are namespaced (`deltaglider-...`) so they are safe to ignore if not needed.
|
||||
|
||||
This is:
|
||||
- ✅ boto3-compatible (Metadata is a standard field)
|
||||
- ✅ Namespaced (deltaglider- prefix prevents conflicts)
|
||||
- ✅ Optional (tools can ignore it)
|
||||
- ✅ Type-safe (Metadata: NotRequired[dict[str, str]])
|
||||
## Status Snapshot
|
||||
|
||||
## Status
|
||||
|
||||
- ✅ **Phase 1:** TypedDict definitions created
|
||||
- ✅ **Phase 2:** `list_objects()` refactored to return boto3-compatible dict
|
||||
- ⏳ **Phase 3:** Refactor remaining methods (`put_object`, `get_object`, etc.) (TODO)
|
||||
- ⏳ **Phase 4:** Backward compatibility with deprecation warnings (TODO)
|
||||
- ⏳ **Phase 5:** Documentation updates (TODO)
|
||||
- ⏳ **Phase 6:** Full test coverage updates (PARTIAL - list_objects tests done)
|
||||
|
||||
**Current:** v4.2.3+ (Phase 2 complete - `list_objects()` boto3-compatible)
|
||||
**Target:** v5.0.0 release (all phases complete)
|
||||
- ✅ TypedDict builders are used everywhere (`build_list_objects_response`, etc.).
|
||||
- ✅ Tests assert boto3-style dict access (`response['Contents']`).
|
||||
- ✅ Documentation (README, SDK docs, examples) shows the boto3 syntax.
|
||||
|
||||
@@ -25,6 +25,7 @@ DeltaGlider's smart `list_objects` method eliminates the N+1 query problem by in
|
||||
|
||||
```python
|
||||
from deltaglider import create_client
|
||||
from deltaglider.client_models import BucketStats
|
||||
import time
|
||||
|
||||
client = create_client()
|
||||
@@ -299,15 +300,18 @@ detailed_compression_report('releases')
|
||||
|
||||
```python
|
||||
def list_buckets_with_stats():
|
||||
"""List all buckets and show cached statistics if available."""
|
||||
"""List buckets and augment with cached stats fetched on demand."""
|
||||
|
||||
# Pre-fetch stats for important buckets
|
||||
important_buckets = ['releases', 'backups']
|
||||
for bucket_name in important_buckets:
|
||||
client.get_bucket_stats(bucket_name, mode='detailed')
|
||||
|
||||
# List all buckets (includes cached stats automatically)
|
||||
response = client.list_buckets()
|
||||
stats_cache: dict[str, BucketStats | None] = {}
|
||||
|
||||
def ensure_stats(bucket_name: str) -> BucketStats | None:
|
||||
if bucket_name not in stats_cache:
|
||||
try:
|
||||
stats_cache[bucket_name] = client.get_bucket_stats(bucket_name)
|
||||
except Exception:
|
||||
stats_cache[bucket_name] = None
|
||||
return stats_cache[bucket_name]
|
||||
|
||||
print("All Buckets:")
|
||||
print(f"{'Name':<30} {'Objects':<10} {'Compression':<15} {'Cached'}")
|
||||
@@ -315,13 +319,12 @@ def list_buckets_with_stats():
|
||||
|
||||
for bucket in response['Buckets']:
|
||||
name = bucket['Name']
|
||||
stats = ensure_stats(name)
|
||||
|
||||
# Check if stats are cached
|
||||
if 'DeltaGliderStats' in bucket:
|
||||
stats = bucket['DeltaGliderStats']
|
||||
obj_count = f"{stats['ObjectCount']:,}"
|
||||
compression = f"{stats['AverageCompressionRatio']:.1%}"
|
||||
cached = "✓ (detailed)" if stats['Detailed'] else "✓ (quick)"
|
||||
if stats:
|
||||
obj_count = f"{stats.object_count:,}"
|
||||
compression = f"{stats.average_compression_ratio:.1%}"
|
||||
cached = "✓ (S3 cache)"
|
||||
else:
|
||||
obj_count = "N/A"
|
||||
compression = "N/A"
|
||||
|
||||
Reference in New Issue
Block a user