mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-03-30 22:12:06 +02:00
feat: Implement boto3-compatible dict responses (Phase 2)
Changed list_objects() to return boto3-compatible dict instead of custom
ListObjectsResponse dataclass. This makes DeltaGlider a true drop-in replacement
for boto3.client('s3').
Changes:
- list_objects() now returns dict[str, Any] with boto3-compatible structure:
* Contents: list[S3Object] (dict with Key, Size, LastModified, etc.)
* CommonPrefixes: list[dict] for folder simulation
* IsTruncated, NextContinuationToken for pagination
* DeltaGlider metadata stored in standard Metadata field
- Updated all client methods that use list_objects() to work with dict responses:
* find_similar_files()
* get_bucket_stats()
* CLI ls command
- Updated all tests to use dict access (response['Contents']) instead of
dataclass access (response.contents)
- Updated examples/boto3_compatible_types.py to demonstrate usage
- DeltaGlider-specific metadata now in Metadata field:
* deltaglider-is-delta: "true"/"false"
* deltaglider-original-size: string number
* deltaglider-compression-ratio: string number or "unknown"
* deltaglider-reference-key: optional string
Benefits:
- True drop-in replacement for boto3
- No learning curve - if you know boto3, you know DeltaGlider
- Works with any boto3-compatible library
- Type safety through TypedDict (no boto3 import needed)
- Zero runtime overhead (TypedDict compiles to plain dict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -259,18 +259,18 @@ def ls(
|
||||
return f"{size_float:.1f}P"
|
||||
|
||||
# List objects using SDK (automatically filters .delta and reference.bin)
|
||||
from deltaglider.client import DeltaGliderClient, ListObjectsResponse
|
||||
from deltaglider.client import DeltaGliderClient
|
||||
|
||||
client = DeltaGliderClient(service)
|
||||
dg_response: ListObjectsResponse = client.list_objects(
|
||||
dg_response = client.list_objects(
|
||||
Bucket=bucket_name, Prefix=prefix_str, MaxKeys=10000, Delimiter="/" if not recursive else ""
|
||||
)
|
||||
objects = dg_response.contents
|
||||
objects = dg_response["Contents"]
|
||||
|
||||
# Filter by recursive flag
|
||||
if not recursive:
|
||||
# Show common prefixes (subdirectories) from S3 response
|
||||
for common_prefix in dg_response.common_prefixes:
|
||||
for common_prefix in dg_response.get("CommonPrefixes", []):
|
||||
prefix_path = common_prefix.get("Prefix", "")
|
||||
# Show only the directory name, not the full path
|
||||
if prefix_str:
|
||||
@@ -283,7 +283,8 @@ def ls(
|
||||
# Only show files at current level (not in subdirectories)
|
||||
filtered_objects = []
|
||||
for obj in objects:
|
||||
rel_path = obj.key[len(prefix_str):] if prefix_str else obj.key
|
||||
obj_key = obj["Key"]
|
||||
rel_path = obj_key[len(prefix_str):] if prefix_str else obj_key
|
||||
# Only include if it's a direct child (no / in relative path)
|
||||
if "/" not in rel_path and rel_path:
|
||||
filtered_objects.append(obj)
|
||||
@@ -294,23 +295,24 @@ def ls(
|
||||
total_count = 0
|
||||
|
||||
for obj in objects:
|
||||
total_size += obj.size
|
||||
total_size += obj["Size"]
|
||||
total_count += 1
|
||||
|
||||
# Format the display
|
||||
size_str = format_bytes(obj.size)
|
||||
size_str = format_bytes(obj["Size"])
|
||||
# last_modified is a string from SDK, parse it if needed
|
||||
if isinstance(obj.last_modified, str):
|
||||
last_modified = obj.get("LastModified", "")
|
||||
if isinstance(last_modified, str):
|
||||
# Already a string, extract date portion
|
||||
date_str = obj.last_modified[:19].replace("T", " ")
|
||||
date_str = last_modified[:19].replace("T", " ")
|
||||
else:
|
||||
date_str = obj.last_modified.strftime("%Y-%m-%d %H:%M:%S")
|
||||
date_str = last_modified.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Show only the filename relative to current prefix (like AWS CLI)
|
||||
if prefix_str:
|
||||
display_key = obj.key[len(prefix_str):]
|
||||
display_key = obj["Key"][len(prefix_str):]
|
||||
else:
|
||||
display_key = obj.key
|
||||
display_key = obj["Key"]
|
||||
|
||||
click.echo(f"{date_str} {size_str:>10} {display_key}")
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from .client_delete_helpers import delete_with_delta_suffix
|
||||
from .client_models import (
|
||||
BucketStats,
|
||||
CompressionEstimate,
|
||||
ListObjectsResponse,
|
||||
ObjectInfo,
|
||||
UploadSummary,
|
||||
)
|
||||
@@ -197,7 +196,7 @@ class DeltaGliderClient:
|
||||
StartAfter: str | None = None,
|
||||
FetchMetadata: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> ListObjectsResponse:
|
||||
) -> dict[str, Any]:
|
||||
"""List objects in bucket with smart metadata fetching.
|
||||
|
||||
This method optimizes performance by:
|
||||
@@ -227,11 +226,11 @@ class DeltaGliderClient:
|
||||
# Fast listing for UI display (no metadata)
|
||||
response = client.list_objects(Bucket='releases', MaxKeys=100)
|
||||
|
||||
# Paginated listing
|
||||
# Paginated listing (boto3-compatible dict response)
|
||||
response = client.list_objects(
|
||||
Bucket='releases',
|
||||
MaxKeys=50,
|
||||
ContinuationToken=response.next_continuation_token
|
||||
ContinuationToken=response.get('NextContinuationToken')
|
||||
)
|
||||
|
||||
# Detailed listing with compression stats (slower, only for analytics)
|
||||
@@ -265,7 +264,7 @@ class DeltaGliderClient:
|
||||
"is_truncated": False,
|
||||
}
|
||||
|
||||
# Convert to ObjectInfo objects with smart metadata fetching
|
||||
# Convert to boto3-compatible S3Object dicts
|
||||
contents = []
|
||||
for obj in result.get("objects", []):
|
||||
# Skip reference.bin files (internal files, never exposed to users)
|
||||
@@ -280,20 +279,21 @@ class DeltaGliderClient:
|
||||
if is_delta:
|
||||
display_key = display_key[:-6] # Remove .delta suffix
|
||||
|
||||
# Create object info with basic data (no HEAD request)
|
||||
info = ObjectInfo(
|
||||
key=display_key, # Use cleaned key without .delta
|
||||
size=obj["size"],
|
||||
last_modified=obj.get("last_modified", ""),
|
||||
etag=obj.get("etag"),
|
||||
storage_class=obj.get("storage_class", "STANDARD"),
|
||||
# DeltaGlider fields
|
||||
original_size=obj["size"], # For non-delta, original = stored
|
||||
compressed_size=obj["size"],
|
||||
is_delta=is_delta,
|
||||
compression_ratio=0.0 if not is_delta else None,
|
||||
reference_key=None,
|
||||
)
|
||||
# Create boto3-compatible S3Object dict
|
||||
s3_obj: dict[str, Any] = {
|
||||
"Key": display_key, # Use cleaned key without .delta
|
||||
"Size": obj["size"],
|
||||
"LastModified": obj.get("last_modified", ""),
|
||||
"ETag": obj.get("etag"),
|
||||
"StorageClass": obj.get("storage_class", "STANDARD"),
|
||||
}
|
||||
|
||||
# Add DeltaGlider metadata in optional Metadata field
|
||||
deltaglider_metadata: dict[str, str] = {
|
||||
"deltaglider-is-delta": str(is_delta).lower(),
|
||||
"deltaglider-original-size": str(obj["size"]),
|
||||
"deltaglider-compression-ratio": "0.0" if not is_delta else "unknown",
|
||||
}
|
||||
|
||||
# SMART METADATA FETCHING:
|
||||
# 1. NEVER fetch metadata for non-delta files (no point)
|
||||
@@ -304,28 +304,45 @@ class DeltaGliderClient:
|
||||
if obj_head and obj_head.metadata:
|
||||
metadata = obj_head.metadata
|
||||
# Update with actual compression stats
|
||||
info.original_size = int(metadata.get("file_size", obj["size"]))
|
||||
info.compression_ratio = float(metadata.get("compression_ratio", 0.0))
|
||||
info.reference_key = metadata.get("ref_key")
|
||||
original_size = int(metadata.get("file_size", obj["size"]))
|
||||
compression_ratio = float(metadata.get("compression_ratio", 0.0))
|
||||
reference_key = metadata.get("ref_key")
|
||||
|
||||
deltaglider_metadata["deltaglider-original-size"] = str(original_size)
|
||||
deltaglider_metadata["deltaglider-compression-ratio"] = str(compression_ratio)
|
||||
if reference_key:
|
||||
deltaglider_metadata["deltaglider-reference-key"] = reference_key
|
||||
except Exception as e:
|
||||
# Log but don't fail the listing
|
||||
self.service.logger.debug(f"Failed to fetch metadata for {obj['key']}: {e}")
|
||||
|
||||
contents.append(info)
|
||||
s3_obj["Metadata"] = deltaglider_metadata
|
||||
contents.append(s3_obj)
|
||||
|
||||
# Build response with pagination support
|
||||
response = ListObjectsResponse(
|
||||
name=Bucket,
|
||||
prefix=Prefix,
|
||||
delimiter=Delimiter,
|
||||
max_keys=MaxKeys,
|
||||
contents=contents,
|
||||
common_prefixes=[{"Prefix": p} for p in result.get("common_prefixes", [])],
|
||||
is_truncated=result.get("is_truncated", False),
|
||||
next_continuation_token=result.get("next_continuation_token"),
|
||||
continuation_token=ContinuationToken,
|
||||
key_count=len(contents),
|
||||
)
|
||||
# Build boto3-compatible response dict
|
||||
response: dict[str, Any] = {
|
||||
"Contents": contents,
|
||||
"Name": Bucket,
|
||||
"Prefix": Prefix,
|
||||
"KeyCount": len(contents),
|
||||
"MaxKeys": MaxKeys,
|
||||
}
|
||||
|
||||
# Add optional fields
|
||||
if Delimiter:
|
||||
response["Delimiter"] = Delimiter
|
||||
|
||||
common_prefixes = result.get("common_prefixes", [])
|
||||
if common_prefixes:
|
||||
response["CommonPrefixes"] = [{"Prefix": p} for p in common_prefixes]
|
||||
|
||||
if result.get("is_truncated"):
|
||||
response["IsTruncated"] = True
|
||||
if result.get("next_continuation_token"):
|
||||
response["NextContinuationToken"] = result["next_continuation_token"]
|
||||
|
||||
if ContinuationToken:
|
||||
response["ContinuationToken"] = ContinuationToken
|
||||
|
||||
return response
|
||||
|
||||
@@ -987,12 +1004,13 @@ class DeltaGliderClient:
|
||||
base_name = Path(filename).stem
|
||||
ext = Path(filename).suffix
|
||||
|
||||
for obj in response.contents:
|
||||
obj_base = Path(obj.key).stem
|
||||
obj_ext = Path(obj.key).suffix
|
||||
for obj in response["Contents"]:
|
||||
obj_key = obj["Key"]
|
||||
obj_base = Path(obj_key).stem
|
||||
obj_ext = Path(obj_key).suffix
|
||||
|
||||
# Skip delta files and references
|
||||
if obj.key.endswith(".delta") or obj.key.endswith("reference.bin"):
|
||||
if obj_key.endswith(".delta") or obj_key.endswith("reference.bin"):
|
||||
continue
|
||||
|
||||
score = 0.0
|
||||
@@ -1014,10 +1032,10 @@ class DeltaGliderClient:
|
||||
if score > 0.5:
|
||||
similar.append(
|
||||
{
|
||||
"Key": obj.key,
|
||||
"Size": obj.size,
|
||||
"Key": obj_key,
|
||||
"Size": obj["Size"],
|
||||
"Similarity": score,
|
||||
"LastModified": obj.last_modified,
|
||||
"LastModified": obj["LastModified"],
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1103,12 +1121,40 @@ class DeltaGliderClient:
|
||||
FetchMetadata=detailed_stats, # Only fetch metadata if detailed stats requested
|
||||
)
|
||||
|
||||
all_objects.extend(response.contents)
|
||||
# Extract S3Objects from response (with Metadata containing DeltaGlider info)
|
||||
for obj_dict in response["Contents"]:
|
||||
# Convert dict back to ObjectInfo for backward compatibility with stats calculation
|
||||
metadata = obj_dict.get("Metadata", {})
|
||||
# Parse compression ratio safely (handle "unknown" value)
|
||||
compression_ratio_str = metadata.get("deltaglider-compression-ratio", "0.0")
|
||||
try:
|
||||
compression_ratio = (
|
||||
float(compression_ratio_str)
|
||||
if compression_ratio_str != "unknown"
|
||||
else 0.0
|
||||
)
|
||||
except ValueError:
|
||||
compression_ratio = 0.0
|
||||
|
||||
if not response.is_truncated:
|
||||
all_objects.append(
|
||||
ObjectInfo(
|
||||
key=obj_dict["Key"],
|
||||
size=obj_dict["Size"],
|
||||
last_modified=obj_dict.get("LastModified", ""),
|
||||
etag=obj_dict.get("ETag"),
|
||||
storage_class=obj_dict.get("StorageClass", "STANDARD"),
|
||||
original_size=int(metadata.get("deltaglider-original-size", obj_dict["Size"])),
|
||||
compressed_size=obj_dict["Size"],
|
||||
is_delta=metadata.get("deltaglider-is-delta", "false") == "true",
|
||||
compression_ratio=compression_ratio,
|
||||
reference_key=metadata.get("deltaglider-reference-key"),
|
||||
)
|
||||
)
|
||||
|
||||
if not response.get("IsTruncated"):
|
||||
break
|
||||
|
||||
continuation_token = response.next_continuation_token
|
||||
continuation_token = response.get("NextContinuationToken")
|
||||
|
||||
# Calculate statistics
|
||||
total_size = 0
|
||||
|
||||
@@ -10,7 +10,6 @@ This allows DeltaGlider to be a true drop-in replacement for boto3.s3.Client.
|
||||
from datetime import datetime
|
||||
from typing import Any, Literal, NotRequired, TypedDict
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# S3 Object Types
|
||||
# ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user