diff --git a/BOTO3_COMPATIBILITY.md b/BOTO3_COMPATIBILITY.md new file mode 100644 index 0000000..4b9e50c --- /dev/null +++ b/BOTO3_COMPATIBILITY.md @@ -0,0 +1,225 @@ +# boto3 S3 Client Compatibility + +DeltaGlider implements a **subset** of boto3's S3 client API, focusing on the most commonly used operations. This is **not** a 100% drop-in replacement, but covers the core functionality needed for most use cases. + +## ✅ Implemented Methods (21 core methods) + +### Object Operations +- ✅ `put_object()` - Upload objects (with automatic delta compression) +- ✅ `get_object()` - Download objects (with automatic delta reconstruction) +- ✅ `delete_object()` - Delete single object +- ✅ `delete_objects()` - Delete multiple objects +- ✅ `head_object()` - Get object metadata +- ✅ `list_objects()` - List objects (list_objects_v2 compatible) +- ✅ `copy_object()` - Copy objects between locations + +### Bucket Operations +- ✅ `create_bucket()` - Create buckets +- ✅ `delete_bucket()` - Delete empty buckets +- ✅ `list_buckets()` - List all buckets + +### Presigned URLs +- ✅ `generate_presigned_url()` - Generate presigned URLs +- ✅ `generate_presigned_post()` - Generate presigned POST data + +### DeltaGlider Extensions +- ✅ `upload()` - Simple upload with S3 URL +- ✅ `download()` - Simple download with S3 URL +- ✅ `verify()` - Verify object integrity +- ✅ `upload_chunked()` - Upload with progress callback +- ✅ `upload_batch()` - Batch upload multiple files +- ✅ `download_batch()` - Batch download multiple files +- ✅ `estimate_compression()` - Estimate compression ratio +- ✅ `find_similar_files()` - Find similar files for delta reference +- ✅ `get_object_info()` - Get detailed object info with compression stats +- ✅ `get_bucket_stats()` - Get bucket statistics +- ✅ `delete_objects_recursive()` - Recursively delete objects + +## ❌ Not Implemented (80+ methods) + +### Multipart Upload +- ❌ `create_multipart_upload()` +- ❌ `upload_part()` +- ❌ `complete_multipart_upload()` +- ❌ `abort_multipart_upload()` +- ❌ `list_multipart_uploads()` +- ❌ `list_parts()` + +### Access Control (ACL) +- ❌ `get_bucket_acl()` +- ❌ `put_bucket_acl()` +- ❌ `get_object_acl()` +- ❌ `put_object_acl()` +- ❌ `get_public_access_block()` +- ❌ `put_public_access_block()` +- ❌ `delete_public_access_block()` + +### Bucket Configuration +- ❌ `get_bucket_location()` +- ❌ `get_bucket_versioning()` +- ❌ `put_bucket_versioning()` +- ❌ `get_bucket_logging()` +- ❌ `put_bucket_logging()` +- ❌ `get_bucket_website()` +- ❌ `put_bucket_website()` +- ❌ `delete_bucket_website()` +- ❌ `get_bucket_cors()` +- ❌ `put_bucket_cors()` +- ❌ `delete_bucket_cors()` +- ❌ `get_bucket_lifecycle_configuration()` +- ❌ `put_bucket_lifecycle_configuration()` +- ❌ `delete_bucket_lifecycle()` +- ❌ `get_bucket_policy()` +- ❌ `put_bucket_policy()` +- ❌ `delete_bucket_policy()` +- ❌ `get_bucket_encryption()` +- ❌ `put_bucket_encryption()` +- ❌ `delete_bucket_encryption()` +- ❌ `get_bucket_notification_configuration()` +- ❌ `put_bucket_notification_configuration()` +- ❌ `get_bucket_accelerate_configuration()` +- ❌ `put_bucket_accelerate_configuration()` +- ❌ `get_bucket_request_payment()` +- ❌ `put_bucket_request_payment()` +- ❌ `get_bucket_replication()` +- ❌ `put_bucket_replication()` +- ❌ `delete_bucket_replication()` + +### Tagging & Metadata +- ❌ `get_object_tagging()` +- ❌ `put_object_tagging()` +- ❌ `delete_object_tagging()` +- ❌ `get_bucket_tagging()` +- ❌ `put_bucket_tagging()` +- ❌ `delete_bucket_tagging()` + +### Advanced Features +- ❌ `restore_object()` - Glacier restore +- ❌ `select_object_content()` - S3 Select +- ❌ `get_object_torrent()` - BitTorrent +- ❌ `get_object_legal_hold()` - Object Lock +- ❌ `put_object_legal_hold()` +- ❌ `get_object_retention()` +- ❌ `put_object_retention()` +- ❌ `get_bucket_analytics_configuration()` +- ❌ `put_bucket_analytics_configuration()` +- ❌ `delete_bucket_analytics_configuration()` +- ❌ `list_bucket_analytics_configurations()` +- ❌ `get_bucket_metrics_configuration()` +- ❌ `put_bucket_metrics_configuration()` +- ❌ `delete_bucket_metrics_configuration()` +- ❌ `list_bucket_metrics_configurations()` +- ❌ `get_bucket_inventory_configuration()` +- ❌ `put_bucket_inventory_configuration()` +- ❌ `delete_bucket_inventory_configuration()` +- ❌ `list_bucket_inventory_configurations()` +- ❌ `get_bucket_intelligent_tiering_configuration()` +- ❌ `put_bucket_intelligent_tiering_configuration()` +- ❌ `delete_bucket_intelligent_tiering_configuration()` +- ❌ `list_bucket_intelligent_tiering_configurations()` + +### Helper Methods +- ❌ `download_file()` - High-level download +- ❌ `upload_file()` - High-level upload +- ❌ `download_fileobj()` - Download to file object +- ❌ `upload_fileobj()` - Upload from file object + +### Other +- ❌ `get_bucket_ownership_controls()` +- ❌ `put_bucket_ownership_controls()` +- ❌ `delete_bucket_ownership_controls()` +- ❌ `get_bucket_policy_status()` +- ❌ `list_object_versions()` +- ❌ `create_session()` - S3 Express +- And 20+ more metadata/configuration methods... + +## Coverage Analysis + +**Implemented:** ~21 methods +**Total boto3 S3 methods:** ~100+ methods +**Coverage:** ~20% + +## What's Covered + +DeltaGlider focuses on: +1. ✅ **Core CRUD operations** - put, get, delete, list +2. ✅ **Bucket management** - create, delete, list buckets +3. ✅ **Basic metadata** - head_object +4. ✅ **Presigned URLs** - generate_presigned_url/post +5. ✅ **Delta compression** - automatic for archive files +6. ✅ **Batch operations** - upload_batch, download_batch +7. ✅ **Compression stats** - get_bucket_stats, estimate_compression + +## What's NOT Covered + +❌ **Advanced bucket configuration** (versioning, lifecycle, logging, etc.) +❌ **Access control** (ACLs, bucket policies) +❌ **Multipart uploads** (for >5GB files) +❌ **Advanced features** (S3 Select, Glacier, Object Lock) +❌ **Tagging APIs** (object/bucket tags) +❌ **High-level transfer utilities** (upload_file, download_file) + +## Use Cases + +### ✅ DeltaGlider is PERFECT for: +- Storing versioned releases/builds +- Backup storage with deduplication +- CI/CD artifact storage +- Docker layer storage +- Archive file storage (zip, tar, etc.) +- Simple S3 storage needs + +### ❌ Use boto3 directly for: +- Complex bucket policies +- Versioning/lifecycle management +- Multipart uploads (>5GB files) +- S3 Select queries +- Glacier deep archive +- Object Lock/Legal Hold +- Advanced ACL management + +## Migration Strategy + +If you need both boto3 and DeltaGlider: + +```python +from deltaglider import create_client +import boto3 + +# Use DeltaGlider for objects (with compression!) +dg_client = create_client() +dg_client.put_object(Bucket='releases', Key='app.zip', Body=data) + +# Use boto3 for advanced features +s3_client = boto3.client('s3') +s3_client.put_bucket_versioning( + Bucket='releases', + VersioningConfiguration={'Status': 'Enabled'} +) +``` + +## Future Additions + +Likely to be added: +- `upload_file()` / `download_file()` - High-level helpers +- `copy_object()` - Object copying +- Basic tagging support +- Multipart upload (for large files) + +Unlikely to be added: +- Advanced bucket configuration +- ACL management +- S3 Select +- Glacier operations + +## Conclusion + +**DeltaGlider is NOT a 100% drop-in boto3 replacement.** + +It implements the **20% of boto3 methods that cover 80% of use cases**, with a focus on: +- Core object operations +- Bucket management +- Delta compression for storage savings +- Simple, clean API + +For advanced S3 features, use boto3 directly or in combination with DeltaGlider. diff --git a/PYPI_RELEASE.md b/PYPI_RELEASE.md deleted file mode 100644 index efffb11..0000000 --- a/PYPI_RELEASE.md +++ /dev/null @@ -1,122 +0,0 @@ -# Publishing DeltaGlider to PyPI - -## Prerequisites - -1. Create PyPI account at https://pypi.org -2. Create API token at https://pypi.org/manage/account/token/ -3. Install build tools: -```bash -pip install build twine -``` - -## Build the Package - -```bash -# Clean previous builds -rm -rf dist/ build/ *.egg-info/ - -# Build source distribution and wheel -python -m build - -# This creates: -# - dist/deltaglider-0.1.0.tar.gz (source distribution) -# - dist/deltaglider-0.1.0-py3-none-any.whl (wheel) -``` - -## Test with TestPyPI (Optional but Recommended) - -1. Upload to TestPyPI: -```bash -python -m twine upload --repository testpypi dist/* -``` - -2. Test installation: -```bash -pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ deltaglider -``` - -## Upload to PyPI - -```bash -# Upload to PyPI -python -m twine upload dist/* - -# You'll be prompted for: -# - username: __token__ -# - password: -``` - -## Verify Installation - -```bash -# Install from PyPI -pip install deltaglider - -# Test it works -deltaglider --help -``` - -## GitHub Release - -After PyPI release, create a GitHub release: - -```bash -git tag -a v0.1.0 -m "Release version 0.1.0" -git push origin v0.1.0 -``` - -Then create a release on GitHub: -1. Go to https://github.com/beshu-tech/deltaglider/releases -2. Click "Create a new release" -3. Select the tag v0.1.0 -4. Add release notes from CHANGELOG -5. Attach the wheel and source distribution from dist/ -6. Publish release - -## Version Bumping - -For next release: -1. Update version in `pyproject.toml` -2. Update CHANGELOG -3. Commit changes -4. Follow steps above - -## Automated Release (GitHub Actions) - -Consider adding `.github/workflows/publish.yml`: - -```yaml -name: Publish to PyPI - -on: - release: - types: [published] - -jobs: - publish: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - name: Install dependencies - run: | - pip install build twine - - name: Build package - run: python -m build - - name: Publish to PyPI - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - twine upload dist/* -``` - -## Marketing After Release - -1. **Hacker News**: Post with compelling title focusing on the 99.9% compression -2. **Reddit**: r/Python, r/devops, r/aws -3. **Twitter/X**: Tag AWS, Python, and DevOps influencers -4. **Dev.to / Medium**: Write technical article about the architecture -5. **PyPI Description**: Ensure it's compelling and includes the case study link \ No newline at end of file diff --git a/README.md b/README.md index 466364e..de88ea9 100644 --- a/README.md +++ b/README.md @@ -193,13 +193,13 @@ deltaglider ls -h s3://backups/ deltaglider rm -r s3://backups/2023/ ``` -### Python SDK - Drop-in boto3 Replacement +### Python SDK - boto3-Compatible API -**[📚 Full SDK Documentation](docs/sdk/README.md)** | **[API Reference](docs/sdk/api.md)** | **[Examples](docs/sdk/examples.md)** +**[📚 Full SDK Documentation](docs/sdk/README.md)** | **[API Reference](docs/sdk/api.md)** | **[Examples](docs/sdk/examples.md)** | **[boto3 Compatibility Guide](BOTO3_COMPATIBILITY.md)** #### Quick Start - boto3 Compatible API (Recommended) -DeltaGlider provides a **100% boto3-compatible API** that works as a drop-in replacement for AWS S3 SDK: +DeltaGlider provides a **boto3-compatible API** for core S3 operations (21 methods covering 80% of use cases): ```python from deltaglider import create_client @@ -239,8 +239,48 @@ stats = client.get_bucket_stats('releases', detailed_stats=True) # With compres client.delete_object(Bucket='releases', Key='old-version.zip') client.head_object(Bucket='releases', Key='v2.0.0/my-app.zip') + +# Bucket management - no boto3 needed! +client.create_bucket(Bucket='my-new-bucket') +client.list_buckets() +client.delete_bucket(Bucket='my-new-bucket') ``` +#### Bucket Management (NEW!) + +**No boto3 required!** DeltaGlider now provides complete bucket management: + +```python +from deltaglider import create_client + +client = create_client() + +# Create buckets +client.create_bucket(Bucket='my-releases') + +# Create bucket in specific region (AWS only) +client.create_bucket( + Bucket='my-regional-bucket', + CreateBucketConfiguration={'LocationConstraint': 'us-west-2'} +) + +# List all buckets +response = client.list_buckets() +for bucket in response['Buckets']: + print(f"{bucket['Name']} - {bucket['CreationDate']}") + +# Delete bucket (must be empty) +client.delete_bucket(Bucket='my-old-bucket') +``` + +**Benefits:** +- ✅ No need to import boto3 separately for bucket operations +- ✅ Consistent API with DeltaGlider object operations +- ✅ Works with AWS S3, MinIO, and S3-compatible storage +- ✅ Idempotent operations (safe to retry) + +See [examples/bucket_management.py](examples/bucket_management.py) for complete example. + #### Simple API (Alternative) For simpler use cases, DeltaGlider also provides a streamlined API: diff --git a/examples/bucket_management.py b/examples/bucket_management.py new file mode 100644 index 0000000..49016f2 --- /dev/null +++ b/examples/bucket_management.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""Example: Bucket management without boto3. + +This example shows how to use DeltaGlider's bucket management APIs +to create, list, and delete buckets without needing boto3 directly. +""" + +from deltaglider import create_client + +# Create client (works with AWS S3, MinIO, or any S3-compatible storage) +client = create_client() + +# For local MinIO/S3-compatible storage: +# client = create_client(endpoint_url='http://localhost:9000') + +print("=" * 70) +print("DeltaGlider Bucket Management Example") +print("=" * 70) + +# 1. List existing buckets +print("\n1. List all buckets:") +try: + response = client.list_buckets() + if response["Buckets"]: + for bucket in response["Buckets"]: + print(f" - {bucket['Name']} (created: {bucket.get('CreationDate', 'unknown')})") + else: + print(" No buckets found") +except Exception as e: + print(f" Error: {e}") + +# 2. Create a new bucket +bucket_name = "my-deltaglider-bucket" +print(f"\n2. Create bucket '{bucket_name}':") +try: + response = client.create_bucket(Bucket=bucket_name) + print(f" ✅ Created: {response['Location']}") +except Exception as e: + print(f" Error: {e}") + +# 3. Create bucket with region (if using AWS) +# Uncomment for AWS S3: +# print("\n3. Create bucket in specific region:") +# try: +# response = client.create_bucket( +# Bucket='my-regional-bucket', +# CreateBucketConfiguration={'LocationConstraint': 'us-west-2'} +# ) +# print(f" ✅ Created: {response['Location']}") +# except Exception as e: +# print(f" Error: {e}") + +# 4. Upload some files to the bucket +print(f"\n4. Upload files to '{bucket_name}':") +try: + # Upload a simple file + client.put_object( + Bucket=bucket_name, + Key="test-file.txt", + Body=b"Hello from DeltaGlider!", + ) + print(" ✅ Uploaded: test-file.txt") +except Exception as e: + print(f" Error: {e}") + +# 5. List objects in the bucket +print(f"\n5. List objects in '{bucket_name}':") +try: + response = client.list_objects(Bucket=bucket_name) + if response.contents: + for obj in response.contents: + print(f" - {obj.key} ({obj.size} bytes)") + else: + print(" No objects found") +except Exception as e: + print(f" Error: {e}") + +# 6. Delete all objects in the bucket (required before deleting bucket) +print(f"\n6. Delete all objects in '{bucket_name}':") +try: + response = client.list_objects(Bucket=bucket_name) + for obj in response.contents: + client.delete_object(Bucket=bucket_name, Key=obj.key) + print(f" ✅ Deleted: {obj.key}") +except Exception as e: + print(f" Error: {e}") + +# 7. Delete the bucket +print(f"\n7. Delete bucket '{bucket_name}':") +try: + response = client.delete_bucket(Bucket=bucket_name) + print(f" ✅ Deleted bucket (status: {response['ResponseMetadata']['HTTPStatusCode']})") +except Exception as e: + print(f" Error: {e}") + +# 8. Verify bucket is deleted +print("\n8. Verify bucket deletion:") +try: + response = client.list_buckets() + bucket_names = [b["Name"] for b in response["Buckets"]] + if bucket_name in bucket_names: + print(f" ❌ Bucket still exists!") + else: + print(f" ✅ Bucket successfully deleted") +except Exception as e: + print(f" Error: {e}") + +print("\n" + "=" * 70) +print("✅ Bucket management complete - no boto3 required!") +print("=" * 70) + +print("\n📚 Key Benefits:") +print(" - No need to import boto3 directly") +print(" - Consistent API with other DeltaGlider operations") +print(" - Works with AWS S3, MinIO, and S3-compatible storage") +print(" - Idempotent operations (safe to retry)") diff --git a/pyproject.toml b/pyproject.toml index a3e75a9..bd4bf04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,8 +144,12 @@ disallow_untyped_defs = true disallow_any_unimported = false no_implicit_optional = true check_untyped_defs = true -namespace_packages = true -explicit_package_bases = true +namespace_packages = false +mypy_path = "src" +exclude = [ + "^build/", + "^dist/", +] [tool.pytest.ini_options] minversion = "8.0" diff --git a/src/deltaglider/_version.py b/src/deltaglider/_version.py index 9e9454d..fc1ce6f 100644 --- a/src/deltaglider/_version.py +++ b/src/deltaglider/_version.py @@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE commit_id: COMMIT_ID __commit_id__: COMMIT_ID -__version__ = version = '0.3.2.dev0' -__version_tuple__ = version_tuple = (0, 3, 2, 'dev0') +__version__ = version = '4.0.1.dev1' +__version_tuple__ = version_tuple = (4, 0, 1, 'dev1') -__commit_id__ = commit_id = 'g23357e240' +__commit_id__ = commit_id = 'gdd39595c6' diff --git a/src/deltaglider/app/cli/main.py b/src/deltaglider/app/cli/main.py index e27a77b..668d1ef 100644 --- a/src/deltaglider/app/cli/main.py +++ b/src/deltaglider/app/cli/main.py @@ -252,11 +252,13 @@ def ls( return f"{size_float:.1f}P" # List objects using SDK (automatically filters .delta and reference.bin) - from deltaglider.client import DeltaGliderClient + from deltaglider.client import DeltaGliderClient, ListObjectsResponse client = DeltaGliderClient(service) - response = client.list_objects(Bucket=bucket_name, Prefix=prefix_str, MaxKeys=10000) - objects = response.contents + dg_response: ListObjectsResponse = client.list_objects( + Bucket=bucket_name, Prefix=prefix_str, MaxKeys=10000 + ) + objects = dg_response.contents # Filter by recursive flag if not recursive: diff --git a/src/deltaglider/client.py b/src/deltaglider/client.py index ea7eb7e..a3b5149 100644 --- a/src/deltaglider/client.py +++ b/src/deltaglider/client.py @@ -107,7 +107,16 @@ class BucketStats: class DeltaGliderClient: - """DeltaGlider client with boto3-compatible APIs and advanced features.""" + """DeltaGlider client with boto3-compatible APIs and advanced features. + + Implements core boto3 S3 client methods (~21 methods covering 80% of use cases): + - Object operations: put_object, get_object, delete_object, list_objects, head_object + - Bucket operations: create_bucket, delete_bucket, list_buckets + - Presigned URLs: generate_presigned_url, generate_presigned_post + - Plus DeltaGlider extensions for compression stats and batch operations + + See BOTO3_COMPATIBILITY.md for complete compatibility matrix. + """ def __init__(self, service: DeltaService, endpoint_url: str | None = None): """Initialize client with service.""" @@ -1234,6 +1243,144 @@ class DeltaGliderClient: }, } + # ============================================================================ + # Bucket Management APIs (boto3-compatible) + # ============================================================================ + + def create_bucket( + self, + Bucket: str, + CreateBucketConfiguration: dict[str, str] | None = None, + **kwargs: Any, + ) -> dict[str, Any]: + """Create an S3 bucket (boto3-compatible). + + Args: + Bucket: Bucket name to create + CreateBucketConfiguration: Optional bucket configuration (e.g., LocationConstraint) + **kwargs: Additional S3 parameters (for compatibility) + + Returns: + Response dict with bucket location + + Example: + >>> client = create_client() + >>> client.create_bucket(Bucket='my-bucket') + >>> # With region + >>> client.create_bucket( + ... Bucket='my-bucket', + ... CreateBucketConfiguration={'LocationConstraint': 'us-west-2'} + ... ) + """ + storage_adapter = self.service.storage + + # Check if storage adapter has boto3 client + if hasattr(storage_adapter, "client"): + try: + params: dict[str, Any] = {"Bucket": Bucket} + if CreateBucketConfiguration: + params["CreateBucketConfiguration"] = CreateBucketConfiguration + + response = storage_adapter.client.create_bucket(**params) + return { + "Location": response.get("Location", f"/{Bucket}"), + "ResponseMetadata": { + "HTTPStatusCode": 200, + }, + } + except Exception as e: + error_msg = str(e) + if "BucketAlreadyExists" in error_msg or "BucketAlreadyOwnedByYou" in error_msg: + # Bucket already exists - return success + self.service.logger.debug(f"Bucket {Bucket} already exists") + return { + "Location": f"/{Bucket}", + "ResponseMetadata": { + "HTTPStatusCode": 200, + }, + } + raise RuntimeError(f"Failed to create bucket: {e}") from e + else: + raise NotImplementedError("Storage adapter does not support bucket creation") + + def delete_bucket( + self, + Bucket: str, + **kwargs: Any, + ) -> dict[str, Any]: + """Delete an S3 bucket (boto3-compatible). + + Note: Bucket must be empty before deletion. + + Args: + Bucket: Bucket name to delete + **kwargs: Additional S3 parameters (for compatibility) + + Returns: + Response dict with deletion status + + Example: + >>> client = create_client() + >>> client.delete_bucket(Bucket='my-bucket') + """ + storage_adapter = self.service.storage + + # Check if storage adapter has boto3 client + if hasattr(storage_adapter, "client"): + try: + storage_adapter.client.delete_bucket(Bucket=Bucket) + return { + "ResponseMetadata": { + "HTTPStatusCode": 204, + }, + } + except Exception as e: + error_msg = str(e) + if "NoSuchBucket" in error_msg: + # Bucket doesn't exist - return success + self.service.logger.debug(f"Bucket {Bucket} does not exist") + return { + "ResponseMetadata": { + "HTTPStatusCode": 204, + }, + } + raise RuntimeError(f"Failed to delete bucket: {e}") from e + else: + raise NotImplementedError("Storage adapter does not support bucket deletion") + + def list_buckets(self, **kwargs: Any) -> dict[str, Any]: + """List all S3 buckets (boto3-compatible). + + Args: + **kwargs: Additional S3 parameters (for compatibility) + + Returns: + Response dict with bucket list + + Example: + >>> client = create_client() + >>> response = client.list_buckets() + >>> for bucket in response['Buckets']: + ... print(bucket['Name']) + """ + storage_adapter = self.service.storage + + # Check if storage adapter has boto3 client + if hasattr(storage_adapter, "client"): + try: + response = storage_adapter.client.list_buckets() + return { + "Buckets": response.get("Buckets", []), + "Owner": response.get("Owner", {}), + "ResponseMetadata": { + "HTTPStatusCode": 200, + }, + } + except Exception as e: + raise RuntimeError(f"Failed to list buckets: {e}") from e + else: + raise NotImplementedError("Storage adapter does not support bucket listing") + def _parse_tagging(self, tagging: str) -> dict[str, str]: """Parse URL-encoded tagging string to dict.""" tags = {} diff --git a/src/deltaglider/core/service.py b/src/deltaglider/core/service.py index 60e4d8d..f59fca1 100644 --- a/src/deltaglider/core/service.py +++ b/src/deltaglider/core/service.py @@ -659,12 +659,42 @@ class DeltaService: self.logger.debug(f"Could not clear cache for {object_key.key}: {e}") elif is_delta: - # Simply delete the delta file + # Delete the delta file self.storage.delete(full_key) result["deleted"] = True result["type"] = "delta" result["original_name"] = obj_head.metadata.get("original_name", "unknown") + # Check if this was the last delta in the DeltaSpace - if so, clean up reference.bin + if "/" in object_key.key: + deltaspace_prefix = "/".join(object_key.key.split("/")[:-1]) + ref_key = f"{deltaspace_prefix}/reference.bin" + + # Check if any other delta files exist in this DeltaSpace + remaining_deltas = [] + for obj in self.storage.list(f"{object_key.bucket}/{deltaspace_prefix}"): + if obj.key.endswith(".delta") and obj.key != object_key.key: + remaining_deltas.append(obj.key) + + if not remaining_deltas: + # No more deltas - clean up the orphaned reference.bin + ref_full_key = f"{object_key.bucket}/{ref_key}" + ref_head = self.storage.head(ref_full_key) + if ref_head: + self.storage.delete(ref_full_key) + self.logger.info( + "Cleaned up orphaned reference.bin", + ref_key=ref_key, + reason="no remaining deltas", + ) + result["cleaned_reference"] = ref_key + + # Clear from cache + try: + self.cache.evict(object_key.bucket, deltaspace_prefix) + except Exception as e: + self.logger.debug(f"Could not clear cache for {deltaspace_prefix}: {e}") + elif is_direct: # Simply delete the direct upload self.storage.delete(full_key) diff --git a/tests/integration/test_bucket_management.py b/tests/integration/test_bucket_management.py new file mode 100644 index 0000000..71c7a82 --- /dev/null +++ b/tests/integration/test_bucket_management.py @@ -0,0 +1,237 @@ +"""Tests for bucket management APIs.""" + +from unittest.mock import Mock + +import pytest + +from deltaglider.app.cli.main import create_service +from deltaglider.client import DeltaGliderClient + + +class TestBucketManagement: + """Test bucket creation, listing, and deletion.""" + + def test_create_bucket_success(self): + """Test creating a bucket successfully.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client + mock_boto3_client = Mock() + mock_boto3_client.create_bucket.return_value = {"Location": "/test-bucket"} + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.create_bucket(Bucket="test-bucket") + + # Verify response + assert response["Location"] == "/test-bucket" + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + # Verify boto3 was called correctly + mock_boto3_client.create_bucket.assert_called_once_with(Bucket="test-bucket") + + def test_create_bucket_with_region(self): + """Test creating a bucket in a specific region.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client + mock_boto3_client = Mock() + mock_boto3_client.create_bucket.return_value = { + "Location": "http://test-bucket.s3.us-west-2.amazonaws.com/" + } + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.create_bucket( + Bucket="test-bucket", + CreateBucketConfiguration={"LocationConstraint": "us-west-2"}, + ) + + # Verify response + assert "Location" in response + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + # Verify boto3 was called with region config + mock_boto3_client.create_bucket.assert_called_once_with( + Bucket="test-bucket", CreateBucketConfiguration={"LocationConstraint": "us-west-2"} + ) + + def test_create_bucket_already_exists(self): + """Test creating a bucket that already exists returns success.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client to raise BucketAlreadyExists + mock_boto3_client = Mock() + mock_boto3_client.create_bucket.side_effect = Exception("BucketAlreadyOwnedByYou") + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.create_bucket(Bucket="existing-bucket") + + # Should return success (idempotent) + assert response["Location"] == "/existing-bucket" + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + def test_list_buckets_success(self): + """Test listing buckets.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client + mock_boto3_client = Mock() + mock_boto3_client.list_buckets.return_value = { + "Buckets": [ + {"Name": "bucket1", "CreationDate": "2025-01-01T00:00:00Z"}, + {"Name": "bucket2", "CreationDate": "2025-01-02T00:00:00Z"}, + ], + "Owner": {"DisplayName": "test-user", "ID": "12345"}, + } + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.list_buckets() + + # Verify response + assert len(response["Buckets"]) == 2 + assert response["Buckets"][0]["Name"] == "bucket1" + assert response["Buckets"][1]["Name"] == "bucket2" + assert response["Owner"]["DisplayName"] == "test-user" + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + def test_list_buckets_empty(self): + """Test listing buckets when none exist.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client with empty result + mock_boto3_client = Mock() + mock_boto3_client.list_buckets.return_value = {"Buckets": [], "Owner": {}} + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.list_buckets() + + # Verify empty list + assert response["Buckets"] == [] + assert response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + def test_delete_bucket_success(self): + """Test deleting a bucket successfully.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client + mock_boto3_client = Mock() + mock_boto3_client.delete_bucket.return_value = None + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.delete_bucket(Bucket="test-bucket") + + # Verify response + assert response["ResponseMetadata"]["HTTPStatusCode"] == 204 + + # Verify boto3 was called + mock_boto3_client.delete_bucket.assert_called_once_with(Bucket="test-bucket") + + def test_delete_bucket_not_found(self): + """Test deleting a bucket that doesn't exist returns success.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client to raise NoSuchBucket + mock_boto3_client = Mock() + mock_boto3_client.delete_bucket.side_effect = Exception("NoSuchBucket") + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + response = client.delete_bucket(Bucket="nonexistent-bucket") + + # Should return success (idempotent) + assert response["ResponseMetadata"]["HTTPStatusCode"] == 204 + + def test_delete_bucket_not_empty_raises_error(self): + """Test deleting a non-empty bucket raises an error.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client to raise BucketNotEmpty + mock_boto3_client = Mock() + mock_boto3_client.delete_bucket.side_effect = Exception( + "BucketNotEmpty: The bucket you tried to delete is not empty" + ) + mock_storage.client = mock_boto3_client + + client = DeltaGliderClient(service) + + with pytest.raises(RuntimeError, match="Failed to delete bucket"): + client.delete_bucket(Bucket="full-bucket") + + def test_bucket_methods_without_boto3_client(self): + """Test that bucket methods raise NotImplementedError when storage doesn't support it.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Storage adapter without boto3 client (no 'client' attribute) + delattr(mock_storage, "client") + + client = DeltaGliderClient(service) + + # All bucket methods should raise NotImplementedError + with pytest.raises(NotImplementedError): + client.create_bucket(Bucket="test") + + with pytest.raises(NotImplementedError): + client.delete_bucket(Bucket="test") + + with pytest.raises(NotImplementedError): + client.list_buckets() + + def test_complete_bucket_lifecycle(self): + """Test complete bucket lifecycle: create, use, delete.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock boto3 client + mock_boto3_client = Mock() + mock_storage.client = mock_boto3_client + + # Setup responses + mock_boto3_client.create_bucket.return_value = {"Location": "/test-lifecycle"} + mock_boto3_client.list_buckets.return_value = { + "Buckets": [{"Name": "test-lifecycle", "CreationDate": "2025-01-01T00:00:00Z"}], + "Owner": {}, + } + mock_boto3_client.delete_bucket.return_value = None + + client = DeltaGliderClient(service) + + # 1. Create bucket + create_response = client.create_bucket(Bucket="test-lifecycle") + assert create_response["ResponseMetadata"]["HTTPStatusCode"] == 200 + + # 2. List buckets - verify it exists + list_response = client.list_buckets() + bucket_names = [b["Name"] for b in list_response["Buckets"]] + assert "test-lifecycle" in bucket_names + + # 3. Delete bucket + delete_response = client.delete_bucket(Bucket="test-lifecycle") + assert delete_response["ResponseMetadata"]["HTTPStatusCode"] == 204 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/integration/test_filtering_and_cleanup.py b/tests/integration/test_filtering_and_cleanup.py new file mode 100644 index 0000000..16f61e6 --- /dev/null +++ b/tests/integration/test_filtering_and_cleanup.py @@ -0,0 +1,434 @@ +"""Tests for SDK filtering and delete cleanup functionality.""" + +from datetime import UTC, datetime +from unittest.mock import Mock + +import pytest + +from deltaglider.app.cli.main import create_service +from deltaglider.client import DeltaGliderClient +from deltaglider.core import ObjectKey +from deltaglider.ports.storage import ObjectHead + + +class TestSDKFiltering: + """Test that SDK filters .delta and reference.bin from list_objects().""" + + def test_list_objects_filters_delta_suffix(self): + """Test that .delta suffix is stripped from object keys.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock list_objects response with .delta files + mock_storage.list_objects.return_value = { + "objects": [ + { + "key": "releases/app-v1.zip.delta", + "size": 1000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "abc123", + "storage_class": "STANDARD", + }, + { + "key": "releases/app-v2.zip.delta", + "size": 1500, + "last_modified": "2025-01-02T00:00:00Z", + "etag": "def456", + "storage_class": "STANDARD", + }, + { + "key": "releases/README.md", + "size": 500, + "last_modified": "2025-01-03T00:00:00Z", + "etag": "ghi789", + "storage_class": "STANDARD", + }, + ], + "common_prefixes": [], + "is_truncated": False, + "next_continuation_token": None, + } + + client = DeltaGliderClient(service) + response = client.list_objects(Bucket="test-bucket", Prefix="releases/") + + # Verify .delta suffix is stripped + keys = [obj.key for obj in response.contents] + assert "releases/app-v1.zip" in keys + assert "releases/app-v2.zip" in keys + assert "releases/README.md" in keys + + # Verify NO .delta suffixes in output + for key in keys: + assert not key.endswith(".delta"), f"Found .delta suffix in: {key}" + + # Verify is_delta flag is set correctly + delta_objects = [obj for obj in response.contents if obj.is_delta] + assert len(delta_objects) == 2 + + def test_list_objects_filters_reference_bin(self): + """Test that reference.bin files are completely filtered out.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock list_objects response with reference.bin files + mock_storage.list_objects.return_value = { + "objects": [ + { + "key": "releases/reference.bin", + "size": 50000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "ref123", + "storage_class": "STANDARD", + }, + { + "key": "releases/1.0/reference.bin", + "size": 50000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "ref456", + "storage_class": "STANDARD", + }, + { + "key": "releases/app.zip.delta", + "size": 1000, + "last_modified": "2025-01-02T00:00:00Z", + "etag": "app123", + "storage_class": "STANDARD", + }, + ], + "common_prefixes": [], + "is_truncated": False, + "next_continuation_token": None, + } + + client = DeltaGliderClient(service) + response = client.list_objects(Bucket="test-bucket", Prefix="releases/") + + # Verify NO reference.bin files in output + keys = [obj.key for obj in response.contents] + for key in keys: + assert not key.endswith("reference.bin"), f"Found reference.bin in: {key}" + + # Should only have the app.zip (with .delta stripped) + assert len(response.contents) == 1 + assert response.contents[0].key == "releases/app.zip" + assert response.contents[0].is_delta is True + + def test_list_objects_combined_filtering(self): + """Test filtering of both .delta and reference.bin together.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock comprehensive file list + mock_storage.list_objects.return_value = { + "objects": [ + { + "key": "data/reference.bin", + "size": 50000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "1", + }, + { + "key": "data/file1.zip.delta", + "size": 1000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "2", + }, + { + "key": "data/file2.zip.delta", + "size": 1500, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "3", + }, + { + "key": "data/file3.txt", + "size": 500, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "4", + }, + { + "key": "data/sub/reference.bin", + "size": 50000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "5", + }, + { + "key": "data/sub/app.jar.delta", + "size": 2000, + "last_modified": "2025-01-01T00:00:00Z", + "etag": "6", + }, + ], + "common_prefixes": [], + "is_truncated": False, + "next_continuation_token": None, + } + + client = DeltaGliderClient(service) + response = client.list_objects(Bucket="test-bucket", Prefix="data/") + + # Should filter out 2 reference.bin files + # Should strip .delta from 3 files + # Should keep 1 regular file as-is + assert len(response.contents) == 4 # 3 deltas + 1 regular file + + keys = [obj.key for obj in response.contents] + expected_keys = ["data/file1.zip", "data/file2.zip", "data/file3.txt", "data/sub/app.jar"] + assert sorted(keys) == sorted(expected_keys) + + # Verify no internal files visible + for key in keys: + assert not key.endswith(".delta") + assert not key.endswith("reference.bin") + + +class TestSingleDeleteCleanup: + """Test that single delete() cleans up orphaned reference.bin.""" + + def test_delete_last_delta_cleans_reference(self): + """Test that deleting the last delta file removes orphaned reference.bin.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock head for both delta and reference.bin + def mock_head_func(key): + if key.endswith("app.zip.delta"): + return ObjectHead( + key="releases/app.zip.delta", + size=1000, + etag="abc123", + last_modified=datetime.now(UTC), + metadata={"original_name": "app.zip", "ref_key": "releases/reference.bin"}, + ) + elif key.endswith("reference.bin"): + return ObjectHead( + key="releases/reference.bin", + size=50000, + etag="ref123", + last_modified=datetime.now(UTC), + metadata={}, + ) + return None + + mock_storage.head.side_effect = mock_head_func + + # Mock list to show NO other deltas remain + mock_storage.list.return_value = [ + ObjectHead( + key="releases/reference.bin", + size=50000, + etag="ref123", + last_modified=datetime.now(UTC), + metadata={}, + ), + ] + mock_storage.delete.return_value = None + + # Delete the last delta + result = service.delete(ObjectKey(bucket="test-bucket", key="releases/app.zip.delta")) + + # Verify delta was deleted + assert result["deleted"] is True + assert result["type"] == "delta" + + # Verify reference.bin cleanup was triggered + assert "cleaned_reference" in result + assert result["cleaned_reference"] == "releases/reference.bin" + + # Verify both files were deleted + assert mock_storage.delete.call_count == 2 + delete_calls = [call[0][0] for call in mock_storage.delete.call_args_list] + assert "test-bucket/releases/app.zip.delta" in delete_calls + assert "test-bucket/releases/reference.bin" in delete_calls + + def test_delete_delta_keeps_reference_when_others_exist(self): + """Test that reference.bin is kept when other deltas remain.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock the delta file being deleted + mock_storage.head.return_value = ObjectHead( + key="releases/app-v1.zip.delta", + size=1000, + etag="abc123", + last_modified=datetime.now(UTC), + metadata={"original_name": "app-v1.zip"}, + ) + + # Mock list to show OTHER deltas still exist + mock_storage.list.return_value = [ + ObjectHead( + key="releases/app-v2.zip.delta", + size=1500, + etag="def456", + last_modified=datetime.now(UTC), + metadata={}, + ), + ObjectHead( + key="releases/reference.bin", + size=50000, + etag="ref123", + last_modified=datetime.now(UTC), + metadata={}, + ), + ] + + mock_storage.delete.return_value = None + + # Delete one delta (but others remain) + result = service.delete(ObjectKey(bucket="test-bucket", key="releases/app-v1.zip.delta")) + + # Verify delta was deleted + assert result["deleted"] is True + assert result["type"] == "delta" + + # Verify reference.bin was NOT cleaned up + assert "cleaned_reference" not in result + + # Verify only the delta was deleted, not reference.bin + assert mock_storage.delete.call_count == 1 + mock_storage.delete.assert_called_once_with("test-bucket/releases/app-v1.zip.delta") + + def test_delete_delta_no_reference_exists(self): + """Test deleting delta when reference.bin doesn't exist (edge case).""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock the delta file + mock_storage.head.return_value = ObjectHead( + key="releases/app.zip.delta", + size=1000, + etag="abc123", + last_modified=datetime.now(UTC), + metadata={"original_name": "app.zip"}, + ) + + # Mock list shows no other deltas + mock_storage.list.return_value = [] + + # Mock head for reference.bin returns None (doesn't exist) + def mock_head_func(key): + if key.endswith("reference.bin"): + return None + return ObjectHead( + key="releases/app.zip.delta", + size=1000, + etag="abc123", + last_modified=datetime.now(UTC), + metadata={}, + ) + + mock_storage.head.side_effect = mock_head_func + mock_storage.delete.return_value = None + + # Delete the delta + result = service.delete(ObjectKey(bucket="test-bucket", key="releases/app.zip.delta")) + + # Verify delta was deleted + assert result["deleted"] is True + assert result["type"] == "delta" + + # Verify no reference cleanup (since it didn't exist) + assert "cleaned_reference" not in result + + # Only delta should be deleted + assert mock_storage.delete.call_count == 1 + + def test_delete_isolated_deltaspaces(self): + """Test that cleanup only affects the specific DeltaSpace.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock head for both delta and reference.bin + def mock_head_func(key): + if "1.0/app.zip.delta" in key: + return ObjectHead( + key="releases/1.0/app.zip.delta", + size=1000, + etag="abc123", + last_modified=datetime.now(UTC), + metadata={"original_name": "app.zip"}, + ) + elif "1.0/reference.bin" in key: + return ObjectHead( + key="releases/1.0/reference.bin", + size=50000, + etag="ref1", + last_modified=datetime.now(UTC), + metadata={}, + ) + return None + + mock_storage.head.side_effect = mock_head_func + + # Mock list for 1.0 - no other deltas + mock_storage.list.return_value = [ + ObjectHead( + key="releases/1.0/reference.bin", + size=50000, + etag="ref1", + last_modified=datetime.now(UTC), + metadata={}, + ), + ] + mock_storage.delete.return_value = None + + # Delete from 1.0 + result = service.delete(ObjectKey(bucket="test-bucket", key="releases/1.0/app.zip.delta")) + + # Should clean up only 1.0/reference.bin + assert result["cleaned_reference"] == "releases/1.0/reference.bin" + + # Verify correct files deleted + delete_calls = [call[0][0] for call in mock_storage.delete.call_args_list] + assert "test-bucket/releases/1.0/app.zip.delta" in delete_calls + assert "test-bucket/releases/1.0/reference.bin" in delete_calls + + +class TestRecursiveDeleteCleanup: + """Test that recursive delete properly cleans up references.""" + + def test_recursive_delete_reference_cleanup_already_works(self): + """Verify existing recursive delete reference cleanup is working.""" + service = create_service() + mock_storage = Mock() + service.storage = mock_storage + + # Mock objects in deltaspace + mock_storage.list.return_value = [ + ObjectHead( + key="data/app.zip.delta", + size=1000, + etag="1", + last_modified=datetime.now(UTC), + metadata={}, + ), + ObjectHead( + key="data/reference.bin", + size=50000, + etag="2", + last_modified=datetime.now(UTC), + metadata={}, + ), + ] + + mock_storage.head.return_value = None + mock_storage.delete.return_value = None + + result = service.delete_recursive("test-bucket", "data/") + + # Should delete both delta and reference + assert result["deleted_count"] == 2 + assert result["deltas_deleted"] == 1 + assert result["references_deleted"] == 1 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/integration/test_recursive_delete_reference_cleanup.py b/tests/integration/test_recursive_delete_reference_cleanup.py index b1f4a76..44d01ec 100644 --- a/tests/integration/test_recursive_delete_reference_cleanup.py +++ b/tests/integration/test_recursive_delete_reference_cleanup.py @@ -286,6 +286,7 @@ class TestRecursiveDeleteReferenceCleanup: last_modified=None, metadata={"original_name": "file.zip"}, ) + mock_storage.list.return_value = [] # No other deltas remain mock_storage.delete.return_value = None # Test single delete