mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-04-09 18:33:37 +02:00
fix: disable boto3 auto-checksums for S3-compatible endpoint support
boto3 1.36+ sends CRC32/CRC64 checksums by default on PUT requests. S3-compatible stores like Hetzner Object Storage reject these with BadRequest, breaking direct (non-delta) file uploads. This sets request_checksum_calculation="when_required" to restore compatibility while still working with AWS S3. Also pins runtime deps to major version ranges and adds S3 compat tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
22
CLAUDE.md
22
CLAUDE.md
@@ -256,4 +256,24 @@ Core delta logic is in `src/deltaglider/core/service.py`:
|
||||
- **Auto-Cleanup**: Corrupted or tampered cache files automatically deleted on decryption failures
|
||||
- **Persistent Keys**: Set `DG_CACHE_ENCRYPTION_KEY` only for cross-process cache sharing (use secrets management)
|
||||
- **Content-Addressed Storage**: SHA256-based filenames prevent collision attacks
|
||||
- **Zero-Trust Cache**: All cache operations include cryptographic validation
|
||||
- **Zero-Trust Cache**: All cache operations include cryptographic validation
|
||||
|
||||
## Dependency Management
|
||||
|
||||
### Pinning Strategy
|
||||
Runtime dependencies in `pyproject.toml` use **compatible range pins** (`>=x.y.z,<NEXT_MAJOR`). This prevents surprise breaking changes from major versions while allowing patch/minor updates.
|
||||
|
||||
**Critical dependency: `boto3`** — This is the most breakage-prone dependency. AWS periodically changes default behaviors in minor releases (e.g., boto3 1.36+ added automatic request checksums that break S3-compatible stores like Hetzner Object Storage). The S3 adapter (`adapters/storage_s3.py`) explicitly sets `request_checksum_calculation="when_required"` to maintain compatibility with non-AWS S3 endpoints.
|
||||
|
||||
### Quarterly Dependency Refresh (do every ~3 months)
|
||||
1. **Check for updates**: `uv pip compile pyproject.toml --upgrade --dry-run`
|
||||
2. **Update in a branch**: bump version floors in `pyproject.toml` to current stable releases
|
||||
3. **Run full test suite**: `uv run pytest` (unit + integration)
|
||||
4. **Test against S3-compatible stores**: test a small file upload against Hetzner (or whichever non-AWS endpoint is in use) — boto3 updates are the most likely to break this
|
||||
5. **Rebuild Docker image** and test the same upload from the container
|
||||
6. **Check changelogs** for boto3, cryptography, and click for any deprecation notices or behavior changes
|
||||
|
||||
### Known Compatibility Constraints
|
||||
- **boto3**: Must use `request_checksum_calculation="when_required"` for Hetzner/MinIO compatibility. If upgrading past a new major behavior change, test direct uploads (non-delta path) of small files to non-AWS endpoints.
|
||||
- **cryptography**: Fernet API has been stable, but major versions may drop old OpenSSL support. Verify cache encryption still works after upgrades.
|
||||
- **click**: CLI argument parsing. Major versions may change decorator behavior. Run integration tests (`test_aws_cli_commands_v2.py`) after upgrades.
|
||||
@@ -49,11 +49,11 @@ classifiers = [
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"boto3>=1.35.0",
|
||||
"click>=8.1.0",
|
||||
"cryptography>=42.0.0",
|
||||
"python-dateutil>=2.9.0",
|
||||
"requests>=2.32.0",
|
||||
"boto3>=1.35.0,<2.0.0",
|
||||
"click>=8.1.0,<9.0.0",
|
||||
"cryptography>=42.0.0,<45.0.0",
|
||||
"python-dateutil>=2.9.0,<3.0.0",
|
||||
"requests>=2.32.0,<3.0.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -7,6 +7,7 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from ..ports.storage import ObjectHead, PutResult, StoragePort
|
||||
@@ -42,6 +43,13 @@ class S3StorageAdapter(StoragePort):
|
||||
client_params: dict[str, Any] = {
|
||||
"service_name": "s3",
|
||||
"endpoint_url": endpoint_url or os.environ.get("AWS_ENDPOINT_URL"),
|
||||
# Disable automatic request checksums (CRC32/CRC64) added in
|
||||
# boto3 1.36+. S3-compatible stores like Hetzner Object Storage
|
||||
# reject the checksum headers with BadRequest.
|
||||
"config": Config(
|
||||
request_checksum_calculation="when_required",
|
||||
response_checksum_validation="when_required",
|
||||
),
|
||||
}
|
||||
|
||||
# Merge in any additional boto3 kwargs (credentials, region, etc.)
|
||||
|
||||
70
tests/unit/test_s3_compat.py
Normal file
70
tests/unit/test_s3_compat.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Tests for S3-compatible storage compatibility.
|
||||
|
||||
Ensures the S3 adapter works with non-AWS S3 endpoints (Hetzner, MinIO, etc.)
|
||||
that don't support newer AWS-specific features like automatic request checksums.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from deltaglider.adapters.storage_s3 import S3StorageAdapter
|
||||
|
||||
|
||||
class TestS3CompatibleEndpoints:
|
||||
"""Verify S3 adapter configuration for non-AWS endpoint compatibility."""
|
||||
|
||||
def test_client_disables_automatic_checksums(self):
|
||||
"""boto3 1.36+ sends CRC32/CRC64 checksums by default.
|
||||
|
||||
S3-compatible stores (Hetzner, MinIO) reject these with BadRequest.
|
||||
The adapter must set request_checksum_calculation='when_required'.
|
||||
"""
|
||||
with patch("deltaglider.adapters.storage_s3.boto3.client") as mock_client:
|
||||
S3StorageAdapter(endpoint_url="https://example.com")
|
||||
|
||||
mock_client.assert_called_once()
|
||||
call_kwargs = mock_client.call_args
|
||||
config = call_kwargs.kwargs.get("config") or call_kwargs[1].get("config")
|
||||
|
||||
assert config is not None, "boto3 client must be created with a Config object"
|
||||
assert config.request_checksum_calculation == "when_required"
|
||||
assert config.response_checksum_validation == "when_required"
|
||||
|
||||
def test_put_object_no_checksum_kwargs(self, temp_dir):
|
||||
"""put_object must not pass ChecksumAlgorithm or similar kwargs."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.put_object.return_value = {"ETag": '"abc123"'}
|
||||
|
||||
adapter = S3StorageAdapter(client=mock_client)
|
||||
|
||||
test_file = temp_dir / "test.sha1"
|
||||
test_file.write_text("abc123")
|
||||
|
||||
adapter.put(
|
||||
"my-bucket/test/test.sha1",
|
||||
test_file,
|
||||
{"compression": "none", "tool": "deltaglider"},
|
||||
)
|
||||
|
||||
mock_client.put_object.assert_called_once()
|
||||
call_kwargs = mock_client.put_object.call_args.kwargs
|
||||
|
||||
checksum_keys = {
|
||||
"ChecksumAlgorithm",
|
||||
"ChecksumCRC32",
|
||||
"ChecksumCRC32C",
|
||||
"ChecksumCRC64NVME",
|
||||
"ChecksumSHA1",
|
||||
"ChecksumSHA256",
|
||||
"ContentMD5",
|
||||
}
|
||||
passed_checksum_keys = checksum_keys & set(call_kwargs.keys())
|
||||
assert not passed_checksum_keys, (
|
||||
f"put_object must not pass checksum kwargs for S3-compatible "
|
||||
f"endpoint support, but found: {passed_checksum_keys}"
|
||||
)
|
||||
|
||||
def test_preconfigured_client_is_used_as_is(self):
|
||||
"""When a pre-configured client is passed, it should be used directly."""
|
||||
mock_client = MagicMock()
|
||||
adapter = S3StorageAdapter(client=mock_client)
|
||||
assert adapter.client is mock_client
|
||||
Reference in New Issue
Block a user