mirror of
https://github.com/beshu-tech/deltaglider.git
synced 2026-01-11 22:50:24 +01:00
security: Remove all legacy shared cache code and env vars
BREAKING CHANGE: Removed DG_UNSAFE_SHARED_CACHE and DG_CACHE_DIR environment variables. DeltaGlider now ONLY uses ephemeral process-isolated cache for security. Changes: - Removed cache_dir parameter from create_client() - Removed all conditional legacy cache mode logic - Updated documentation (CLAUDE.md, docs/sdk/api.md) - Updated tests to not pass removed cache_dir parameter - Marked Phase 1 of SECURITY_FIX_ROADMAP.md as completed All 99 tests passing. Ephemeral cache is now the only mode.
This commit is contained in:
@@ -181,13 +181,14 @@ Core delta logic is in `src/deltaglider/core/service.py`:
|
||||
## Environment Variables
|
||||
|
||||
- `DG_LOG_LEVEL`: Logging level (default: "INFO")
|
||||
- `DG_CACHE_DIR`: Local reference cache directory (default: "/tmp/.deltaglider/reference_cache")
|
||||
- `DG_MAX_RATIO`: Maximum acceptable delta/file ratio (default: "0.5")
|
||||
- `AWS_ENDPOINT_URL`: Override S3 endpoint for MinIO/LocalStack
|
||||
- `AWS_ACCESS_KEY_ID`: AWS credentials
|
||||
- `AWS_SECRET_ACCESS_KEY`: AWS credentials
|
||||
- `AWS_DEFAULT_REGION`: AWS region
|
||||
|
||||
**Note**: DeltaGlider uses ephemeral, process-isolated cache for security. Cache is automatically created in `/tmp/deltaglider-*` and cleaned up on exit.
|
||||
|
||||
## Important Implementation Details
|
||||
|
||||
1. **xdelta3 Binary Dependency**: The system requires xdelta3 binary installed on the system. The `XdeltaAdapter` uses subprocess to call it.
|
||||
|
||||
@@ -23,29 +23,24 @@ Replace filesystem cache with **ephemeral, cryptographically-signed, user-isolat
|
||||
|
||||
## 📋 Implementation Roadmap
|
||||
|
||||
### **DAY 1-2: Emergency Hotfix** (v5.0.3)
|
||||
### **DAY 1-2: Emergency Hotfix** (v5.0.3) ✅ COMPLETED
|
||||
*Stop the bleeding - minimal changes for immediate deployment*
|
||||
|
||||
#### 1. **Disable Shared Cache Mode** (2 hours)
|
||||
#### 1. **Ephemeral Process-Isolated Cache** (2 hours) ✅ COMPLETED
|
||||
```python
|
||||
# src/deltaglider/app/cli/main.py
|
||||
import tempfile
|
||||
import os
|
||||
import atexit
|
||||
|
||||
def create_service(...):
|
||||
# CRITICAL: Use process-specific temp directory
|
||||
if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true":
|
||||
cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))
|
||||
else:
|
||||
# Legacy mode with warning
|
||||
cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/cache"))
|
||||
logger.warning("UNSAFE: Shared cache mode enabled. Use at your own risk!")
|
||||
# SECURITY: Always use ephemeral process-isolated cache
|
||||
cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))
|
||||
```
|
||||
|
||||
**Impact**: Each process gets isolated cache, auto-cleaned on exit. Eliminates multi-user attacks.
|
||||
**Implementation**: All legacy shared cache code removed. Ephemeral cache is now the ONLY mode.
|
||||
|
||||
#### 2. **Add SHA Validation at Use-Time** (2 hours)
|
||||
#### 2. **Add SHA Validation at Use-Time** (2 hours) ✅ COMPLETED
|
||||
```python
|
||||
# src/deltaglider/ports/cache.py
|
||||
class CachePort(Protocol):
|
||||
@@ -59,9 +54,10 @@ def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path
|
||||
if not path.exists():
|
||||
raise CacheMissError(f"Cache miss for {bucket}/{prefix}")
|
||||
|
||||
# Lock file for atomic read
|
||||
# Lock file for atomic read (Unix only)
|
||||
with open(path, 'rb') as f:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_SH)
|
||||
if sys.platform != "win32":
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_SH)
|
||||
content = f.read()
|
||||
actual_sha = hashlib.sha256(content).hexdigest()
|
||||
|
||||
@@ -72,13 +68,13 @@ def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path
|
||||
return path
|
||||
```
|
||||
|
||||
#### 3. **Update All Usage Points** (1 hour)
|
||||
#### 3. **Update All Usage Points** (1 hour) ✅ COMPLETED
|
||||
```python
|
||||
# src/deltaglider/core/service.py
|
||||
# Replace ALL instances of:
|
||||
ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix)
|
||||
# Replaced ALL instances in two locations:
|
||||
# - Line 234 (get method for decoding)
|
||||
# - Line 415 (_create_delta method for encoding)
|
||||
|
||||
# With:
|
||||
ref_path = self.cache.get_validated_ref(
|
||||
delta_space.bucket,
|
||||
delta_space.prefix,
|
||||
@@ -86,7 +82,7 @@ ref_path = self.cache.get_validated_ref(
|
||||
)
|
||||
```
|
||||
|
||||
**Test & Deploy**: 4 hours testing + immediate release
|
||||
**Test & Deploy**: ✅ All 99 tests passing + ready for release
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ Factory function to create a configured DeltaGlider client with sensible default
|
||||
def create_client(
|
||||
endpoint_url: Optional[str] = None,
|
||||
log_level: str = "INFO",
|
||||
cache_dir: str = "/tmp/.deltaglider/cache",
|
||||
**kwargs
|
||||
) -> DeltaGliderClient
|
||||
```
|
||||
@@ -30,11 +29,12 @@ def create_client(
|
||||
|
||||
- **endpoint_url** (`Optional[str]`): S3 endpoint URL for MinIO, R2, or other S3-compatible storage. If None, uses AWS S3.
|
||||
- **log_level** (`str`): Logging verbosity level. Options: "DEBUG", "INFO", "WARNING", "ERROR". Default: "INFO".
|
||||
- **cache_dir** (`str`): Directory for local reference cache. Default: "/tmp/.deltaglider/cache".
|
||||
- **kwargs**: Additional arguments passed to `DeltaService`:
|
||||
- **tool_version** (`str`): Version string for metadata. Default: "deltaglider/0.1.0"
|
||||
- **max_ratio** (`float`): Maximum acceptable delta/file ratio. Default: 0.5
|
||||
|
||||
**Security Note**: DeltaGlider automatically uses ephemeral, process-isolated cache (`/tmp/deltaglider-*`) that is cleaned up on exit. No configuration needed.
|
||||
|
||||
#### Returns
|
||||
|
||||
`DeltaGliderClient`: Configured client instance ready for use.
|
||||
@@ -48,11 +48,8 @@ client = create_client()
|
||||
# Custom endpoint for MinIO
|
||||
client = create_client(endpoint_url="http://localhost:9000")
|
||||
|
||||
# Debug mode with custom cache
|
||||
client = create_client(
|
||||
log_level="DEBUG",
|
||||
cache_dir="/var/cache/deltaglider"
|
||||
)
|
||||
# Debug mode
|
||||
client = create_client(log_level="DEBUG")
|
||||
|
||||
# Custom delta ratio threshold
|
||||
client = create_client(max_ratio=0.3) # Only use delta if <30% of original
|
||||
@@ -726,9 +723,10 @@ DeltaGlider respects these environment variables:
|
||||
### DeltaGlider Configuration
|
||||
|
||||
- **DG_LOG_LEVEL**: Logging level (DEBUG, INFO, WARNING, ERROR)
|
||||
- **DG_CACHE_DIR**: Local cache directory
|
||||
- **DG_MAX_RATIO**: Default maximum delta ratio
|
||||
|
||||
**Note**: Cache is automatically managed (ephemeral, process-isolated) and requires no configuration.
|
||||
|
||||
### Example
|
||||
|
||||
```bash
|
||||
@@ -739,10 +737,9 @@ export AWS_SECRET_ACCESS_KEY=minioadmin
|
||||
|
||||
# Configure DeltaGlider
|
||||
export DG_LOG_LEVEL=DEBUG
|
||||
export DG_CACHE_DIR=/var/cache/deltaglider
|
||||
export DG_MAX_RATIO=0.3
|
||||
|
||||
# Now use normally
|
||||
# Now use normally (cache managed automatically)
|
||||
python my_script.py
|
||||
```
|
||||
|
||||
|
||||
@@ -44,22 +44,10 @@ def create_service(
|
||||
max_ratio = float(os.environ.get("DG_MAX_RATIO", "0.5"))
|
||||
metrics_type = os.environ.get("DG_METRICS", "logging") # Options: noop, logging, cloudwatch
|
||||
|
||||
# SECURITY: Use ephemeral cache by default to prevent multi-user attacks
|
||||
if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true":
|
||||
# Create process-specific temporary cache directory
|
||||
cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
# Register cleanup handler to remove cache on exit
|
||||
atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))
|
||||
else:
|
||||
# Legacy shared cache mode - UNSAFE in multi-user environments
|
||||
cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/reference_cache"))
|
||||
# Create logger early to issue warning
|
||||
temp_logger = StdLoggerAdapter(level=log_level)
|
||||
temp_logger.warning(
|
||||
"SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). "
|
||||
"This mode has known security vulnerabilities in multi-user environments. "
|
||||
"Use at your own risk!"
|
||||
)
|
||||
# SECURITY: Always use ephemeral process-isolated cache
|
||||
cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
# Register cleanup handler to remove cache on exit
|
||||
atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))
|
||||
|
||||
# Set AWS environment variables if provided
|
||||
if endpoint_url:
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
# ruff: noqa: I001
|
||||
import atexit
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from collections.abc import Callable
|
||||
@@ -1065,7 +1064,6 @@ class DeltaGliderClient:
|
||||
def create_client(
|
||||
endpoint_url: str | None = None,
|
||||
log_level: str = "INFO",
|
||||
cache_dir: str = "/tmp/.deltaglider/cache",
|
||||
aws_access_key_id: str | None = None,
|
||||
aws_secret_access_key: str | None = None,
|
||||
aws_session_token: str | None = None,
|
||||
@@ -1080,11 +1078,11 @@ def create_client(
|
||||
- Compression estimation
|
||||
- Progress callbacks for large uploads
|
||||
- Detailed object and bucket statistics
|
||||
- Secure ephemeral cache (process-isolated, auto-cleanup)
|
||||
|
||||
Args:
|
||||
endpoint_url: Optional S3 endpoint URL (for MinIO, R2, etc.)
|
||||
log_level: Logging level
|
||||
cache_dir: Directory for reference cache
|
||||
aws_access_key_id: AWS access key ID (None to use environment/IAM)
|
||||
aws_secret_access_key: AWS secret access key (None to use environment/IAM)
|
||||
aws_session_token: AWS session token for temporary credentials (None if not using)
|
||||
@@ -1125,22 +1123,10 @@ def create_client(
|
||||
XdeltaAdapter,
|
||||
)
|
||||
|
||||
# SECURITY: Use ephemeral cache by default to prevent multi-user attacks
|
||||
if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true":
|
||||
# Create process-specific temporary cache directory
|
||||
actual_cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
# Register cleanup handler to remove cache on exit
|
||||
atexit.register(lambda: shutil.rmtree(actual_cache_dir, ignore_errors=True))
|
||||
else:
|
||||
# Legacy shared cache mode - UNSAFE in multi-user environments
|
||||
actual_cache_dir = Path(cache_dir)
|
||||
# Create logger early to issue warning
|
||||
temp_logger = StdLoggerAdapter(level=log_level)
|
||||
temp_logger.warning(
|
||||
"SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). "
|
||||
"This mode has known security vulnerabilities in multi-user environments. "
|
||||
"Use at your own risk!"
|
||||
)
|
||||
# SECURITY: Always use ephemeral process-isolated cache
|
||||
cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
|
||||
# Register cleanup handler to remove cache on exit
|
||||
atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))
|
||||
|
||||
# Build boto3 client kwargs
|
||||
boto3_kwargs = {}
|
||||
@@ -1157,7 +1143,7 @@ def create_client(
|
||||
hasher = Sha256Adapter()
|
||||
storage = S3StorageAdapter(endpoint_url=endpoint_url, boto3_kwargs=boto3_kwargs)
|
||||
diff = XdeltaAdapter()
|
||||
cache = FsCacheAdapter(actual_cache_dir, hasher)
|
||||
cache = FsCacheAdapter(cache_dir, hasher)
|
||||
clock = UtcClockAdapter()
|
||||
logger = StdLoggerAdapter(level=log_level)
|
||||
metrics = NoopMetricsAdapter()
|
||||
|
||||
@@ -124,7 +124,7 @@ class MockStorage:
|
||||
@pytest.fixture
|
||||
def client(tmp_path):
|
||||
"""Create a client with mocked storage."""
|
||||
client = create_client(cache_dir=str(tmp_path / "cache"))
|
||||
client = create_client()
|
||||
|
||||
# Replace storage with mock
|
||||
mock_storage = MockStorage()
|
||||
@@ -156,7 +156,6 @@ class TestCredentialHandling:
|
||||
aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
|
||||
aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
region_name="us-west-2",
|
||||
cache_dir=str(tmp_path / "cache"),
|
||||
)
|
||||
|
||||
# Verify the client was created
|
||||
@@ -179,7 +178,6 @@ class TestCredentialHandling:
|
||||
aws_access_key_id="ASIAIOSFODNN7EXAMPLE",
|
||||
aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
aws_session_token="FwoGZXIvYXdzEBEaDH...",
|
||||
cache_dir=str(tmp_path / "cache"),
|
||||
)
|
||||
|
||||
assert client is not None
|
||||
@@ -188,7 +186,7 @@ class TestCredentialHandling:
|
||||
def test_create_client_without_credentials_uses_environment(self, tmp_path):
|
||||
"""Test that omitting credentials falls back to environment/IAM."""
|
||||
# This should use boto3's default credential chain
|
||||
client = create_client(cache_dir=str(tmp_path / "cache"))
|
||||
client = create_client()
|
||||
|
||||
assert client is not None
|
||||
assert client.service.storage.client is not None
|
||||
@@ -199,7 +197,6 @@ class TestCredentialHandling:
|
||||
endpoint_url="http://localhost:9000",
|
||||
aws_access_key_id="minioadmin",
|
||||
aws_secret_access_key="minioadmin",
|
||||
cache_dir=str(tmp_path / "cache"),
|
||||
)
|
||||
|
||||
assert client is not None
|
||||
|
||||
@@ -71,7 +71,7 @@ def mock_storage():
|
||||
def client(tmp_path):
|
||||
"""Create DeltaGliderClient with mock storage."""
|
||||
# Use create_client to get a properly configured client
|
||||
client = create_client(cache_dir=str(tmp_path / "cache"))
|
||||
client = create_client()
|
||||
|
||||
# Replace storage with mock
|
||||
mock_storage = MockStorage()
|
||||
|
||||
Reference in New Issue
Block a user