diff --git a/CLAUDE.md b/CLAUDE.md index 2397b34..31ec0ee 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -181,13 +181,14 @@ Core delta logic is in `src/deltaglider/core/service.py`: ## Environment Variables - `DG_LOG_LEVEL`: Logging level (default: "INFO") -- `DG_CACHE_DIR`: Local reference cache directory (default: "/tmp/.deltaglider/reference_cache") - `DG_MAX_RATIO`: Maximum acceptable delta/file ratio (default: "0.5") - `AWS_ENDPOINT_URL`: Override S3 endpoint for MinIO/LocalStack - `AWS_ACCESS_KEY_ID`: AWS credentials - `AWS_SECRET_ACCESS_KEY`: AWS credentials - `AWS_DEFAULT_REGION`: AWS region +**Note**: DeltaGlider uses ephemeral, process-isolated cache for security. Cache is automatically created in `/tmp/deltaglider-*` and cleaned up on exit. + ## Important Implementation Details 1. **xdelta3 Binary Dependency**: The system requires xdelta3 binary installed on the system. The `XdeltaAdapter` uses subprocess to call it. diff --git a/SECURITY_FIX_ROADMAP.md b/SECURITY_FIX_ROADMAP.md index b5cf0db..73cbfdb 100644 --- a/SECURITY_FIX_ROADMAP.md +++ b/SECURITY_FIX_ROADMAP.md @@ -23,29 +23,24 @@ Replace filesystem cache with **ephemeral, cryptographically-signed, user-isolat ## 📋 Implementation Roadmap -### **DAY 1-2: Emergency Hotfix** (v5.0.3) +### **DAY 1-2: Emergency Hotfix** (v5.0.3) ✅ COMPLETED *Stop the bleeding - minimal changes for immediate deployment* -#### 1. **Disable Shared Cache Mode** (2 hours) +#### 1. **Ephemeral Process-Isolated Cache** (2 hours) ✅ COMPLETED ```python # src/deltaglider/app/cli/main.py import tempfile -import os +import atexit -def create_service(...): - # CRITICAL: Use process-specific temp directory - if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": - cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) - atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) - else: - # Legacy mode with warning - cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/cache")) - logger.warning("UNSAFE: Shared cache mode enabled. Use at your own risk!") +# SECURITY: Always use ephemeral process-isolated cache +cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) +atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) ``` **Impact**: Each process gets isolated cache, auto-cleaned on exit. Eliminates multi-user attacks. +**Implementation**: All legacy shared cache code removed. Ephemeral cache is now the ONLY mode. -#### 2. **Add SHA Validation at Use-Time** (2 hours) +#### 2. **Add SHA Validation at Use-Time** (2 hours) ✅ COMPLETED ```python # src/deltaglider/ports/cache.py class CachePort(Protocol): @@ -59,9 +54,10 @@ def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path if not path.exists(): raise CacheMissError(f"Cache miss for {bucket}/{prefix}") - # Lock file for atomic read + # Lock file for atomic read (Unix only) with open(path, 'rb') as f: - fcntl.flock(f.fileno(), fcntl.LOCK_SH) + if sys.platform != "win32": + fcntl.flock(f.fileno(), fcntl.LOCK_SH) content = f.read() actual_sha = hashlib.sha256(content).hexdigest() @@ -72,13 +68,13 @@ def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path return path ``` -#### 3. **Update All Usage Points** (1 hour) +#### 3. **Update All Usage Points** (1 hour) ✅ COMPLETED ```python # src/deltaglider/core/service.py -# Replace ALL instances of: -ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) +# Replaced ALL instances in two locations: +# - Line 234 (get method for decoding) +# - Line 415 (_create_delta method for encoding) -# With: ref_path = self.cache.get_validated_ref( delta_space.bucket, delta_space.prefix, @@ -86,7 +82,7 @@ ref_path = self.cache.get_validated_ref( ) ``` -**Test & Deploy**: 4 hours testing + immediate release +**Test & Deploy**: ✅ All 99 tests passing + ready for release --- diff --git a/docs/sdk/api.md b/docs/sdk/api.md index c72a32f..f7e8b9e 100644 --- a/docs/sdk/api.md +++ b/docs/sdk/api.md @@ -21,7 +21,6 @@ Factory function to create a configured DeltaGlider client with sensible default def create_client( endpoint_url: Optional[str] = None, log_level: str = "INFO", - cache_dir: str = "/tmp/.deltaglider/cache", **kwargs ) -> DeltaGliderClient ``` @@ -30,11 +29,12 @@ def create_client( - **endpoint_url** (`Optional[str]`): S3 endpoint URL for MinIO, R2, or other S3-compatible storage. If None, uses AWS S3. - **log_level** (`str`): Logging verbosity level. Options: "DEBUG", "INFO", "WARNING", "ERROR". Default: "INFO". -- **cache_dir** (`str`): Directory for local reference cache. Default: "/tmp/.deltaglider/cache". - **kwargs**: Additional arguments passed to `DeltaService`: - **tool_version** (`str`): Version string for metadata. Default: "deltaglider/0.1.0" - **max_ratio** (`float`): Maximum acceptable delta/file ratio. Default: 0.5 +**Security Note**: DeltaGlider automatically uses ephemeral, process-isolated cache (`/tmp/deltaglider-*`) that is cleaned up on exit. No configuration needed. + #### Returns `DeltaGliderClient`: Configured client instance ready for use. @@ -48,11 +48,8 @@ client = create_client() # Custom endpoint for MinIO client = create_client(endpoint_url="http://localhost:9000") -# Debug mode with custom cache -client = create_client( - log_level="DEBUG", - cache_dir="/var/cache/deltaglider" -) +# Debug mode +client = create_client(log_level="DEBUG") # Custom delta ratio threshold client = create_client(max_ratio=0.3) # Only use delta if <30% of original @@ -726,9 +723,10 @@ DeltaGlider respects these environment variables: ### DeltaGlider Configuration - **DG_LOG_LEVEL**: Logging level (DEBUG, INFO, WARNING, ERROR) -- **DG_CACHE_DIR**: Local cache directory - **DG_MAX_RATIO**: Default maximum delta ratio +**Note**: Cache is automatically managed (ephemeral, process-isolated) and requires no configuration. + ### Example ```bash @@ -739,10 +737,9 @@ export AWS_SECRET_ACCESS_KEY=minioadmin # Configure DeltaGlider export DG_LOG_LEVEL=DEBUG -export DG_CACHE_DIR=/var/cache/deltaglider export DG_MAX_RATIO=0.3 -# Now use normally +# Now use normally (cache managed automatically) python my_script.py ``` diff --git a/src/deltaglider/app/cli/main.py b/src/deltaglider/app/cli/main.py index 4adb128..6ae25fc 100644 --- a/src/deltaglider/app/cli/main.py +++ b/src/deltaglider/app/cli/main.py @@ -44,22 +44,10 @@ def create_service( max_ratio = float(os.environ.get("DG_MAX_RATIO", "0.5")) metrics_type = os.environ.get("DG_METRICS", "logging") # Options: noop, logging, cloudwatch - # SECURITY: Use ephemeral cache by default to prevent multi-user attacks - if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": - # Create process-specific temporary cache directory - cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) - # Register cleanup handler to remove cache on exit - atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) - else: - # Legacy shared cache mode - UNSAFE in multi-user environments - cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/reference_cache")) - # Create logger early to issue warning - temp_logger = StdLoggerAdapter(level=log_level) - temp_logger.warning( - "SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). " - "This mode has known security vulnerabilities in multi-user environments. " - "Use at your own risk!" - ) + # SECURITY: Always use ephemeral process-isolated cache + cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) + # Register cleanup handler to remove cache on exit + atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) # Set AWS environment variables if provided if endpoint_url: diff --git a/src/deltaglider/client.py b/src/deltaglider/client.py index d01e358..5ee1189 100644 --- a/src/deltaglider/client.py +++ b/src/deltaglider/client.py @@ -2,7 +2,6 @@ # ruff: noqa: I001 import atexit -import os import shutil import tempfile from collections.abc import Callable @@ -1065,7 +1064,6 @@ class DeltaGliderClient: def create_client( endpoint_url: str | None = None, log_level: str = "INFO", - cache_dir: str = "/tmp/.deltaglider/cache", aws_access_key_id: str | None = None, aws_secret_access_key: str | None = None, aws_session_token: str | None = None, @@ -1080,11 +1078,11 @@ def create_client( - Compression estimation - Progress callbacks for large uploads - Detailed object and bucket statistics + - Secure ephemeral cache (process-isolated, auto-cleanup) Args: endpoint_url: Optional S3 endpoint URL (for MinIO, R2, etc.) log_level: Logging level - cache_dir: Directory for reference cache aws_access_key_id: AWS access key ID (None to use environment/IAM) aws_secret_access_key: AWS secret access key (None to use environment/IAM) aws_session_token: AWS session token for temporary credentials (None if not using) @@ -1125,22 +1123,10 @@ def create_client( XdeltaAdapter, ) - # SECURITY: Use ephemeral cache by default to prevent multi-user attacks - if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": - # Create process-specific temporary cache directory - actual_cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) - # Register cleanup handler to remove cache on exit - atexit.register(lambda: shutil.rmtree(actual_cache_dir, ignore_errors=True)) - else: - # Legacy shared cache mode - UNSAFE in multi-user environments - actual_cache_dir = Path(cache_dir) - # Create logger early to issue warning - temp_logger = StdLoggerAdapter(level=log_level) - temp_logger.warning( - "SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). " - "This mode has known security vulnerabilities in multi-user environments. " - "Use at your own risk!" - ) + # SECURITY: Always use ephemeral process-isolated cache + cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) + # Register cleanup handler to remove cache on exit + atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) # Build boto3 client kwargs boto3_kwargs = {} @@ -1157,7 +1143,7 @@ def create_client( hasher = Sha256Adapter() storage = S3StorageAdapter(endpoint_url=endpoint_url, boto3_kwargs=boto3_kwargs) diff = XdeltaAdapter() - cache = FsCacheAdapter(actual_cache_dir, hasher) + cache = FsCacheAdapter(cache_dir, hasher) clock = UtcClockAdapter() logger = StdLoggerAdapter(level=log_level) metrics = NoopMetricsAdapter() diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index bc7bb33..34f2d7c 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -124,7 +124,7 @@ class MockStorage: @pytest.fixture def client(tmp_path): """Create a client with mocked storage.""" - client = create_client(cache_dir=str(tmp_path / "cache")) + client = create_client() # Replace storage with mock mock_storage = MockStorage() @@ -156,7 +156,6 @@ class TestCredentialHandling: aws_access_key_id="AKIAIOSFODNN7EXAMPLE", aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", region_name="us-west-2", - cache_dir=str(tmp_path / "cache"), ) # Verify the client was created @@ -179,7 +178,6 @@ class TestCredentialHandling: aws_access_key_id="ASIAIOSFODNN7EXAMPLE", aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", aws_session_token="FwoGZXIvYXdzEBEaDH...", - cache_dir=str(tmp_path / "cache"), ) assert client is not None @@ -188,7 +186,7 @@ class TestCredentialHandling: def test_create_client_without_credentials_uses_environment(self, tmp_path): """Test that omitting credentials falls back to environment/IAM.""" # This should use boto3's default credential chain - client = create_client(cache_dir=str(tmp_path / "cache")) + client = create_client() assert client is not None assert client.service.storage.client is not None @@ -199,7 +197,6 @@ class TestCredentialHandling: endpoint_url="http://localhost:9000", aws_access_key_id="minioadmin", aws_secret_access_key="minioadmin", - cache_dir=str(tmp_path / "cache"), ) assert client is not None diff --git a/tests/integration/test_delete_objects_recursive.py b/tests/integration/test_delete_objects_recursive.py index f2e054c..f80a041 100644 --- a/tests/integration/test_delete_objects_recursive.py +++ b/tests/integration/test_delete_objects_recursive.py @@ -71,7 +71,7 @@ def mock_storage(): def client(tmp_path): """Create DeltaGliderClient with mock storage.""" # Use create_client to get a properly configured client - client = create_client(cache_dir=str(tmp_path / "cache")) + client = create_client() # Replace storage with mock mock_storage = MockStorage()