diff --git a/SECURITY_FIX_ROADMAP.md b/SECURITY_FIX_ROADMAP.md new file mode 100644 index 0000000..b5cf0db --- /dev/null +++ b/SECURITY_FIX_ROADMAP.md @@ -0,0 +1,542 @@ +# 🛡️ DeltaGlider Security Fix Roadmap + +## Executive Summary +Critical security vulnerabilities have been identified in DeltaGlider's cache system that enable multi-user attacks, data exposure, and cache poisoning. This document provides a **chronological, actionable roadmap** to eliminate these threats through bold architectural changes. + +**Key Innovation**: Instead of patching individual issues, we propose a **"Zero-Trust Cache Architecture"** that eliminates entire classes of vulnerabilities. + +--- + +## 🚀 The Bold Solution: Ephemeral Signed Cache + +### Core Concept +Replace filesystem cache with **ephemeral, cryptographically-signed, user-isolated cache** that eliminates: +- TOCTOU vulnerabilities (no shared filesystem) +- Multi-user interference (process isolation) +- Cache poisoning (cryptographic signatures) +- Information disclosure (encrypted metadata) +- Cross-endpoint collision (content-addressed storage) + +**Note**: DeltaGlider is designed as a standalone CLI/SDK application. All solutions maintain this architecture without requiring external services. + +--- + +## 📋 Implementation Roadmap + +### **DAY 1-2: Emergency Hotfix** (v5.0.3) +*Stop the bleeding - minimal changes for immediate deployment* + +#### 1. **Disable Shared Cache Mode** (2 hours) +```python +# src/deltaglider/app/cli/main.py +import tempfile +import os + +def create_service(...): + # CRITICAL: Use process-specific temp directory + if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": + cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) + atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) + else: + # Legacy mode with warning + cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/cache")) + logger.warning("UNSAFE: Shared cache mode enabled. Use at your own risk!") +``` + +**Impact**: Each process gets isolated cache, auto-cleaned on exit. Eliminates multi-user attacks. + +#### 2. **Add SHA Validation at Use-Time** (2 hours) +```python +# src/deltaglider/ports/cache.py +class CachePort(Protocol): + def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path: + """Get reference with atomic SHA validation - MUST use this for all operations.""" + ... + +# src/deltaglider/adapters/cache_fs.py +def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path: + path = self.ref_path(bucket, prefix) + if not path.exists(): + raise CacheMissError(f"Cache miss for {bucket}/{prefix}") + + # Lock file for atomic read + with open(path, 'rb') as f: + fcntl.flock(f.fileno(), fcntl.LOCK_SH) + content = f.read() + actual_sha = hashlib.sha256(content).hexdigest() + + if actual_sha != expected_sha: + path.unlink() # Remove corrupted cache + raise CacheCorruptionError(f"SHA mismatch: cache corrupted") + + return path +``` + +#### 3. **Update All Usage Points** (1 hour) +```python +# src/deltaglider/core/service.py +# Replace ALL instances of: +ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) + +# With: +ref_path = self.cache.get_validated_ref( + delta_space.bucket, + delta_space.prefix, + ref_sha256 # Pass expected SHA +) +``` + +**Test & Deploy**: 4 hours testing + immediate release + +--- + +### **DAY 3-5: Quick Wins** (v5.1.0) +*Low-risk improvements with high security impact* + +#### 4. **Implement Content-Addressed Storage** (4 hours) +```python +# src/deltaglider/adapters/cache_cas.py +class ContentAddressedCache(CachePort): + """Cache using SHA as filename - eliminates collisions""" + + def ref_path(self, bucket: str, prefix: str, sha256: str) -> Path: + # Use SHA as filename - guaranteed unique + return self.base_dir / sha256[:2] / sha256[2:4] / sha256 + + def write_ref(self, bucket: str, prefix: str, src: Path, sha256: str) -> Path: + path = self.ref_path(bucket, prefix, sha256) + + # If file with this SHA exists, we're done (deduplication!) + if path.exists(): + return path + + # Atomic write + path.parent.mkdir(parents=True, mode=0o700, exist_ok=True) + tmp = path.with_suffix('.tmp') + shutil.copy2(src, tmp) + os.chmod(tmp, 0o600) + + # Verify content before committing + actual_sha = self.hasher.sha256(tmp) + if actual_sha != sha256: + tmp.unlink() + raise ValueError("File corruption during cache write") + + os.replace(tmp, path) # Atomic + return path +``` + +**Benefits**: +- Same file cached once regardless of bucket/prefix +- Automatic deduplication +- No collision possible (SHA256 uniqueness) +- Natural cache validation (filename IS the checksum) + +#### 5. **Add Secure Directory Creation** (2 hours) +```python +# src/deltaglider/utils/secure_fs.py +import os +import stat + +def secure_makedirs(path: Path, mode: int = 0o700) -> None: + """Create directory with secure permissions atomically.""" + try: + path.mkdir(parents=True, mode=mode, exist_ok=False) + except FileExistsError: + # Verify it's ours and has correct permissions + st = path.stat() + if st.st_uid != os.getuid(): + raise SecurityError(f"Directory {path} owned by different user") + if stat.S_IMODE(st.st_mode) != mode: + os.chmod(path, mode) # Fix permissions +``` + +#### 6. **Unify Cache Configuration** (1 hour) +```python +# src/deltaglider/config.py +import os +from pathlib import Path + +def get_cache_dir() -> Path: + """Single source of truth for cache directory.""" + if os.environ.get("DG_NO_CACHE") == "true": + return None # Feature flag to disable cache + + if os.environ.get("DG_EPHEMERAL_CACHE") == "true": + return Path(tempfile.mkdtemp(prefix="dg-cache-")) + + # User-specific cache by default + cache_base = os.environ.get("DG_CACHE_DIR", + os.path.expanduser("~/.cache/deltaglider")) + return Path(cache_base) / "v2" # Version cache format +``` + +--- + +### **DAY 6-10: Architecture Redesign** (v5.2.0) +*The bold solution that eliminates entire vulnerability classes* + +#### 7. **Implement Memory Cache with Encryption** (8 hours) +```python +# src/deltaglider/adapters/cache_memory.py +import mmap +import pickle +from cryptography.fernet import Fernet + +class MemoryCache(CachePort): + """In-memory cache with optional disk backing via mmap.""" + + def __init__(self, max_size_mb: int = 100): + self.cache = {} # SHA -> (data, metadata) + self.max_size = max_size_mb * 1024 * 1024 + self.current_size = 0 + + # Per-process encryption key (never persisted) + self.cipher = Fernet(Fernet.generate_key()) + + # Optional mmap for larger files + self.mmap_file = tempfile.NamedTemporaryFile(prefix="dg-mmap-") + self.mmap = mmap.mmap(self.mmap_file.fileno(), self.max_size) + + def write_ref(self, bucket: str, prefix: str, src: Path, sha256: str) -> bytes: + # Read and encrypt data + with open(src, 'rb') as f: + data = f.read() + + encrypted = self.cipher.encrypt(data) + + # Store in memory or mmap based on size + if len(encrypted) + self.current_size <= self.max_size: + self.cache[sha256] = encrypted + self.current_size += len(encrypted) + else: + # Evict LRU or use mmap + self._evict_lru() + self.cache[sha256] = encrypted + + return encrypted + + def get_validated_ref(self, bucket: str, prefix: str, sha256: str) -> bytes: + if sha256 not in self.cache: + raise CacheMissError() + + encrypted = self.cache[sha256] + data = self.cipher.decrypt(encrypted) + + # Always validate + actual_sha = hashlib.sha256(data).hexdigest() + if actual_sha != sha256: + del self.cache[sha256] + raise CacheCorruptionError() + + return data +``` + +**Benefits**: +- No filesystem access = no permission issues +- Encrypted in memory = secure even in core dumps +- Per-process isolation = no multi-user issues +- Zero TOCTOU window = memory is atomic + +#### 8. **Implement Signed Cache Entries** (6 hours) +```python +# src/deltaglider/adapters/cache_signed.py +import hmac +import json +from datetime import datetime, timedelta + +class SignedCache(CachePort): + """Cache with cryptographic signatures and expiry.""" + + def __init__(self, base_dir: Path, secret_key: bytes = None): + self.base_dir = base_dir + # Per-session key if not provided + self.secret = secret_key or os.urandom(32) + + def _sign_metadata(self, metadata: dict) -> str: + """Create HMAC signature for metadata.""" + json_meta = json.dumps(metadata, sort_keys=True) + signature = hmac.new( + self.secret, + json_meta.encode(), + hashlib.sha256 + ).hexdigest() + return signature + + def write_ref(self, bucket: str, prefix: str, src: Path, sha256: str) -> Path: + # Create signed metadata + metadata = { + "sha256": sha256, + "bucket": bucket, + "prefix": prefix, + "timestamp": datetime.utcnow().isoformat(), + "expires": (datetime.utcnow() + timedelta(hours=24)).isoformat(), + "pid": os.getpid(), + "uid": os.getuid(), + } + signature = self._sign_metadata(metadata) + + # Store data + metadata + cache_dir = self.base_dir / signature[:8] # Use signature prefix as namespace + cache_dir.mkdir(parents=True, mode=0o700, exist_ok=True) + + data_path = cache_dir / f"{sha256}.bin" + meta_path = cache_dir / f"{sha256}.meta" + + # Atomic writes + shutil.copy2(src, data_path) + os.chmod(data_path, 0o600) + + with open(meta_path, 'w') as f: + json.dump({"metadata": metadata, "signature": signature}, f) + os.chmod(meta_path, 0o600) + + return data_path + + def get_validated_ref(self, bucket: str, prefix: str, sha256: str) -> Path: + # Find and validate signed entry + pattern = self.base_dir / "*" / f"{sha256}.meta" + matches = list(Path(self.base_dir).glob(f"*/{sha256}.meta")) + + for meta_path in matches: + with open(meta_path) as f: + entry = json.load(f) + + # Verify signature + expected_sig = self._sign_metadata(entry["metadata"]) + if not hmac.compare_digest(entry["signature"], expected_sig): + meta_path.unlink() # Remove tampered entry + continue + + # Check expiry + expires = datetime.fromisoformat(entry["metadata"]["expires"]) + if datetime.utcnow() > expires: + meta_path.unlink() + continue + + # Validate data integrity + data_path = meta_path.with_suffix('.bin') + actual_sha = self.hasher.sha256(data_path) + if actual_sha != sha256: + data_path.unlink() + meta_path.unlink() + continue + + return data_path + + raise CacheMissError(f"No valid cache entry for {sha256}") +``` + +--- + +### **DAY 11-15: Advanced Security** (v6.0.0) +*Next-generation features for standalone security* + +#### 9. **Add Integrity Monitoring** (4 hours) +```python +# src/deltaglider/security/monitor.py +import inotify +import logging + +class CacheIntegrityMonitor: + """Detect and alert on cache tampering attempts.""" + + def __init__(self, cache_dir: Path): + self.cache_dir = cache_dir + self.notifier = inotify.INotify() + self.watch_desc = self.notifier.add_watch( + str(cache_dir), + inotify.IN_MODIFY | inotify.IN_DELETE | inotify.IN_ATTRIB + ) + self.logger = logging.getLogger("security") + + async def monitor(self): + """Monitor for unauthorized cache modifications.""" + async for event in self.notifier: + if event.mask & inotify.IN_MODIFY: + # File modified - verify it was by our process + if not self._is_our_modification(event): + self.logger.critical( + f"SECURITY: Unauthorized cache modification detected: {event.path}" + ) + # Immediately invalidate affected cache + Path(event.path).unlink(missing_ok=True) + + elif event.mask & inotify.IN_ATTRIB: + # Permission change - always suspicious + self.logger.warning( + f"SECURITY: Cache permission change: {event.path}" + ) +``` + +--- + +### **DAY 16-20: Testing & Rollout** (v6.0.0 release) + +#### 10. **Security Test Suite** (8 hours) +```python +# tests/security/test_cache_attacks.py +import pytest +import os +import threading +import time + +class TestCacheSecurity: + """Test all known attack vectors.""" + + def test_toctou_attack_prevented(self, cache): + """Verify TOCTOU window is eliminated.""" + sha = "abc123" + cache.write_ref("bucket", "prefix", test_file, sha) + + # Attacker thread tries to replace file during read + def attacker(): + time.sleep(0.0001) # Try to hit the TOCTOU window + cache_path = cache.ref_path("bucket", "prefix", sha) + cache_path.write_bytes(b"malicious") + + thread = threading.Thread(target=attacker) + thread.start() + + # Should detect tampering + with pytest.raises(CacheCorruptionError): + cache.get_validated_ref("bucket", "prefix", sha) + + def test_multi_user_isolation(self, cache): + """Verify users can't access each other's cache.""" + # Create cache as user A + cache_a = SignedCache(Path("/tmp/cache"), secret=b"key_a") + cache_a.write_ref("bucket", "prefix", test_file, "sha_a") + + # Try to read as user B with different key + cache_b = SignedCache(Path("/tmp/cache"), secret=b"key_b") + + with pytest.raises(CacheMissError): + cache_b.get_validated_ref("bucket", "prefix", "sha_a") + + def test_cache_poisoning_prevented(self, cache): + """Verify corrupted cache is detected.""" + sha = "abc123" + cache.write_ref("bucket", "prefix", test_file, sha) + + # Corrupt the cache file + cache_path = cache.ref_path("bucket", "prefix", sha) + with open(cache_path, 'ab') as f: + f.write(b"corrupted") + + # Should detect corruption + with pytest.raises(CacheCorruptionError): + cache.get_validated_ref("bucket", "prefix", sha) +``` + +#### 11. **Migration Guide** (4 hours) +```python +# src/deltaglider/migration/v5_to_v6.py +def migrate_cache(): + """Migrate from v5 shared cache to v6 secure cache.""" + old_cache = Path("/tmp/.deltaglider/cache") + + if old_cache.exists(): + print("WARNING: Old insecure cache detected at", old_cache) + print("This cache had security vulnerabilities and will not be migrated.") + + response = input("Delete old cache? [y/N]: ") + if response.lower() == 'y': + shutil.rmtree(old_cache) + print("Old cache deleted. New secure cache will be created on demand.") + else: + print("Old cache retained at", old_cache) + print("Set DG_CACHE_DIR to use a different location.") +``` + +#### 12. **Performance Benchmarks** (4 hours) +```python +# benchmarks/cache_performance.py +def benchmark_cache_implementations(): + """Compare performance of cache implementations.""" + + implementations = [ + ("Filesystem (v5)", FsCacheAdapter), + ("Content-Addressed", ContentAddressedCache), + ("Memory", MemoryCache), + ("Signed", SignedCache), + ] + + for name, cache_class in implementations: + cache = cache_class(test_dir) + + # Measure write performance + start = time.perf_counter() + for i in range(1000): + cache.write_ref("bucket", f"prefix{i}", test_file, f"sha{i}") + write_time = time.perf_counter() - start + + # Measure read performance + start = time.perf_counter() + for i in range(1000): + cache.get_validated_ref("bucket", f"prefix{i}", f"sha{i}") + read_time = time.perf_counter() - start + + print(f"{name}: Write={write_time:.3f}s Read={read_time:.3f}s") +``` + +--- + +## 📊 Decision Matrix + +| Solution | Security | Performance | Complexity | Breaking Change | +|----------|----------|-------------|------------|-----------------| +| Hotfix (Day 1-2) | ⭐⭐⭐ | ⭐⭐ | ⭐ | No | +| Content-Addressed | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐ | No | +| Memory Cache | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | No | +| Signed Cache | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | ⭐⭐⭐ | No | + +--- + +## 🎯 Recommended Approach + +### For Immediate Production (Next 48 hours) +Deploy **Hotfix v5.0.3** with ephemeral cache + SHA validation + +### For Next Release (1 week) +Implement **Content-Addressed Storage** (v5.1.0) - best balance of security and simplicity + +### For Enterprise (1 month) +Deploy **Signed Cache** (v6.0.0) for maximum security with built-in TTL and integrity + +--- + +## 🚦 Success Metrics + +After implementation, verify: + +1. **Security Tests Pass**: All attack vectors prevented +2. **Performance Maintained**: <10% degradation vs v5 +3. **Zero CVEs**: No security vulnerabilities in cache +4. **User Isolation**: Multi-user systems work safely +5. **Backward Compatible**: Existing workflows unaffected + +--- + +## 📞 Support + +For questions or security concerns: +- Security Team: security@deltaglider.io +- Lead Developer: @architect +- Immediate Issues: Create SECURITY labeled issue + +--- + +## ⚠️ Disclosure Timeline + +- **Day 0**: Vulnerabilities discovered +- **Day 1**: Hotfix released (v5.0.3) +- **Day 7**: Improved version released (v5.1.0) +- **Day 30**: Full disclosure published +- **Day 45**: v6.0.0 with complete redesign + +--- + +*Document Version: 1.0* +*Classification: SENSITIVE - INTERNAL USE ONLY* +*Last Updated: 2024-10-09* \ No newline at end of file diff --git a/src/deltaglider/adapters/cache_fs.py b/src/deltaglider/adapters/cache_fs.py index a0c2a05..29e9f84 100644 --- a/src/deltaglider/adapters/cache_fs.py +++ b/src/deltaglider/adapters/cache_fs.py @@ -1,8 +1,15 @@ """Filesystem cache adapter.""" +import hashlib import shutil +import sys from pathlib import Path +# Unix-only imports for file locking +if sys.platform != "win32": + import fcntl + +from ..core.errors import CacheCorruptionError, CacheMissError from ..ports.cache import CachePort from ..ports.hash import HashPort @@ -29,6 +36,60 @@ class FsCacheAdapter(CachePort): actual_sha = self.hasher.sha256(path) return actual_sha == sha + def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path: + """Get cached reference with atomic SHA validation. + + This method prevents TOCTOU attacks by validating the SHA at use-time, + not just at check-time. + + Args: + bucket: S3 bucket name + prefix: Prefix/deltaspace within bucket + expected_sha: Expected SHA256 hash + + Returns: + Path to validated cached file + + Raises: + CacheMissError: File not found in cache + CacheCorruptionError: SHA mismatch detected + """ + path = self.ref_path(bucket, prefix) + + if not path.exists(): + raise CacheMissError(f"Cache miss for {bucket}/{prefix}") + + # Lock file and validate content atomically + try: + with open(path, "rb") as f: + # Acquire shared lock (Unix only) + if sys.platform != "win32": + fcntl.flock(f.fileno(), fcntl.LOCK_SH) + + # Read and hash content + content = f.read() + actual_sha = hashlib.sha256(content).hexdigest() + + # Release lock automatically when exiting context + + # Validate SHA + if actual_sha != expected_sha: + # File corrupted or tampered - remove it + try: + path.unlink() + except OSError: + pass # Best effort cleanup + + raise CacheCorruptionError( + f"Cache corruption detected for {bucket}/{prefix}: " + f"expected {expected_sha}, got {actual_sha}" + ) + + return path + + except OSError as e: + raise CacheMissError(f"Cache read error for {bucket}/{prefix}: {e}") from e + def write_ref(self, bucket: str, prefix: str, src: Path) -> Path: """Cache reference file.""" path = self.ref_path(bucket, prefix) diff --git a/src/deltaglider/app/cli/main.py b/src/deltaglider/app/cli/main.py index e7acda5..4adb128 100644 --- a/src/deltaglider/app/cli/main.py +++ b/src/deltaglider/app/cli/main.py @@ -1,8 +1,11 @@ """CLI main entry point.""" +import atexit import json import os +import shutil import sys +import tempfile from pathlib import Path import click @@ -38,10 +41,26 @@ def create_service( ) -> DeltaService: """Create service with wired adapters.""" # Get config from environment - cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/reference_cache")) max_ratio = float(os.environ.get("DG_MAX_RATIO", "0.5")) metrics_type = os.environ.get("DG_METRICS", "logging") # Options: noop, logging, cloudwatch + # SECURITY: Use ephemeral cache by default to prevent multi-user attacks + if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": + # Create process-specific temporary cache directory + cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) + # Register cleanup handler to remove cache on exit + atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True)) + else: + # Legacy shared cache mode - UNSAFE in multi-user environments + cache_dir = Path(os.environ.get("DG_CACHE_DIR", "/tmp/.deltaglider/reference_cache")) + # Create logger early to issue warning + temp_logger = StdLoggerAdapter(level=log_level) + temp_logger.warning( + "SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). " + "This mode has known security vulnerabilities in multi-user environments. " + "Use at your own risk!" + ) + # Set AWS environment variables if provided if endpoint_url: os.environ["AWS_ENDPOINT_URL"] = endpoint_url diff --git a/src/deltaglider/client.py b/src/deltaglider/client.py index 0d54f57..d01e358 100644 --- a/src/deltaglider/client.py +++ b/src/deltaglider/client.py @@ -1,6 +1,9 @@ """DeltaGlider client with boto3-compatible APIs and advanced features.""" # ruff: noqa: I001 +import atexit +import os +import shutil import tempfile from collections.abc import Callable from pathlib import Path @@ -1122,6 +1125,23 @@ def create_client( XdeltaAdapter, ) + # SECURITY: Use ephemeral cache by default to prevent multi-user attacks + if os.environ.get("DG_UNSAFE_SHARED_CACHE") != "true": + # Create process-specific temporary cache directory + actual_cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp")) + # Register cleanup handler to remove cache on exit + atexit.register(lambda: shutil.rmtree(actual_cache_dir, ignore_errors=True)) + else: + # Legacy shared cache mode - UNSAFE in multi-user environments + actual_cache_dir = Path(cache_dir) + # Create logger early to issue warning + temp_logger = StdLoggerAdapter(level=log_level) + temp_logger.warning( + "SECURITY WARNING: Shared cache mode enabled (DG_UNSAFE_SHARED_CACHE=true). " + "This mode has known security vulnerabilities in multi-user environments. " + "Use at your own risk!" + ) + # Build boto3 client kwargs boto3_kwargs = {} if aws_access_key_id is not None: @@ -1137,7 +1157,7 @@ def create_client( hasher = Sha256Adapter() storage = S3StorageAdapter(endpoint_url=endpoint_url, boto3_kwargs=boto3_kwargs) diff = XdeltaAdapter() - cache = FsCacheAdapter(Path(cache_dir), hasher) + cache = FsCacheAdapter(actual_cache_dir, hasher) clock = UtcClockAdapter() logger = StdLoggerAdapter(level=log_level) metrics = NoopMetricsAdapter() diff --git a/src/deltaglider/core/errors.py b/src/deltaglider/core/errors.py index 45f7291..0f84d59 100644 --- a/src/deltaglider/core/errors.py +++ b/src/deltaglider/core/errors.py @@ -47,3 +47,15 @@ class PolicyViolationWarning(Warning): """Policy violation warning.""" pass + + +class CacheMissError(DeltaGliderError): + """Cache miss - file not found in cache.""" + + pass + + +class CacheCorruptionError(DeltaGliderError): + """Cache corruption - SHA mismatch or tampering detected.""" + + pass diff --git a/src/deltaglider/core/service.py b/src/deltaglider/core/service.py index 392f32e..61545d2 100644 --- a/src/deltaglider/core/service.py +++ b/src/deltaglider/core/service.py @@ -230,7 +230,10 @@ class DeltaService: with tempfile.TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) delta_path = tmp_path / "delta" - ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) + # SECURITY: Use validated ref to prevent TOCTOU attacks + ref_path = self.cache.get_validated_ref( + delta_space.bucket, delta_space.prefix, delta_meta.ref_sha256 + ) out_path = tmp_path / "output" # Download delta @@ -408,7 +411,8 @@ class DeltaService: if not cache_hit: self._cache_reference(delta_space, ref_sha256) - ref_path = self.cache.ref_path(delta_space.bucket, delta_space.prefix) + # SECURITY: Use validated ref to prevent TOCTOU attacks + ref_path = self.cache.get_validated_ref(delta_space.bucket, delta_space.prefix, ref_sha256) # Create delta with tempfile.NamedTemporaryFile(suffix=".delta") as delta_file: diff --git a/src/deltaglider/ports/cache.py b/src/deltaglider/ports/cache.py index bd76395..cdc2cec 100644 --- a/src/deltaglider/ports/cache.py +++ b/src/deltaglider/ports/cache.py @@ -15,6 +15,26 @@ class CachePort(Protocol): """Check if reference exists and matches SHA.""" ... + def get_validated_ref(self, bucket: str, prefix: str, expected_sha: str) -> Path: + """Get cached reference with atomic SHA validation. + + This method MUST be used instead of ref_path() to prevent TOCTOU attacks. + It validates the SHA256 hash at the time of use, not just at cache check time. + + Args: + bucket: S3 bucket name + prefix: Prefix/deltaspace within bucket + expected_sha: Expected SHA256 hash of the file + + Returns: + Path to the validated cached file + + Raises: + CacheMissError: If cached file doesn't exist + CacheCorruptionError: If SHA doesn't match (file corrupted or tampered) + """ + ... + def write_ref(self, bucket: str, prefix: str, src: Path) -> Path: """Cache reference file.""" ...