deltaglider-beshu-tech/src/deltaglider/client.py

"""DeltaGlider client with boto3-compatible APIs and advanced features."""

# ruff: noqa: I001
import atexit
import os
import shutil
import tempfile
from collections.abc import Callable
from pathlib import Path
from typing import Any, cast

from . import __version__
from .adapters.storage_s3 import S3StorageAdapter
from .client_delete_helpers import delete_with_delta_suffix
from .client_models import (
    BucketStats,
    CompressionEstimate,
    ObjectInfo,
    UploadSummary,
)

# fmt: off - Keep all client_operations imports together
from .client_operations import (
    create_bucket as _create_bucket,
    delete_bucket as _delete_bucket,
    download_batch as _download_batch,
    estimate_compression as _estimate_compression,
    find_similar_files as _find_similar_files,
    generate_presigned_post as _generate_presigned_post,
    generate_presigned_url as _generate_presigned_url,
    get_bucket_acl as _get_bucket_acl,
    get_bucket_stats as _get_bucket_stats,
    get_object_info as _get_object_info,
    list_buckets as _list_buckets,
    put_bucket_acl as _put_bucket_acl,
    upload_batch as _upload_batch,
    upload_chunked as _upload_chunked,
)

# fmt: on
from .client_operations.stats import StatsMode

from .core import DeltaService, DeltaSpace, ObjectKey
from .core.errors import NotFoundError
from .core.models import DeleteResult
from .core.object_listing import ObjectListing, list_objects_page
from .core.s3_uri import parse_s3_url
from .response_builders import (
    build_delete_response,
    build_get_response,
    build_list_objects_response,
    build_put_response,
)
from .types import CommonPrefix, S3Object


class DeltaGliderClient:
    """DeltaGlider client with boto3-compatible APIs and advanced features.

    Implements core boto3 S3 client methods (~21 methods covering 80% of use cases):
    - Object operations: put_object, get_object, delete_object, list_objects, head_object
    - Bucket operations: create_bucket, delete_bucket, list_buckets
    - Presigned URLs: generate_presigned_url, generate_presigned_post
    - Plus DeltaGlider extensions for compression stats and batch operations

    See BOTO3_COMPATIBILITY.md for complete compatibility matrix.
    """

    def __init__(self, service: DeltaService, endpoint_url: str | None = None):
        """Initialize client with service."""
        self.service = service
        self.endpoint_url = endpoint_url
        # Session-scoped bucket statistics cache (cleared with the client lifecycle)
        self._bucket_stats_cache: dict[str, dict[str, BucketStats]] = {}

    # -------------------------------------------------------------------------
    # Internal helpers
    # -------------------------------------------------------------------------

    def _invalidate_bucket_stats_cache(self, bucket: str | None = None) -> None:
        """Invalidate cached bucket statistics."""
        if bucket is None:
            self._bucket_stats_cache.clear()
        else:
            self._bucket_stats_cache.pop(bucket, None)

    def _store_bucket_stats_cache(
        self,
        bucket: str,
        mode: StatsMode,
        stats: BucketStats,
    ) -> None:
        """Store bucket statistics in the session cache."""
        bucket_cache = self._bucket_stats_cache.setdefault(bucket, {})
        bucket_cache[mode] = stats
        if mode == "detailed":
            bucket_cache["sampled"] = stats
            bucket_cache["quick"] = stats
        elif mode == "sampled":
            bucket_cache.setdefault("quick", stats)

    def _get_cached_bucket_stats(self, bucket: str, mode: StatsMode) -> BucketStats | None:
        """Retrieve cached stats for a bucket, preferring more detailed metrics when available."""
        bucket_cache = self._bucket_stats_cache.get(bucket)
        if not bucket_cache:
            return None
        if mode == "detailed":
            return bucket_cache.get("detailed")
        if mode == "sampled":
            return bucket_cache.get("sampled") or bucket_cache.get("detailed")
        return (
            bucket_cache.get("quick") or bucket_cache.get("sampled") or bucket_cache.get("detailed")
        )

    def _get_cached_bucket_stats_for_listing(
        self, bucket: str
    ) -> tuple[BucketStats | None, StatsMode | None]:
        """Return best cached stats for bucket listings."""
        bucket_cache = self._bucket_stats_cache.get(bucket)
        if not bucket_cache:
            return (None, None)
        if "detailed" in bucket_cache:
            return (bucket_cache["detailed"], "detailed")
        if "sampled" in bucket_cache:
            return (bucket_cache["sampled"], "sampled")
        if "quick" in bucket_cache:
            return (bucket_cache["quick"], "quick")
        return (None, None)

    # ============================================================================
    # Boto3-compatible APIs (matches S3 client interface)
    # ============================================================================

    def put_object(
        self,
        Bucket: str,
        Key: str,
        Body: bytes | str | Path | None = None,
        Metadata: dict[str, str] | None = None,
        ContentType: str | None = None,
        Tagging: str | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Upload an object to S3 with delta compression (boto3-compatible).

        This method uses DeltaGlider's delta compression for archive files.
        Files will be stored as .delta when appropriate (subsequent similar files).
        The GET operation transparently reconstructs the original file.

        Args:
            Bucket: S3 bucket name
            Key: Object key (specifies the deltaspace and filename)
            Body: Object data (bytes, string, or file path)
            Metadata: Object metadata
            ContentType: MIME type (currently unused but kept for compatibility)
            Tagging: Object tags as URL-encoded string (currently unused)
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with ETag and compression info
        """
        import tempfile

        # Handle Body parameter
        if Body is None:
            raise ValueError("Body parameter is required")

        # Write body to a temporary file for DeltaService.put()
        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(Key).suffix) as tmp_file:
            tmp_path = Path(tmp_file.name)

            # Write Body to temp file
            if isinstance(Body, bytes):
                tmp_file.write(Body)
            elif isinstance(Body, str):
                tmp_file.write(Body.encode("utf-8"))
            elif isinstance(Body, Path):
                tmp_file.write(Body.read_bytes())
            else:
                # Handle any other type by converting to string path
                path_str = str(Body)
                try:
                    tmp_file.write(Path(path_str).read_bytes())
                except Exception as e:
                    raise ValueError(
                        f"Invalid Body parameter: cannot read from {path_str}: {e}"
                    ) from e

        try:
            # Extract deltaspace prefix from Key
            # If Key has path separators, use parent as prefix
            key_path = Path(Key)
            if "/" in Key:
                # Use the parent directories as the deltaspace prefix
                prefix = str(key_path.parent)
                # Copy temp file with original filename for proper extension detection
                named_tmp = tmp_path.parent / key_path.name
                tmp_path.rename(named_tmp)
                tmp_path = named_tmp
            else:
                # No path, use empty prefix
                prefix = ""
                # Rename temp file to have the proper filename
                named_tmp = tmp_path.parent / Key
                tmp_path.rename(named_tmp)
                tmp_path = named_tmp

            # Create DeltaSpace and use DeltaService for compression
            delta_space = DeltaSpace(bucket=Bucket, prefix=prefix)

            # Use the service to put the file (handles delta compression automatically)
            summary = self.service.put(tmp_path, delta_space, max_ratio=0.5)

            # Calculate ETag from file content
            sha256_hash = self.service.hasher.sha256(tmp_path)

            # Build DeltaGlider compression info
            deltaglider_info: dict[str, Any] = {
                "OriginalSizeMB": summary.file_size / (1024 * 1024),
                "StoredSizeMB": (summary.delta_size or summary.file_size) / (1024 * 1024),
                "IsDelta": summary.delta_size is not None,
                "CompressionRatio": summary.delta_ratio or 1.0,
                "SavingsPercent": (
                    (
                        (summary.file_size - (summary.delta_size or summary.file_size))
                        / summary.file_size
                        * 100
                    )
                    if summary.file_size > 0
                    else 0.0
                ),
                "StoredAs": summary.key,
                "Operation": summary.operation,
            }

            # Return as dict[str, Any] for public API (TypedDict is a dict at runtime!)
            response = cast(
                dict[str, Any],
                build_put_response(
                    etag=f'"{sha256_hash}"',
                    deltaglider_info=deltaglider_info,
                ),
            )
            self._invalidate_bucket_stats_cache(Bucket)
            return response
        finally:
            # Clean up temp file
            if tmp_path.exists():
                tmp_path.unlink()

    def get_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Download an object from S3 (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with Body stream and metadata
        """
        # Download to temp file
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            tmp_path = Path(tmp.name)

        self.download(
            s3_url=f"s3://{Bucket}/{Key}",
            output_path=tmp_path,
        )

        # Open file for streaming
        body = open(tmp_path, "rb")

        # Get metadata
        obj_head = self.service.storage.head(f"{Bucket}/{Key}")
        file_size = tmp_path.stat().st_size
        etag = f'"{self.service.hasher.sha256(tmp_path)}"'

        # Return as dict[str, Any] for public API (TypedDict is a dict at runtime!)
        return cast(
            dict[str, Any],
            build_get_response(
                body=body,  # type: ignore[arg-type]  # File object is compatible with bytes
                content_length=file_size,
                etag=etag,
                metadata=obj_head.metadata if obj_head else {},
            ),
        )

    def list_objects(
        self,
        Bucket: str,
        Prefix: str = "",
        Delimiter: str = "",
        MaxKeys: int = 1000,
        ContinuationToken: str | None = None,
        StartAfter: str | None = None,
        FetchMetadata: bool = False,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """List objects in bucket with smart metadata fetching.

        This method optimizes performance by:
        - Never fetching metadata for non-delta files (they don't need it)
        - Only fetching metadata for delta files when explicitly requested
        - Supporting efficient pagination for large buckets

        Args:
            Bucket: S3 bucket name
            Prefix: Filter results to keys beginning with prefix
            Delimiter: Delimiter for grouping keys (e.g., '/' for folders)
            MaxKeys: Maximum number of keys to return (for pagination)
            ContinuationToken: Token from previous response for pagination
            StartAfter: Start listing after this key (for pagination)
            FetchMetadata: If True, fetch metadata ONLY for delta files (default: False)
            **kwargs: Additional parameters for compatibility

        Returns:
            ListObjectsResponse with objects and pagination info

        Performance Notes:
            - With FetchMetadata=False: ~50ms for 1000 objects (1 S3 API call)
            - With FetchMetadata=True: ~2-3s for 1000 objects (1 + N delta files API calls)
            - Non-delta files NEVER trigger HEAD requests (no metadata needed)

        Example:
            # Fast listing for UI display (no metadata)
            response = client.list_objects(Bucket='releases', MaxKeys=100)

            # Paginated listing (boto3-compatible dict response)
            response = client.list_objects(
                Bucket='releases',
                MaxKeys=50,
                ContinuationToken=response.get('NextContinuationToken')
            )

            # Detailed listing with compression stats (slower, only for analytics)
            response = client.list_objects(
                Bucket='releases',
                FetchMetadata=True  # Only fetches for delta files
            )
        """
        start_after = StartAfter or ContinuationToken
        try:
            listing = list_objects_page(
                self.service.storage,
                bucket=Bucket,
                prefix=Prefix,
                delimiter=Delimiter,
                max_keys=MaxKeys,
                start_after=start_after,
            )
        except NotImplementedError:
            if isinstance(self.service.storage, S3StorageAdapter):
                listing = list_objects_page(
                    self.service.storage,
                    bucket=Bucket,
                    prefix=Prefix,
                    delimiter=Delimiter,
                    max_keys=MaxKeys,
                    start_after=start_after,
                )
            else:
                listing = ObjectListing()

        # Convert to boto3-compatible S3Object TypedDicts (type-safe!)
        contents: list[S3Object] = []
        for obj in listing.objects:
            # Skip reference.bin files (internal files, never exposed to users)
            if obj["key"].endswith("/reference.bin") or obj["key"] == "reference.bin":
                continue

            # Determine file type
            is_delta = obj["key"].endswith(".delta")

            # Remove .delta suffix from display key (hide internal implementation)
            display_key = obj["key"]
            if is_delta:
                display_key = display_key[:-6]  # Remove .delta suffix

            # Build DeltaGlider metadata
            deltaglider_metadata: dict[str, str] = {
                "deltaglider-is-delta": str(is_delta).lower(),
                "deltaglider-original-size": str(obj["size"]),
                "deltaglider-compression-ratio": "0.0" if not is_delta else "unknown",
            }

            # SMART METADATA FETCHING:
            # 1. NEVER fetch metadata for non-delta files (no point)
            # 2. Only fetch for delta files when explicitly requested
            if FetchMetadata and is_delta:
                try:
                    obj_head = self.service.storage.head(f"{Bucket}/{obj['key']}")
                    if obj_head and obj_head.metadata:
                        metadata = obj_head.metadata
                        # Update with actual compression stats
                        original_size = int(metadata.get("file_size", obj["size"]))
                        compression_ratio = float(metadata.get("compression_ratio", 0.0))
                        reference_key = metadata.get("ref_key")

                        deltaglider_metadata["deltaglider-original-size"] = str(original_size)
                        deltaglider_metadata["deltaglider-compression-ratio"] = str(
                            compression_ratio
                        )
                        if reference_key:
                            deltaglider_metadata["deltaglider-reference-key"] = reference_key
                except Exception as e:
                    # Log but don't fail the listing
                    self.service.logger.debug(f"Failed to fetch metadata for {obj['key']}: {e}")

            # Create boto3-compatible S3Object TypedDict - mypy validates structure!
            s3_obj: S3Object = {
                "Key": display_key,  # Use cleaned key without .delta
                "Size": obj["size"],
                "LastModified": obj.get("last_modified", ""),
                "ETag": str(obj.get("etag", "")),
                "StorageClass": obj.get("storage_class", "STANDARD"),
                "Metadata": deltaglider_metadata,
            }
            contents.append(s3_obj)

        # Build type-safe boto3-compatible CommonPrefix TypedDicts
        common_prefixes = listing.common_prefixes
        common_prefix_dicts: list[CommonPrefix] | None = (
            [CommonPrefix(Prefix=p) for p in common_prefixes] if common_prefixes else None
        )

        # Return as dict[str, Any] for public API (TypedDict is a dict at runtime!)
        return cast(
            dict[str, Any],
            build_list_objects_response(
                bucket=Bucket,
                prefix=Prefix,
                delimiter=Delimiter,
                max_keys=MaxKeys,
                contents=contents,
                common_prefixes=common_prefix_dicts,
                is_truncated=listing.is_truncated,
                next_continuation_token=listing.next_continuation_token,
                continuation_token=ContinuationToken,
            ),
        )

    def delete_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Delete an object with delta awareness (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key (can be with or without .delta suffix)
            **kwargs: Additional parameters

        Returns:
            Response dict with deletion details
        """
        _, delete_result = delete_with_delta_suffix(self.service, Bucket, Key)

        # Build DeltaGlider-specific info
        deltaglider_info: dict[str, Any] = {
            "Type": delete_result.type,
            "Deleted": delete_result.deleted,
        }

        # Add warnings if any
        if delete_result.warnings:
            deltaglider_info["Warnings"] = delete_result.warnings

        # Add dependent delta count for references
        if delete_result.dependent_deltas:
            deltaglider_info["DependentDeltas"] = delete_result.dependent_deltas

        # Return as dict[str, Any] for public API (TypedDict is a dict at runtime!)
        response = cast(
            dict[str, Any],
            build_delete_response(
                delete_marker=False,
                status_code=204,
                deltaglider_info=deltaglider_info,
            ),
        )
        self._invalidate_bucket_stats_cache(Bucket)
        return response

    def delete_objects(
        self,
        Bucket: str,
        Delete: dict[str, Any],
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Delete multiple objects with delta awareness (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Delete: Dict with 'Objects' list of {'Key': key} dicts
            **kwargs: Additional parameters

        Returns:
            Response dict with deleted objects
        """
        deleted = []
        errors = []
        delta_info = []

        for obj in Delete.get("Objects", []):
            key = obj["Key"]
            try:
                actual_key, delete_result = delete_with_delta_suffix(self.service, Bucket, key)

                deleted_item = {"Key": key}
                if actual_key != key:
                    deleted_item["StoredKey"] = actual_key
                if delete_result.type:
                    deleted_item["Type"] = delete_result.type
                if delete_result.warnings:
                    deleted_item["Warnings"] = delete_result.warnings

                deleted.append(deleted_item)

                # Track delta-specific info
                if delete_result.type in ("delta", "reference"):
                    delta_info.append(
                        {
                            "Key": key,
                            "StoredKey": actual_key,
                            "Type": delete_result.type,
                            "DependentDeltas": delete_result.dependent_deltas,
                        }
                    )

            except NotFoundError as e:
                errors.append(
                    {
                        "Key": key,
                        "Code": "NoSuchKey",
                        "Message": str(e),
                    }
                )
            except Exception as e:
                errors.append(
                    {
                        "Key": key,
                        "Code": "InternalError",
                        "Message": str(e),
                    }
                )

        response: dict[str, Any] = {"Deleted": deleted}
        if errors:
            response["Errors"] = errors

        if delta_info:
            response["DeltaGliderInfo"] = {
                "DeltaFilesDeleted": len([d for d in delta_info if d["Type"] == "delta"]),
                "ReferencesDeleted": len([d for d in delta_info if d["Type"] == "reference"]),
                "Details": delta_info,
            }

        response["ResponseMetadata"] = {"HTTPStatusCode": 200}
        self._invalidate_bucket_stats_cache(Bucket)
        return response

    def delete_objects_recursive(
        self,
        Bucket: str,
        Prefix: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Recursively delete all objects under a prefix with delta awareness.

        Args:
            Bucket: S3 bucket name
            Prefix: Prefix to delete recursively
            **kwargs: Additional parameters

        Returns:
            Response dict with deletion statistics
        """
        single_results: list[dict[str, Any]] = []
        single_errors: list[str] = []

        # First, attempt to delete the prefix as a direct object (with delta fallback)
        if Prefix and not Prefix.endswith("/"):
            candidate_keys = [Prefix]
            if not Prefix.endswith(".delta"):
                candidate_keys.append(f"{Prefix}.delta")

            seen_candidates = set()
            for candidate in candidate_keys:
                if candidate in seen_candidates:
                    continue
                seen_candidates.add(candidate)

                obj_head = self.service.storage.head(f"{Bucket}/{candidate}")
                if not obj_head:
                    continue

                try:
                    actual_key, single_del = delete_with_delta_suffix(
                        self.service, Bucket, candidate
                    )
                    if single_del.deleted:
                        single_results.append(
                            {
                                "requested_key": candidate,
                                "actual_key": actual_key,
                                "result": single_del,
                            }
                        )
                except Exception as e:
                    single_errors.append(f"Failed to delete {candidate}: {e}")

        # Use core service's delta-aware recursive delete for remaining objects
        recursive_result = self.service.delete_recursive(Bucket, Prefix)

        # Aggregate results
        single_deleted_count = len(single_results)
        single_counts = {"delta": 0, "reference": 0, "direct": 0, "other": 0}
        single_details = []
        single_warnings: list[str] = []

        for item in single_results:
            dr: DeleteResult = item["result"]
            requested_key = item["requested_key"]
            actual_key = item["actual_key"]
            result_type = dr.type if dr.type in single_counts else "other"
            single_counts[result_type] += 1
            detail: dict[str, Any] = {
                "Key": requested_key,
                "Type": dr.type,
                "DependentDeltas": dr.dependent_deltas,
                "Warnings": dr.warnings,
            }
            if actual_key != requested_key:
                detail["StoredKey"] = actual_key
            single_details.append(detail)
            if dr.warnings:
                single_warnings.extend(dr.warnings)

        deleted_count = recursive_result.deleted_count + single_deleted_count
        failed_count = recursive_result.failed_count + len(single_errors)

        deltas_deleted = recursive_result.deltas_deleted + single_counts["delta"]
        references_deleted = recursive_result.references_deleted + single_counts["reference"]
        direct_deleted = recursive_result.direct_deleted + single_counts["direct"]
        other_deleted = recursive_result.other_deleted + single_counts["other"]

        response: dict[str, Any] = {
            "ResponseMetadata": {
                "HTTPStatusCode": 200,
            },
            "DeletedCount": deleted_count,
            "FailedCount": failed_count,
            "DeltaGliderInfo": {
                "DeltasDeleted": deltas_deleted,
                "ReferencesDeleted": references_deleted,
                "DirectDeleted": direct_deleted,
                "OtherDeleted": other_deleted,
            },
        }

        if recursive_result.errors:
            response["Errors"] = recursive_result.errors

        if recursive_result.warnings:
            response["Warnings"] = recursive_result.warnings

        if single_errors:
            errors_list = cast(list[str], response.setdefault("Errors", []))
            errors_list.extend(single_errors)

        if single_warnings:
            warnings_list = cast(list[str], response.setdefault("Warnings", []))
            warnings_list.extend(single_warnings)

        if single_details:
            response["DeltaGliderInfo"]["SingleDeletes"] = single_details  # type: ignore[index]

        self._invalidate_bucket_stats_cache(Bucket)
        return response

    def head_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Get object metadata (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            **kwargs: Additional parameters

        Returns:
            Response dict with object metadata
        """
        obj_head = self.service.storage.head(f"{Bucket}/{Key}")
        if not obj_head:
            raise FileNotFoundError(f"Object not found: s3://{Bucket}/{Key}")

        return {
            "ContentLength": obj_head.size,
            "ContentType": obj_head.metadata.get("content_type", "binary/octet-stream"),
            "ETag": obj_head.metadata.get("etag", ""),
            "LastModified": obj_head.metadata.get("last_modified", ""),
            "Metadata": obj_head.metadata,
            "ResponseMetadata": {
                "HTTPStatusCode": 200,
            },
        }

    # ============================================================================
    # Simple client methods (original DeltaGlider API)
    # ============================================================================

    def upload(
        self,
        file_path: str | Path,
        s3_url: str,
        tags: dict[str, str] | None = None,
        max_ratio: float = 0.5,
    ) -> UploadSummary:
        """Upload a file to S3 with automatic delta compression.

        Args:
            file_path: Local file to upload
            s3_url: S3 destination URL (s3://bucket/prefix/)
            tags: Optional tags to add to the object
            max_ratio: Maximum acceptable delta/file ratio (default 0.5)

        Returns:
            UploadSummary with compression statistics
        """
        file_path = Path(file_path)

        address = parse_s3_url(s3_url, strip_trailing_slash=True)
        bucket = address.bucket
        prefix = address.key

        # Create delta space and upload
        delta_space = DeltaSpace(bucket=bucket, prefix=prefix)
        summary = self.service.put(file_path, delta_space, max_ratio)

        # TODO: Add tags support when implemented

        # Convert to user-friendly summary
        is_delta = summary.delta_size is not None
        stored_size = summary.delta_size if is_delta else summary.file_size

        upload_summary = UploadSummary(
            operation=summary.operation,
            bucket=summary.bucket,
            key=summary.key,
            original_size=summary.file_size,
            stored_size=stored_size or summary.file_size,  # Ensure stored_size is never None
            is_delta=is_delta,
            delta_ratio=summary.delta_ratio or 0.0,
        )
        self._invalidate_bucket_stats_cache(bucket)
        return upload_summary

    def download(self, s3_url: str, output_path: str | Path) -> None:
        """Download and reconstruct a file from S3.

        Args:
            s3_url: S3 source URL (s3://bucket/key)
            output_path: Local destination path
        """
        output_path = Path(output_path)

        address = parse_s3_url(s3_url, allow_empty_key=False)
        bucket = address.bucket
        key = address.key

        # Auto-append .delta if the file doesn't exist without it
        # This allows users to specify the original name and we'll find the delta
        obj_key = ObjectKey(bucket=bucket, key=key)

        # Try to get metadata first to see if it exists
        try:
            self.service.get(obj_key, output_path)
        except Exception:
            # Try with .delta suffix
            if not key.endswith(".delta"):
                obj_key = ObjectKey(bucket=bucket, key=key + ".delta")
                self.service.get(obj_key, output_path)
            else:
                raise

    def verify(self, s3_url: str) -> bool:
        """Verify integrity of a stored file.

        Args:
            s3_url: S3 URL of the file to verify

        Returns:
            True if verification passed, False otherwise
        """
        address = parse_s3_url(s3_url, allow_empty_key=False)
        bucket = address.bucket
        key = address.key

        obj_key = ObjectKey(bucket=bucket, key=key)
        result = self.service.verify(obj_key)
        return result.valid

    # ============================================================================
    # DeltaGlider-specific APIs
    # ============================================================================

    def upload_chunked(
        self,
        file_path: str | Path,
        s3_url: str,
        chunk_size: int = 5 * 1024 * 1024,
        progress_callback: Callable[[int, int, int, int], None] | None = None,
        max_ratio: float = 0.5,
    ) -> UploadSummary:
        """Upload a file in chunks with progress callback.

        This method reads the file in chunks to avoid loading large files entirely into memory,
        making it suitable for uploading very large files. Progress is reported after each chunk.

        Args:
            file_path: Local file to upload
            s3_url: S3 destination URL (s3://bucket/path/filename)
            chunk_size: Size of each chunk in bytes (default 5MB)
            progress_callback: Callback(chunk_number, total_chunks, bytes_sent, total_bytes)
            max_ratio: Maximum acceptable delta/file ratio for compression

        Returns:
            UploadSummary with compression statistics

        Example:
            def on_progress(chunk_num, total_chunks, bytes_sent, total_bytes):
                percent = (bytes_sent / total_bytes) * 100
                print(f"Upload progress: {percent:.1f}%")

            client.upload_chunked(
                "large_file.zip",
                "s3://bucket/releases/large_file.zip",
                chunk_size=10 * 1024 * 1024,  # 10MB chunks
                progress_callback=on_progress
            )
        """
        result: UploadSummary = _upload_chunked(
            self, file_path, s3_url, chunk_size, progress_callback, max_ratio
        )
        return result

    def upload_batch(
        self,
        files: list[str | Path],
        s3_prefix: str,
        max_ratio: float = 0.5,
        progress_callback: Callable[[str, int, int], None] | None = None,
    ) -> list[UploadSummary]:
        """Upload multiple files in batch.

        Args:
            files: List of local file paths
            s3_prefix: S3 destination prefix (s3://bucket/prefix/)
            max_ratio: Maximum acceptable delta/file ratio
            progress_callback: Callback(filename, current_file_index, total_files)

        Returns:
            List of UploadSummary objects
        """
        return _upload_batch(self, files, s3_prefix, max_ratio, progress_callback)

    def download_batch(
        self,
        s3_urls: list[str],
        output_dir: str | Path,
        progress_callback: Callable[[str, int, int], None] | None = None,
    ) -> list[Path]:
        """Download multiple files in batch.

        Args:
            s3_urls: List of S3 URLs to download
            output_dir: Local directory to save files
            progress_callback: Callback(filename, current_file_index, total_files)

        Returns:
            List of downloaded file paths
        """
        return _download_batch(self, s3_urls, output_dir, progress_callback)

    def estimate_compression(
        self,
        file_path: str | Path,
        bucket: str,
        prefix: str = "",
        sample_size: int = 1024 * 1024,
    ) -> CompressionEstimate:
        """Estimate compression ratio before upload.

        Args:
            file_path: Local file to estimate
            bucket: Target bucket
            prefix: Target prefix (for finding similar files)
            sample_size: Bytes to sample for estimation (default 1MB)

        Returns:
            CompressionEstimate with predicted compression
        """
        result: CompressionEstimate = _estimate_compression(
            self, file_path, bucket, prefix, sample_size
        )
        return result

    def find_similar_files(
        self,
        bucket: str,
        prefix: str,
        filename: str,
        limit: int = 5,
    ) -> list[dict[str, Any]]:
        """Find similar files that could serve as references.

        Args:
            bucket: S3 bucket
            prefix: Prefix to search in
            filename: Filename to match against
            limit: Maximum number of results

        Returns:
            List of similar files with scores
        """
        return _find_similar_files(self, bucket, prefix, filename, limit)

    def get_object_info(self, s3_url: str) -> ObjectInfo:
        """Get detailed object information including compression stats.

        Args:
            s3_url: S3 URL of the object

        Returns:
            ObjectInfo with detailed metadata
        """
        result: ObjectInfo = _get_object_info(self, s3_url)
        return result

    def get_bucket_stats(
        self,
        bucket: str,
        mode: StatsMode = "quick",
        use_cache: bool = True,
        refresh_cache: bool = False,
    ) -> BucketStats:
        """Get statistics for a bucket with selectable accuracy modes and S3-based caching.

        Modes:
            - ``quick``: Fast listing-only stats (delta compression approximated).
            - ``sampled``: Fetch one delta HEAD per delta-space and reuse the ratio.
            - ``detailed``: Fetch metadata for every delta object (slowest, most accurate).

        Caching:
            - Stats are cached in S3 at ``.deltaglider/stats_{mode}.json``
            - Cache is automatically validated on every call (uses LIST operation)
            - If bucket changed, cache is recomputed automatically
            - Use ``refresh_cache=True`` to force recomputation
            - Use ``use_cache=False`` to skip caching entirely

        Args:
            bucket: S3 bucket name
            mode: Stats mode ("quick", "sampled", or "detailed")
            use_cache: If True, use S3-cached stats when available (default: True)
            refresh_cache: If True, force cache recomputation even if valid (default: False)

        Returns:
            BucketStats with compression and space savings info

        Performance:
            - With cache hit: ~50-100ms (LIST + cache read + validation)
            - quick (no cache): ~50ms per 1000 objects (LIST only)
            - sampled (no cache): ~60 HEAD calls per 60 delta-spaces plus LIST
            - detailed (no cache): ~2-3s per 1000 delta objects (LIST + HEAD per delta)

        Example:
            # Quick stats with caching (fast, ~100ms)
            stats = client.get_bucket_stats('releases')

            # Force refresh (slow, recomputes everything)
            stats = client.get_bucket_stats('releases', refresh_cache=True)

            # Skip cache entirely
            stats = client.get_bucket_stats('releases', use_cache=False)

            # Detailed stats with caching
            stats = client.get_bucket_stats('releases', mode='detailed')
        """
        if mode not in {"quick", "sampled", "detailed"}:
            raise ValueError(f"Unknown stats mode: {mode}")

        # Use S3-based caching from stats.py (replaces old in-memory cache)
        result: BucketStats = _get_bucket_stats(
            self, bucket, mode=mode, use_cache=use_cache, refresh_cache=refresh_cache
        )
        return result

    def generate_presigned_url(
        self,
        ClientMethod: str,
        Params: dict[str, Any],
        ExpiresIn: int = 3600,
    ) -> str:
        """Generate presigned URL (boto3-compatible).

        Args:
            ClientMethod: Method name ('get_object' or 'put_object')
            Params: Parameters dict with Bucket and Key
            ExpiresIn: URL expiration in seconds

        Returns:
            Presigned URL string
        """
        return _generate_presigned_url(self, ClientMethod, Params, ExpiresIn)

    def generate_presigned_post(
        self,
        Bucket: str,
        Key: str,
        Fields: dict[str, str] | None = None,
        Conditions: list[Any] | None = None,
        ExpiresIn: int = 3600,
    ) -> dict[str, Any]:
        """Generate presigned POST data for HTML forms (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            Fields: Additional fields to include
            Conditions: Upload conditions
            ExpiresIn: URL expiration in seconds

        Returns:
            Dict with 'url' and 'fields' for form submission
        """
        return _generate_presigned_post(self, Bucket, Key, Fields, Conditions, ExpiresIn)

    # ============================================================================
    # Bucket Management APIs (boto3-compatible)
    # ============================================================================

    def create_bucket(
        self,
        Bucket: str,
        CreateBucketConfiguration: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Create an S3 bucket (boto3-compatible).

        Args:
            Bucket: Bucket name to create
            CreateBucketConfiguration: Optional bucket configuration (e.g., LocationConstraint)
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with bucket location

        Example:
            >>> client = create_client()
            >>> client.create_bucket(Bucket='my-bucket')
            >>> # With region
            >>> client.create_bucket(
            ...     Bucket='my-bucket',
            ...     CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
            ... )
        """
        response = _create_bucket(self, Bucket, CreateBucketConfiguration, **kwargs)
        self._invalidate_bucket_stats_cache(Bucket)
        return response

    def delete_bucket(
        self,
        Bucket: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Delete an S3 bucket (boto3-compatible).

        Note: Bucket must be empty before deletion.

        Args:
            Bucket: Bucket name to delete
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with deletion status

        Example:
            >>> client = create_client()
            >>> client.delete_bucket(Bucket='my-bucket')
        """
        response = _delete_bucket(self, Bucket, **kwargs)
        self._invalidate_bucket_stats_cache(Bucket)
        return response

    def list_buckets(self, **kwargs: Any) -> dict[str, Any]:
        """List all S3 buckets (boto3-compatible).

        Args:
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with bucket list

        Example:
            >>> client = create_client()
            >>> response = client.list_buckets()
            >>> for bucket in response['Buckets']:
            ...     print(bucket['Name'])
        """
        return _list_buckets(self, **kwargs)

    def put_bucket_acl(
        self,
        Bucket: str,
        ACL: str | None = None,
        AccessControlPolicy: dict[str, Any] | None = None,
        GrantFullControl: str | None = None,
        GrantRead: str | None = None,
        GrantReadACP: str | None = None,
        GrantWrite: str | None = None,
        GrantWriteACP: str | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Set the ACL for an S3 bucket (boto3-compatible passthrough).

        Args:
            Bucket: Bucket name
            ACL: Canned ACL (private, public-read, public-read-write, authenticated-read)
            AccessControlPolicy: Full ACL policy dict
            GrantFullControl: Grants full control to the grantee
            GrantRead: Allows grantee to list objects in the bucket
            GrantReadACP: Allows grantee to read the bucket ACL
            GrantWrite: Allows grantee to create objects in the bucket
            GrantWriteACP: Allows grantee to write the ACL for the bucket
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with status
        """
        return _put_bucket_acl(
            self,
            Bucket,
            ACL=ACL,
            AccessControlPolicy=AccessControlPolicy,
            GrantFullControl=GrantFullControl,
            GrantRead=GrantRead,
            GrantReadACP=GrantReadACP,
            GrantWrite=GrantWrite,
            GrantWriteACP=GrantWriteACP,
            **kwargs,
        )

    def get_bucket_acl(
        self,
        Bucket: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Get the ACL for an S3 bucket (boto3-compatible passthrough).

        Args:
            Bucket: Bucket name
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with Owner and Grants
        """
        return _get_bucket_acl(self, Bucket, **kwargs)

    def _parse_tagging(self, tagging: str) -> dict[str, str]:
        """Parse URL-encoded tagging string to dict."""
        tags = {}
        if tagging:
            for pair in tagging.split("&"):
                if "=" in pair:
                    key, value = pair.split("=", 1)
                    tags[key] = value
        return tags

    # ============================================================================
    # Cache Management APIs (DeltaGlider Extensions)
    # ============================================================================

    def clear_cache(self) -> None:
        """Clear all cached reference files.

        Forcibly removes all cached data from memory or disk. This is essential for
        long-running applications that need to:
        - Free memory/disk space
        - Invalidate cache after configuration changes
        - Ensure fresh data fetch from S3
        - Clean up after tests

        **Important for Long-Running Applications**:
        Unlike the CLI which cleans up cache on exit, programmatic SDK usage
        requires manual cache management. Call this method periodically or when:
        - Application runs for extended periods (hours/days)
        - Memory usage is high
        - Configuration changes (endpoint, credentials, encryption key)
        - Testing scenarios requiring clean state

        **Effects**:
        - Filesystem cache: Removes all files from cache directory
        - Memory cache: Clears all in-memory data
        - Encrypted cache: Clears encryption key mappings
        - Next upload will re-fetch reference from S3

        **Example - Long-Running Service**:
        ```python
        from deltaglider import create_client
        import schedule
        import time

        client = create_client()

        def upload_task():
            client.put_object(Bucket='releases', Key='app.zip', Body=open('app.zip', 'rb'))

        def cleanup_task():
            client.clear_cache()  # Free resources every hour
            print("Cache cleared")

        # Upload every 10 minutes
        schedule.every(10).minutes.do(upload_task)

        # Clear cache every hour
        schedule.every().hour.do(cleanup_task)

        while True:
            schedule.run_pending()
            time.sleep(1)
        ```

        **Example - Test Cleanup**:
        ```python
        def test_upload():
            client = create_client()
            try:
                client.put_object(Bucket='test', Key='file.zip', Body=b'data')
            finally:
                client.clear_cache()  # Ensure clean state for next test
        ```

        **Example - After Configuration Change**:
        ```python
        client = create_client(endpoint_url='http://minio1:9000')
        client.put_object(Bucket='bucket', Key='file.zip', Body=b'data')

        # Switch to different endpoint
        client.clear_cache()  # Clear cache from old endpoint
        client = create_client(endpoint_url='http://minio2:9000')
        ```

        See Also:
        - `evict_cache()`: Remove specific cached reference
        - docs/CACHE_MANAGEMENT.md: Complete cache management guide
        """
        self._invalidate_bucket_stats_cache()
        self.service.cache.clear()

    def rehydrate_for_download(self, Bucket: str, Key: str, ExpiresIn: int = 3600) -> str | None:
        """Rehydrate a deltaglider-compressed file for direct download.

        If the file is deltaglider-compressed, this will:
        1. Download and decompress the file
        2. Re-upload to .deltaglider/tmp/ with expiration metadata
        3. Return the new temporary file key

        If the file is not deltaglider-compressed, returns None.

        Args:
            Bucket: S3 bucket name
            Key: Object key
            ExpiresIn: How long the temporary file should exist (seconds)

        Returns:
            New key for temporary file, or None if not deltaglider-compressed

        Example:
            >>> client = create_client()
            >>> temp_key = client.rehydrate_for_download(
            ...     Bucket='my-bucket',
            ...     Key='large-file.zip.delta',
            ...     ExpiresIn=3600  # 1 hour
            ... )
            >>> if temp_key:
            ...     # Generate presigned URL for the temporary file
            ...     url = client.generate_presigned_url(
            ...         'get_object',
            ...         Params={'Bucket': 'my-bucket', 'Key': temp_key},
            ...         ExpiresIn=3600
            ...     )
        """
        return self.service.rehydrate_for_download(Bucket, Key, ExpiresIn)

    def generate_presigned_url_with_rehydration(
        self,
        Bucket: str,
        Key: str,
        ExpiresIn: int = 3600,
    ) -> str:
        """Generate a presigned URL with automatic rehydration for deltaglider files.

        This method handles both regular and deltaglider-compressed files:
        - For regular files: Returns a standard presigned URL
        - For deltaglider files: Rehydrates to temporary location and returns presigned URL

        Args:
            Bucket: S3 bucket name
            Key: Object key
            ExpiresIn: URL expiration time in seconds

        Returns:
            Presigned URL for direct download

        Example:
            >>> client = create_client()
            >>> # Works for both regular and deltaglider files
            >>> url = client.generate_presigned_url_with_rehydration(
            ...     Bucket='my-bucket',
            ...     Key='any-file.zip',  # or 'any-file.zip.delta'
            ...     ExpiresIn=3600
            ... )
            >>> print(f"Download URL: {url}")
        """
        # Try to rehydrate if it's a deltaglider file
        temp_key = self.rehydrate_for_download(Bucket, Key, ExpiresIn)

        # Use the temporary key if rehydration occurred, otherwise use original
        download_key = temp_key if temp_key else Key

        # Extract the original filename for Content-Disposition header
        original_filename = Key.removesuffix(".delta") if Key.endswith(".delta") else Key
        if "/" in original_filename:
            original_filename = original_filename.split("/")[-1]

        # Generate presigned URL with Content-Disposition to force correct filename
        params = {"Bucket": Bucket, "Key": download_key}
        if temp_key:
            # For rehydrated files, set Content-Disposition to use original filename
            params["ResponseContentDisposition"] = f'attachment; filename="{original_filename}"'

        return self.generate_presigned_url("get_object", Params=params, ExpiresIn=ExpiresIn)

    def purge_temp_files(self, Bucket: str) -> dict[str, Any]:
        """Purge expired temporary files from .deltaglider/tmp/.

        Scans the .deltaglider/tmp/ prefix and deletes any files
        whose dg-expires-at metadata indicates they have expired.

        Args:
            Bucket: S3 bucket to purge temp files from

        Returns:
            dict with purge statistics including:
            - deleted_count: Number of files deleted
            - expired_count: Number of expired files found
            - error_count: Number of errors encountered
            - total_size_freed: Total bytes freed
            - duration_seconds: Operation duration
            - errors: List of error messages

        Example:
            >>> client = create_client()
            >>> result = client.purge_temp_files(Bucket='my-bucket')
            >>> print(f"Deleted {result['deleted_count']} expired files")
            >>> print(f"Freed {result['total_size_freed']} bytes")
        """
        return self.service.purge_temp_files(Bucket)


def create_client(
    endpoint_url: str | None = None,
    log_level: str = "INFO",
    aws_access_key_id: str | None = None,
    aws_secret_access_key: str | None = None,
    aws_session_token: str | None = None,
    region_name: str | None = None,
    **kwargs: Any,
) -> DeltaGliderClient:
    """Create a DeltaGlider client with boto3-compatible APIs.

    This client provides:
    - Boto3-compatible method names (put_object, get_object, etc.)
    - Batch operations (upload_batch, download_batch)
    - Compression estimation
    - Progress callbacks for large uploads
    - Detailed object and bucket statistics
    - Secure ephemeral cache (process-isolated, auto-cleanup)

    Args:
        endpoint_url: Optional S3 endpoint URL (for MinIO, R2, etc.)
        log_level: Logging level
        aws_access_key_id: AWS access key ID (None to use environment/IAM)
        aws_secret_access_key: AWS secret access key (None to use environment/IAM)
        aws_session_token: AWS session token for temporary credentials (None if not using)
        region_name: AWS region name (None for default)
        **kwargs: Additional arguments

    Returns:
        DeltaGliderClient instance

    Examples:
        >>> # Boto3-compatible usage with default credentials
        >>> client = create_client()
        >>> client.put_object(Bucket='my-bucket', Key='file.zip', Body=b'data')
        >>> response = client.get_object(Bucket='my-bucket', Key='file.zip')
        >>> data = response['Body'].read()

        >>> # With explicit credentials
        >>> client = create_client(
        ...     aws_access_key_id='AKIAIOSFODNN7EXAMPLE',
        ...     aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
        ... )

        >>> # Batch operations
        >>> results = client.upload_batch(['v1.zip', 'v2.zip'], 's3://bucket/releases/')

        >>> # Compression estimation
        >>> estimate = client.estimate_compression('new.zip', 'bucket', 'releases/')
        >>> print(f"Expected compression: {estimate.estimated_ratio:.1%}")
    """
    # Import here to avoid circular dependency
    from .adapters import (
        ContentAddressedCache,
        EncryptedCache,
        MemoryCache,
        NoopMetricsAdapter,
        S3StorageAdapter,
        Sha256Adapter,
        StdLoggerAdapter,
        UtcClockAdapter,
        XdeltaAdapter,
    )

    # SECURITY: Always use ephemeral process-isolated cache
    cache_dir = Path(tempfile.mkdtemp(prefix="deltaglider-", dir="/tmp"))
    # Register cleanup handler to remove cache on exit
    atexit.register(lambda: shutil.rmtree(cache_dir, ignore_errors=True))

    # Build boto3 client kwargs
    boto3_kwargs = {}
    if aws_access_key_id is not None:
        boto3_kwargs["aws_access_key_id"] = aws_access_key_id
    if aws_secret_access_key is not None:
        boto3_kwargs["aws_secret_access_key"] = aws_secret_access_key
    if aws_session_token is not None:
        boto3_kwargs["aws_session_token"] = aws_session_token
    if region_name is not None:
        boto3_kwargs["region_name"] = region_name

    # Create adapters
    hasher = Sha256Adapter()
    storage = S3StorageAdapter(endpoint_url=endpoint_url, boto3_kwargs=boto3_kwargs)
    diff = XdeltaAdapter()

    # SECURITY: Configurable cache with encryption and backend selection
    from .ports.cache import CachePort

    cache_backend = os.environ.get("DG_CACHE_BACKEND", "filesystem")  # Options: filesystem, memory
    base_cache: CachePort
    if cache_backend == "memory":
        max_size_mb = int(os.environ.get("DG_CACHE_MEMORY_SIZE_MB", "100"))
        base_cache = MemoryCache(hasher, max_size_mb=max_size_mb, temp_dir=cache_dir)
    else:
        # Filesystem-backed with Content-Addressed Storage
        base_cache = ContentAddressedCache(cache_dir, hasher)

    # Always apply encryption with ephemeral keys (security hardening)
    # Encryption key is optional via DG_CACHE_ENCRYPTION_KEY (ephemeral if not set)
    cache: CachePort = EncryptedCache.from_env(base_cache)

    clock = UtcClockAdapter()
    logger = StdLoggerAdapter(level=log_level)
    metrics = NoopMetricsAdapter()

    # Get default values (use real package version)
    tool_version = kwargs.pop("tool_version", f"deltaglider/{__version__}")
    max_ratio = kwargs.pop("max_ratio", 0.5)

    # Create service
    service = DeltaService(
        storage=storage,
        diff=diff,
        hasher=hasher,
        cache=cache,
        clock=clock,
        logger=logger,
        metrics=metrics,
        tool_version=tool_version,
        max_ratio=max_ratio,
        **kwargs,
    )

    return DeltaGliderClient(service, endpoint_url)