deltaglider-beshu-tech/src/deltaglider/client.py

"""DeltaGlider client with boto3-compatible APIs and advanced features."""

import tempfile
from collections.abc import Callable
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

from .adapters.storage_s3 import S3StorageAdapter
from .core import DeltaService, DeltaSpace, ObjectKey


@dataclass
class UploadSummary:
    """User-friendly upload summary."""

    operation: str
    bucket: str
    key: str
    original_size: int
    stored_size: int
    is_delta: bool
    delta_ratio: float = 0.0

    @property
    def original_size_mb(self) -> float:
        """Original size in MB."""
        return self.original_size / (1024 * 1024)

    @property
    def stored_size_mb(self) -> float:
        """Stored size in MB."""
        return self.stored_size / (1024 * 1024)

    @property
    def savings_percent(self) -> float:
        """Percentage saved through compression."""
        if self.original_size == 0:
            return 0.0
        return ((self.original_size - self.stored_size) / self.original_size) * 100


@dataclass
class CompressionEstimate:
    """Compression estimate for a file."""

    original_size: int
    estimated_compressed_size: int
    estimated_ratio: float
    confidence: float
    recommended_reference: str | None = None
    should_use_delta: bool = True


@dataclass
class ObjectInfo:
    """Detailed object information with compression stats."""

    key: str
    size: int
    last_modified: str
    etag: str | None = None
    storage_class: str = "STANDARD"

    # DeltaGlider-specific fields
    original_size: int | None = None
    compressed_size: int | None = None
    compression_ratio: float | None = None
    is_delta: bool = False
    reference_key: str | None = None
    delta_chain_length: int = 0


@dataclass
class ListObjectsResponse:
    """Response from list_objects, compatible with boto3."""

    name: str  # Bucket name
    prefix: str = ""
    delimiter: str = ""
    max_keys: int = 1000
    common_prefixes: list[dict[str, str]] = field(default_factory=list)
    contents: list[ObjectInfo] = field(default_factory=list)
    is_truncated: bool = False
    next_continuation_token: str | None = None
    continuation_token: str | None = None
    key_count: int = 0

    @property
    def objects(self) -> list[ObjectInfo]:
        """Alias for contents, for convenience."""
        return self.contents


@dataclass
class BucketStats:
    """Statistics for a bucket."""

    bucket: str
    object_count: int
    total_size: int
    compressed_size: int
    space_saved: int
    average_compression_ratio: float
    delta_objects: int
    direct_objects: int


class DeltaGliderClient:
    """DeltaGlider client with boto3-compatible APIs and advanced features."""

    def __init__(self, service: DeltaService, endpoint_url: str | None = None):
        """Initialize client with service."""
        self.service = service
        self.endpoint_url = endpoint_url
        self._multipart_uploads: dict[str, Any] = {}  # Track multipart uploads

    # ============================================================================
    # Boto3-compatible APIs (matches S3 client interface)
    # ============================================================================

    def put_object(
        self,
        Bucket: str,
        Key: str,
        Body: bytes | str | Path | None = None,
        Metadata: dict[str, str] | None = None,
        ContentType: str | None = None,
        Tagging: str | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Upload an object to S3 (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            Body: Object data (bytes, string, or file path)
            Metadata: Object metadata
            ContentType: MIME type
            Tagging: Object tags as URL-encoded string
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with ETag and version info
        """
        # Handle Body parameter
        if Body is None:
            raise ValueError("Body parameter is required")

        # Create temp file if Body is bytes or string
        cleanup_temp = False
        if isinstance(Body, (bytes, str)):
            # Create temp file with the actual key name to ensure proper naming
            temp_dir = Path(tempfile.gettempdir())
            tmp_path = temp_dir / Path(Key).name

            # If file exists, add unique suffix
            if tmp_path.exists():
                import uuid

                tmp_path = temp_dir / f"{uuid.uuid4()}_{Path(Key).name}"

            if isinstance(Body, str):
                tmp_path.write_text(Body)
            else:
                tmp_path.write_bytes(Body)
            cleanup_temp = True
        elif isinstance(Body, Path):
            tmp_path = Body
        else:
            tmp_path = Path(str(Body))

        try:
            # For boto3 compatibility, we need to handle the key differently
            # The base upload method expects a prefix and appends the filename
            # But put_object should store exactly at the specified key

            # Extract the directory part of the key
            key_parts = Key.rsplit("/", 1)
            if len(key_parts) > 1:
                # Key has a path component
                prefix = key_parts[0]
                s3_url = f"s3://{Bucket}/{prefix}/"
            else:
                # Key is just a filename
                s3_url = f"s3://{Bucket}/"

            # Use our upload method
            result = self.upload(
                file_path=tmp_path,
                s3_url=s3_url,
                tags=self._parse_tagging(Tagging) if Tagging else None,
            )

            # Return boto3-compatible response
            return {
                "ETag": f'"{self.service.hasher.sha256(tmp_path)}"',
                "ResponseMetadata": {
                    "HTTPStatusCode": 200,
                },
                # DeltaGlider extensions
                "DeltaGlider": {
                    "original_size": result.original_size,
                    "stored_size": result.stored_size,
                    "is_delta": result.is_delta,
                    "compression_ratio": result.delta_ratio,
                },
            }
        finally:
            # Clean up temp file
            if cleanup_temp and tmp_path.exists():
                tmp_path.unlink()

    def get_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Download an object from S3 (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            **kwargs: Additional S3 parameters (for compatibility)

        Returns:
            Response dict with Body stream and metadata
        """
        # Download to temp file
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            tmp_path = Path(tmp.name)

        self.download(
            s3_url=f"s3://{Bucket}/{Key}",
            output_path=tmp_path,
        )

        # Open file for streaming
        body = open(tmp_path, "rb")

        # Get metadata
        obj_head = self.service.storage.head(f"{Bucket}/{Key}")

        return {
            "Body": body,  # File-like object
            "ContentLength": tmp_path.stat().st_size,
            "ContentType": obj_head.metadata.get("content_type", "binary/octet-stream")
            if obj_head
            else "binary/octet-stream",
            "ETag": f'"{self.service.hasher.sha256(tmp_path)}"',
            "Metadata": obj_head.metadata if obj_head else {},
            "ResponseMetadata": {
                "HTTPStatusCode": 200,
            },
        }

    def list_objects(
        self,
        Bucket: str,
        Prefix: str = "",
        Delimiter: str = "",
        MaxKeys: int = 1000,
        ContinuationToken: str | None = None,
        StartAfter: str | None = None,
        **kwargs: Any,
    ) -> ListObjectsResponse:
        """List objects in bucket (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Prefix: Filter results to keys beginning with prefix
            Delimiter: Delimiter for grouping keys (e.g., '/' for folders)
            MaxKeys: Maximum number of keys to return
            ContinuationToken: Token for pagination
            StartAfter: Start listing after this key
            **kwargs: Additional parameters for compatibility

        Returns:
            ListObjectsResponse with objects and common prefixes
        """
        # Use storage adapter's list_objects method if available
        if hasattr(self.service.storage, "list_objects"):
            # Use list_objects method if available
            result = self.service.storage.list_objects(
                bucket=Bucket,
                prefix=Prefix,
                delimiter=Delimiter,
                max_keys=MaxKeys,
                start_after=StartAfter,
            )
        elif isinstance(self.service.storage, S3StorageAdapter):
            # Fallback to S3StorageAdapter specific implementation
            result = self.service.storage.list_objects(
                bucket=Bucket,
                prefix=Prefix,
                delimiter=Delimiter,
                max_keys=MaxKeys,
                start_after=StartAfter,
            )
        else:
            # Last resort fallback - should rarely be needed
            result = {
                "objects": [],
                "common_prefixes": [],
                "is_truncated": False,
            }

        # Convert to ObjectInfo objects
        contents = []
        for obj in result.get("objects", []):
            # Check if it's a delta file or direct upload
            is_delta = obj["key"].endswith(".delta")

            # Get metadata if available
            obj_head = self.service.storage.head(f"{Bucket}/{obj['key']}")
            metadata = obj_head.metadata if obj_head else {}

            info = ObjectInfo(
                key=obj["key"],
                size=obj["size"],
                last_modified=obj.get("last_modified", ""),
                etag=obj.get("etag"),
                storage_class=obj.get("storage_class", "STANDARD"),
                # DeltaGlider fields
                original_size=int(metadata.get("file_size", obj["size"])),
                compressed_size=obj["size"],
                is_delta=is_delta,
                compression_ratio=float(metadata.get("compression_ratio", 0.0)),
                reference_key=metadata.get("ref_key"),
            )
            contents.append(info)

        # Build response
        response = ListObjectsResponse(
            name=Bucket,
            prefix=Prefix,
            delimiter=Delimiter,
            max_keys=MaxKeys,
            contents=contents,
            common_prefixes=[{"Prefix": p} for p in result.get("common_prefixes", [])],
            is_truncated=result.get("is_truncated", False),
            next_continuation_token=result.get("next_continuation_token"),
            continuation_token=ContinuationToken,
            key_count=len(contents),
        )

        return response

    def delete_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Delete an object with delta awareness (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            **kwargs: Additional parameters

        Returns:
            Response dict with deletion details
        """
        # Use core service's delta-aware delete
        object_key = ObjectKey(bucket=Bucket, key=Key)
        delete_result = self.service.delete(object_key)

        response = {
            "DeleteMarker": False,
            "ResponseMetadata": {
                "HTTPStatusCode": 204,
            },
            "DeltaGliderInfo": {
                "Type": delete_result.get("type"),
                "Deleted": delete_result.get("deleted", False),
            },
        }

        # Add warnings if any
        warnings = delete_result.get("warnings")
        if warnings:
            delta_info = response.get("DeltaGliderInfo")
            if delta_info and isinstance(delta_info, dict):
                delta_info["Warnings"] = warnings

        # Add dependent delta count for references
        dependent_deltas = delete_result.get("dependent_deltas")
        if dependent_deltas:
            delta_info = response.get("DeltaGliderInfo")
            if delta_info and isinstance(delta_info, dict):
                delta_info["DependentDeltas"] = dependent_deltas

        return response

    def delete_objects(
        self,
        Bucket: str,
        Delete: dict[str, Any],
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Delete multiple objects with delta awareness (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Delete: Dict with 'Objects' list of {'Key': key} dicts
            **kwargs: Additional parameters

        Returns:
            Response dict with deleted objects
        """
        deleted = []
        errors = []
        delta_info = []

        for obj in Delete.get("Objects", []):
            key = obj["Key"]
            try:
                object_key = ObjectKey(bucket=Bucket, key=key)
                delete_result = self.service.delete(object_key)

                deleted_item = {"Key": key}
                if delete_result.get("type"):
                    deleted_item["Type"] = delete_result["type"]
                if delete_result.get("warnings"):
                    deleted_item["Warnings"] = delete_result["warnings"]

                deleted.append(deleted_item)

                # Track delta-specific info
                if delete_result.get("type") in ["delta", "reference"]:
                    delta_info.append(
                        {
                            "Key": key,
                            "Type": delete_result["type"],
                            "DependentDeltas": delete_result.get("dependent_deltas", 0),
                        }
                    )

            except Exception as e:
                errors.append(
                    {
                        "Key": key,
                        "Code": "InternalError",
                        "Message": str(e),
                    }
                )

        response: dict[str, Any] = {"Deleted": deleted}
        if errors:
            response["Errors"] = errors

        if delta_info:
            response["DeltaGliderInfo"] = {
                "DeltaFilesDeleted": len([d for d in delta_info if d["Type"] == "delta"]),
                "ReferencesDeleted": len([d for d in delta_info if d["Type"] == "reference"]),
                "Details": delta_info,
            }

        response["ResponseMetadata"] = {"HTTPStatusCode": 200}
        return response

    def delete_objects_recursive(
        self,
        Bucket: str,
        Prefix: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Recursively delete all objects under a prefix with delta awareness.

        Args:
            Bucket: S3 bucket name
            Prefix: Prefix to delete recursively
            **kwargs: Additional parameters

        Returns:
            Response dict with deletion statistics
        """
        # Use core service's delta-aware recursive delete
        delete_result = self.service.delete_recursive(Bucket, Prefix)

        response = {
            "ResponseMetadata": {
                "HTTPStatusCode": 200,
            },
            "DeletedCount": delete_result.get("deleted_count", 0),
            "FailedCount": delete_result.get("failed_count", 0),
            "DeltaGliderInfo": {
                "DeltasDeleted": delete_result.get("deltas_deleted", 0),
                "ReferencesDeleted": delete_result.get("references_deleted", 0),
                "DirectDeleted": delete_result.get("direct_deleted", 0),
                "OtherDeleted": delete_result.get("other_deleted", 0),
            },
        }

        if delete_result.get("errors"):
            response["Errors"] = delete_result["errors"]

        if delete_result.get("warnings"):
            response["Warnings"] = delete_result["warnings"]

        return response

    def head_object(
        self,
        Bucket: str,
        Key: str,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Get object metadata (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            **kwargs: Additional parameters

        Returns:
            Response dict with object metadata
        """
        obj_head = self.service.storage.head(f"{Bucket}/{Key}")
        if not obj_head:
            raise FileNotFoundError(f"Object not found: s3://{Bucket}/{Key}")

        return {
            "ContentLength": obj_head.size,
            "ContentType": obj_head.metadata.get("content_type", "binary/octet-stream"),
            "ETag": obj_head.metadata.get("etag", ""),
            "LastModified": obj_head.metadata.get("last_modified", ""),
            "Metadata": obj_head.metadata,
            "ResponseMetadata": {
                "HTTPStatusCode": 200,
            },
        }

    # ============================================================================
    # Simple client methods (original DeltaGlider API)
    # ============================================================================

    def upload(
        self,
        file_path: str | Path,
        s3_url: str,
        tags: dict[str, str] | None = None,
        max_ratio: float = 0.5,
    ) -> UploadSummary:
        """Upload a file to S3 with automatic delta compression.

        Args:
            file_path: Local file to upload
            s3_url: S3 destination URL (s3://bucket/prefix/)
            tags: Optional tags to add to the object
            max_ratio: Maximum acceptable delta/file ratio (default 0.5)

        Returns:
            UploadSummary with compression statistics
        """
        file_path = Path(file_path)

        # Parse S3 URL
        if not s3_url.startswith("s3://"):
            raise ValueError(f"Invalid S3 URL: {s3_url}")

        s3_path = s3_url[5:].rstrip("/")
        parts = s3_path.split("/", 1)
        bucket = parts[0]
        prefix = parts[1] if len(parts) > 1 else ""

        # Create delta space and upload
        delta_space = DeltaSpace(bucket=bucket, prefix=prefix)
        summary = self.service.put(file_path, delta_space, max_ratio)

        # TODO: Add tags support when implemented

        # Convert to user-friendly summary
        is_delta = summary.delta_size is not None
        stored_size = summary.delta_size if is_delta else summary.file_size

        return UploadSummary(
            operation=summary.operation,
            bucket=summary.bucket,
            key=summary.key,
            original_size=summary.file_size,
            stored_size=stored_size or summary.file_size,  # Ensure stored_size is never None
            is_delta=is_delta,
            delta_ratio=summary.delta_ratio or 0.0,
        )

    def download(self, s3_url: str, output_path: str | Path) -> None:
        """Download and reconstruct a file from S3.

        Args:
            s3_url: S3 source URL (s3://bucket/key)
            output_path: Local destination path
        """
        output_path = Path(output_path)

        # Parse S3 URL
        if not s3_url.startswith("s3://"):
            raise ValueError(f"Invalid S3 URL: {s3_url}")

        s3_path = s3_url[5:]
        parts = s3_path.split("/", 1)
        if len(parts) < 2:
            raise ValueError(f"S3 URL must include key: {s3_url}")

        bucket = parts[0]
        key = parts[1]

        # Auto-append .delta if the file doesn't exist without it
        # This allows users to specify the original name and we'll find the delta
        obj_key = ObjectKey(bucket=bucket, key=key)

        # Try to get metadata first to see if it exists
        try:
            self.service.get(obj_key, output_path)
        except Exception:
            # Try with .delta suffix
            if not key.endswith(".delta"):
                obj_key = ObjectKey(bucket=bucket, key=key + ".delta")
                self.service.get(obj_key, output_path)
            else:
                raise

    def verify(self, s3_url: str) -> bool:
        """Verify integrity of a stored file.

        Args:
            s3_url: S3 URL of the file to verify

        Returns:
            True if verification passed, False otherwise
        """
        # Parse S3 URL
        if not s3_url.startswith("s3://"):
            raise ValueError(f"Invalid S3 URL: {s3_url}")

        s3_path = s3_url[5:]
        parts = s3_path.split("/", 1)
        if len(parts) < 2:
            raise ValueError(f"S3 URL must include key: {s3_url}")

        bucket = parts[0]
        key = parts[1]

        obj_key = ObjectKey(bucket=bucket, key=key)
        result = self.service.verify(obj_key)
        return result.valid

    # ============================================================================
    # DeltaGlider-specific APIs
    # ============================================================================

    def upload_chunked(
        self,
        file_path: str | Path,
        s3_url: str,
        chunk_size: int = 5 * 1024 * 1024,
        progress_callback: Callable[[int, int, int, int], None] | None = None,
        max_ratio: float = 0.5,
    ) -> UploadSummary:
        """Upload a file in chunks with progress callback.

        This method reads the file in chunks to avoid loading large files entirely into memory,
        making it suitable for uploading very large files. Progress is reported after each chunk.

        Args:
            file_path: Local file to upload
            s3_url: S3 destination URL (s3://bucket/path/filename)
            chunk_size: Size of each chunk in bytes (default 5MB)
            progress_callback: Callback(chunk_number, total_chunks, bytes_sent, total_bytes)
            max_ratio: Maximum acceptable delta/file ratio for compression

        Returns:
            UploadSummary with compression statistics

        Example:
            def on_progress(chunk_num, total_chunks, bytes_sent, total_bytes):
                percent = (bytes_sent / total_bytes) * 100
                print(f"Upload progress: {percent:.1f}%")

            client.upload_chunked(
                "large_file.zip",
                "s3://bucket/releases/large_file.zip",
                chunk_size=10 * 1024 * 1024,  # 10MB chunks
                progress_callback=on_progress
            )
        """
        file_path = Path(file_path)
        file_size = file_path.stat().st_size

        # For small files, just use regular upload
        if file_size <= chunk_size:
            if progress_callback:
                progress_callback(1, 1, file_size, file_size)
            return self.upload(file_path, s3_url, max_ratio=max_ratio)

        # Calculate chunks
        total_chunks = (file_size + chunk_size - 1) // chunk_size

        # Create a temporary file for chunked processing
        # For now, we read the entire file but report progress in chunks
        # Future enhancement: implement true streaming upload in storage adapter
        bytes_read = 0

        with open(file_path, "rb") as f:
            for chunk_num in range(1, total_chunks + 1):
                # Read chunk (simulated for progress reporting)
                chunk_data = f.read(chunk_size)
                bytes_read += len(chunk_data)

                if progress_callback:
                    progress_callback(chunk_num, total_chunks, bytes_read, file_size)

        # Perform the actual upload
        # TODO: When storage adapter supports streaming, pass chunks directly
        result = self.upload(file_path, s3_url, max_ratio=max_ratio)

        # Final progress callback
        if progress_callback:
            progress_callback(total_chunks, total_chunks, file_size, file_size)

        return result

    def upload_batch(
        self,
        files: list[str | Path],
        s3_prefix: str,
        max_ratio: float = 0.5,
        progress_callback: Callable[[str, int, int], None] | None = None,
    ) -> list[UploadSummary]:
        """Upload multiple files in batch.

        Args:
            files: List of local file paths
            s3_prefix: S3 destination prefix (s3://bucket/prefix/)
            max_ratio: Maximum acceptable delta/file ratio
            progress_callback: Callback(filename, current_file_index, total_files)

        Returns:
            List of UploadSummary objects
        """
        results = []

        for i, file_path in enumerate(files):
            file_path = Path(file_path)

            if progress_callback:
                progress_callback(file_path.name, i + 1, len(files))

            # Upload each file
            s3_url = f"{s3_prefix.rstrip('/')}/{file_path.name}"
            summary = self.upload(file_path, s3_url, max_ratio=max_ratio)
            results.append(summary)

        return results

    def download_batch(
        self,
        s3_urls: list[str],
        output_dir: str | Path,
        progress_callback: Callable[[str, int, int], None] | None = None,
    ) -> list[Path]:
        """Download multiple files in batch.

        Args:
            s3_urls: List of S3 URLs to download
            output_dir: Local directory to save files
            progress_callback: Callback(filename, current_file_index, total_files)

        Returns:
            List of downloaded file paths
        """
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        results = []

        for i, s3_url in enumerate(s3_urls):
            # Extract filename from URL
            filename = s3_url.split("/")[-1]
            if filename.endswith(".delta"):
                filename = filename[:-6]  # Remove .delta suffix

            if progress_callback:
                progress_callback(filename, i + 1, len(s3_urls))

            output_path = output_dir / filename
            self.download(s3_url, output_path)
            results.append(output_path)

        return results

    def estimate_compression(
        self,
        file_path: str | Path,
        bucket: str,
        prefix: str = "",
        sample_size: int = 1024 * 1024,
    ) -> CompressionEstimate:
        """Estimate compression ratio before upload.

        Args:
            file_path: Local file to estimate
            bucket: Target bucket
            prefix: Target prefix (for finding similar files)
            sample_size: Bytes to sample for estimation (default 1MB)

        Returns:
            CompressionEstimate with predicted compression
        """
        file_path = Path(file_path)
        file_size = file_path.stat().st_size

        # Check file extension
        ext = file_path.suffix.lower()
        delta_extensions = {
            ".zip",
            ".tar",
            ".gz",
            ".tar.gz",
            ".tgz",
            ".bz2",
            ".tar.bz2",
            ".xz",
            ".tar.xz",
            ".7z",
            ".rar",
            ".dmg",
            ".iso",
            ".pkg",
            ".deb",
            ".rpm",
            ".apk",
            ".jar",
            ".war",
            ".ear",
        }

        # Already compressed formats that won't benefit from delta
        incompressible = {".jpg", ".jpeg", ".png", ".mp4", ".mp3", ".avi", ".mov"}

        if ext in incompressible:
            return CompressionEstimate(
                original_size=file_size,
                estimated_compressed_size=file_size,
                estimated_ratio=0.0,
                confidence=0.95,
                should_use_delta=False,
            )

        if ext not in delta_extensions:
            # Unknown type, conservative estimate
            return CompressionEstimate(
                original_size=file_size,
                estimated_compressed_size=file_size,
                estimated_ratio=0.0,
                confidence=0.5,
                should_use_delta=file_size > 1024 * 1024,  # Only for files > 1MB
            )

        # Look for similar files in the target location
        similar_files = self.find_similar_files(bucket, prefix, file_path.name)

        if similar_files:
            # If we have similar files, estimate high compression
            estimated_ratio = 0.99  # 99% compression typical for similar versions
            confidence = 0.9
            recommended_ref = similar_files[0]["Key"] if similar_files else None
        else:
            # First file of its type
            estimated_ratio = 0.0
            confidence = 0.7
            recommended_ref = None

        estimated_size = int(file_size * (1 - estimated_ratio))

        return CompressionEstimate(
            original_size=file_size,
            estimated_compressed_size=estimated_size,
            estimated_ratio=estimated_ratio,
            confidence=confidence,
            recommended_reference=recommended_ref,
            should_use_delta=True,
        )

    def find_similar_files(
        self,
        bucket: str,
        prefix: str,
        filename: str,
        limit: int = 5,
    ) -> list[dict[str, Any]]:
        """Find similar files that could serve as references.

        Args:
            bucket: S3 bucket
            prefix: Prefix to search in
            filename: Filename to match against
            limit: Maximum number of results

        Returns:
            List of similar files with scores
        """
        # List objects in the prefix
        response = self.list_objects(
            Bucket=bucket,
            Prefix=prefix,
            MaxKeys=1000,
        )

        similar: list[dict[str, Any]] = []
        base_name = Path(filename).stem
        ext = Path(filename).suffix

        for obj in response.contents:
            obj_base = Path(obj.key).stem
            obj_ext = Path(obj.key).suffix

            # Skip delta files and references
            if obj.key.endswith(".delta") or obj.key.endswith("reference.bin"):
                continue

            score = 0.0

            # Extension match
            if ext == obj_ext:
                score += 0.5

            # Base name similarity
            if base_name in obj_base or obj_base in base_name:
                score += 0.3

            # Version pattern match
            import re

            if re.search(r"v?\d+[\.\d]*", base_name) and re.search(r"v?\d+[\.\d]*", obj_base):
                score += 0.2

            if score > 0.5:
                similar.append(
                    {
                        "Key": obj.key,
                        "Size": obj.size,
                        "Similarity": score,
                        "LastModified": obj.last_modified,
                    }
                )

        # Sort by similarity
        similar.sort(key=lambda x: x["Similarity"], reverse=True)  # type: ignore

        return similar[:limit]

    def get_object_info(self, s3_url: str) -> ObjectInfo:
        """Get detailed object information including compression stats.

        Args:
            s3_url: S3 URL of the object

        Returns:
            ObjectInfo with detailed metadata
        """
        # Parse URL
        if not s3_url.startswith("s3://"):
            raise ValueError(f"Invalid S3 URL: {s3_url}")

        s3_path = s3_url[5:]
        parts = s3_path.split("/", 1)
        bucket = parts[0]
        key = parts[1] if len(parts) > 1 else ""

        # Get object metadata
        obj_head = self.service.storage.head(f"{bucket}/{key}")
        if not obj_head:
            raise FileNotFoundError(f"Object not found: {s3_url}")

        metadata = obj_head.metadata
        is_delta = key.endswith(".delta")

        return ObjectInfo(
            key=key,
            size=obj_head.size,
            last_modified=metadata.get("last_modified", ""),
            etag=metadata.get("etag"),
            original_size=int(metadata.get("file_size", obj_head.size)),
            compressed_size=obj_head.size,
            compression_ratio=float(metadata.get("compression_ratio", 0.0)),
            is_delta=is_delta,
            reference_key=metadata.get("ref_key"),
        )

    def get_bucket_stats(self, bucket: str) -> BucketStats:
        """Get statistics for a bucket.

        Args:
            bucket: S3 bucket name

        Returns:
            BucketStats with compression and space savings info
        """
        # List all objects
        all_objects = []
        continuation_token = None

        while True:
            response = self.list_objects(
                Bucket=bucket,
                MaxKeys=1000,
                ContinuationToken=continuation_token,
            )

            all_objects.extend(response.contents)

            if not response.is_truncated:
                break

            continuation_token = response.next_continuation_token

        # Calculate stats
        total_size = 0
        compressed_size = 0
        delta_count = 0
        direct_count = 0

        for obj in all_objects:
            compressed_size += obj.size

            if obj.is_delta:
                delta_count += 1
                total_size += obj.original_size or obj.size
            else:
                direct_count += 1
                total_size += obj.size

        space_saved = total_size - compressed_size
        avg_ratio = (space_saved / total_size) if total_size > 0 else 0.0

        return BucketStats(
            bucket=bucket,
            object_count=len(all_objects),
            total_size=total_size,
            compressed_size=compressed_size,
            space_saved=space_saved,
            average_compression_ratio=avg_ratio,
            delta_objects=delta_count,
            direct_objects=direct_count,
        )

    def _try_boto3_presigned_operation(self, operation: str, **kwargs: Any) -> Any | None:
        """Try to generate presigned operation using boto3 client, return None if not available."""
        storage_adapter = self.service.storage

        # Check if storage adapter has boto3 client
        if hasattr(storage_adapter, "client"):
            try:
                if operation == "url":
                    return str(storage_adapter.client.generate_presigned_url(**kwargs))
                elif operation == "post":
                    return dict(storage_adapter.client.generate_presigned_post(**kwargs))
            except AttributeError:
                # storage_adapter does not have a 'client' attribute
                pass
            except Exception as e:
                # Fall back to manual construction if needed
                self.service.logger.warning(f"Failed to generate presigned {operation}: {e}")

        return None

    def generate_presigned_url(
        self,
        ClientMethod: str,
        Params: dict[str, Any],
        ExpiresIn: int = 3600,
    ) -> str:
        """Generate presigned URL (boto3-compatible).

        Args:
            ClientMethod: Method name ('get_object' or 'put_object')
            Params: Parameters dict with Bucket and Key
            ExpiresIn: URL expiration in seconds

        Returns:
            Presigned URL string
        """
        # Try boto3 first, fallback to manual construction
        url = self._try_boto3_presigned_operation(
            "url",
            ClientMethod=ClientMethod,
            Params=Params,
            ExpiresIn=ExpiresIn,
        )
        if url is not None:
            return str(url)

        # Fallback: construct URL manually (less secure, for dev/testing only)
        bucket = Params.get("Bucket", "")
        key = Params.get("Key", "")

        if self.endpoint_url:
            base_url = self.endpoint_url
        else:
            base_url = f"https://{bucket}.s3.amazonaws.com"

        # Warning: This is not a real presigned URL, just a placeholder
        self.service.logger.warning("Using placeholder presigned URL - not suitable for production")
        return f"{base_url}/{key}?expires={ExpiresIn}"

    def generate_presigned_post(
        self,
        Bucket: str,
        Key: str,
        Fields: dict[str, str] | None = None,
        Conditions: list[Any] | None = None,
        ExpiresIn: int = 3600,
    ) -> dict[str, Any]:
        """Generate presigned POST data for HTML forms (boto3-compatible).

        Args:
            Bucket: S3 bucket name
            Key: Object key
            Fields: Additional fields to include
            Conditions: Upload conditions
            ExpiresIn: URL expiration in seconds

        Returns:
            Dict with 'url' and 'fields' for form submission
        """
        # Try boto3 first, fallback to manual construction
        response = self._try_boto3_presigned_operation(
            "post",
            Bucket=Bucket,
            Key=Key,
            Fields=Fields,
            Conditions=Conditions,
            ExpiresIn=ExpiresIn,
        )
        if response is not None:
            return dict(response)

        # Fallback: return minimal structure for compatibility
        if self.endpoint_url:
            url = f"{self.endpoint_url}/{Bucket}"
        else:
            url = f"https://{Bucket}.s3.amazonaws.com"

        return {
            "url": url,
            "fields": {
                "key": Key,
                **(Fields or {}),
            },
        }

    def _parse_tagging(self, tagging: str) -> dict[str, str]:
        """Parse URL-encoded tagging string to dict."""
        tags = {}
        if tagging:
            for pair in tagging.split("&"):
                if "=" in pair:
                    key, value = pair.split("=", 1)
                    tags[key] = value
        return tags


def create_client(
    endpoint_url: str | None = None,
    log_level: str = "INFO",
    cache_dir: str = "/tmp/.deltaglider/cache",
    **kwargs: Any,
) -> DeltaGliderClient:
    """Create a DeltaGlider client with boto3-compatible APIs.

    This client provides:
    - Boto3-compatible method names (put_object, get_object, etc.)
    - Batch operations (upload_batch, download_batch)
    - Compression estimation
    - Progress callbacks for large uploads
    - Detailed object and bucket statistics

    Args:
        endpoint_url: Optional S3 endpoint URL (for MinIO, R2, etc.)
        log_level: Logging level
        cache_dir: Directory for reference cache
        **kwargs: Additional arguments

    Returns:
        DeltaGliderClient instance

    Examples:
        >>> # Boto3-compatible usage
        >>> client = create_client()
        >>> client.put_object(Bucket='my-bucket', Key='file.zip', Body=b'data')
        >>> response = client.get_object(Bucket='my-bucket', Key='file.zip')
        >>> data = response['Body'].read()

        >>> # Batch operations
        >>> results = client.upload_batch(['v1.zip', 'v2.zip'], 's3://bucket/releases/')

        >>> # Compression estimation
        >>> estimate = client.estimate_compression('new.zip', 'bucket', 'releases/')
        >>> print(f"Expected compression: {estimate.estimated_ratio:.1%}")
    """
    # Import here to avoid circular dependency
    from .adapters import (
        FsCacheAdapter,
        NoopMetricsAdapter,
        S3StorageAdapter,
        Sha256Adapter,
        StdLoggerAdapter,
        UtcClockAdapter,
        XdeltaAdapter,
    )

    # Create adapters
    hasher = Sha256Adapter()
    storage = S3StorageAdapter(endpoint_url=endpoint_url)
    diff = XdeltaAdapter()
    cache = FsCacheAdapter(Path(cache_dir), hasher)
    clock = UtcClockAdapter()
    logger = StdLoggerAdapter(level=log_level)
    metrics = NoopMetricsAdapter()

    # Get default values
    tool_version = kwargs.pop("tool_version", "deltaglider/0.2.0")
    max_ratio = kwargs.pop("max_ratio", 0.5)

    # Create service
    service = DeltaService(
        storage=storage,
        diff=diff,
        hasher=hasher,
        cache=cache,
        clock=clock,
        logger=logger,
        metrics=metrics,
        tool_version=tool_version,
        max_ratio=max_ratio,
        **kwargs,
    )

    return DeltaGliderClient(service, endpoint_url)