release: 1.0.4

ci: cache-nix-action preffered
magic cache doesnt work due to size
2026-06-25 11:26:17 +02:00 · 2026-06-23 10:32:38 +02:00 · 2026-06-23 08:01:04 +02:00 · 2026-06-23 07:36:13 +02:00 · 2026-06-22 19:26:53 +02:00 · 2026-06-22 15:29:30 +02:00
331 changed files with 47104 additions and 11234 deletions
@@ -0,0 +1,2 @@
 [alias]
 eval = "run -p evaluations --release --"
@@ -1,40 +0,0 @@
 # Git stuff
 .git/
 .gitignore
 .github
 # Node build artifacts
 **/node_modules/
 # Nix/Devenv environment files
 .direnv/
 .devenv/
 devenv.lock
 devenv.nix
 devenv.yaml
 docker-compose.yml
 .envrc
 .devenv.flake.nix
 flake.lock
 flake.nix
 # Rust build artifacts (crucial for multi-stage builds)
 **/target/
 # Runtime data directories
 data/
 database/
 # Local environment config (sensitive)
 .env
 # IDE specific
 .vscode/
 .idea/
 # OS specific
 .DS_Store
 Thumbs.db
 # Logs / Temporary files
 *.log
@@ -1,49 +0,0 @@
 - name: Prepare lib dir
  run: mkdir -p lib
 # Linux
 - name: Fetch ONNX Runtime (Linux)
  if: runner.os == 'Linux'
  env:
    ORT_VER: 1.22.0
  run: |
    set -euo pipefail
    ARCH="$(uname -m)"
    case "$ARCH" in
      x86_64)  URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-x64-${ORT_VER}.tgz" ;;
      aarch64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-aarch64-${ORT_VER}.tgz" ;;
      *) echo "Unsupported arch $ARCH"; exit 1 ;;
    esac
    curl -fsSL -o ort.tgz "$URL"
    tar -xzf ort.tgz
    cp -v onnxruntime-*/lib/libonnxruntime.so* lib/
 # macOS
 - name: Fetch ONNX Runtime (macOS)
  if: runner.os == 'macOS'
  env:
    ORT_VER: 1.22.0
  run: |
    set -euo pipefail
    curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
    tar -xzf ort.tgz
    # copy the main dylib; rename to stable name if needed
    cp -v onnxruntime-*/lib/libonnxruntime*.dylib lib/
    # optional: ensure a stable name
    if [ ! -f lib/libonnxruntime.dylib ]; then
      cp -v lib/libonnxruntime*.dylib lib/libonnxruntime.dylib
    fi
 # Windows
 - name: Fetch ONNX Runtime (Windows)
  if: runner.os == 'Windows'
  shell: pwsh
  env:
    ORT_VER: 1.22.0
  run: |
    $url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
    Invoke-WebRequest $url -OutFile ort.zip
    Expand-Archive ort.zip -DestinationPath ort
    $dll = Get-ChildItem -Recurse -Path ort -Filter onnxruntime.dll | Select-Object -First 1
    Copy-Item $dll.FullName lib\onnxruntime.dll
@@ -0,0 +1,30 @@
 name: CI
 permissions:
  contents: read
  actions: write
 on:
  push:
    branches: [main]
  workflow_dispatch:
 jobs:
  check:
    name: Format, lint, build & test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - uses: DeterminateSystems/determinate-nix-action@v3
      - uses: nix-community/cache-nix-action@v7
        with:
          primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
          restore-prefixes-first-match: nix-${{ runner.os }}-
          gc-max-store-size-linux: 10G
      - name: Check formatting, clippy lint, unit tests & ort version
        run: nix flake check --show-trace
@@ -7,7 +7,7 @@ on:
  pull_request:
  push:
    tags:
-      - '**[0-9]+.[0-9]+.[0-9]+*'
+      - "**[0-9]+.[0-9]+.[0-9]+*"
 jobs:
  plan:
@@ -17,6 +17,7 @@ jobs:
      tag: ${{ !github.event.pull_request && github.ref_name || '' }}
      tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }}
      publishing: ${{ !github.event.pull_request }}
      ort-version: ${{ steps.ort_version.outputs.value }}
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
@@ -24,9 +25,25 @@ jobs:
        with:
          submodules: recursive
      - name: Install Nix
        uses: DeterminateSystems/determinate-nix-action@v3
      - uses: nix-community/cache-nix-action@v7
        with:
          primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
          restore-prefixes-first-match: nix-${{ runner.os }}-
          gc-max-store-size-linux: 10G
      - name: Read ORT version from flake
        id: ort_version
        run: echo "value=$(nix eval .#lib.ortVersion --raw)" >> "$GITHUB_OUTPUT"
      - name: Verify ort-version matches nixpkgs onnxruntime
        run: nix flake check --system x86_64-linux
      - name: Install dist
        shell: bash
-        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.0/cargo-dist-installer.sh | sh"
+        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh"
      - name: Cache dist
        uses: actions/upload-artifact@v4
@@ -67,6 +84,10 @@ jobs:
        with:
          submodules: recursive
      - name: Load ONNX Runtime version
        shell: bash
        run: echo "ORT_VER=${{ needs.plan.outputs.ort-version }}" >> "$GITHUB_ENV"
      - name: Install Rust non-interactively if not already installed
        if: ${{ matrix.container }}
        run: |
@@ -107,8 +128,6 @@ jobs:
      - name: Fetch ONNX Runtime (Linux)
        if: runner.os == 'Linux'
        env:
          ORT_VER: 1.22.0
        run: |
          set -euo pipefail
          ARCH="$(uname -m)"
@@ -125,8 +144,6 @@ jobs:
      - name: Fetch ONNX Runtime (macOS)
        if: runner.os == 'macOS'
        env:
          ORT_VER: 1.22.0
        run: |
          set -euo pipefail
          curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
@@ -137,8 +154,6 @@ jobs:
      - name: Fetch ONNX Runtime (Windows)
        if: runner.os == 'Windows'
        shell: pwsh
        env:
          ORT_VER: 1.22.0
        run: |
          $url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
          Invoke-WebRequest $url -OutFile ort.zip
@@ -151,7 +166,6 @@ jobs:
          echo "lib/ contents:"
          ls -l lib || dir lib
      # ===== END: Injected ORT staging =====
      - name: Install dependencies
        run: |
          ${{ matrix.packages_install }}
@@ -179,21 +193,31 @@ jobs:
            ${{ env.BUILD_MANIFEST_NAME }}
  build_and_push_docker_image:
-    name: Build and Push Docker Image
+    name: Build and Push Docker Image (Nix)
    runs-on: ubuntu-latest
    needs: [plan]
    if: ${{ needs.plan.outputs.publishing == 'true' }}
    permissions:
      contents: read
      id-token: write
      packages: write
      actions: write
    steps:
-      - name: Checkout repository
+      - uses: actions/checkout@v4
        uses: actions/checkout@v4
        with:
          submodules: recursive
-      - name: Set up Docker Buildx
+      - name: Install Nix
-        uses: docker/setup-buildx-action@v3
+        uses: DeterminateSystems/determinate-nix-action@v3
      - uses: nix-community/cache-nix-action@v7
        with:
          primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix', '**/flake.lock', 'Cargo.lock') }}
          restore-prefixes-first-match: nix-${{ runner.os }}-
          gc-max-store-size-linux: 10G
      - name: Build Docker image with Nix
        run: nix build .#dockerImage -L --show-trace
      - name: Log in to GitHub Container Registry
        uses: docker/login-action@v3
@@ -208,15 +232,16 @@ jobs:
        with:
          images: ghcr.io/${{ github.repository }}
-      - name: Build and push Docker image
+      - name: Load and push Docker image
-        uses: docker/build-push-action@v5
+        env:
-        with:
+          IMAGE_NAME: ghcr.io/${{ github.repository }}
-          context: .
+          IMAGE_TAG: ${{ needs.plan.outputs.tag }}
-          push: true
+        run: |
-          tags: ${{ steps.meta.outputs.tags }}
+          docker load < result
-          labels: ${{ steps.meta.outputs.labels }}
+          docker tag "minne:1.0.3" "$IMAGE_NAME:$IMAGE_TAG"
-          cache-from: type=gha
+          docker tag "minne:1.0.3" "$IMAGE_NAME:latest"
-          cache-to: type=gha,mode=max
+          docker push "$IMAGE_NAME:$IMAGE_TAG"
          docker push "$IMAGE_NAME:latest"
  build-global-artifacts:
    needs: [plan, build-local-artifacts]
@@ -10,6 +10,9 @@ result
 data
 database
 evaluations/cache/
 evaluations/reports/
 # Devenv
 .devenv*
 devenv.local.nix
@@ -21,3 +24,8 @@ devenv.local.nix
 .pre-commit-config.yaml
 # html-router/assets/style.css
 html-router/node_modules
 .fastembed_cache/
 # insta: pending (unreviewed) snapshots; accepted *.snap files are committed
 *.snap.new
 .insta.bak
@@ -1,62 +1,131 @@
 # Changelog
 ## Unreleased
 - Infra: CI workflow fixes. CI is now a nix flake check which includes compilation, caching and running tests, clippy, fmt, validation for ort version.
 - Docker-compose: The example now references the ghcr image, this is so we can remove the Dockerfile and reducing maintenance scope.
 - Refactor: web scraping now uses `servo-fetch` (pure-Rust Servo engine) and PDF rendering uses `pdfium-render` (direct PDFium bindings) — reduces Docker image size by ~300MB, improves startup latency by ~100× for PDF rendering, and provides more stable output
 - Fix: added `pkgs.libglvnd` to `LD_LIBRARY_PATH` in devenv so Servo engine can find `libEGL.so` at runtime
 - Fix: updated Dockerfile to add `libegl1 libegl-mesa0 libgles2 libfontconfig1 libfreetype6` runtime dependencies for servo-fetch
 - Docs: updated architecture, features, and installation docs to reflect the new web processing stack
 - Fix: added pre-commit hooks to further maintain code consistency.
 - Security: updated some deps because dependabot told me, good bot.
 - Security: bump `async-openai` to 0.41.1 (feature-gated types, transcription API rename; removes `backoff` transitive dep)
 - Refactor: deduplicated test database setup across common/src/storage/.
 - Refactor: split knowledge-graph.js monolith into focused functions.
 - Evaluations: simplified crate layout — linear pipeline, sharded-only converted store, in-memory ingestion, `db/` and `cli/` modules; namespace reuse state in corpus manifest (removed `cache/snapshots/`); no legacy JSON/history compatibility (re-run `--warm` after upgrade)
 - Performance: ingestion skips per-task index rebuild; worker runs scheduled `REBUILD INDEX` (default every 24h via `index_rebuild_interval_secs`, `0` disables)
 - Performance: ingestion persists all artifacts in a single SurrealDB transaction per task (atomic replace by task id)
 - Performance: entity embeddings during ingestion use batched `embed_batch`, matching chunk embedding
 - Fix: ingestion reclaims tasks after a successful persist without re-running the pipeline when `mark_succeeded` failed
 - Fix: content deletion clears graph relationships via shared `TextContent::clear_ingested_children`
 - Fix: regression re suggestion of relationships
 - Internal: extracted duplicate entity+embedding patterns into `HasEmbedding` and `EmbeddingRecord` traits with generic `store_with_embedding`, `delete_by_source_id`, and `vector_search` on `SurrealDbClient`.
 - Infra: `ort-version` file removed — version inlined in `flake.nix` and `devenv.nix`; `release.yml` reads it via `nix eval .#lib.ortVersion` from the plan job
 - Infra: `screenshot-graph.webp` and `.dockerignore` deleted — stale artifacts from Dockerfile era
 ## 1.0.3 (2026-06-12)
 - Search: filter results by type — knowledge entities, ingested content, or both
 - Admin: choose the local FastEmbed model from the admin UI; changes save immediately and apply after restart (re-embeds when the vector dimension changes)
 - Performance: pooled FastEmbed workers and batched embedding generation for faster ingestion and search
 - Performance: lower search and chat latency from backend allocation and retrieval optimizations
 - Fix: modal dialogs (scratchpad editor, admin prompts, entity creation) open and close more reliably
 - Fix: improved knowledge-entity relationship suggestions when creating entities manually
 - Fix: API key revocation now correctly clears the stored key
 ## 1.0.2 (2026-02-15)
 - Fix: edge case where navigation back to a chat page could trigger a new response generation
 - Fix: chat references now validate and render more reliably
 - Fix: improved admin access checks for restricted routes
 - Performance: faster chat sidebar loads from cached conversation archive data
 - API: harmonized ingest endpoint naming and added configurable ingest safety limits
 - Security: hardened query handling and ingestion logging to reduce injection and data exposure risk
 ## 1.0.1 (2026-02-11)
 - Shipped an S3 storage backend so content can be stored in object storage instead of local disk, with configuration support for S3 deployments.
 - Introduced user theme preferences with the new Obsidian Prism look and improved dark mode styling.
 - Fixed edge cases, including content deletion behavior and compatibility for older user records.
 ## 1.0.0 (2026-01-02)
 - **Locally generated embeddings are now default**. If you want to continue using API embeddings, set EMBEDDING_BACKEND to openai. This will download a ONNX model and recreate all embeddings. But in most instances it's very worth it. Removing the network bound call to create embeddings. Creating embeddings on my N100 device is extremely fast. Typically a search response is provided in less than 50ms.
 - Added a benchmarks create for evaluating the retrieval process
 - Added fastembed embedding support, enables the use of local CPU generated embeddings, greatly improved latency if machine can handle it. Quick search has vastly better accuracy and is much faster, 50ms latency when testing compared to minimum 300ms.
 - Embeddings stored on own table.
 - Refactored retrieval pipeline to use the new, faster and more accurate strategy. Read [blog post](https://blog.stark.pub/posts/eval-retrieval-refactor/) for more details.
 ## Version 0.2.7 (2025-12-04)
 - Improved admin page, now only loads models when specifically requested. Groundwork for coming configuration features.
 - Fix: timezone aware info in scratchpad
 ## Version 0.2.6 (2025-10-29)
 - Added an opt-in FastEmbed-based reranking stage behind `reranking_enabled`. It improves retrieval accuracy by re-scoring hybrid results.
 - Fix: default name for relationships harmonized across application
 ## Version 0.2.5 (2025-10-24)
 - Added manual knowledge entity creation flows using a modal, with the option for suggested relationships
 - Scratchpad feature, with the feature to convert scratchpads to content.
 - Added knowledge entity search results to the global search
 - Backend fixes for improved performance when ingesting and retrieval
 ## Version 0.2.4 (2025-10-15)
 - Improved retrieval performance. Ingestion and chat now utilizes full text search, vector comparison and graph traversal.
 - Ingestion task archive
 ## Version 0.2.3 (2025-10-12)
 - Fix changing vector dimensions on a fresh database (#3)
 ## Version 0.2.2 (2025-10-07)
 - Support for ingestion of PDF files
 - Improved ingestion speed
 - Fix deletion of items work as expected
 - Fix enabling GPT-5 use via OpenAI API
 ## Version 0.2.1 (2025-09-24)
 - Fixed API JSON responses so iOS Shortcuts integrations keep working.
 ## Version 0.2.0 (2025-09-23)
 - Revamped the UI with a neobrutalist theme, better dark mode, and a D3-based knowledge graph.
 - Added pagination for entities and content plus new observability metrics on the dashboard.
 - Enabled audio ingestion and merged the new storage backend.
 - Improved performance, request filtering, and journalctl/systemd compatibility.
 ## Version 0.1.4 (2025-07-01)
 - Added image ingestion with configurable system settings and updated Docker Compose docs.
 - Hardened admin flows by fixing concurrent API/database calls and normalizing task statuses.
 ## Version 0.1.3 (2025-06-08)
 - Added support for AI providers beyond OpenAI.
 - Made the HTTP port configurable for deployments.
 - Smoothed graph mapper failures, long content tiles, and refreshed project documentation.
 ## Version 0.1.2 (2025-05-26)
 - Introduced full-text search across indexed knowledge.
 - Polished the UI with consistent titles, icon fallbacks, and improved markdown scrolling.
 - Fixed search result links and SurrealDB vector formatting glitches.
 ## Version 0.1.1 (2025-05-13)
 - Added streaming feedback to ingestion tasks for clearer progress updates.
 - Made the data storage path configurable.
 - Improved release tooling with Chromium-enabled Nix flakes, Docker builds, and migration/template fixes.
 ## Version 0.1.0 (2025-05-06)
 - Initial release with a SurrealDB-backed ingestion pipeline, job queue, vector search, and knowledge graph storage.
 - Delivered a chat experience featuring streaming responses, conversation history, markdown rendering, and customizable system prompts.
 - Introduced an admin console with analytics, registration and timezone controls, and job monitoring.
@@ -5,20 +5,26 @@ members = [
  "api-router",
  "html-router",
  "ingestion-pipeline",
-  "composite-retrieval",
+  "retrieval-pipeline",
-  "json-stream-parser"
+  "json-stream-parser",
  "evaluations",
 ]
-resolver = "2"
+resolver = "3"
 [workspace.dependencies]
 anyhow = "1.0.94"
-async-openai = "0.29.3"
+async-openai = { version = "0.41.1", features = [
  "chat-completion",
  "embedding",
  "audio",
  "model",
 ] }
 async-stream = "0.3.6"
 async-trait = "0.1.88"
 axum-htmx = "0.7.0"
-axum_session = "0.16"
+axum_session = "0.18"
-axum_session_auth = "0.16"
+axum_session_auth = "0.18"
-axum_session_surreal = "0.4"
+axum_session_surreal = "0.6"
 axum_typed_multipart = "0.16"
 axum = { version = "0.8", features = ["multipart", "macros"] }
 chrono-tz = "0.10.1"
@@ -26,7 +32,6 @@ chrono = { version = "0.4.39", features = ["serde"] }
 config = "0.15.4"
 dom_smoothie = "0.10.0"
 futures = "0.3.31"
 headless_chrome = "1.0.17"
 include_dir = "0.7.4"
 mime = "0.3.17"
 mime_guess = "2.0.5"
@@ -34,14 +39,16 @@ minijinja-autoreload = "2.5.0"
 minijinja-contrib = { version = "2.6.0", features = ["datetime", "timezone"] }
 minijinja-embed = { version = "2.8.0" }
 minijinja = { version = "2.5.0", features = ["loader", "multi_template"] }
-reqwest = {version = "0.12.12", features = ["charset", "json"]}
+reqwest = { version = "0.12.12", features = ["charset", "json"] }
 serde_json = "1.0.128"
 serde = { version = "1", features = ["derive"] }
 sha2 = "0.10.8"
-surrealdb-migrations = "2.2.2"
+surrealdb-migrations = "2.4.0"
-surrealdb = { version = "2", features = ["kv-mem"] }
+surrealdb = { version = "2.6" }
 tempfile = "3.12.0"
-text-splitter = "0.18.1"
+text-splitter = { version = "0.18.1", features = ["markdown", "tokenizers"] }
 tokenizers = { version = "0.20.4", features = ["http"] }
 unicode-normalization = "0.1.24"
 thiserror = "1.0.63"
 tokio-util = { version = "0.7.15", features = ["io"] }
 tokio = { version = "1", features = ["full"] }
@@ -53,15 +60,30 @@ url = { version = "2.5.2", features = ["serde"] }
 uuid = { version = "1.10.0", features = ["v4", "serde"] }
 tokio-retry = "0.3.0"
 base64 = "0.22.1"
-object_store = { version = "0.11.2" }
+object_store = { version = "0.11.2", features = ["aws"] }
 bytes = "1.7.1"
-state-machines = "0.2.0"
+state-machines = "0.9"
-fastembed = { version = "5.2.0", default-features = false, features = ["hf-hub-native-tls", "ort-load-dynamic"] }
+pdf-extract = "0.9"
 lopdf = "0.32"
 pdfium-auto = "0.3"
 pdfium-render = "0.8"
 servo-fetch = "0.13"
 tendril = "0.4"
 image = { version = "0.25", default-features = false, features = ["png"] }
 fastembed = { version = "5.2.0", default-features = false, features = [
  "hf-hub-native-tls",
  "ort-load-dynamic",
 ] }
 [profile.dist]
 inherits = "release"
 lto = "thin"
 [workspace.lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
  "cfg(feature, values(\"inspect\"))",
 ] }
 [workspace.lints.clippy]
 # Performance-focused lints
 perf = { level = "warn", priority = -1 }
@@ -77,7 +99,7 @@ implicit_clone = "warn"
 redundant_clone = "warn"
 # Security-focused lints
-integer_arithmetic = "warn"
+arithmetic_side_effects = "warn"
 indexing_slicing = "warn"
 unwrap_used = "warn"
 expect_used = "warn"
@@ -87,7 +109,7 @@ todo = "warn"
 # Async/Network lints
 async_yields_async = "warn"
-await_holding_invalid_state = "warn"
+await_holding_invalid_type = "warn"
 rc_buffer = "warn"
 # Maintainability-focused lints
@@ -103,9 +125,10 @@ missing_errors_doc = "allow"
 missing_panics_doc = "warn"
 module_name_repetitions = "warn"
 wildcard_dependencies = "warn"
-missing_docs_in_private_items = "warn"
+missing_docs_in_private_items = "allow"
 # Allow noisy lints that don't add value for this project
 manual_must_use = "allow"
 needless_raw_string_hashes = "allow"
 multiple_bound_locations = "allow"
 cargo_common_metadata = "allow"
 multiple-crate-versions = "allow"
@@ -1,51 +0,0 @@
 # === Builder ===
 FROM rust:1.86-bookworm AS builder
 WORKDIR /usr/src/minne
 RUN apt-get update && apt-get install -y --no-install-recommends \
    pkg-config clang cmake git && rm -rf /var/lib/apt/lists/*
 # Cache deps
 COPY Cargo.toml Cargo.lock ./
 RUN mkdir -p api-router common composite-retrieval html-router ingestion-pipeline json-stream-parser main worker
 COPY api-router/Cargo.toml ./api-router/
 COPY common/Cargo.toml ./common/
 COPY composite-retrieval/Cargo.toml ./composite-retrieval/
 COPY html-router/Cargo.toml ./html-router/
 COPY ingestion-pipeline/Cargo.toml ./ingestion-pipeline/
 COPY json-stream-parser/Cargo.toml ./json-stream-parser/
 COPY main/Cargo.toml ./main/
 RUN cargo build --release --bin main --features ingestion-pipeline/docker || true
 # Build
 COPY . .
 RUN cargo build --release --bin main --features ingestion-pipeline/docker
 # === Runtime ===
 FROM debian:bookworm-slim
 # Chromium + runtime deps + OpenMP for ORT
 RUN apt-get update && apt-get install -y --no-install-recommends \
    chromium libnss3 libasound2 libgbm1 libxshmfence1 \
    ca-certificates fonts-dejavu fonts-noto-color-emoji \
    libgomp1 libstdc++6 curl \
  && rm -rf /var/lib/apt/lists/*
 # ONNX Runtime (CPU). Change if you bump ort.
 ARG ORT_VERSION=1.22.0
 RUN mkdir -p /opt/onnxruntime && \
    curl -fsSL -o /tmp/ort.tgz \
      "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
    tar -xzf /tmp/ort.tgz -C /opt/onnxruntime --strip-components=1 && rm /tmp/ort.tgz
 ENV CHROME_BIN=/usr/bin/chromium \
    SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
    ORT_DYLIB_PATH=/opt/onnxruntime/lib/libonnxruntime.so
 # Non-root
 RUN useradd -m appuser
 USER appuser
 WORKDIR /home/appuser
 COPY --from=builder /usr/src/minne/target/release/main /usr/local/bin/main
 EXPOSE 3000
 CMD ["main"]
@@ -10,13 +10,13 @@
 ## Demo deployment
-To test _Minne_ out, enter [this](https://minne-demo.stark.pub) read-only demo deployment to view and test functionality out.
+To test _Minne_ out, enter [this](https://minne.stark.pub) and sign in to a read-only demo deployment to view and test functionality out.
 ## Noteworthy Features
 - **Search & Chat Interface** - Find content or knowledge instantly with full-text search, or use the chat mode and conversational AI to find and reason about content
 - **Manual and AI-assisted connections** - Build entities and relationships manually with full control, let AI create entities and relationships automatically, or blend both approaches with AI suggestions for manual approval 
- **Hybrid Retrieval System** - Search combining vector similarity, full-text search, and graph traversal for highly relevant results
+- **Hybrid Retrieval System** - Search combining vector similarity & full-text search
 - **Scratchpad Feature** - Quickly capture thoughts and convert them to permanent content when ready
 - **Visual Graph Explorer** - Interactive D3-based navigation of your knowledge entities and connections
 - **Multi-Format Support** - Ingest text, URLs, PDFs, audio files, and images into your knowledge base
@@ -121,7 +121,7 @@ fastembed_cache_dir: "/var/lib/minne/fastembed"  # optional override, defaults t
 - **Frontend:** HTML with HTMX and minimal JavaScript for interactivity
 - **Database:** SurrealDB (graph, document, and vector search)
 - **AI Integration:** OpenAI-compatible API with structured outputs
- **Web Processing:** Headless Chrome for robust webpage content extraction
+- **Web Processing:** Embedded Servo engine (servo-fetch) for webpage content extraction + PDFium for PDF rendering
 ## Configuration
@@ -172,7 +172,7 @@ cd minne
 docker compose up -d
 ```
-The included `docker-compose.yml` handles SurrealDB and Chromium dependencies automatically.
+The included `docker-compose.yml` handles SurrealDB automatically.
 ### 2. Nix
@@ -180,13 +180,13 @@ The included `docker-compose.yml` handles SurrealDB and Chromium dependencies au
 nix run 'github:perstarkse/minne#main'
 ```
-This fetches Minne and all dependencies, including Chromium.
+This fetches Minne and all dependencies.
 ### 3. Pre-built Binaries
 Download binaries for Windows, macOS, and Linux from the [GitHub Releases](https://github.com/perstarkse/minne/releases/latest).
-**Requirements:** You'll need to provide SurrealDB and Chromium separately.
+**Requirements:** You'll need to provide SurrealDB separately.
 ### 4. Build from Source
@@ -196,7 +196,7 @@ cd minne
 cargo run --release --bin main
 ```
-**Requirements:** SurrealDB and Chromium must be installed and accessible in your PATH.
+**Requirements:** SurrealDB must be installed and accessible in your PATH.
 ## Application Architecture
@@ -20,3 +20,8 @@ futures = { workspace = true }
 axum_typed_multipart = { workspace = true} 
 common = { path = "../common" }
 [dev-dependencies]
 common = { path = "../common", features = ["test-utils"] }
 tower = "0.5"
 uuid = { workspace = true }
@@ -11,31 +11,3 @@ pub struct ApiState {
    pub config: AppConfig,
    pub storage: StorageManager,
 }
 impl ApiState {
    pub async fn new(
        config: &AppConfig,
        storage: StorageManager,
    ) -> Result<Self, Box<dyn std::error::Error>> {
        let surreal_db_client = Arc::new(
            SurrealDbClient::new(
                &config.surrealdb_address,
                &config.surrealdb_username,
                &config.surrealdb_password,
                &config.surrealdb_namespace,
                &config.surrealdb_database,
            )
            .await?,
        );
        surreal_db_client.apply_migrations().await?;
        let app_state = Self {
            db: surreal_db_client.clone(),
            config: config.clone(),
            storage,
        };
        Ok(app_state)
    }
 }
@@ -7,36 +7,38 @@ use common::error::AppError;
 use serde::Serialize;
 use thiserror::Error;
-#[derive(Error, Debug, Serialize, Clone)]
+#[derive(Error, Debug)]
-pub enum ApiError {
+pub enum ApiErr {
-    #[error("Internal server error")]
+    #[error("internal server error")]
    InternalError(String),
-    #[error("Validation error: {0}")]
+    #[error("validation error: {0}")]
    ValidationError(String),
-    #[error("Not found: {0}")]
+    #[error("not found: {0}")]
    NotFound(String),
-    #[error("Unauthorized: {0}")]
+    #[error("unauthorized: {0}")]
    Unauthorized(String),
    #[error("payload too large: {0}")]
    PayloadTooLarge(String),
 }
-impl From<AppError> for ApiError {
+impl From<AppError> for ApiErr {
    fn from(err: AppError) -> Self {
        match err {
            AppError::Database(_) | AppError::OpenAI(_) => {
                tracing::error!("Internal error: {:?}", err);
                Self::InternalError("Internal server error".to_string())
            }
            AppError::NotFound(msg) => Self::NotFound(msg),
            AppError::Validation(msg) => Self::ValidationError(msg),
            AppError::Auth(msg) => Self::Unauthorized(msg),
-            _ => Self::InternalError("Internal server error".to_string()),
+            other => {
                tracing::error!("internal API error: {other:?}");
                Self::InternalError("Internal server error".to_string())
            }
        }
    }
 }
-impl IntoResponse for ApiError {
+impl IntoResponse for ApiErr {
    fn into_response(self) -> Response {
        let (status, error_response) = match self {
            Self::InternalError(message) => (
@@ -67,6 +69,13 @@ impl IntoResponse for ApiError {
                    status: "error".to_string(),
                },
            ),
            Self::PayloadTooLarge(message) => (
                StatusCode::PAYLOAD_TOO_LARGE,
                ErrorResponse {
                    error: message,
                    status: "error".to_string(),
                },
            ),
        };
        (status, Json(error_response)).into_response()
@@ -84,6 +93,7 @@ mod tests {
    use super::*;
    use common::error::AppError;
    use std::fmt::Debug;
    use std::io;
    // Helper to check status code
    fn assert_status_code<T: IntoResponse + Debug>(response: T, expected_status: StatusCode) {
@@ -95,43 +105,58 @@ mod tests {
    fn test_app_error_to_api_error_conversion() {
        // Test NotFound error conversion
        let not_found = AppError::NotFound("resource not found".to_string());
-        let api_error = ApiError::from(not_found);
+        let api_error = ApiErr::from(not_found);
-        assert!(matches!(api_error, ApiError::NotFound(msg) if msg == "resource not found"));
+        assert!(matches!(api_error, ApiErr::NotFound(msg) if msg == "resource not found"));
        // Test Validation error conversion
        let validation = AppError::Validation("invalid input".to_string());
-        let api_error = ApiError::from(validation);
+        let api_error = ApiErr::from(validation);
-        assert!(matches!(api_error, ApiError::ValidationError(msg) if msg == "invalid input"));
+        assert!(matches!(api_error, ApiErr::ValidationError(msg) if msg == "invalid input"));
        // Test Auth error conversion
        let auth = AppError::Auth("unauthorized".to_string());
-        let api_error = ApiError::from(auth);
+        let api_error = ApiErr::from(auth);
-        assert!(matches!(api_error, ApiError::Unauthorized(msg) if msg == "unauthorized"));
+        assert!(matches!(api_error, ApiErr::Unauthorized(msg) if msg == "unauthorized"));
        // Test for internal errors - create a mock error that doesn't require surrealdb
-        let internal_error =
+        let internal_error = AppError::Io(io::Error::other("io error"));
-            AppError::Io(std::io::Error::new(std::io::ErrorKind::Other, "io error"));
+        let api_error = ApiErr::from(internal_error);
-        let api_error = ApiError::from(internal_error);
+        assert!(matches!(
-        assert!(matches!(api_error, ApiError::InternalError(_)));
+            api_error,
            ApiErr::InternalError(msg) if msg == "Internal server error"
        ));
    }
    #[test]
    fn test_app_error_internal_error_is_sanitized() {
        let api_error = ApiErr::from(AppError::internal("db password incorrect"));
        assert!(matches!(
            api_error,
            ApiErr::InternalError(msg) if msg == "Internal server error"
        ));
    }
    #[test]
    fn test_api_error_response_status_codes() {
        // Test internal error status
-        let error = ApiError::InternalError("server error".to_string());
+        let error = ApiErr::InternalError("server error".to_string());
        assert_status_code(error, StatusCode::INTERNAL_SERVER_ERROR);
        // Test not found status
-        let error = ApiError::NotFound("not found".to_string());
+        let error = ApiErr::NotFound("not found".to_string());
        assert_status_code(error, StatusCode::NOT_FOUND);
        // Test validation error status
-        let error = ApiError::ValidationError("invalid input".to_string());
+        let error = ApiErr::ValidationError("invalid input".to_string());
        assert_status_code(error, StatusCode::BAD_REQUEST);
        // Test unauthorized status
-        let error = ApiError::Unauthorized("not allowed".to_string());
+        let error = ApiErr::Unauthorized("not allowed".to_string());
        assert_status_code(error, StatusCode::UNAUTHORIZED);
        // Test payload too large status
        let error = ApiErr::PayloadTooLarge("too big".to_string());
        assert_status_code(error, StatusCode::PAYLOAD_TOO_LARGE);
    }
    // Alternative approach that doesn't try to parse the response body
@@ -139,15 +164,15 @@ mod tests {
    fn test_error_messages() {
        // For validation errors
        let message = "invalid data format";
-        let error = ApiError::ValidationError(message.to_string());
+        let error = ApiErr::ValidationError(message.to_string());
        // Check that the error itself contains the message
-        assert_eq!(error.to_string(), format!("Validation error: {}", message));
+        assert_eq!(error.to_string(), format!("validation error: {message}"));
        // For not found errors
        let message = "user not found";
-        let error = ApiError::NotFound(message.to_string());
+        let error = ApiErr::NotFound(message.to_string());
-        assert_eq!(error.to_string(), format!("Not found: {}", message));
+        assert_eq!(error.to_string(), format!("not found: {message}"));
    }
    // Alternative approach for internal error test
@@ -156,11 +181,11 @@ mod tests {
        // Create a sensitive error message
        let sensitive_info = "db password incorrect";
-        // Create ApiError with sensitive info
+        // Create ApiErr with sensitive info
-        let api_error = ApiError::InternalError(sensitive_info.to_string());
+        let api_error = ApiErr::InternalError(sensitive_info.to_string());
        // Check the error message is correctly set
-        assert_eq!(api_error.to_string(), "Internal server error");
+        assert_eq!(api_error.to_string(), "internal server error");
        // Also verify correct status code
        assert_status_code(api_error, StatusCode::INTERNAL_SERVER_ERROR);
@@ -6,7 +6,7 @@ use axum::{
    Router,
 };
 use middleware_api_auth::api_auth;
-use routes::{categories::get_categories, ingress::ingest_data, liveness::live, readiness::ready};
+use routes::{categories::list, ingest::handle, liveness::live, readiness::ready};
 pub mod api_state;
 pub mod error;
@@ -26,9 +26,13 @@ where
    // Protected API endpoints (require auth)
    let protected = Router::new()
-        .route("/ingress", post(ingest_data))
+        .route(
-        .route("/categories", get(get_categories))
+            "/ingest",
-        .layer(DefaultBodyLimit::max(1024 * 1024 * 1024))
+            post(handle).layer(DefaultBodyLimit::max(
                app_state.config.ingest_max_body_bytes,
            )),
        )
        .route("/categories", get(list))
        .route_layer(from_fn_with_state(app_state.clone(), api_auth));
    public.merge(protected)
@@ -6,26 +6,26 @@ use axum::{
 use common::storage::types::user::User;
-use crate::{api_state::ApiState, error::ApiError};
+use crate::{api_state::ApiState, error::ApiErr};
 pub async fn api_auth(
    State(state): State<ApiState>,
    mut request: Request,
    next: Next,
-) -> Result<Response, ApiError> {
+) -> Result<Response, ApiErr> {
    let api_key = extract_api_key(&request)
-        .ok_or_else(|| ApiError::Unauthorized("You have to be authenticated".to_string()))?;
+        .ok_or_else(|| ApiErr::Unauthorized("You have to be authenticated".to_string()))?;
-    let user = User::find_by_api_key(&api_key, &state.db).await?;
+    let user = User::find_by_api_key(api_key, &state.db).await?;
    let user =
-        user.ok_or_else(|| ApiError::Unauthorized("You have to be authenticated".to_string()))?;
+        user.ok_or_else(|| ApiErr::Unauthorized("You have to be authenticated".to_string()))?;
    request.extensions_mut().insert(user);
    Ok(next.run(request).await)
 }
-fn extract_api_key(request: &Request) -> Option<String> {
+fn extract_api_key(request: &Request) -> Option<&str> {
    request
        .headers()
        .get("X-API-Key")
@@ -35,7 +35,67 @@ fn extract_api_key(request: &Request) -> Option<String> {
                .headers()
                .get("Authorization")
                .and_then(|v| v.to_str().ok())
-                .and_then(|auth| auth.strip_prefix("Bearer ").map(str::trim))
+                .and_then(|auth| auth.strip_prefix("Bearer "))
                .map(str::trim)
        })
-        .map(String::from)
+}
 #[cfg(test)]
 #[allow(clippy::expect_used)]
 mod tests {
    use axum::body::Body;
    use axum::http::{HeaderValue, Request};
    use super::extract_api_key;
    fn request_with_headers(headers: &[(&str, &str)]) -> Request<Body> {
        let mut builder = Request::builder().method("GET").uri("/");
        for (name, value) in headers {
            builder = builder.header(*name, *value);
        }
        builder.body(Body::empty()).expect("test request")
    }
    #[test]
    fn extract_api_key_from_x_api_key_header() {
        let request = request_with_headers(&[("X-API-Key", "sk_test_key")]);
        assert_eq!(extract_api_key(&request), Some("sk_test_key"));
    }
    #[test]
    fn extract_api_key_from_bearer_authorization() {
        let request = request_with_headers(&[("Authorization", "Bearer sk_bearer_key")]);
        assert_eq!(extract_api_key(&request), Some("sk_bearer_key"));
    }
    #[test]
    fn extract_api_key_prefers_x_api_key_over_authorization() {
        let request = request_with_headers(&[
            ("X-API-Key", "sk_header"),
            ("Authorization", "Bearer sk_bearer"),
        ]);
        assert_eq!(extract_api_key(&request), Some("sk_header"));
    }
    #[test]
    fn extract_api_key_returns_none_when_missing() {
        let request = request_with_headers(&[]);
        assert_eq!(extract_api_key(&request), None);
    }
    #[test]
    fn extract_api_key_rejects_non_bearer_authorization() {
        let request = request_with_headers(&[("Authorization", "Basic abc")]);
        assert_eq!(extract_api_key(&request), None);
    }
    #[test]
    fn extract_api_key_rejects_invalid_header_values() {
        let mut request = request_with_headers(&[]);
        request.headers_mut().insert(
            "X-API-Key",
            HeaderValue::from_bytes(&[0xFF]).expect("invalid header"),
        );
        assert_eq!(extract_api_key(&request), None);
    }
 }
@@ -1,12 +1,12 @@
 use axum::{extract::State, response::IntoResponse, Extension, Json};
 use common::storage::types::user::User;
-use crate::{api_state::ApiState, error::ApiError};
+use crate::{api_state::ApiState, error::ApiErr};
-pub async fn get_categories(
+pub async fn list(
    State(state): State<ApiState>,
    Extension(user): Extension<User>,
-) -> Result<impl IntoResponse, ApiError> {
+) -> Result<impl IntoResponse, ApiErr> {
    let categories = User::get_user_categories(&user.id, &state.db).await?;
    Ok(Json(categories))
@@ -0,0 +1,79 @@
 use axum::{extract::State, http::StatusCode, response::IntoResponse, Extension, Json};
 use axum_typed_multipart::{FieldData, TryFromMultipart, TypedMultipart};
 use common::{
    error::AppError,
    storage::types::{
        file_info::FileInfo, ingestion_payload::IngestionPayload, ingestion_task::IngestionTask,
        user::User,
    },
    utils::ingest_limits::{validate_ingest_input, IngestValidationError},
 };
 use futures::{future::try_join_all, TryFutureExt};
 use serde_json::json;
 use tempfile::NamedTempFile;
 use tracing::info;
 use crate::{api_state::ApiState, error::ApiErr};
 #[derive(Debug, TryFromMultipart)]
 pub struct Params {
    pub content: Option<String>,
    pub context: String,
    pub category: String,
    #[form_data(limit = "20000000")]
    #[form_data(default)]
    pub files: Vec<FieldData<NamedTempFile>>,
 }
 pub async fn handle(
    State(state): State<ApiState>,
    Extension(user): Extension<User>,
    TypedMultipart(input): TypedMultipart<Params>,
 ) -> Result<impl IntoResponse, ApiErr> {
    let user_id = user.id;
    let has_content = input.content.as_ref().is_some_and(|c| !c.trim().is_empty());
    match validate_ingest_input(
        &state.config,
        input.content.as_deref(),
        &input.context,
        &input.category,
        input.files.len(),
    ) {
        Ok(()) => {}
        Err(IngestValidationError::PayloadTooLarge(message)) => {
            return Err(ApiErr::PayloadTooLarge(message));
        }
        Err(IngestValidationError::BadRequest(message)) => {
            return Err(ApiErr::ValidationError(message));
        }
    }
    info!(
        user_id = %user_id,
        has_content,
        content_len = input.content.as_ref().map_or(0, String::len),
        context_len = input.context.len(),
        category_len = input.category.len(),
        file_count = input.files.len(),
        "Received ingest request"
    );
    let file_infos = try_join_all(input.files.into_iter().map(|file| {
        FileInfo::new_with_storage(file, &state.db, &user_id, &state.storage)
            .map_err(AppError::from)
    }))
    .await?;
    let payloads = IngestionPayload::create_ingestion_payload(
        input.content,
        input.context,
        input.category,
        file_infos,
        user_id.clone(),
    )?;
    IngestionTask::create_all_and_add_to_db(payloads, &user_id, &state.db).await?;
    Ok((StatusCode::OK, Json(json!({ "status": "success" }))))
 }
@@ -1,56 +0,0 @@
 use axum::{extract::State, http::StatusCode, response::IntoResponse, Extension, Json};
 use axum_typed_multipart::{FieldData, TryFromMultipart, TypedMultipart};
 use common::{
    error::AppError,
    storage::types::{
        file_info::FileInfo, ingestion_payload::IngestionPayload, ingestion_task::IngestionTask,
        user::User,
    },
 };
 use futures::{future::try_join_all, TryFutureExt};
 use serde_json::json;
 use tempfile::NamedTempFile;
 use tracing::info;
 use crate::{api_state::ApiState, error::ApiError};
 #[derive(Debug, TryFromMultipart)]
 pub struct IngestParams {
    pub content: Option<String>,
    pub context: String,
    pub category: String,
    #[form_data(limit = "10000000")] // Adjust limit as needed
    #[form_data(default)]
    pub files: Vec<FieldData<NamedTempFile>>,
 }
 pub async fn ingest_data(
    State(state): State<ApiState>,
    Extension(user): Extension<User>,
    TypedMultipart(input): TypedMultipart<IngestParams>,
 ) -> Result<impl IntoResponse, ApiError> {
    info!("Received input: {:?}", input);
    let file_infos = try_join_all(input.files.into_iter().map(|file| {
        FileInfo::new_with_storage(file, &state.db, &user.id, &state.storage)
            .map_err(AppError::from)
    }))
    .await?;
    let payloads = IngestionPayload::create_ingestion_payload(
        input.content,
        input.context,
        input.category,
        file_infos,
        user.id.as_str(),
    )?;
    let futures: Vec<_> = payloads
        .into_iter()
        .map(|object| IngestionTask::create_and_add_to_db(object, user.id.clone(), &state.db))
        .collect();
    try_join_all(futures).await?;
    Ok((StatusCode::OK, Json(json!({ "status": "success" }))))
 }
@@ -1,4 +1,4 @@
 pub mod categories;
-pub mod ingress;
+pub mod ingest;
 pub mod liveness;
 pub mod readiness;
@@ -1,5 +1,6 @@
 use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
 use serde_json::json;
 use tracing::error;
 use crate::api_state::ApiState;
@@ -13,13 +14,15 @@ pub async fn ready(State(state): State<ApiState>) -> impl IntoResponse {
                "checks": { "db": "ok" }
            })),
        ),
-        Err(e) => (
+        Err(e) => {
-            StatusCode::SERVICE_UNAVAILABLE,
+            error!("readiness check failed: {e:?}");
-            Json(json!({
+            (
-                "status": "error",
+                StatusCode::SERVICE_UNAVAILABLE,
-                "checks": { "db": "fail" },
+                Json(json!({
-                "reason": e.to_string()
+                    "status": "error",
-            })),
+                    "checks": { "db": "fail" }
-        ),
+                })),
            )
        }
    }
 }
@@ -0,0 +1,159 @@
 #![allow(clippy::expect_used)]
 use std::sync::Arc;
 use api_router::{api_routes_v1, api_state::ApiState};
 use axum::{
    body::{to_bytes, Body},
    http::{Request, StatusCode},
    Router,
 };
 use common::{
    storage::{db::SurrealDbClient, store::StorageManager, types::user::User},
    utils::config::{AppConfig, StorageKind},
 };
 use tower::ServiceExt;
 async fn build_test_app() -> (Router, Arc<SurrealDbClient>) {
    let namespace = "api_router_test";
    let database = uuid::Uuid::new_v4().to_string();
    let db = Arc::new(
        SurrealDbClient::memory(namespace, &database)
            .await
            .expect("in-memory db"),
    );
    db.apply_migrations()
        .await
        .expect("migrations should apply");
    let config = AppConfig {
        storage: StorageKind::Memory,
        ..Default::default()
    };
    let storage = StorageManager::new(&config).await.expect("storage manager");
    let state = ApiState {
        db: Arc::clone(&db),
        config,
        storage,
    };
    let router = api_routes_v1(&state).with_state(state);
    (router, db)
 }
 async fn response_body(response: axum::response::Response) -> String {
    let body = to_bytes(response.into_body(), usize::MAX)
        .await
        .expect("response body");
    String::from_utf8(body.to_vec()).expect("utf-8 body")
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn live_probe_is_public() {
    let (app, _db) = build_test_app().await;
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/live")
                .body(Body::empty())
                .expect("live request"),
        )
        .await
        .expect("live response");
    assert_eq!(response.status(), StatusCode::OK);
    assert!(response_body(response).await.contains("\"status\":\"ok\""));
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn ready_probe_is_public_and_reports_db_ok() {
    let (app, _db) = build_test_app().await;
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/ready")
                .body(Body::empty())
                .expect("ready request"),
        )
        .await
        .expect("ready response");
    assert_eq!(response.status(), StatusCode::OK);
    let body = response_body(response).await;
    assert!(body.contains("\"checks\":{\"db\":\"ok\"}") || body.contains("\"db\":\"ok\""));
    assert!(!body.contains("reason"));
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn protected_route_requires_api_key() {
    let (app, _db) = build_test_app().await;
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/categories")
                .body(Body::empty())
                .expect("categories request"),
        )
        .await
        .expect("categories response");
    assert_eq!(response.status(), StatusCode::UNAUTHORIZED);
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn protected_route_rejects_invalid_api_key() {
    let (app, _db) = build_test_app().await;
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/categories")
                .header("X-API-Key", "sk_invalid")
                .body(Body::empty())
                .expect("categories request"),
        )
        .await
        .expect("categories response");
    assert_eq!(response.status(), StatusCode::UNAUTHORIZED);
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn authenticated_user_can_list_categories() {
    let (app, db) = build_test_app().await;
    let user = User::create_new(
        "api_router_test@example.com".to_string(),
        "test_password".to_string(),
        &db,
        "UTC".to_string(),
        "system".to_string(),
    )
    .await
    .expect("test user");
    let api_key = User::set_api_key(&user.id, &db).await.expect("api key");
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/categories")
                .header("X-API-Key", api_key)
                .body(Body::empty())
                .expect("categories request"),
        )
        .await
        .expect("categories response");
    assert_eq!(response.status(), StatusCode::OK);
 }
@@ -16,7 +16,7 @@ tracing = { workspace = true }
 anyhow = { workspace = true }
 thiserror = { workspace = true }
 serde_json = { workspace = true }
-surrealdb = { workspace = true, features = ["kv-mem"] }
+surrealdb = { workspace = true }
 async-openai = { workspace = true }
 futures = { workspace = true }
 tempfile = { workspace = true }
@@ -45,7 +45,11 @@ tokio-retry = { workspace = true }
 object_store = { workspace = true }
 bytes = { workspace = true }
 state-machines = { workspace = true }
 fastembed = { workspace = true }
 [features]
-test-utils = []
+test-utils = ["surrealdb/kv-mem"]
 [dev-dependencies]
 surrealdb = { workspace = true, features = ["kv-mem"] }
@@ -14,8 +14,11 @@ CREATE system_settings:current CONTENT {
    query_model: "gpt-4o-mini",
    processing_model: "gpt-4o-mini",
    embedding_model: "text-embedding-3-small",
    voice_processing_model: "whisper-1",
    image_processing_model: "gpt-4o-mini",
    image_processing_prompt: "Analyze this image and respond based on its primary content:\n - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.\n - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.\n - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a Text: heading.\n\n Respond directly with the analysis.",
    embedding_dimensions: 1536,
    query_system_prompt: "You are a knowledgeable assistant with access to a specialized knowledge base. You will be provided with relevant knowledge entities from the database as context. Each knowledge entity contains a name, description, and type, representing different concepts, ideas, and information.\nYour task is to:\n1. Carefully analyze the provided knowledge entities in the context\n2. Answer user questions based on this information\n3. Provide clear, concise, and accurate responses\n4. When referencing information, briefly mention which knowledge entity it came from\n5. If the provided context doesn't contain enough information to answer the question confidently, clearly state this\n6. If only partial information is available, explain what you can answer and what information is missing\n7. Avoid making assumptions or providing information not supported by the context\n8. Output the references to the documents. Use the UUIDs and make sure they are correct!\nRemember:\n- Be direct and honest about the limitations of your knowledge\n- Cite the relevant knowledge entities when providing information, but only provide the UUIDs in the reference array\n- If you need to combine information from multiple entities, explain how they connect\n- Don't speculate beyond what's provided in the context\nExample response formats:\n\"Based on [Entity Name], [answer...]\"\n\"I found relevant information in multiple entries: [explanation...]\"\n\"I apologize, but the provided context doesn't contain information about [topic]\"",
-    ingestion_system_prompt: "You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.\nThe JSON should have the following structure:\n{\n\"knowledge_entities\": [\n{\n\"key\": \"unique-key-1\",\n\"name\": \"Entity Name\",\n\"description\": \"A detailed description of the entity.\",\n\"entity_type\": \"TypeOfEntity\"\n},\n// More entities...\n],\n\"relationships\": [\n{\n\"type\": \"RelationshipType\",\n\"source\": \"unique-key-1 or UUID from existing database\",\n\"target\": \"unique-key-1 or UUID from existing database\"\n},\n// More relationships...\n]\n}\nGuidelines:\n1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.\n2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.\n3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.\n4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.\n5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity\"\n6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.\n7. Only create relationships between existing KnowledgeEntities.\n8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.\n9. A new relationship MUST include a newly created KnowledgeEntity."
+    ingestion_system_prompt: "You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.\nThe JSON should have the following structure:\n{\n\"knowledge_entities\": [\n{\n\"key\": \"unique-key-1\",\n\"name\": \"Entity Name\",\n\"description\": \"A detailed description of the entity.\",\n\"entity_type\": \"TypeOfEntity\"\n},\n// More entities...\n],\n\"relationships\": [\n{\n\"type\": \"RelationshipType\",\n\"source\": \"unique-key-1 or UUID from existing database\",\n\"target\": \"unique-key-1 or UUID from existing database\"\n},\n// More relationships...\n]\n}\nGuidelines:\n1. Do NOT generate any IDs or UUIDs. Use a unique `key` for each knowledge entity.\n2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.\n3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.\n4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.\n5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.\n6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.\n7. Only create relationships between existing KnowledgeEntities.\n8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.\n9. A new relationship MUST include a newly created KnowledgeEntity."
 };
 END;
@@ -0,0 +1,2 @@
 -- Runtime-managed: text_content FTS indexes now created at startup via the shared Surreal helper.
 -- This migration is intentionally left as a no-op to avoid heavy index builds during migration.
@@ -0,0 +1 @@
 -- No-op: legacy `job` table was superseded by `ingestion_task`; kept for migration order compatibility.
@@ -0,0 +1 @@
 -- Runtime-managed: FTS indexes now built at startup; migration retained as a no-op.
@@ -0,0 +1,18 @@
 -- Remove HNSW indexes from base tables (now created at runtime on *_embedding tables)
 REMOVE INDEX IF EXISTS idx_embedding_entities ON knowledge_entity;
 REMOVE INDEX IF EXISTS idx_embedding_chunks ON text_chunk;
 -- Remove FTS indexes (now created at runtime via indexes.rs)
 REMOVE INDEX IF EXISTS text_content_fts_text_idx ON text_content;
 REMOVE INDEX IF EXISTS text_content_fts_category_idx ON text_content;
 REMOVE INDEX IF EXISTS text_content_fts_context_idx ON text_content;
 REMOVE INDEX IF EXISTS text_content_fts_file_name_idx ON text_content;
 REMOVE INDEX IF EXISTS text_content_fts_url_idx ON text_content;
 REMOVE INDEX IF EXISTS text_content_fts_url_title_idx ON text_content;
 REMOVE INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity;
 REMOVE INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity;
 REMOVE INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk;
 -- Remove legacy analyzers (recreated at runtime with updated configuration)
 REMOVE ANALYZER IF EXISTS app_default_fts_analyzer;
 REMOVE ANALYZER IF EXISTS app_en_fts_analyzer;
@@ -0,0 +1,23 @@
 -- Move chunk/entity embeddings to dedicated tables for index efficiency.
 -- Text chunk embeddings table
 DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;
 DEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;
 DEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;
 DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;
 DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;
 -- Knowledge entity embeddings table
 DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;
 DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;
 DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;
 DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;
@@ -0,0 +1,23 @@
 -- Copy embeddings from base tables to dedicated tables
 -- This runs BEFORE the field removal migration
 FOR $chunk IN (SELECT * FROM text_chunk WHERE embedding != NONE AND array::len(embedding) > 0) {
    CREATE text_chunk_embedding CONTENT {
        chunk_id: $chunk.id,
        embedding: $chunk.embedding,
        user_id: $chunk.user_id,
        source_id: $chunk.source_id,
        created_at: $chunk.created_at,
        updated_at: $chunk.updated_at
    };
 };
 FOR $entity IN (SELECT * FROM knowledge_entity WHERE embedding != NONE AND array::len(embedding) > 0) {
    CREATE knowledge_entity_embedding CONTENT {
        entity_id: $entity.id,
        embedding: $entity.embedding,
        user_id: $entity.user_id,
        created_at: $entity.created_at,
        updated_at: $entity.updated_at
    };
 };
@@ -0,0 +1,3 @@
 -- Drop legacy embedding fields from base tables; embeddings now live in *_embedding tables.
 REMOVE FIELD IF EXISTS embedding ON TABLE text_chunk;
 REMOVE FIELD IF EXISTS embedding ON TABLE knowledge_entity;
@@ -0,0 +1,8 @@
 -- Add embedding_backend field to system_settings for visibility of active backend
 DEFINE FIELD IF NOT EXISTS embedding_backend ON system_settings TYPE option<string>;
 -- Set default to 'openai' for existing installs to preserve backward compatibility
 UPDATE system_settings:current SET
    embedding_backend = 'openai'
 WHERE embedding_backend == NONE;
@@ -0,0 +1,97 @@
 -- Enforce SCHEMAFULL on all tables and define missing fields
 -- 1. Define missing fields for ingestion_task (formerly job, but now ingestion_task)
 DEFINE TABLE OVERWRITE ingestion_task SCHEMAFULL;
 -- Core Fields
 DEFINE FIELD IF NOT EXISTS id ON ingestion_task TYPE record<ingestion_task>;
 DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime DEFAULT time::now();
 DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime DEFAULT time::now();
 DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;
 -- State Machine Fields
 DEFINE FIELD IF NOT EXISTS state ON ingestion_task TYPE string ASSERT $value IN ['Pending', 'Reserved', 'Processing', 'Succeeded', 'Failed', 'Cancelled', 'DeadLetter'];
 DEFINE FIELD IF NOT EXISTS attempts ON ingestion_task TYPE int DEFAULT 0;
 DEFINE FIELD IF NOT EXISTS max_attempts ON ingestion_task TYPE int DEFAULT 3;
 DEFINE FIELD IF NOT EXISTS scheduled_at ON ingestion_task TYPE datetime DEFAULT time::now();
 DEFINE FIELD IF NOT EXISTS locked_at ON ingestion_task TYPE option<datetime>;
 DEFINE FIELD IF NOT EXISTS lease_duration_secs ON ingestion_task TYPE int DEFAULT 300;
 DEFINE FIELD IF NOT EXISTS worker_id ON ingestion_task TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS error_code ON ingestion_task TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS error_message ON ingestion_task TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS last_error_at ON ingestion_task TYPE option<datetime>;
 DEFINE FIELD IF NOT EXISTS priority ON ingestion_task TYPE int DEFAULT 0;
 -- Content Payload (IngestionPayload Enum)
 DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;
 DEFINE FIELD IF NOT EXISTS content.Url ON ingestion_task TYPE option<object>;
 DEFINE FIELD IF NOT EXISTS content.Text ON ingestion_task TYPE option<object>;
 DEFINE FIELD IF NOT EXISTS content.File ON ingestion_task TYPE option<object>;
 -- Content: Url Variant
 DEFINE FIELD IF NOT EXISTS content.Url.url ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Url.context ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Url.category ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Url.user_id ON ingestion_task TYPE string;
 -- Content: Text Variant
 DEFINE FIELD IF NOT EXISTS content.Text.text ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Text.context ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Text.category ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.Text.user_id ON ingestion_task TYPE string;
 -- Content: File Variant
 DEFINE FIELD IF NOT EXISTS content.File.context ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.category ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.user_id ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info ON ingestion_task TYPE object;
 -- Content: File.file_info (FileInfo Struct)
 DEFINE FIELD IF NOT EXISTS content.File.file_info.id ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.created_at ON ingestion_task TYPE datetime;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.updated_at ON ingestion_task TYPE datetime;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.sha256 ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.path ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.file_name ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.mime_type ON ingestion_task TYPE string;
 DEFINE FIELD IF NOT EXISTS content.File.file_info.user_id ON ingestion_task TYPE string;
 -- 2. Enforce SCHEMAFULL on all other tables
 DEFINE TABLE OVERWRITE analytics SCHEMAFULL;
 DEFINE TABLE OVERWRITE conversation SCHEMAFULL;
 DEFINE TABLE OVERWRITE file SCHEMAFULL;
 DEFINE TABLE OVERWRITE knowledge_entity SCHEMAFULL;
 DEFINE TABLE OVERWRITE message SCHEMAFULL;
 DEFINE TABLE OVERWRITE relates_to SCHEMAFULL TYPE RELATION;
 DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;
 DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;
 DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
 DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
 DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
 DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
 DEFINE TABLE OVERWRITE scratchpad SCHEMAFULL;
 DEFINE TABLE OVERWRITE system_settings SCHEMAFULL;
 DEFINE TABLE OVERWRITE text_chunk SCHEMAFULL;
 -- text_content must have fields defined before enforcing SCHEMAFULL
 DEFINE TABLE OVERWRITE text_content SCHEMAFULL;
 DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
 DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
 DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
 DEFINE TABLE OVERWRITE user SCHEMAFULL;
@@ -0,0 +1 @@
 DEFINE FIELD IF NOT EXISTS theme ON user TYPE string DEFAULT "system";
@@ -0,0 +1,3 @@
 -- Per-user deduplication: same SHA256 may exist for different users.
 REMOVE INDEX IF EXISTS file_sha256_idx ON file;
 DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;
@@ -0,0 +1,33 @@
 -- Harden knowledge entity embeddings and graph storage invariants.
 DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;
 -- Backfill denormalized source_id from the linked entity.
 FOR $emb IN (SELECT * FROM knowledge_entity_embedding WHERE source_id = NONE OR source_id = '') {
    LET $entity = (SELECT source_id FROM $emb.entity_id)[0];
    IF $entity != NONE {
        UPDATE $emb.id SET source_id = $entity.source_id;
    }
 };
 -- Re-key embeddings so record id matches entity id (stable 1:1 identity).
 FOR $emb IN (SELECT * FROM knowledge_entity_embedding) {
    LET $entity_key = record::id($emb.entity_id);
    LET $canonical = type::thing('knowledge_entity_embedding', $entity_key);
    IF $emb.id != $canonical {
        UPSERT $canonical CONTENT {
            entity_id: $emb.entity_id,
            embedding: $emb.embedding,
            user_id: $emb.user_id,
            source_id: $emb.source_id,
            created_at: $emb.created_at,
            updated_at: $emb.updated_at
        };
        DELETE $emb.id;
    }
 };
 REMOVE INDEX IF EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;
@@ -0,0 +1,21 @@
 -- Harden text chunk embeddings storage invariants.
 -- Re-key embeddings so record id matches chunk id (stable 1:1 identity).
 FOR $emb IN (SELECT * FROM text_chunk_embedding) {
    LET $chunk_key = record::id($emb.chunk_id);
    LET $canonical = type::thing('text_chunk_embedding', $chunk_key);
    IF $emb.id != $canonical {
        UPSERT $canonical CONTENT {
            chunk_id: $emb.chunk_id,
            embedding: $emb.embedding,
            user_id: $emb.user_id,
            source_id: $emb.source_id,
            created_at: $emb.created_at,
            updated_at: $emb.updated_at
        };
        DELETE $emb.id;
    }
 };
 REMOVE INDEX IF EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
@@ -0,0 +1,8 @@
 -- Align persisted embedding settings when FastEmbed is the recorded backend but the model
 -- name is still the OpenAI migration default (invalid for FastEmbed `from_str`).
 UPDATE system_settings:current SET
    embedding_model = 'Xenova/bge-small-en-v1.5',
    embedding_dimensions = 384
 WHERE embedding_backend = 'fastembed'
    AND embedding_model = 'text-embedding-3-small';
@@ -0,0 +1,5 @@
 -- Track scheduled runtime index rebuild state on the system_settings singleton.
 DEFINE FIELD IF NOT EXISTS last_index_rebuild_at ON system_settings TYPE option<datetime>;
 DEFINE FIELD IF NOT EXISTS index_rebuild_lease_owner ON system_settings TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS index_rebuild_lease_expires_at ON system_settings TYPE option<datetime>;
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -242,7 +242,7 @@\n\n # Defines the schema for the 'text_content' table.\n\n-DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n@@ -254,10 +254,24 @@\n DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;\n # UrlInfo is a struct, store as object\n DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;\n+DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;\n+\n DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;\n DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n+# FileInfo fields\n+DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;\n+\n # Indexes based on query patterns\n DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\n","events":null}
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -28,6 +28,7 @@\n # Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)\n DEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY\n+DEFINE INDEX IF NOT EXISTS conversation_user_updated_at_idx ON conversation FIELDS user_id, updated_at; # For sidebar conversation projection ORDER BY\n\n # Defines the schema for the 'file' table (used by FileInfo).\n\n","events":null}
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -45,9 +45,8 @@\n DEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;\n\n-# Indexes based on usage (get_by_sha, potentially user lookups)\n-# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates\n-DEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;\n+# Indexes based on usage (get_by_sha scoped by user_id, user lookups)\n+DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;\n DEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;\n\n # Defines the schema for the 'ingestion_task' table (used by IngestionTask).\n","events":null}
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -68,7 +68,7 @@\n\n # Defines the schema for the 'knowledge_entity' table.\n\n-DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;\n@@ -90,6 +90,7 @@\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n@@ -102,6 +103,7 @@\n DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;\n DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;\n+DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;\n\n -- Custom fields\n DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;\n@@ -109,8 +111,9 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;\n\n # Defines the schema for the 'message' table.\n\n@@ -135,19 +138,17 @@\n # Defines the 'relates_to' edge table for KnowledgeRelationships.\n # Edges connect nodes, in this case knowledge_entity records.\n\n-# Define the edge table itself, enforcing connections between knowledge_entity records\n-# SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary\n-DEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n+DEFINE TABLE IF NOT EXISTS relates_to SCHEMAFULL TYPE RELATION FROM knowledge_entity TO knowledge_entity;\n+\n+DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;\n+DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;\n\n-# Define the metadata field within the edge\n # RelationshipMetadata is a struct, store as object\n DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;\n+DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n+DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n+DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n\n-# Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)\n-# DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;\n-# DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;\n-# DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;\n-\n # Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n","events":null}
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -237,7 +237,7 @@\n\n -- Indexes\n -- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;\n-DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id;\n+DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;\n\n","events":null}
@@ -0,0 +1 @@
 {"schemas":"--- original\n+++ modified\n@@ -201,6 +201,10 @@\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_backend ON system_settings TYPE option<string>;\n+DEFINE FIELD IF NOT EXISTS last_index_rebuild_at ON system_settings TYPE option<datetime>;\n+DEFINE FIELD IF NOT EXISTS index_rebuild_lease_owner ON system_settings TYPE option<string>;\n+DEFINE FIELD IF NOT EXISTS index_rebuild_lease_expires_at ON system_settings TYPE option<datetime>;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}
@@ -13,3 +13,4 @@ DEFINE FIELD IF NOT EXISTS title ON conversation TYPE string;
 # Add indexes based on query patterns (get_complete_conversation ownership check, get_user_conversations)
 DEFINE INDEX IF NOT EXISTS conversation_user_id_idx ON conversation FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS conversation_created_at_idx ON conversation FIELDS created_at; # For get_user_conversations ORDER BY
 DEFINE INDEX IF NOT EXISTS conversation_user_updated_at_idx ON conversation FIELDS user_id, updated_at; # For sidebar conversation projection ORDER BY
@@ -13,7 +13,6 @@ DEFINE FIELD IF NOT EXISTS file_name ON file TYPE string;
 DEFINE FIELD IF NOT EXISTS mime_type ON file TYPE string;
 DEFINE FIELD IF NOT EXISTS user_id ON file TYPE string;
-# Indexes based on usage (get_by_sha, potentially user lookups)
+# Indexes based on usage (get_by_sha scoped by user_id, user lookups)
-# Using UNIQUE based on the logic in FileInfo::new to prevent duplicates
+DEFINE INDEX IF NOT EXISTS file_user_sha256_idx ON file FIELDS user_id, sha256 UNIQUE;
 DEFINE INDEX IF NOT EXISTS file_sha256_idx ON file FIELDS sha256 UNIQUE;
 DEFINE INDEX IF NOT EXISTS file_user_id_idx ON file FIELDS user_id;
@@ -1,6 +1,6 @@
 # Defines the schema for the 'knowledge_entity' table.
-DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;
+DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMAFULL;
 # Standard fields
 DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;
@@ -15,16 +15,13 @@ DEFINE FIELD IF NOT EXISTS entity_type ON knowledge_entity TYPE string;
 # metadata is Option<serde_json::Value>, store as object
 DEFINE FIELD IF NOT EXISTS metadata ON knowledge_entity TYPE option<object>;
 # Define embedding as a standard array of floats for schema definition
 DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity TYPE array<float>;
 # The specific vector nature is handled by the index definition below
 DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity TYPE string;
-# Indexes based on build_indexes and query patterns
+-- Indexes based on build_indexes and query patterns
-# The INDEX definition correctly specifies the vector properties
+-- HNSW index now defined on knowledge_entity_embedding table for better memory usage
-DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;
+-- DEFINE INDEX IF NOT EXISTS idx_embedding_entities ON knowledge_entity FIELDS embedding HNSW DIMENSION 1536;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_user_source_idx ON knowledge_entity FIELDS user_id, source_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;
@@ -0,0 +1,20 @@
 -- Defines the schema for the 'knowledge_entity_embedding' table.
 -- Separate table to optimize HNSW index creation memory usage
 DEFINE TABLE IF NOT EXISTS knowledge_entity_embedding SCHEMAFULL;
 -- Standard fields
 DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS user_id ON knowledge_entity_embedding TYPE string;
 DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity_embedding TYPE string;
 -- Custom fields
 DEFINE FIELD IF NOT EXISTS entity_id ON knowledge_entity_embedding TYPE record<knowledge_entity>;
 DEFINE FIELD IF NOT EXISTS embedding ON knowledge_entity_embedding TYPE array<float>;
 -- Indexes
 -- DEFINE INDEX IF NOT EXISTS idx_embedding_knowledge_entity_embedding ON knowledge_entity_embedding FIELDS embedding HNSW DIMENSION 1536;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_entity_id_idx ON knowledge_entity_embedding FIELDS entity_id UNIQUE;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_user_id_idx ON knowledge_entity_embedding FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_embedding_source_id_idx ON knowledge_entity_embedding FIELDS source_id;
@@ -0,0 +1,17 @@
 # Defines the 'relates_to' edge table for KnowledgeRelationships.
 # Edges connect nodes, in this case knowledge_entity records.
 DEFINE TABLE IF NOT EXISTS relates_to SCHEMAFULL TYPE RELATION FROM knowledge_entity TO knowledge_entity;
 DEFINE FIELD IF NOT EXISTS in ON relates_to TYPE record<knowledge_entity>;
 DEFINE FIELD IF NOT EXISTS out ON relates_to TYPE record<knowledge_entity>;
 # RelationshipMetadata is a struct, store as object
 DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
 DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
 DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
 DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
 # Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)
 DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;
 DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;
@@ -14,3 +14,7 @@ DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;
 DEFINE FIELD IF NOT EXISTS embedding_backend ON system_settings TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS last_index_rebuild_at ON system_settings TYPE option<datetime>;
 DEFINE FIELD IF NOT EXISTS index_rebuild_lease_owner ON system_settings TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS index_rebuild_lease_expires_at ON system_settings TYPE option<datetime>;
@@ -10,14 +10,8 @@ DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE datetime;
 DEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;
 DEFINE FIELD IF NOT EXISTS chunk ON text_chunk TYPE string;
 # Define embedding as a standard array of floats for schema definition
 DEFINE FIELD IF NOT EXISTS embedding ON text_chunk TYPE array<float>;
 # The specific vector nature is handled by the index definition below
 DEFINE FIELD IF NOT EXISTS user_id ON text_chunk TYPE string;
 # Indexes based on build_indexes and query patterns (delete_by_source_id)
 # The INDEX definition correctly specifies the vector properties
 DEFINE INDEX IF NOT EXISTS idx_embedding_chunks ON text_chunk FIELDS embedding HNSW DIMENSION 1536;
 DEFINE INDEX IF NOT EXISTS text_chunk_source_id_idx ON text_chunk FIELDS source_id;
 DEFINE INDEX IF NOT EXISTS text_chunk_user_id_idx ON text_chunk FIELDS user_id;
@@ -0,0 +1,20 @@
 -- Defines the schema for the 'text_chunk_embedding' table.
 -- Separate table to optimize HNSW index creation memory usage
 DEFINE TABLE IF NOT EXISTS text_chunk_embedding SCHEMAFULL;
 # Standard fields
 DEFINE FIELD IF NOT EXISTS created_at ON text_chunk_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk_embedding TYPE datetime;
 DEFINE FIELD IF NOT EXISTS user_id ON text_chunk_embedding TYPE string;
 DEFINE FIELD IF NOT EXISTS source_id ON text_chunk_embedding TYPE string;
 # Custom fields
 DEFINE FIELD IF NOT EXISTS chunk_id ON text_chunk_embedding TYPE record<text_chunk>;
 DEFINE FIELD IF NOT EXISTS embedding ON text_chunk_embedding TYPE array<float>;
 -- Indexes
 -- DEFINE INDEX IF NOT EXISTS idx_embedding_text_chunk_embedding ON text_chunk_embedding FIELDS embedding HNSW DIMENSION 1536;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_chunk_id_idx ON text_chunk_embedding FIELDS chunk_id UNIQUE;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_user_id_idx ON text_chunk_embedding FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS text_chunk_embedding_source_id_idx ON text_chunk_embedding FIELDS source_id;
@@ -1,6 +1,6 @@
 # Defines the schema for the 'text_content' table.
-DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;
+DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;
 # Standard fields
 DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;
@@ -12,10 +12,24 @@ DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;
 # UrlInfo is a struct, store as object
 DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;
 DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;
 DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;
 # FileInfo fields
 DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;
 DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;
 DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;
 # Indexes based on query patterns
 DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;
 DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;
@@ -1,27 +0,0 @@
 DEFINE ANALYZER IF NOT EXISTS app_default_fts_analyzer
    TOKENIZERS class
    FILTERS lowercase, ascii; 
 DEFINE INDEX IF NOT EXISTS text_content_fts_text_idx ON TABLE text_content
    FIELDS text
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
 DEFINE INDEX IF NOT EXISTS text_content_fts_category_idx ON TABLE text_content
    FIELDS category
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
 DEFINE INDEX IF NOT EXISTS text_content_fts_context_idx ON TABLE text_content
    FIELDS context
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
 DEFINE INDEX IF NOT EXISTS text_content_fts_file_name_idx ON TABLE text_content
    FIELDS file_info.file_name
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
 DEFINE INDEX IF NOT EXISTS text_content_fts_url_idx ON TABLE text_content
    FIELDS url_info.url
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
 DEFINE INDEX IF NOT EXISTS text_content_fts_url_title_idx ON TABLE text_content
    FIELDS url_info.title
    SEARCH ANALYZER app_default_fts_analyzer BM25 HIGHLIGHTS;
@@ -1 +0,0 @@
 REMOVE TABLE job;
@@ -1,17 +0,0 @@
 -- Add FTS indexes for searching name and description on entities
 DEFINE ANALYZER IF NOT EXISTS app_en_fts_analyzer
    TOKENIZERS class
    FILTERS lowercase, ascii, snowball(english);
 DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_name_idx ON TABLE knowledge_entity
    FIELDS name
    SEARCH ANALYZER app_en_fts_analyzer BM25;
 DEFINE INDEX IF NOT EXISTS knowledge_entity_fts_description_idx ON TABLE knowledge_entity
    FIELDS description
    SEARCH ANALYZER app_en_fts_analyzer BM25;
 DEFINE INDEX IF NOT EXISTS text_chunk_fts_chunk_idx ON TABLE text_chunk
    FIELDS chunk
    SEARCH ANALYZER app_en_fts_analyzer BM25;
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -98,7 +98,7 @@\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at; # For get_latest_knowledge_entities\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n # Defines the schema for the 'message' table.\n\n@@ -157,6 +157,8 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n\n","events":null}
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -51,23 +51,23 @@\n\n # Defines the schema for the 'ingestion_task' table (used by IngestionTask).\n\n-DEFINE TABLE IF NOT EXISTS job SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON job TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON job TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE string;\n+DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE string;\n\n # Custom fields from the IngestionTask struct\n # IngestionPayload is complex, store as object\n-DEFINE FIELD IF NOT EXISTS content ON job TYPE object;\n+DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\n # IngestionTaskStatus can hold data (InProgress), store as object\n-DEFINE FIELD IF NOT EXISTS status ON job TYPE object;\n-DEFINE FIELD IF NOT EXISTS user_id ON job TYPE string;\n+DEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\n+DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;\n\n # Indexes explicitly defined in build_indexes and useful for get_unfinished_tasks\n-DEFINE INDEX IF NOT EXISTS idx_job_status ON job FIELDS status;\n-DEFINE INDEX IF NOT EXISTS idx_job_user ON job FIELDS user_id;\n-DEFINE INDEX IF NOT EXISTS idx_job_created ON job FIELDS created_at;\n+DEFINE INDEX IF NOT EXISTS idx_ingestion_task_status ON ingestion_task FIELDS status;\n+DEFINE INDEX IF NOT EXISTS idx_ingestion_task_user ON ingestion_task FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS idx_ingestion_task_created ON ingestion_task FIELDS created_at;\n\n # Defines the schema for the 'knowledge_entity' table.\n\n","events":null}
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -57,10 +57,7 @@\n DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE string;\n DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE string;\n\n-# Custom fields from the IngestionTask struct\n-# IngestionPayload is complex, store as object\n DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\n-# IngestionTaskStatus can hold data (InProgress), store as object\n DEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\n DEFINE FIELD IF NOT EXISTS user_id ON ingestion_task TYPE string;\n\n@@ -157,10 +154,12 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -160,6 +160,7 @@\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -18,8 +18,8 @@\n DEFINE TABLE IF NOT EXISTS conversation SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON conversation TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON conversation TYPE datetime;\n\n # Custom fields from the Conversation struct\n DEFINE FIELD IF NOT EXISTS user_id ON conversation TYPE string;\n@@ -34,8 +34,8 @@\n DEFINE TABLE IF NOT EXISTS file SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON file TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON file TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON file TYPE datetime;\n\n # Custom fields from the FileInfo struct\n DEFINE FIELD IF NOT EXISTS sha256 ON file TYPE string;\n@@ -54,8 +54,8 @@\n DEFINE TABLE IF NOT EXISTS ingestion_task SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON ingestion_task TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON ingestion_task TYPE datetime;\n\n DEFINE FIELD IF NOT EXISTS content ON ingestion_task TYPE object;\n DEFINE FIELD IF NOT EXISTS status ON ingestion_task TYPE object;\n@@ -71,8 +71,8 @@\n DEFINE TABLE IF NOT EXISTS knowledge_entity SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON knowledge_entity TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON knowledge_entity TYPE datetime;\n\n # Custom fields from the KnowledgeEntity struct\n DEFINE FIELD IF NOT EXISTS source_id ON knowledge_entity TYPE string;\n@@ -102,8 +102,8 @@\n DEFINE TABLE IF NOT EXISTS message SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON message TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON message TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON message TYPE datetime;\n\n # Custom fields from the Message struct\n DEFINE FIELD IF NOT EXISTS conversation_id ON message TYPE string;\n@@ -167,8 +167,8 @@\n DEFINE TABLE IF NOT EXISTS text_chunk SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON text_chunk TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON text_chunk TYPE datetime;\n\n # Custom fields from the TextChunk struct\n DEFINE FIELD IF NOT EXISTS source_id ON text_chunk TYPE string;\n@@ -191,8 +191,8 @@\n DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON text_content TYPE datetime;\n\n # Custom fields from the TextContent struct\n DEFINE FIELD IF NOT EXISTS text ON text_content TYPE string;\n@@ -215,8 +215,8 @@\n DEFINE TABLE IF NOT EXISTS user SCHEMALESS;\n\n # Standard fields\n-DEFINE FIELD IF NOT EXISTS created_at ON user TYPE string;\n-DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE string;\n+DEFINE FIELD IF NOT EXISTS created_at ON user TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON user TYPE datetime;\n\n # Custom fields from the User struct\n DEFINE FIELD IF NOT EXISTS email ON user TYPE string;\n","events":null}
@@ -1 +0,0 @@
 {"schemas":"--- original\n+++ modified\n@@ -137,6 +137,30 @@\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;\n DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;\n\n+# Defines the schema for the 'scratchpad' table.\n+\n+DEFINE TABLE IF NOT EXISTS scratchpad SCHEMALESS;\n+\n+# Standard fields from stored_object! macro\n+DEFINE FIELD IF NOT EXISTS created_at ON scratchpad TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS updated_at ON scratchpad TYPE datetime;\n+\n+# Custom fields from the Scratchpad struct\n+DEFINE FIELD IF NOT EXISTS user_id ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS title ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS content ON scratchpad TYPE string;\n+DEFINE FIELD IF NOT EXISTS last_saved_at ON scratchpad TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS is_dirty ON scratchpad TYPE bool DEFAULT false;\n+DEFINE FIELD IF NOT EXISTS is_archived ON scratchpad TYPE bool DEFAULT false;\n+DEFINE FIELD IF NOT EXISTS archived_at ON scratchpad TYPE option<datetime>;\n+DEFINE FIELD IF NOT EXISTS ingested_at ON scratchpad TYPE option<datetime>;\n+\n+# Indexes based on query patterns\n+DEFINE INDEX IF NOT EXISTS scratchpad_user_idx ON scratchpad FIELDS user_id;\n+DEFINE INDEX IF NOT EXISTS scratchpad_user_archived_idx ON scratchpad FIELDS user_id, is_archived;\n+DEFINE INDEX IF NOT EXISTS scratchpad_updated_idx ON scratchpad FIELDS updated_at;\n+DEFINE INDEX IF NOT EXISTS scratchpad_archived_idx ON scratchpad FIELDS archived_at;\n+\n DEFINE TABLE OVERWRITE script_migration SCHEMAFULL\n     PERMISSIONS\n         FOR select FULL\n","events":null}
@@ -1,19 +0,0 @@
 # Defines the 'relates_to' edge table for KnowledgeRelationships.
 # Edges connect nodes, in this case knowledge_entity records.
 # Define the edge table itself, enforcing connections between knowledge_entity records
 # SCHEMAFULL requires all fields to be defined, maybe start with SCHEMALESS if metadata might vary
 DEFINE TABLE IF NOT EXISTS relates_to SCHEMALESS TYPE RELATION FROM knowledge_entity TO knowledge_entity;
 # Define the metadata field within the edge
 # RelationshipMetadata is a struct, store as object
 DEFINE FIELD IF NOT EXISTS metadata ON relates_to TYPE object;
 # Optionally, define fields within the metadata object for stricter schema (requires SCHEMAFULL on table)
 # DEFINE FIELD IF NOT EXISTS metadata.user_id ON relates_to TYPE string;
 # DEFINE FIELD IF NOT EXISTS metadata.source_id ON relates_to TYPE string;
 # DEFINE FIELD IF NOT EXISTS metadata.relationship_type ON relates_to TYPE string;
 # Add indexes based on query patterns (delete_relationships_by_source_id, get_knowledge_relationships)
 DEFINE INDEX IF NOT EXISTS relates_to_metadata_source_id_idx ON relates_to FIELDS metadata.source_id;
 DEFINE INDEX IF NOT EXISTS relates_to_metadata_user_id_idx ON relates_to FIELDS metadata.user_id;
@@ -4,37 +4,128 @@ use tokio::task::JoinError;
 use crate::storage::types::file_info::FileError;
 /// Errors from embedding provider operations.
 #[allow(clippy::module_name_repetitions)]
 #[derive(Error, Debug)]
 pub enum EmbeddingError {
    #[error("openai error: {0}")]
    OpenAI(Box<OpenAIError>),
    #[error("fastembed error: {0}")]
    FastEmbed(String),
    #[error("task join error: {0}")]
    Join(#[from] JoinError),
    #[error("fastembed model mutex poisoned: {0}")]
    MutexPoisoned(String),
    #[error("no embedding data received")]
    NoData,
    #[error("embedding configuration error: {0}")]
    Config(String),
    #[error("unknown fastembed model: {0}")]
    UnknownModel(String),
 }
 impl From<OpenAIError> for EmbeddingError {
    fn from(err: OpenAIError) -> Self {
        Self::OpenAI(Box::new(err))
    }
 }
 impl EmbeddingError {
    pub(crate) fn fastembed(err: impl std::fmt::Display) -> Self {
        Self::FastEmbed(err.to_string())
    }
    pub(crate) fn mutex_poisoned(err: impl std::fmt::Display) -> Self {
        Self::MutexPoisoned(err.to_string())
    }
 }
 // Core internal errors
 #[allow(clippy::module_name_repetitions)]
 #[derive(Error, Debug)]
 pub enum AppError {
-    #[error("Database error: {0}")]
+    #[error("database error: {0}")]
-    Database(#[from] surrealdb::Error),
+    Database(Box<surrealdb::Error>),
-    #[error("OpenAI error: {0}")]
+    #[error("openai error: {0}")]
-    OpenAI(#[from] OpenAIError),
+    OpenAI(Box<OpenAIError>),
-    #[error("File error: {0}")]
+    #[error("embedding error: {0}")]
    Embedding(#[from] EmbeddingError),
    #[error("file error: {0}")]
    File(#[from] FileError),
-    #[error("Not found: {0}")]
+    #[error("not found: {0}")]
    NotFound(String),
-    #[error("Validation error: {0}")]
+    #[error("validation error: {0}")]
    Validation(String),
-    #[error("Authorization error: {0}")]
+    #[error("authorization error: {0}")]
    Auth(String),
-    #[error("LLM parsing error: {0}")]
+    #[error("llm parsing error: {0}")]
    LLMParsing(String),
-    #[error("Task join error: {0}")]
+    #[error("task join error: {0}")]
    Join(#[from] JoinError),
-    #[error("Graph mapper error: {0}")]
+    #[error("graph mapper error: {0}")]
    GraphMapper(String),
-    #[error("IoError: {0}")]
+    #[error("io error: {0}")]
    Io(#[from] std::io::Error),
-    #[error("Reqwest error: {0}")]
+    #[error("reqwest error: {0}")]
-    Reqwest(#[from] reqwest::Error),
+    Reqwest(Box<reqwest::Error>),
-    #[error("Anyhow error: {0}")]
+    #[error("storage error: {0}")]
-    Anyhow(#[from] anyhow::Error),
+    Storage(Box<object_store::Error>),
-    #[error("Ingestion Processing error: {0}")]
+    #[error("ingestion processing error: {0}")]
    Processing(String),
-    #[error("DOM smoothie error: {0}")]
+    #[error("dom smoothie error: {0}")]
-    DomSmoothie(#[from] dom_smoothie::ReadabilityError),
+    DomSmoothie(Box<dom_smoothie::ReadabilityError>),
-    #[error("Internal service error: {0}")]
+    #[error("internal service error: {0}")]
    InternalError(String),
 }
 impl From<surrealdb::Error> for AppError {
    fn from(err: surrealdb::Error) -> Self {
        Self::Database(Box::new(err))
    }
 }
 impl From<OpenAIError> for AppError {
    fn from(err: OpenAIError) -> Self {
        Self::OpenAI(Box::new(err))
    }
 }
 impl From<reqwest::Error> for AppError {
    fn from(err: reqwest::Error) -> Self {
        Self::Reqwest(Box::new(err))
    }
 }
 impl From<object_store::Error> for AppError {
    fn from(err: object_store::Error) -> Self {
        Self::Storage(Box::new(err))
    }
 }
 impl From<dom_smoothie::ReadabilityError> for AppError {
    fn from(err: dom_smoothie::ReadabilityError) -> Self {
        Self::DomSmoothie(Box::new(err))
    }
 }
 impl AppError {
    /// Builds an [`AppError::InternalError`] from a displayable message.
    #[must_use]
    pub fn internal(msg: impl std::fmt::Display) -> Self {
        Self::InternalError(msg.to_string())
    }
 }
 #[cfg(test)]
 mod tests {
    use super::AppError;
    #[test]
    fn app_error_is_reasonably_sized() {
        assert!(
            std::mem::size_of::<AppError>() <= 64,
            "AppError is {} bytes",
            std::mem::size_of::<AppError>()
        );
    }
 }
@@ -1,3 +1,8 @@
 #![allow(clippy::doc_markdown)]
 //! Shared utilities and storage helpers for the workspace crates.
 pub mod error;
 pub mod storage;
 pub mod utils;
 #[cfg(any(test, feature = "test-utils"))]
 pub mod test_utils;
@@ -1,35 +1,33 @@
-use super::types::StoredObject;
+use super::types::{EmbeddingRecord, HasEmbedding, StoredObject};
 use crate::error::AppError;
 use axum_session::{SessionConfig, SessionError, SessionStore};
 use axum_session_surreal::SessionSurrealPool;
 use futures::Stream;
 use include_dir::{include_dir, Dir};
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 use std::{ops::Deref, sync::Arc};
 use surrealdb::{
    engine::any::{connect, Any},
-    opt::auth::Root,
+    opt::auth::{Namespace, Root},
    Error, Notification, Surreal,
 };
 use surrealdb_migrations::MigrationRunner;
 use tracing::debug;
-static MIGRATIONS_DIR: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/");
+/// Embedded SurrealDB project root (`migrations/`, `schemas/`, `.surrealdb`).
 static MIGRATIONS_DIR: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/db");
 #[derive(Clone)]
 pub struct SurrealDbClient {
    pub client: Surreal<Any>,
 }
 #[allow(clippy::module_name_repetitions)]
 pub trait ProvidesDb {
    fn db(&self) -> &Arc<SurrealDbClient>;
 }
 impl SurrealDbClient {
    /// # Initialize a new datbase client
    ///
    /// # Arguments
    ///
    /// # Returns
    /// * `SurrealDbClient` initialized
    pub async fn new(
        address: &str,
        username: &str,
@@ -39,15 +37,33 @@ impl SurrealDbClient {
    ) -> Result<Self, Error> {
        let db = connect(address).await?;
-        // Sign in to database
+        if !address.starts_with("mem://") {
-        db.signin(Root { username, password }).await?;
+            db.signin(Root { username, password }).await?;
        }
        // Set namespace
        db.use_ns(namespace).use_db(database).await?;
        Ok(SurrealDbClient { client: db })
    }
    pub async fn new_with_namespace_user(
        address: &str,
        namespace: &str,
        username: &str,
        password: &str,
        database: &str,
    ) -> Result<Self, Error> {
        let db = connect(address).await?;
        db.signin(Namespace {
            namespace,
            username,
            password,
        })
        .await?;
        db.use_ns(namespace).use_db(database).await?;
        Ok(SurrealDbClient { client: db })
    }
    pub async fn create_session_store(
        &self,
    ) -> Result<SessionStore<SessionSurrealPool<Any>>, SessionError> {
@@ -55,67 +71,44 @@ impl SurrealDbClient {
        SessionStore::new(
            Some(self.client.clone().into()),
            SessionConfig::default()
-                .with_table_name("test_session_table")
+                .with_table_name("session")
                .with_secure(true),
        )
        .await
    }
    /// Applies all pending database migrations found in the embedded MIGRATIONS_DIR.
    ///
    /// This function should be called during application startup, after connecting to
    /// the database and selecting the appropriate namespace and database, but before
    /// the application starts performing operations that rely on the schema.
    pub async fn apply_migrations(&self) -> Result<(), AppError> {
        debug!("Applying migrations");
        MigrationRunner::new(&self.client)
            .load_files(&MIGRATIONS_DIR)
            .up()
            .await
-            .map_err(|e| AppError::InternalError(e.to_string()))?;
+            .map_err(AppError::internal)?;
        Ok(())
    }
    /// Operation to rebuild indexes
    pub async fn rebuild_indexes(&self) -> Result<(), Error> {
        debug!("Rebuilding indexes");
        let rebuild_sql = r#"
            BEGIN TRANSACTION;
            REBUILD INDEX IF EXISTS idx_embedding_chunks ON text_chunk;
            REBUILD INDEX IF EXISTS idx_embedding_entities ON knowledge_entity;
            REBUILD INDEX IF EXISTS text_content_fts_idx ON text_content;
            REBUILD INDEX IF EXISTS knowledge_entity_fts_name_idx ON knowledge_entity;
            REBUILD INDEX IF EXISTS knowledge_entity_fts_description_idx ON knowledge_entity;
            REBUILD INDEX IF EXISTS text_chunk_fts_chunk_idx ON text_chunk;
            COMMIT TRANSACTION;
        "#;
        self.client.query(rebuild_sql).await?;
        Ok(())
    }
    /// Operation to store a object in SurrealDB, requires the struct to implement StoredObject
    ///
    /// # Arguments
    /// * `item` - The item to be stored
    ///
    /// # Returns
    /// * `Result` - Item or Error
    pub async fn store_item<T>(&self, item: T) -> Result<Option<T>, Error>
    where
        T: StoredObject + Send + Sync + 'static,
    {
        self.client
-            .create((T::table_name(), item.get_id()))
+            .create((T::table_name(), item.id()))
            .content(item)
            .await
    }
    pub async fn upsert_item<T>(&self, item: T) -> Result<Option<T>, Error>
    where
        T: StoredObject + Send + Sync + 'static,
    {
        let id = item.id().to_string();
        self.client
            .upsert((T::table_name(), id))
            .content(item)
            .await
    }
    /// Operation to retrieve all objects from a certain table, requires the struct to implement StoredObject
    ///
    /// # Returns
    /// * `Result` - Vec<T> or Error
    pub async fn get_all_stored_items<T>(&self) -> Result<Vec<T>, Error>
    where
        T: for<'de> StoredObject,
@@ -123,13 +116,6 @@ impl SurrealDbClient {
        self.client.select(T::table_name()).await
    }
    /// Operation to retrieve a single object by its ID, requires the struct to implement StoredObject
    ///
    /// # Arguments
    /// * `id` - The ID of the item to retrieve
    ///
    /// # Returns
    /// * `Result<Option<T>, Error>` - The found item or Error
    pub async fn get_item<T>(&self, id: &str) -> Result<Option<T>, Error>
    where
        T: for<'de> StoredObject,
@@ -137,13 +123,6 @@ impl SurrealDbClient {
        self.client.select((T::table_name(), id)).await
    }
    /// Operation to delete a single object by its ID, requires the struct to implement StoredObject
    ///
    /// # Arguments
    /// * `id` - The ID of the item to delete
    ///
    /// # Returns
    /// * `Result<Option<T>, Error>` - The deleted item or Error
    pub async fn delete_item<T>(&self, id: &str) -> Result<Option<T>, Error>
    where
        T: for<'de> StoredObject,
@@ -151,10 +130,6 @@ impl SurrealDbClient {
        self.client.delete((T::table_name(), id)).await
    }
    /// Operation to listen to a table for updates, requires the struct to implement StoredObject
    ///
    /// # Returns
    /// * `Result<Option<T>, Error>` - The deleted item or Error
    pub async fn listen<T>(
        &self,
    ) -> Result<impl Stream<Item = Result<Notification<T>, Error>>, Error>
@@ -163,6 +138,156 @@ impl SurrealDbClient {
    {
        self.client.select(T::table_name()).live().await
    }
    /// Atomically store an entity and its embedding vector in a single
    /// SurrealDB transaction.
    ///
    /// Creates (or overwrites) the entity row and upserts the linked
    /// embedding record.  The embedding dimension is validated against
    /// `embedding_dimensions` before the query is issued.
    pub async fn store_with_embedding<E>(
        &self,
        entity: E,
        embedding: Vec<f32>,
        embedding_dimensions: usize,
    ) -> Result<(), AppError>
    where
        E: HasEmbedding + Serialize + Send + Sync + 'static,
        <E as HasEmbedding>::Embedding: Serialize + Send + Sync,
    {
        E::Embedding::validate_dimension(&embedding, embedding_dimensions)?;
        let entity_id = entity.id().to_string();
        let emb = <E as HasEmbedding>::Embedding::new(
            &entity_id,
            entity.source_id().to_string(),
            embedding,
            entity.user_id().to_string(),
            E::table_name(),
        );
        let sql = format!(
            "
            BEGIN TRANSACTION;
              CREATE type::thing('{et}', $id) CONTENT $entity;
              UPSERT type::thing('{emt}', $id) CONTENT $emb;
            COMMIT TRANSACTION;
            ",
            et = E::table_name(),
            emt = <E as HasEmbedding>::Embedding::table_name(),
        );
        self.client
            .query(sql)
            .bind(("id", entity_id))
            .bind(("entity", entity))
            .bind(("emb", emb))
            .await?
            .check()?;
        Ok(())
    }
    /// Delete all entity and embedding rows matching a given `source_id`.
    ///
    /// Runs inside a SurrealDB transaction so that entity and embedding
    /// deletes are atomic.
    pub async fn delete_by_source_id<E>(&self, source_id: &str) -> Result<(), AppError>
    where
        E: HasEmbedding,
        E::Embedding: Send + Sync,
    {
        self.client
            .query("BEGIN TRANSACTION;")
            .query(format!(
                "DELETE FROM {} WHERE source_id = $source_id;",
                E::Embedding::table_name()
            ))
            .query(format!(
                "DELETE FROM {} WHERE source_id = $source_id;",
                E::table_name()
            ))
            .query("COMMIT TRANSACTION;")
            .bind(("source_id", source_id.to_owned()))
            .await?
            .check()?;
        Ok(())
    }
    /// Vector similarity search over entities using HNSW index.
    ///
    /// Performs a cosine-similarity search against the embedding table,
    /// fetches the corresponding entity rows server-side via `FETCH`,
    /// and returns `(entity, score)` pairs ordered by descending
    /// similarity.  Orphaned embeddings (entity deleted but its
    /// embedding row remains) are logged as a warning and dropped.
    ///
    /// This is a single round-trip — SurrealDB resolves the link field
    /// (`entity_id` or `chunk_id`) inside the query engine.
    pub async fn vector_search<E, Emb>(
        &self,
        take: usize,
        query_embedding: &[f32],
        user_id: &str,
    ) -> Result<Vec<(E, f32)>, AppError>
    where
        E: StoredObject + DeserializeOwned + Clone + Send + Sync,
        Emb: EmbeddingRecord + Send + Sync,
    {
        // Generic row that works with both `entity_id` and `chunk_id` link
        // fields via `#[serde(alias)]`.  SurrealDB's `FETCH` resolves the link
        // server-side so we get the full entity in a single round-trip.
        #[derive(serde::Deserialize)]
        struct FetchRow<Ent> {
            score: f32,
            #[serde(alias = "entity_id", alias = "chunk_id")]
            entity: Option<Ent>,
        }
        let link_field = Emb::link_field();
        let sql = format!(
            r#"
            SELECT
                {link_field},
                vector::similarity::cosine(embedding, $embedding) AS score
            FROM {emb_table}
            WHERE user_id = $user_id
              AND embedding <|{take},100|> $embedding
            ORDER BY score DESC
            LIMIT {take}
            FETCH {link_field}
            "#,
            link_field = link_field,
            emb_table = Emb::table_name(),
            take = take,
        );
        let mut response = self
            .client
            .query(sql)
            .bind(("embedding", query_embedding.to_vec()))
            .bind(("user_id", user_id.to_string()))
            .await?;
        response = response.check()?;
        let rows: Vec<FetchRow<E>> = response.take(0)?;
        let mut results = Vec::with_capacity(rows.len());
        for r in rows {
            if let Some(entity) = r.entity {
                results.push((entity, r.score));
            } else {
                tracing::warn!(
                    "Vector search hit orphaned {} row with missing {link_field}",
                    Emb::table_name()
                );
            }
        }
        Ok(results)
    }
 }
 impl Deref for SurrealDbClient {
@@ -175,41 +300,29 @@ impl Deref for SurrealDbClient {
 #[cfg(any(test, feature = "test-utils"))]
 impl SurrealDbClient {
    /// Create an in-memory SurrealDB client for testing.
    pub async fn memory(namespace: &str, database: &str) -> Result<Self, Error> {
        let db = connect("mem://").await?;
        db.use_ns(namespace).use_db(database).await?;
        Ok(SurrealDbClient { client: db })
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use crate::stored_object;
    use anyhow::{self, Context};
-    use super::*;
+    use crate::test_utils::setup_test_db;
    use uuid::Uuid;
    stored_object!(Dummy, "dummy", {
        name: String
    });
    #[tokio::test]
-    async fn test_initialization_and_crud() {
+    async fn test_initialization_and_crud() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string(); // ensures isolation per test run
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Call your initialization
        db.apply_migrations()
            .await
            .expect("Failed to initialize schema");
        // Test basic CRUD
        let dummy = Dummy {
            id: "abc".to_string(),
            name: "first".to_string(),
@@ -217,49 +330,95 @@ mod tests {
            updated_at: Utc::now(),
        };
-        // Store
+        let stored = db
-        let stored = db.store_item(dummy.clone()).await.expect("Failed to store");
+            .store_item(dummy.clone())
            .await
            .with_context(|| "Failed to store".to_string())?;
        assert!(stored.is_some());
        // Read
        let fetched = db
            .get_item::<Dummy>(&dummy.id)
            .await
-            .expect("Failed to fetch");
+            .with_context(|| "Failed to fetch".to_string())?;
        assert_eq!(fetched, Some(dummy.clone()));
        // Read all
        let all = db
            .get_all_stored_items::<Dummy>()
            .await
-            .expect("Failed to fetch all");
+            .with_context(|| "Failed to fetch all".to_string())?;
        assert!(all.contains(&dummy));
        // Delete
        let deleted = db
            .delete_item::<Dummy>(&dummy.id)
            .await
-            .expect("Failed to delete");
+            .with_context(|| "Failed to delete".to_string())?;
        assert_eq!(deleted, Some(dummy));
        // After delete, should not be present
        let fetch_post = db
            .get_item::<Dummy>("abc")
            .await
-            .expect("Failed fetch post delete");
+            .with_context(|| "Failed fetch post delete".to_string())?;
        assert!(fetch_post.is_none());
        Ok(())
    }
    #[tokio::test]
-    async fn test_applying_migrations() {
+    async fn upsert_item_overwrites_existing_records() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        let mut dummy = Dummy {
            id: "abc".to_string(),
            name: "first".to_string(),
            created_at: Utc::now(),
            updated_at: Utc::now(),
        };
        db.store_item(dummy.clone())
            .await
            .with_context(|| "Failed to store initial record".to_string())?;
        dummy.name = "updated".to_string();
        let upserted = db
            .upsert_item(dummy.clone())
            .await
            .with_context(|| "Failed to upsert record".to_string())?;
        assert!(upserted.is_some());
        let fetched: Option<Dummy> = db
            .get_item(&dummy.id)
            .await
            .with_context(|| "fetch after upsert".to_string())?;
        let fetched =
            fetched.ok_or_else(|| anyhow::anyhow!("Expected record to exist after upsert"))?;
        assert_eq!(fetched.name, "updated");
        let new_record = Dummy {
            id: "def".to_string(),
            name: "brand-new".to_string(),
            created_at: Utc::now(),
            updated_at: Utc::now(),
        };
        db.upsert_item(new_record.clone())
            .await
            .with_context(|| "Failed to upsert new record".to_string())?;
        let fetched_new: Option<Dummy> = db
            .get_item(&new_record.id)
            .await
            .with_context(|| "fetch inserted via upsert".to_string())?;
        assert_eq!(fetched_new, Some(new_record));
        Ok(())
    }
    #[tokio::test]
    async fn test_applying_migrations() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        db.apply_migrations()
            .await
-            .expect("Failed to build indexes");
+            .with_context(|| "Failed to build indexes".to_string())?;
        Ok(())
    }
 }
@@ -1,3 +1,4 @@
 pub mod db;
 pub mod indexes;
 pub mod store;
 pub mod types;
@@ -1,4 +1,5 @@
-use crate::storage::types::{file_info::deserialize_flexible_id, user::User, StoredObject};
+use crate::storage::types::{user::User, StoredObject};
 use crate::utils::serde_helpers::deserialize_flexible_id;
 use serde::{Deserialize, Serialize};
 use crate::{error::AppError, storage::db::SurrealDbClient};
@@ -16,61 +17,78 @@ impl StoredObject for Analytics {
        "analytics"
    }
-    fn get_id(&self) -> &str {
+    fn id(&self) -> &str {
        &self.id
    }
 }
 impl Analytics {
    const RECORD_ID: &'static str = "current";
    /// Ensures the singleton analytics record exists (idempotent).
    ///
    /// Production databases are also seeded by `20250503_215025_initial_setup.surql`;
    /// this uses an atomic `UPSERT` for tests and recovery.
    pub async fn ensure_initialized(db: &SurrealDbClient) -> Result<Self, AppError> {
-        let analytics = db.get_item::<Self>("current").await?;
+        let analytics: Option<Self> = db
-
+            .client
-        if analytics.is_none() {
+            .query(
-            let created_analytics = Analytics {
+                "UPSERT type::thing('analytics', $id) SET visitors = visitors ?? 0, page_loads = page_loads ?? 0 RETURN AFTER",
-                id: "current".to_string(),
+            )
-                visitors: 0,
+            .bind(("id", Self::RECORD_ID))
-                page_loads: 0,
+            .await?
-            };
+            .take(0)?;
            let stored: Option<Self> = db.store_item(created_analytics).await?;
            return stored.ok_or(AppError::Validation(
                "Failed to initialize analytics".into(),
            ));
        }
        analytics.ok_or(AppError::Validation(
-            "Failed to initialize analytics".into(),
+            "failed to initialize analytics".into(),
        ))
    }
    pub async fn get_current(db: &SurrealDbClient) -> Result<Self, AppError> {
        let analytics: Option<Self> = db.get_item("current").await?;
-        analytics.ok_or(AppError::NotFound("Analytics not found".into()))
+        analytics.ok_or(AppError::NotFound("analytics not found".into()))
    }
    pub async fn increment_visitors(db: &SurrealDbClient) -> Result<Self, AppError> {
        let updated: Option<Self> = db
            .client
-            .query("UPDATE type::thing('analytics', 'current') SET visitors += 1 RETURN AFTER")
+            .query(
                "UPSERT type::thing('analytics', $id) SET visitors = (visitors ?? 0) + 1, page_loads = page_loads ?? 0 RETURN AFTER",
            )
            .bind(("id", Self::RECORD_ID))
            .await?
            .take(0)?;
-        updated.ok_or(AppError::Validation("Failed to update analytics".into()))
+        updated.ok_or(AppError::Validation("failed to update analytics".into()))
    }
    pub async fn increment_page_loads(db: &SurrealDbClient) -> Result<Self, AppError> {
        Self::record_page_view(db, false).await
    }
    /// Records a page view, optionally counting the visitor as new.
    pub async fn record_page_view(
        db: &SurrealDbClient,
        is_new_visitor: bool,
    ) -> Result<Self, AppError> {
        let visitor_delta = i64::from(is_new_visitor);
        let updated: Option<Self> = db
            .client
-            .query("UPDATE type::thing('analytics', 'current') SET page_loads += 1 RETURN AFTER")
+            .query(
                "UPSERT type::thing('analytics', $id) SET page_loads = (page_loads ?? 0) + 1, visitors = (visitors ?? 0) + $visitor_delta RETURN AFTER",
            )
            .bind(("id", Self::RECORD_ID))
            .bind(("visitor_delta", visitor_delta))
            .await?
            .take(0)?;
-        updated.ok_or(AppError::Validation("Failed to update analytics".into()))
+        updated.ok_or(AppError::Validation("failed to update analytics".into()))
    }
    pub async fn get_users_amount(db: &SurrealDbClient) -> Result<i64, AppError> {
        // We need to use a direct query for COUNT aggregation
        #[derive(Debug, Deserialize)]
        struct CountResult {
            /// Total user count.
            count: i64,
        }
@@ -81,14 +99,17 @@ impl Analytics {
            .await?
            .take(0)?;
-        Ok(result.map(|r| r.count).unwrap_or(0))
+        Ok(result.map_or(0, |r| r.count))
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use super::*;
    use crate::stored_object;
    use crate::test_utils::setup_test_db;
    use anyhow::{self};
    use uuid::Uuid;
    stored_object!(TestUser, "user", {
@@ -98,18 +119,11 @@ mod tests {
    });
    #[tokio::test]
-    async fn test_analytics_initialization() {
+    async fn test_analytics_initialization() -> anyhow::Result<()> {
        // Setup in-memory database for testing
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Test initialization of analytics
-        let analytics = Analytics::ensure_initialized(&db)
+        let analytics = Analytics::ensure_initialized(&db).await?;
            .await
            .expect("Failed to initialize analytics");
        // Verify initial state after initialization
        assert_eq!(analytics.id, "current");
@@ -117,159 +131,169 @@ mod tests {
        assert_eq!(analytics.visitors, 0);
        // Test idempotency - ensure calling it again doesn't change anything
-        let analytics_again = Analytics::ensure_initialized(&db)
+        let analytics_again = Analytics::ensure_initialized(&db).await?;
            .await
            .expect("Failed to get analytics after initialization");
        assert_eq!(analytics.id, analytics_again.id);
        assert_eq!(analytics.page_loads, analytics_again.page_loads);
        assert_eq!(analytics.visitors, analytics_again.visitors);
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_current_analytics() {
+    async fn test_get_current_analytics() -> anyhow::Result<()> {
        // Setup in-memory database for testing
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Initialize analytics
-        Analytics::ensure_initialized(&db)
+        Analytics::ensure_initialized(&db).await?;
            .await
            .expect("Failed to initialize analytics");
        // Test get_current method
-        let analytics = Analytics::get_current(&db)
+        let analytics = Analytics::get_current(&db).await?;
            .await
            .expect("Failed to get current analytics");
        assert_eq!(analytics.id, "current");
        assert_eq!(analytics.page_loads, 0);
        assert_eq!(analytics.visitors, 0);
        Ok(())
    }
    #[tokio::test]
-    async fn test_increment_visitors() {
+    async fn test_increment_visitors() -> anyhow::Result<()> {
        // Setup in-memory database for testing
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Initialize analytics
-        Analytics::ensure_initialized(&db)
+        Analytics::ensure_initialized(&db).await?;
            .await
            .expect("Failed to initialize analytics");
        // Test increment_visitors method
-        let analytics = Analytics::increment_visitors(&db)
+        let analytics = Analytics::increment_visitors(&db).await?;
            .await
            .expect("Failed to increment visitors");
        assert_eq!(analytics.visitors, 1);
        assert_eq!(analytics.page_loads, 0);
        // Increment again and check
-        let analytics = Analytics::increment_visitors(&db)
+        let analytics = Analytics::increment_visitors(&db).await?;
            .await
            .expect("Failed to increment visitors again");
        assert_eq!(analytics.visitors, 2);
        assert_eq!(analytics.page_loads, 0);
        Ok(())
    }
    #[tokio::test]
-    async fn test_increment_page_loads() {
+    async fn test_increment_page_loads() -> anyhow::Result<()> {
        // Setup in-memory database for testing
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Initialize analytics
-        Analytics::ensure_initialized(&db)
+        Analytics::ensure_initialized(&db).await?;
            .await
            .expect("Failed to initialize analytics");
        // Test increment_page_loads method
-        let analytics = Analytics::increment_page_loads(&db)
+        let analytics = Analytics::increment_page_loads(&db).await?;
            .await
            .expect("Failed to increment page loads");
        assert_eq!(analytics.visitors, 0);
        assert_eq!(analytics.page_loads, 1);
        // Increment again and check
-        let analytics = Analytics::increment_page_loads(&db)
+        let analytics = Analytics::increment_page_loads(&db).await?;
            .await
            .expect("Failed to increment page loads again");
        assert_eq!(analytics.visitors, 0);
        assert_eq!(analytics.page_loads, 2);
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_users_amount() {
+    async fn test_get_users_amount() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()).await?;
        let namespace = "test_ns";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Test with no users
-        let count = Analytics::get_users_amount(&db)
+        let count = Analytics::get_users_amount(&db).await?;
            .await
            .expect("Failed to get users amount");
        assert_eq!(count, 0);
        // Create a few test users
        for i in 0..3 {
            let user = TestUser {
-                id: format!("user{}", i),
+                id: format!("user{i}"),
-                email: format!("user{}@example.com", i),
+                email: format!("user{i}@example.com"),
                password: "password".to_string(),
-                user_id: format!("uid{}", i),
+                user_id: format!("uid{i}"),
                created_at: Utc::now(),
                updated_at: Utc::now(),
            };
-            db.store_item(user)
+            db.store_item(user).await?;
                .await
                .expect("Failed to create test user");
        }
        // Test users amount after adding users
-        let count = Analytics::get_users_amount(&db)
+        let count = Analytics::get_users_amount(&db).await?;
            .await
            .expect("Failed to get users amount after adding users");
        assert_eq!(count, 3);
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_current_nonexistent() {
+    async fn test_increment_visitors_without_prior_init() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
-        let namespace = "test_ns";
+        let analytics = Analytics::increment_visitors(&db).await?;
-        let database = &Uuid::new_v4().to_string();
+        assert_eq!(analytics.visitors, 1);
-        let db = SurrealDbClient::memory(namespace, database)
+        assert_eq!(analytics.page_loads, 0);
            .await
            .expect("Failed to start in-memory surrealdb");
        Ok(())
    }
    #[tokio::test]
    async fn test_increment_page_loads_without_prior_init() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let analytics = Analytics::increment_page_loads(&db).await?;
        assert_eq!(analytics.page_loads, 1);
        assert_eq!(analytics.visitors, 0);
        Ok(())
    }
    #[tokio::test]
    async fn test_visitor_and_page_load_increments_are_independent() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let after_visitors = Analytics::increment_visitors(&db).await?;
        assert_eq!(after_visitors.visitors, 1);
        assert_eq!(after_visitors.page_loads, 0);
        let after_page_load = Analytics::increment_page_loads(&db).await?;
        assert_eq!(after_page_load.visitors, 1);
        assert_eq!(after_page_load.page_loads, 1);
        let after_second_visitor = Analytics::increment_visitors(&db).await?;
        assert_eq!(after_second_visitor.visitors, 2);
        assert_eq!(after_second_visitor.page_loads, 1);
        Ok(())
    }
    #[tokio::test]
    async fn test_record_page_view() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let first_view = Analytics::record_page_view(&db, true).await?;
        assert_eq!(first_view.visitors, 1);
        assert_eq!(first_view.page_loads, 1);
        let returning_view = Analytics::record_page_view(&db, false).await?;
        assert_eq!(returning_view.visitors, 1);
        assert_eq!(returning_view.page_loads, 2);
        Ok(())
    }
    #[tokio::test]
    async fn test_get_current_nonexistent() -> anyhow::Result<()> {
        let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string()).await?;
        // Don't initialize analytics and try to get it
        let result = Analytics::get_current(&db).await;
        assert!(result.is_err());
-        if let Err(err) = result {
+        match result {
-            match err {
+            Ok(_) => anyhow::bail!("Expected NotFound error, got success"),
-                AppError::NotFound(_) => {
+            Err(AppError::NotFound(_)) => {}
-                    // Expected error
+            Err(err) => anyhow::bail!("Expected NotFound error, got: {err:?}"),
                }
                _ => panic!("Expected NotFound error, got: {:?}", err),
            }
        }
        Ok(())
    }
 }
@@ -10,7 +10,57 @@ stored_object!(Conversation, "conversation", {
    title: String
 });
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
 #[allow(clippy::module_name_repetitions)]
 pub struct SidebarConversation {
    #[serde(deserialize_with = "deserialize_sidebar_id")]
    pub id: String,
    pub title: String,
 }
 struct SidebarIdVisitor;
 impl<'de> serde::de::Visitor<'de> for SidebarIdVisitor {
    type Value = String;
    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        formatter.write_str("a string id or a SurrealDB Thing")
    }
    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(value.to_string())
    }
    fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(value)
    }
    fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
    where
        A: serde::de::MapAccess<'de>,
    {
        let thing = <surrealdb::sql::Thing as serde::Deserialize>::deserialize(
            serde::de::value::MapAccessDeserializer::new(map),
        )?;
        Ok(thing.id.to_raw())
    }
 }
 fn deserialize_sidebar_id<'de, D>(deserializer: D) -> Result<String, D::Error>
 where
    D: serde::Deserializer<'de>,
 {
    deserializer.deserialize_any(SidebarIdVisitor)
 }
 impl Conversation {
    #[must_use]
    pub fn new(user_id: String, title: String) -> Self {
        let now = Utc::now();
        Self {
@@ -30,7 +80,7 @@ impl Conversation {
        let conversation: Conversation = db
            .get_item(conversation_id)
            .await?
-            .ok_or_else(|| AppError::NotFound("Conversation not found".to_string()))?;
+            .ok_or_else(|| AppError::NotFound("conversation not found".to_string()))?;
        if conversation.user_id != user_id {
            return Err(AppError::Auth(
@@ -38,10 +88,15 @@ impl Conversation {
            ));
        }
-        let messages:Vec<Message> = db.client.
+        let messages: Vec<Message> = db
-            query("SELECT * FROM type::table($table_name) WHERE conversation_id = $conversation_id ORDER BY updated_at").
+            .client
-            bind(("table_name", Message::table_name())).
+            .query(
-            bind(("conversation_id", conversation_id.to_string()))
+                "SELECT * FROM type::table($message_table) WHERE conversation_id = $conversation_id AND type::thing($conversation_table, $conversation_id).user_id = $user_id ORDER BY updated_at",
            )
            .bind(("message_table", Message::table_name()))
            .bind(("conversation_table", Self::table_name()))
            .bind(("conversation_id", conversation_id.to_string()))
            .bind(("user_id", user_id.to_string()))
            .await?
            .take(0)?;
@@ -56,7 +111,7 @@ impl Conversation {
        // First verify ownership by getting conversation user_id
        let conversation: Option<Conversation> = db.get_item(id).await?;
        let conversation =
-            conversation.ok_or_else(|| AppError::NotFound("Conversation not found".to_string()))?;
+            conversation.ok_or_else(|| AppError::NotFound("conversation not found".to_string()))?;
        if conversation.user_id != user_id {
            return Err(AppError::Auth(
@@ -64,7 +119,7 @@ impl Conversation {
            ));
        }
-        let _updated: Option<Self> = db
+        let updated: Option<Self> = db
            .update((Self::table_name(), id))
            .patch(PatchOp::replace("/title", new_title.to_string()))
            .patch(PatchOp::replace(
@@ -73,82 +128,107 @@ impl Conversation {
            ))
            .await?;
        if updated.is_none() {
            return Err(AppError::NotFound("conversation not found".to_string()));
        }
        Ok(())
    }
    pub async fn get_user_sidebar_conversations(
        user_id: &str,
        db: &SurrealDbClient,
    ) -> Result<Vec<SidebarConversation>, AppError> {
        let conversations: Vec<SidebarConversation> = db
            .client
            .query(
                "SELECT id, title, updated_at FROM type::table($table_name) WHERE user_id = $user_id ORDER BY updated_at DESC",
            )
            .bind(("table_name", Self::table_name()))
            .bind(("user_id", user_id.to_string()))
            .await?
            .take(0)?;
        Ok(conversations)
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use crate::storage::types::message::MessageRole;
    use crate::test_utils::setup_test_db;
    use anyhow::{self, Context};
    use super::*;
-    #[tokio::test]
+    const MESSAGE_QUERY_FOR_OWNER: &str = "SELECT * FROM type::table($message_table) WHERE conversation_id = $conversation_id AND type::thing($conversation_table, $conversation_id).user_id = $user_id ORDER BY updated_at";
-    async fn test_create_conversation() {
+
-        // Setup in-memory database for testing
+    async fn fetch_messages_for_owner(
-        let namespace = "test_ns";
+        db: &SurrealDbClient,
-        let database = &Uuid::new_v4().to_string();
+        conversation_id: &str,
-        let db = SurrealDbClient::memory(namespace, database)
+        user_id: &str,
-            .await
+    ) -> Result<Vec<Message>, AppError> {
-            .expect("Failed to start in-memory surrealdb");
+        db.client
            .query(MESSAGE_QUERY_FOR_OWNER)
            .bind(("message_table", Message::table_name()))
            .bind(("conversation_table", Conversation::table_name()))
            .bind(("conversation_id", conversation_id.to_string()))
            .bind(("user_id", user_id.to_string()))
            .await?
            .take(0)
            .map_err(AppError::from)
    }
    #[tokio::test]
    async fn test_create_conversation() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        // Create a new conversation
        let user_id = "test_user";
        let title = "Test Conversation";
        let conversation = Conversation::new(user_id.to_string(), title.to_string());
        // Verify conversation properties
        assert_eq!(conversation.user_id, user_id);
        assert_eq!(conversation.title, title);
        assert!(!conversation.id.is_empty());
        // Store the conversation
        let result = db.store_item(conversation.clone()).await;
        assert!(result.is_ok());
        // Verify it can be retrieved
        let retrieved: Option<Conversation> = db
            .get_item(&conversation.id)
            .await
-            .expect("Failed to retrieve conversation");
+            .with_context(|| "Failed to retrieve conversation".to_string())?;
        assert!(retrieved.is_some());
-        let retrieved = retrieved.unwrap();
+        let retrieved =
            retrieved.ok_or_else(|| anyhow::anyhow!("Expected conversation to exist"))?;
        assert_eq!(retrieved.id, conversation.id);
        assert_eq!(retrieved.user_id, user_id);
        assert_eq!(retrieved.title, title);
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_complete_conversation_not_found() {
+    async fn test_get_complete_conversation_not_found() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
        let namespace = "test_ns";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Try to get a conversation that doesn't exist
        let result =
            Conversation::get_complete_conversation("nonexistent_id", "test_user", &db).await;
        assert!(result.is_err());
        match result {
-            Err(AppError::NotFound(_)) => { /* expected error */ }
+            Err(AppError::NotFound(_)) => {}
-            _ => panic!("Expected NotFound error"),
+            _ => anyhow::bail!("Expected NotFound error"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_complete_conversation_unauthorized() {
+    async fn test_get_complete_conversation_unauthorized() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
        let namespace = "test_ns";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Create and store a conversation for user_id_1
        let user_id_1 = "user_1";
        let conversation =
            Conversation::new(user_id_1.to_string(), "Private Conversation".to_string());
@@ -156,27 +236,24 @@ mod tests {
        db.store_item(conversation)
            .await
-            .expect("Failed to store conversation");
+            .with_context(|| "Failed to store conversation".to_string())?;
        // Try to access with a different user
        let user_id_2 = "user_2";
        let result =
            Conversation::get_complete_conversation(&conversation_id, user_id_2, &db).await;
        assert!(result.is_err());
        match result {
-            Err(AppError::Auth(_)) => { /* expected error */ }
+            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected Auth error"),
+            _ => anyhow::bail!("Expected Auth error"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_patch_title_success() {
+    async fn test_patch_title_success() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        let user_id = "user_1";
        let original_title = "Original Title";
@@ -185,49 +262,42 @@ mod tests {
        db.store_item(conversation)
            .await
-            .expect("Failed to store conversation");
+            .with_context(|| "Failed to store conversation".to_string())?;
        let new_title = "Updated Title";
        // Patch title successfully
        let result = Conversation::patch_title(&conversation_id, user_id, new_title, &db).await;
        assert!(result.is_ok());
        // Retrieve from DB to verify
        let updated_conversation = db
            .get_item::<Conversation>(&conversation_id)
            .await
-            .expect("Failed to get conversation")
+            .with_context(|| "Failed to get conversation".to_string())?
-            .expect("Conversation missing");
+            .ok_or_else(|| anyhow::anyhow!("Conversation missing"))?;
        assert_eq!(updated_conversation.title, new_title);
        assert_eq!(updated_conversation.user_id, user_id);
        Ok(())
    }
    #[tokio::test]
-    async fn test_patch_title_not_found() {
+    async fn test_patch_title_not_found() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Try to patch non-existing conversation
        let result = Conversation::patch_title("nonexistent", "user_x", "New Title", &db).await;
        assert!(result.is_err());
        match result {
            Err(AppError::NotFound(_)) => {}
-            _ => panic!("Expected NotFound error"),
+            _ => anyhow::bail!("Expected NotFound error"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_patch_title_unauthorized() {
+    async fn test_patch_title_unauthorized() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        let owner_id = "owner";
        let other_user_id = "intruder";
@@ -236,38 +306,119 @@ mod tests {
        db.store_item(conversation)
            .await
-            .expect("Failed to store conversation");
+            .with_context(|| "Failed to store conversation".to_string())?;
        // Attempt patch with unauthorized user
        let result =
            Conversation::patch_title(&conversation_id, other_user_id, "Hacked Title", &db).await;
        assert!(result.is_err());
        match result {
            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected Auth error"),
+            _ => anyhow::bail!("Expected Auth error"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_complete_conversation_with_messages() {
+    async fn test_get_user_sidebar_conversations_filters_and_orders_by_updated_at_desc() {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await.expect("setup_test_db");
-        let namespace = "test_ns";
+
-        let database = &Uuid::new_v4().to_string();
+        let user_id = "sidebar_user";
-        let db = SurrealDbClient::memory(namespace, database)
+        let other_user_id = "other_user";
-            .await
+        let base = Utc::now();
-            .expect("Failed to start in-memory surrealdb");
+
        let mut oldest = Conversation::new(user_id.to_string(), "Oldest".to_string());
        oldest.updated_at = base - chrono::Duration::minutes(30);
        let mut newest = Conversation::new(user_id.to_string(), "Newest".to_string());
        newest.updated_at = base - chrono::Duration::minutes(5);
        let mut middle = Conversation::new(user_id.to_string(), "Middle".to_string());
        middle.updated_at = base - chrono::Duration::minutes(15);
        let mut other_user = Conversation::new(other_user_id.to_string(), "Other".to_string());
        other_user.updated_at = base;
        db.store_item(oldest.clone())
            .await
            .expect("Failed to store oldest conversation");
        db.store_item(newest.clone())
            .await
            .expect("Failed to store newest conversation");
        db.store_item(middle.clone())
            .await
            .expect("Failed to store middle conversation");
        db.store_item(other_user)
            .await
            .expect("Failed to store other-user conversation");
        let sidebar_items = Conversation::get_user_sidebar_conversations(user_id, &db)
            .await
            .expect("Failed to get sidebar conversations");
        assert_eq!(sidebar_items.len(), 3);
        let s0 = sidebar_items.first().expect("expected 3 items");
        let s1 = sidebar_items.get(1).expect("expected 3 items");
        let s2 = sidebar_items.get(2).expect("expected 3 items");
        assert_eq!(s0.id, newest.id);
        assert_eq!(s0.title, "Newest");
        assert_eq!(s1.id, middle.id);
        assert_eq!(s1.title, "Middle");
        assert_eq!(s2.id, oldest.id);
        assert_eq!(s2.title, "Oldest");
    }
    #[tokio::test]
    async fn test_sidebar_projection_reflects_patch_title_and_updated_at_reorder() {
        let db = setup_test_db().await.expect("setup_test_db");
        let user_id = "sidebar_patch_user";
        let base = Utc::now();
        let mut first = Conversation::new(user_id.to_string(), "First".to_string());
        first.updated_at = base - chrono::Duration::minutes(20);
        let mut second = Conversation::new(user_id.to_string(), "Second".to_string());
        second.updated_at = base - chrono::Duration::minutes(10);
        db.store_item(first.clone())
            .await
            .expect("Failed to store first conversation");
        db.store_item(second.clone())
            .await
            .expect("Failed to store second conversation");
        let before_patch = Conversation::get_user_sidebar_conversations(user_id, &db)
            .await
            .expect("Failed to get sidebar conversations before patch");
        let before = before_patch.first().expect("expected at least 1 item");
        assert_eq!(before.id, second.id);
        Conversation::patch_title(&first.id, user_id, "First (renamed)", &db)
            .await
            .expect("Failed to patch conversation title");
        let after_patch = Conversation::get_user_sidebar_conversations(user_id, &db)
            .await
            .expect("Failed to get sidebar conversations after patch");
        let after = after_patch.first().expect("expected at least 1 item");
        assert_eq!(after.id, first.id);
        assert_eq!(after.title, "First (renamed)");
    }
    #[tokio::test]
    async fn test_get_complete_conversation_with_messages() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        // Create and store a conversation for user_id_1
        let user_id_1 = "user_1";
        let conversation = Conversation::new(user_id_1.to_string(), "Conversation".to_string());
        let conversation_id = conversation.id.clone();
        db.store_item(conversation)
            .await
-            .expect("Failed to store conversation");
+            .with_context(|| "Failed to store conversation".to_string())?;
        // Create messages
        let message1 = Message::new(
            conversation_id.clone(),
            MessageRole::User,
@@ -287,46 +438,190 @@ mod tests {
            None,
        );
        // Store messages
        db.store_item(message1)
            .await
-            .expect("Failed to store message1");
+            .with_context(|| "Failed to store message1".to_string())?;
        db.store_item(message2)
            .await
-            .expect("Failed to store message2");
+            .with_context(|| "Failed to store message2".to_string())?;
        db.store_item(message3)
            .await
-            .expect("Failed to store message3");
+            .with_context(|| "Failed to store message3".to_string())?;
        // Retrieve the complete conversation
        let result =
            Conversation::get_complete_conversation(&conversation_id, user_id_1, &db).await;
        assert!(result.is_ok(), "Failed to retrieve complete conversation");
-        let (retrieved_conversation, messages) = result.unwrap();
+        let (retrieved_conversation, retrieved_messages) =
            result.with_context(|| "Failed to retrieve complete conversation".to_string())?;
        // Verify conversation data
        assert_eq!(retrieved_conversation.id, conversation_id);
        assert_eq!(retrieved_conversation.user_id, user_id_1);
        assert_eq!(retrieved_conversation.title, "Conversation");
-        // Verify messages
+        assert_eq!(retrieved_messages.len(), 3);
        assert_eq!(messages.len(), 3);
-        // Verify messages are sorted by updated_at
+        let message_contents: Vec<&str> = retrieved_messages
-        let message_contents: Vec<&str> = messages.iter().map(|m| m.content.as_str()).collect();
+            .iter()
            .map(|m| m.content.as_str())
            .collect();
        assert!(message_contents.contains(&"Hello, AI!"));
        assert!(message_contents.contains(&"Hello, human! How can I help you today?"));
        assert!(message_contents.contains(&"Tell me about Rust programming."));
        // Make sure we can't access with different user
        let user_id_2 = "user_2";
        let unauthorized_result =
            Conversation::get_complete_conversation(&conversation_id, user_id_2, &db).await;
        assert!(unauthorized_result.is_err());
        match unauthorized_result {
-            Err(AppError::Auth(_)) => { /* expected error */ }
+            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected Auth error"),
+            _ => anyhow::bail!("Expected Auth error"),
        }
        Ok(())
    }
    #[test]
    fn test_sidebar_conversation_deserializes_plain_string_id() {
        let item: SidebarConversation =
            serde_json::from_str(r#"{"id":"conv-plain","title":"My chat"}"#)
                .expect("valid sidebar conversation json");
        assert_eq!(item.id, "conv-plain");
        assert_eq!(item.title, "My chat");
    }
    #[tokio::test]
    async fn test_sidebar_conversation_deserializes_id_from_db_record() {
        let db = setup_test_db().await.expect("setup_test_db");
        let owner = "sidebar_owner";
        let conversation = Conversation::new(owner.to_string(), "Sidebar title".to_string());
        let expected_id = conversation.id.clone();
        db.store_item(conversation)
            .await
            .expect("Failed to store conversation");
        let items = Conversation::get_user_sidebar_conversations(owner, &db)
            .await
            .expect("Failed to load sidebar");
        assert_eq!(items.len(), 1);
        let item = items.first().expect("expected one sidebar item");
        assert_eq!(item.id, expected_id);
        assert_eq!(item.title, "Sidebar title");
    }
    #[tokio::test]
    async fn test_message_query_filters_by_owner_user_id_in_sql() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let owner = "owner_user";
        let intruder = "intruder_user";
        let conversation = Conversation::new(owner.to_string(), "Private".to_string());
        let conversation_id = conversation.id.clone();
        db.store_item(conversation).await?;
        db.store_item(Message::new(
            conversation_id.clone(),
            MessageRole::User,
            "secret message".to_string(),
            None,
        ))
        .await?;
        let owner_messages = fetch_messages_for_owner(&db, &conversation_id, owner).await?;
        assert_eq!(owner_messages.len(), 1);
        assert_eq!(
            owner_messages
                .first()
                .expect("expected owner message")
                .content,
            "secret message"
        );
        let intruder_messages = fetch_messages_for_owner(&db, &conversation_id, intruder).await?;
        assert!(
            intruder_messages.is_empty(),
            "SQL owner filter must not return messages for a non-owner user_id"
        );
        Ok(())
    }
    #[tokio::test]
    async fn test_get_complete_conversation_orders_messages_by_updated_at() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_id = "order_user";
        let conversation = Conversation::new(user_id.to_string(), "Ordered".to_string());
        let conversation_id = conversation.id.clone();
        db.store_item(conversation).await?;
        let base = Utc::now();
        let mut first = Message::new(
            conversation_id.clone(),
            MessageRole::User,
            "first".to_string(),
            None,
        );
        first.updated_at = base - chrono::Duration::minutes(20);
        let mut second = Message::new(
            conversation_id.clone(),
            MessageRole::AI,
            "second".to_string(),
            None,
        );
        second.updated_at = base - chrono::Duration::minutes(5);
        db.store_item(first).await?;
        db.store_item(second).await?;
        let (_, messages) =
            Conversation::get_complete_conversation(&conversation_id, user_id, &db).await?;
        assert_eq!(messages.len(), 2);
        assert_eq!(
            messages.first().expect("expected first message").content,
            "first"
        );
        assert_eq!(
            messages.get(1).expect("expected second message").content,
            "second"
        );
        Ok(())
    }
    #[tokio::test]
    async fn test_patch_title_not_found_when_conversation_deleted() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let owner = "owner";
        let conversation = Conversation::new(owner.to_string(), "To delete".to_string());
        let conversation_id = conversation.id.clone();
        db.store_item(conversation).await?;
        db.delete_item::<Conversation>(&conversation_id).await?;
        let result = Conversation::patch_title(&conversation_id, owner, "New title", &db).await;
        assert!(result.is_err());
        match result {
            Err(AppError::NotFound(_)) => {}
            other => anyhow::bail!("expected NotFound, got {other:?}"),
        }
        Ok(())
    }
    #[tokio::test]
    async fn test_conversation_new_initializes_timestamps_and_id() {
        let before = Utc::now();
        let conversation = Conversation::new("user".to_string(), "Title".to_string());
        let after = Utc::now();
        assert!(!conversation.id.is_empty());
        assert!(conversation.created_at >= before && conversation.created_at <= after);
        assert_eq!(conversation.created_at, conversation.updated_at);
        assert_eq!(conversation.user_id, "user");
        assert_eq!(conversation.title, "Title");
    }
 }
@@ -1,3 +1,4 @@
 #![allow(clippy::result_large_err)]
 use crate::{error::AppError, storage::types::file_info::FileInfo};
 use serde::{Deserialize, Serialize};
 use tracing::info;
@@ -25,77 +26,150 @@ pub enum IngestionPayload {
    },
 }
 impl Default for IngestionPayload {
    /// An empty text payload, used as a cheap placeholder when the real content
    /// has been moved out of a task (see [`crate::storage::types::ingestion_task::IngestionTask::take_content`]).
    fn default() -> Self {
        Self::Text {
            text: String::new(),
            context: String::new(),
            category: String::new(),
            user_id: String::new(),
        }
    }
 }
 /// Shared ingest metadata moved or cloned into each payload variant.
 struct IngestFields {
    context: String,
    category: String,
    user_id: String,
 }
 /// Result of parsing optional ingest content before file payloads are built.
 #[derive(Debug)]
 enum ParsedContent {
    /// No URL or text payload should be appended.
    Skip,
    Url(String),
    Text(String),
 }
 impl ParsedContent {
    #[must_use]
    fn follows(&self) -> bool {
        !matches!(self, Self::Skip)
    }
 }
 impl IngestionPayload {
    /// Creates ingestion payloads from the provided content, context, and files.
    ///
-    /// # Arguments
+    /// Files are emitted first. When both files and content are present, shared
-    /// * `content` - Optional textual content to be ingressed
+    /// metadata is cloned per file; otherwise the last file-only payload moves
-    /// * `context` - context for processing the ingress content
+    /// `context`, `category`, and `user_id` without cloning.
    /// * `category` - Category to classify the ingressed content
    /// * `files` - Vector of `FileInfo` objects containing information about uploaded files
    /// * `user_id` - Identifier of the user performing the ingress operation
    ///
-    /// # Returns
+    /// # Errors
-    /// * `Result<Vec<IngestionPayload>, AppError>` - On success, returns a vector of ingress objects
+    ///
-    ///   (one per file/content type). On failure, returns an `AppError`.
+    /// Returns [`AppError::NotFound`] when no valid files or content are provided.
    #[allow(clippy::similar_names)]
    pub fn create_ingestion_payload(
        content: Option<String>,
        context: String,
        category: String,
        files: Vec<FileInfo>,
-        user_id: &str,
+        user_id: String,
    ) -> Result<Vec<IngestionPayload>, AppError> {
-        // Initialize list
+        let parsed = Self::parse_content(content);
-        let mut object_list = Vec::new();
+        let content_follows = parsed.follows();
        let file_count = files.len();
        #[allow(clippy::arithmetic_side_effects)]
        let capacity = file_count + usize::from(content_follows);
        let mut object_list = Vec::with_capacity(capacity);
        let mut fields = Some(IngestFields {
            context,
            category,
            user_id,
        });
-        // Create a IngestionPayload from content if it exists, checking for URL or text
+        for (index, file) in files.into_iter().enumerate() {
-        if let Some(input_content) = content {
+            let is_last_file = index.saturating_add(1) == file_count;
-            match Url::parse(&input_content) {
+            if content_follows || !is_last_file {
-                Ok(url) => {
+                let Some(shared) = fields.as_ref() else {
-                    info!("Detected URL: {}", url);
+                    return Err(AppError::internal("shared ingest fields consumed early"));
-                    object_list.push(IngestionPayload::Url {
+                };
-                        url: url.to_string(),
+                object_list.push(Self::File {
-                        context: context.clone(),
+                    file_info: file,
-                        category: category.clone(),
+                    context: shared.context.clone(),
-                        user_id: user_id.into(),
+                    category: shared.category.clone(),
-                    });
+                    user_id: shared.user_id.clone(),
-                }
+                });
-                Err(_) => {
+            } else {
-                    if input_content.len() > 2 {
+                let Some(shared) = fields.take() else {
-                        info!("Treating input as plain text");
+                    return Err(AppError::internal("shared ingest fields missing for file"));
-                        object_list.push(IngestionPayload::Text {
+                };
-                            text: input_content.to_string(),
+                object_list.push(Self::File {
-                            context: context.clone(),
+                    file_info: file,
-                            category: category.clone(),
+                    context: shared.context,
-                            user_id: user_id.into(),
+                    category: shared.category,
-                        });
+                    user_id: shared.user_id,
-                    }
+                });
                }
            }
        }
-        for file in files {
+        if let ParsedContent::Url(url) = parsed {
-            object_list.push(IngestionPayload::File {
+            info!("Detected URL: {url}");
-                file_info: file,
+            let Some(shared) = fields.take() else {
-                context: context.clone(),
+                return Err(AppError::internal("shared ingest fields missing for url"));
-                category: category.clone(),
+            };
-                user_id: user_id.into(),
+            object_list.push(Self::Url {
-            })
+                url,
                context: shared.context,
                category: shared.category,
                user_id: shared.user_id,
            });
        } else if let ParsedContent::Text(text) = parsed {
            info!("Treating input as plain text");
            let Some(shared) = fields.take() else {
                return Err(AppError::internal("shared ingest fields missing for text"));
            };
            object_list.push(Self::Text {
                text,
                context: shared.context,
                category: shared.category,
                user_id: shared.user_id,
            });
        }
        // If no objects are constructed, we return Err
        if object_list.is_empty() {
            return Err(AppError::NotFound(
-                "No valid content or files provided".into(),
+                "no valid content or files provided".into(),
            ));
        }
        Ok(object_list)
    }
    fn parse_content(content: Option<String>) -> ParsedContent {
        let Some(input_content) = content else {
            return ParsedContent::Skip;
        };
        if input_content.len() <= 2 {
            return ParsedContent::Skip;
        }
        match Url::parse(&input_content) {
            Ok(url) => ParsedContent::Url(url.to_string()),
            Err(_) => ParsedContent::Text(input_content),
        }
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use anyhow::{self, Context};
    use chrono::Utc;
    use super::*;
@@ -124,24 +198,23 @@ mod tests {
    }
    #[test]
-    fn test_create_ingestion_payload_with_url() {
+    fn test_create_ingestion_payload_with_url() -> anyhow::Result<()> {
        let url = "https://example.com";
        let context = "Process this URL";
        let category = "websites";
        let user_id = "user123";
        let files = vec![];
        let result = IngestionPayload::create_ingestion_payload(
            Some(url.to_string()),
            context.to_string(),
            category.to_string(),
-            files,
+            vec![],
-            user_id,
+            user_id.to_string(),
        )
-        .unwrap();
+        .with_context(|| "create_ingestion_payload".to_string())?;
        assert_eq!(result.len(), 1);
-        match &result[0] {
+        match result.first().context("expected one result")? {
            IngestionPayload::Url {
                url: payload_url,
                context: payload_context,
@@ -149,34 +222,34 @@ mod tests {
                user_id: payload_user_id,
            } => {
                // URL parser may normalize the URL by adding a trailing slash
-                assert!(payload_url == &url.to_string() || payload_url == &format!("{}/", url));
+                assert!(payload_url == &url.to_string() || payload_url == &format!("{url}/"));
                assert_eq!(payload_context, &context);
                assert_eq!(payload_category, &category);
                assert_eq!(payload_user_id, &user_id);
            }
-            _ => panic!("Expected Url variant"),
+            _ => anyhow::bail!("Expected Url variant"),
        }
        Ok(())
    }
    #[test]
-    fn test_create_ingestion_payload_with_text() {
+    fn test_create_ingestion_payload_with_text() -> anyhow::Result<()> {
        let text = "This is some text content";
        let context = "Process this text";
        let category = "notes";
        let user_id = "user123";
        let files = vec![];
        let result = IngestionPayload::create_ingestion_payload(
            Some(text.to_string()),
            context.to_string(),
            category.to_string(),
-            files,
+            vec![],
-            user_id,
+            user_id.to_string(),
        )
-        .unwrap();
+        .with_context(|| "create_ingestion_payload".to_string())?;
        assert_eq!(result.len(), 1);
-        match &result[0] {
+        match result.first().context("expected one result")? {
            IngestionPayload::Text {
                text: payload_text,
                context: payload_context,
@@ -188,12 +261,13 @@ mod tests {
                assert_eq!(payload_category, category);
                assert_eq!(payload_user_id, user_id);
            }
-            _ => panic!("Expected Text variant"),
+            _ => anyhow::bail!("Expected Text variant"),
        }
        Ok(())
    }
    #[test]
-    fn test_create_ingestion_payload_with_file() {
+    fn test_create_ingestion_payload_with_file() -> anyhow::Result<()> {
        let context = "Process this file";
        let category = "documents";
        let user_id = "user123";
@@ -204,36 +278,36 @@ mod tests {
        };
        let file_info: FileInfo = mock_file.into();
-        let files = vec![file_info.clone()];
+        let file_id = file_info.id.clone();
        let result = IngestionPayload::create_ingestion_payload(
            None,
            context.to_string(),
            category.to_string(),
-            files,
+            vec![file_info],
-            user_id,
+            user_id.to_string(),
-        )
+        )?;
        .unwrap();
        assert_eq!(result.len(), 1);
-        match &result[0] {
+        match result.first().context("expected one result")? {
            IngestionPayload::File {
                file_info: payload_file_info,
                context: payload_context,
                category: payload_category,
                user_id: payload_user_id,
            } => {
-                assert_eq!(payload_file_info.id, file_info.id);
+                assert_eq!(payload_file_info.id, file_id);
                assert_eq!(payload_context, context);
                assert_eq!(payload_category, category);
                assert_eq!(payload_user_id, user_id);
            }
-            _ => panic!("Expected File variant"),
+            _ => anyhow::bail!("Expected File variant"),
        }
        Ok(())
    }
    #[test]
-    fn test_create_ingestion_payload_with_url_and_file() {
+    fn test_create_ingestion_payload_with_url_and_file() -> anyhow::Result<()> {
        let url = "https://example.com";
        let context = "Process this data";
        let category = "mixed";
@@ -245,88 +319,207 @@ mod tests {
        };
        let file_info: FileInfo = mock_file.into();
-        let files = vec![file_info.clone()];
+        let file_id = file_info.id.clone();
        let result = IngestionPayload::create_ingestion_payload(
            Some(url.to_string()),
            context.to_string(),
            category.to_string(),
-            files,
+            vec![file_info],
-            user_id,
+            user_id.to_string(),
-        )
+        )?;
        .unwrap();
        assert_eq!(result.len(), 2);
-        // Check first item is URL
+        // Check first item is File (files processed first to minimize clones)
-        match &result[0] {
+        match result.first().context("expected first item")? {
            IngestionPayload::Url {
                url: payload_url, ..
            } => {
                // URL parser may normalize the URL by adding a trailing slash
                assert!(payload_url == &url.to_string() || payload_url == &format!("{}/", url));
            }
            _ => panic!("Expected first item to be Url variant"),
        }
        // Check second item is File
        match &result[1] {
            IngestionPayload::File {
                file_info: payload_file_info,
                ..
            } => {
-                assert_eq!(payload_file_info.id, file_info.id);
+                assert_eq!(payload_file_info.id, file_id);
            }
-            _ => panic!("Expected second item to be File variant"),
+            _ => anyhow::bail!("Expected first item to be File variant"),
        }
        // Check second item is URL
        match result.get(1).context("expected second item")? {
            IngestionPayload::Url {
                url: payload_url, ..
            } => {
                // URL parser may normalize the URL by adding a trailing slash
                assert!(payload_url == &url.to_string() || payload_url == &format!("{url}/"));
            }
            _ => anyhow::bail!("Expected second item to be Url variant"),
        }
        Ok(())
    }
    #[test]
-    fn test_create_ingestion_payload_empty_input() {
+    fn test_create_ingestion_payload_empty_input() -> anyhow::Result<()> {
        let context = "Process something";
        let category = "empty";
        let user_id = "user123";
-        let files = vec![];
+
        let result = IngestionPayload::create_ingestion_payload(
            None,
            context.to_string(),
            category.to_string(),
            vec![],
            user_id.to_string(),
        );
        assert!(result.is_err());
        match result {
            Err(AppError::NotFound(msg)) => {
                assert_eq!(msg, "no valid content or files provided");
            }
            _ => anyhow::bail!("Expected NotFound error"),
        }
        Ok(())
    }
    #[test]
    fn test_create_ingestion_payload_with_empty_text() -> anyhow::Result<()> {
        let text = ""; // Empty text
        let context = "Process this";
        let category = "notes";
        let user_id = "user123";
        let result = IngestionPayload::create_ingestion_payload(
            Some(text.to_string()),
            context.to_string(),
            category.to_string(),
            vec![],
            user_id.to_string(),
        );
        assert!(result.is_err());
        match result {
            Err(AppError::NotFound(msg)) => {
                assert_eq!(msg, "no valid content or files provided");
            }
            _ => anyhow::bail!("Expected NotFound error"),
        }
        Ok(())
    }
    #[test]
    fn test_create_ingestion_payload_with_file_and_text() -> anyhow::Result<()> {
        let text = "plain notes";
        let context = "ctx";
        let category = "cat";
        let user_id = "user123";
        let file_info: FileInfo = MockFileInfo {
            id: "file1".to_string(),
        }
        .into();
        let result = IngestionPayload::create_ingestion_payload(
            Some(text.to_string()),
            context.to_string(),
            category.to_string(),
            vec![file_info],
            user_id.to_string(),
        )?;
        assert_eq!(result.len(), 2);
        let first = result.first().expect("expected first payload");
        let second = result.get(1).expect("expected second payload");
        match (first, second) {
            (
                IngestionPayload::File {
                    file_info: payload_file,
                    context: file_context,
                    ..
                },
                IngestionPayload::Text {
                    text: payload_text,
                    context: text_context,
                    category: text_category,
                    user_id: text_user_id,
                },
            ) => {
                assert_eq!(payload_file.id, "file1");
                assert_eq!(file_context, context);
                assert_eq!(payload_text, text);
                assert_eq!(text_context, context);
                assert_eq!(text_category, category);
                assert_eq!(text_user_id, user_id);
            }
            _ => anyhow::bail!("expected File then Text"),
        }
        Ok(())
    }
    #[test]
    fn test_create_ingestion_payload_short_content_with_file_only_yields_file() -> anyhow::Result<()>
    {
        let context = "ctx";
        let category = "cat";
        let user_id = "user123";
        let file_info: FileInfo = MockFileInfo {
            id: "file1".to_string(),
        }
        .into();
        let result = IngestionPayload::create_ingestion_payload(
            Some("ab".to_string()),
            context.to_string(),
            category.to_string(),
            vec![file_info],
            user_id.to_string(),
        )?;
        assert_eq!(result.len(), 1);
        match result.first().context("expected one file payload")? {
            IngestionPayload::File {
                file_info,
                context: payload_context,
                category: payload_category,
                user_id: payload_user_id,
            } => {
                assert_eq!(file_info.id, "file1");
                assert_eq!(payload_context, context);
                assert_eq!(payload_category, category);
                assert_eq!(payload_user_id, user_id);
            }
            _ => anyhow::bail!("expected File variant only"),
        }
        Ok(())
    }
    #[test]
    fn test_create_ingestion_payload_two_files_without_content() -> anyhow::Result<()> {
        let context = "ctx";
        let category = "cat";
        let user_id = "user123";
        let files = vec![
            MockFileInfo {
                id: "file1".to_string(),
            }
            .into(),
            MockFileInfo {
                id: "file2".to_string(),
            }
            .into(),
        ];
        let result = IngestionPayload::create_ingestion_payload(
            None,
            context.to_string(),
            category.to_string(),
            files,
-            user_id,
+            user_id.to_string(),
-        );
+        )?;
-        assert!(result.is_err());
+        assert_eq!(result.len(), 2);
-        match result {
+        assert!(matches!(
-            Err(AppError::NotFound(msg)) => {
+            result.first(),
-                assert_eq!(msg, "No valid content or files provided");
+            Some(IngestionPayload::File { .. })
-            }
+        ));
-            _ => panic!("Expected NotFound error"),
+        assert!(matches!(result.get(1), Some(IngestionPayload::File { .. })));
-        }
+        Ok(())
    }
    #[test]
    fn test_create_ingestion_payload_with_empty_text() {
        let text = ""; // Empty text
        let context = "Process this";
        let category = "notes";
        let user_id = "user123";
        let files = vec![];
        let result = IngestionPayload::create_ingestion_payload(
            Some(text.to_string()),
            context.to_string(),
            category.to_string(),
            files,
            user_id,
        );
        assert!(result.is_err());
        match result {
            Err(AppError::NotFound(msg)) => {
                assert_eq!(msg, "No valid content or files provided");
            }
            _ => panic!("Expected NotFound error"),
        }
    }
 }
@@ -1,6 +1,7 @@
-use std::time::Duration;
+use std::{sync::Arc, time::Duration};
 use chrono::Duration as ChronoDuration;
 use futures::future::try_join_all;
 use state_machines::state_machine;
 use surrealdb::sql::Datetime as SurrealDatetime;
 use uuid::Uuid;
@@ -13,7 +14,7 @@ pub const MAX_ATTEMPTS: u32 = 3;
 pub const DEFAULT_LEASE_SECS: i64 = 300;
 pub const DEFAULT_PRIORITY: i32 = 0;
-#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
+#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
 pub enum TaskState {
    #[serde(rename = "Pending")]
    #[default]
@@ -33,6 +34,7 @@ pub enum TaskState {
 }
 impl TaskState {
    #[must_use]
    pub fn as_str(&self) -> &'static str {
        match self {
            TaskState::Pending => "Pending",
@@ -45,6 +47,7 @@ impl TaskState {
        }
    }
    #[must_use]
    pub fn is_terminal(&self) -> bool {
        matches!(
            self,
@@ -52,6 +55,7 @@ impl TaskState {
        )
    }
    #[must_use]
    pub fn display_label(&self) -> &'static str {
        match self {
            TaskState::Pending => "Pending",
@@ -65,12 +69,16 @@ impl TaskState {
    }
 }
 /// Information about an error that occurred during task processing.
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq, Default)]
 pub struct TaskErrorInfo {
    /// Machine-readable error code (e.g., `"pipeline_error"`).
    pub code: Option<String>,
    /// Human-readable error description.
    pub message: String,
 }
 /// Internal events that drive the task state machine transitions.
 #[derive(Debug, Clone, Copy)]
 enum TaskTransition {
    StartProcessing,
@@ -82,7 +90,7 @@ enum TaskTransition {
 }
 impl TaskTransition {
-    fn as_str(&self) -> &'static str {
+    fn as_str(self) -> &'static str {
        match self {
            TaskTransition::StartProcessing => "start_processing",
            TaskTransition::Succeed => "succeed",
@@ -132,34 +140,20 @@ mod lifecycle {
    pub(super) fn pending() -> TaskLifecycleMachine<(), Pending> {
        TaskLifecycleMachine::new(())
    }
    pub(super) fn reserved() -> TaskLifecycleMachine<(), Reserved> {
        pending()
            .reserve()
            .expect("reserve transition from Pending should exist")
    }
    pub(super) fn processing() -> TaskLifecycleMachine<(), Processing> {
        reserved()
            .start_processing()
            .expect("start_processing transition from Reserved should exist")
    }
    pub(super) fn failed() -> TaskLifecycleMachine<(), Failed> {
        processing()
            .fail()
            .expect("fail transition from Processing should exist")
    }
 }
-fn invalid_transition(state: &TaskState, event: TaskTransition) -> AppError {
+fn invalid_transition(state: TaskState, event: TaskTransition) -> AppError {
    AppError::Validation(format!(
-        "Invalid task transition: {} -> {}",
+        "invalid task transition: {} -> {}",
        state.as_str(),
        event.as_str()
    ))
 }
 fn worker_id_for_bind(worker_id: Option<&String>) -> String {
    worker_id.cloned().unwrap_or_default()
 }
 stored_object!(IngestionTask, "ingestion_task", {
    content: IngestionPayload,
    state: TaskState,
@@ -188,6 +182,7 @@ stored_object!(IngestionTask, "ingestion_task", {
 });
 impl IngestionTask {
    #[must_use]
    pub fn new(content: IngestionPayload, user_id: String) -> Self {
        let now = chrono::Utc::now();
@@ -211,33 +206,85 @@ impl IngestionTask {
        }
    }
    #[must_use]
    pub fn can_retry(&self) -> bool {
        self.attempts < self.max_attempts
    }
-    pub fn lease_duration(&self) -> Duration {
+    /// Moves the payload out of the task, leaving an empty placeholder behind.
-        Duration::from_secs(self.lease_duration_secs.max(0) as u64)
+    ///
    /// The task's `content` is only needed while driving the pipeline; the
    /// terminal `user_id`, `state`, and bookkeeping fields are stored separately,
    /// so replacing it with the default placeholder avoids cloning large payloads.
    #[must_use]
    pub fn take_content(&mut self) -> IngestionPayload {
        std::mem::take(&mut self.content)
    }
    #[must_use]
    pub fn lease_duration(&self) -> Duration {
        Duration::from_secs(u64::try_from(self.lease_duration_secs.max(0)).unwrap_or(0))
    }
    /// Create a new task and immediately persist it to the database.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Database` if the store operation fails.
    /// Returns `AppError::internal` if the database returns no stored record.
    pub async fn create_and_add_to_db(
        content: IngestionPayload,
-        user_id: String,
+        user_id: impl AsRef<str>,
        db: &SurrealDbClient,
    ) -> Result<IngestionTask, AppError> {
-        let task = Self::new(content, user_id);
+        let task = Self::new(content, user_id.as_ref().to_string());
-        db.store_item(task.clone()).await?;
+        db.store_item(task)
-        Ok(task)
+            .await?
            .ok_or_else(|| AppError::internal("ingestion task store returned no record"))
    }
    /// Create and persist multiple tasks concurrently (one `CREATE` per payload).
    ///
    /// Use this when ingest produces several payloads (files plus URL/text). For a
    /// single payload, call [`Self::create_and_add_to_db`] instead.
    ///
    /// # Errors
    ///
    /// Returns the first [`AppError`] from any failed store, same as [`try_join_all`].
    pub async fn create_all_and_add_to_db(
        contents: Vec<IngestionPayload>,
        user_id: impl AsRef<str>,
        db: &SurrealDbClient,
    ) -> Result<Vec<IngestionTask>, AppError> {
        if contents.is_empty() {
            return Ok(Vec::new());
        }
        let user_id = Arc::new(user_id.as_ref().to_string());
        let db = db.clone();
        try_join_all(contents.into_iter().map(|content| {
            let user_id = Arc::clone(&user_id);
            let db = db.clone();
            async move { Self::create_and_add_to_db(content, user_id.as_ref(), &db).await }
        }))
        .await
    }
    /// Claim the next ready task for processing.
    ///
    /// Atomically reserves a task by transitioning it from a candidate state to `Reserved`.
    /// Returns `Ok(None)` if no task is ready to claim.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Database` if the update query fails.
    pub async fn claim_next_ready(
        db: &SurrealDbClient,
        worker_id: &str,
        now: chrono::DateTime<chrono::Utc>,
        lease_duration: Duration,
    ) -> Result<Option<IngestionTask>, AppError> {
        debug_assert!(lifecycle::pending().reserve().is_ok());
        debug_assert!(lifecycle::failed().reserve().is_ok());
        const CLAIM_QUERY: &str = r#"
            UPDATE (
                SELECT * FROM type::table($table)
@@ -267,6 +314,11 @@ impl IngestionTask {
            RETURN *;
        "#;
        debug_assert!(lifecycle::pending().reserve().is_ok());
        debug_assert!(lifecycle::pending().reserve().is_ok_and(|m| m
            .start_processing()
            .is_ok_and(|m| m.fail().is_ok_and(|m| m.reserve().is_ok()))));
        let mut result = db
            .client
            .query(CLAIM_QUERY)
@@ -291,13 +343,22 @@ impl IngestionTask {
            .bind(("reserved_state", TaskState::Reserved.as_str()))
            .bind(("now", SurrealDatetime::from(now)))
            .bind(("worker_id", worker_id.to_string()))
-            .bind(("lease_secs", lease_duration.as_secs() as i64))
+            .bind((
                "lease_secs",
                i64::try_from(lease_duration.as_secs()).unwrap_or(i64::MAX),
            ))
            .await?;
        let task: Option<IngestionTask> = result.take(0)?;
        Ok(task)
    }
    /// Transition this task from `Reserved` to `Processing`.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is not in `Reserved` state
    /// or belongs to a different worker. Returns `AppError::Database` on DB failure.
    pub async fn mark_processing(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
        const START_PROCESSING_QUERY: &str = r#"
            UPDATE type::thing($table, $id)
@@ -309,6 +370,7 @@ impl IngestionTask {
        "#;
        let now = chrono::Utc::now();
        let worker_id = worker_id_for_bind(self.worker_id.as_ref());
        let mut result = db
            .client
            .query(START_PROCESSING_QUERY)
@@ -317,13 +379,19 @@ impl IngestionTask {
            .bind(("processing", TaskState::Processing.as_str()))
            .bind(("reserved", TaskState::Reserved.as_str()))
            .bind(("now", SurrealDatetime::from(now)))
-            .bind(("worker_id", self.worker_id.clone().unwrap_or_default()))
+            .bind(("worker_id", worker_id))
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::StartProcessing))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::StartProcessing))
    }
    /// Transition this task from `Processing` to `Succeeded`.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is not in `Processing` state
    /// or belongs to a different worker. Returns `AppError::Database` on DB failure.
    pub async fn mark_succeeded(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
        const COMPLETE_QUERY: &str = r#"
            UPDATE type::thing($table, $id)
@@ -340,6 +408,7 @@ impl IngestionTask {
        "#;
        let now = chrono::Utc::now();
        let worker_id = worker_id_for_bind(self.worker_id.as_ref());
        let mut result = db
            .client
            .query(COMPLETE_QUERY)
@@ -348,23 +417,27 @@ impl IngestionTask {
            .bind(("succeeded", TaskState::Succeeded.as_str()))
            .bind(("processing", TaskState::Processing.as_str()))
            .bind(("now", SurrealDatetime::from(now)))
-            .bind(("worker_id", self.worker_id.clone().unwrap_or_default()))
+            .bind(("worker_id", worker_id))
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::Succeed))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Succeed))
    }
    /// Transition this task from `Processing` to `Failed`.
    ///
    /// The task will be rescheduled for retry after `retry_delay`.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is not in `Processing` state
    /// or belongs to a different worker. Returns `AppError::Database` on DB failure.
    pub async fn mark_failed(
        &self,
        error: TaskErrorInfo,
        retry_delay: Duration,
        db: &SurrealDbClient,
    ) -> Result<IngestionTask, AppError> {
        let now = chrono::Utc::now();
        let retry_at = now
            + ChronoDuration::from_std(retry_delay).unwrap_or_else(|_| ChronoDuration::seconds(30));
        const FAIL_QUERY: &str = r#"
            UPDATE type::thing($table, $id)
            SET state = $failed,
@@ -379,6 +452,15 @@ impl IngestionTask {
            RETURN *;
        "#;
        let now = chrono::Utc::now();
        let retry_at = now
            .checked_add_signed(
                ChronoDuration::from_std(retry_delay)
                    .unwrap_or_else(|_| ChronoDuration::seconds(30)),
            )
            .unwrap_or(now);
        let worker_id = worker_id_for_bind(self.worker_id.as_ref());
        let mut result = db
            .client
            .query(FAIL_QUERY)
@@ -390,13 +472,19 @@ impl IngestionTask {
            .bind(("retry_at", SurrealDatetime::from(retry_at)))
            .bind(("error_code", error.code.clone()))
            .bind(("error_message", error.message.clone()))
-            .bind(("worker_id", self.worker_id.clone().unwrap_or_default()))
+            .bind(("worker_id", worker_id))
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::Fail))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Fail))
    }
    /// Transition this task from `Failed` to `DeadLetter`.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is not in `Failed` state.
    /// Returns `AppError::Database` on DB failure.
    pub async fn mark_dead_letter(
        &self,
        error: TaskErrorInfo,
@@ -430,9 +518,15 @@ impl IngestionTask {
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::DeadLetter))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::DeadLetter))
    }
    /// Transition this task to `Cancelled` from any non-terminal state.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is in a terminal state.
    /// Returns `AppError::Database` on DB failure.
    pub async fn mark_cancelled(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
        const CANCEL_QUERY: &str = r#"
            UPDATE type::thing($table, $id)
@@ -463,9 +557,15 @@ impl IngestionTask {
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::Cancel))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Cancel))
    }
    /// Release a reserved task back to `Pending` state.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Validation` if the task is not in `Reserved` state.
    /// Returns `AppError::Database` on DB failure.
    pub async fn release(&self, db: &SurrealDbClient) -> Result<IngestionTask, AppError> {
        const RELEASE_QUERY: &str = r#"
            UPDATE type::thing($table, $id)
@@ -489,9 +589,14 @@ impl IngestionTask {
            .await?;
        let updated: Option<IngestionTask> = result.take(0)?;
-        updated.ok_or_else(|| invalid_transition(&self.state, TaskTransition::Release))
+        updated.ok_or_else(|| invalid_transition(self.state, TaskTransition::Release))
    }
    /// Retrieve all non-terminal tasks across active states.
    ///
    /// # Errors
    ///
    /// Returns `AppError::Database` if the query fails.
    pub async fn get_unfinished_tasks(
        db: &SurrealDbClient,
    ) -> Result<Vec<IngestionTask>, AppError> {
@@ -520,8 +625,12 @@ impl IngestionTask {
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use anyhow::{self, Context};
    use super::*;
    use crate::storage::types::ingestion_payload::IngestionPayload;
    use crate::test_utils::setup_test_db;
    fn create_payload(user_id: &str) -> IngestionPayload {
        IngestionPayload::Text {
@@ -532,16 +641,12 @@ mod tests {
        }
    }
-    async fn memory_db() -> SurrealDbClient {
+    async fn memory_db() -> anyhow::Result<SurrealDbClient> {
-        let namespace = "test_ns";
+        setup_test_db().await
        let database = Uuid::new_v4().to_string();
        SurrealDbClient::memory(namespace, &database)
            .await
            .expect("in-memory surrealdb")
    }
    #[tokio::test]
-    async fn test_new_task_defaults() {
+    async fn test_new_task_defaults() -> anyhow::Result<()> {
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload.clone(), user_id.to_string());
@@ -553,73 +658,140 @@ mod tests {
        assert_eq!(task.max_attempts, MAX_ATTEMPTS);
        assert!(task.locked_at.is_none());
        assert!(task.worker_id.is_none());
        Ok(())
    }
    #[test]
    fn test_take_content_moves_payload_and_leaves_default() {
        let user_id = "user123";
        let payload = create_payload(user_id);
        let mut task = IngestionTask::new(payload.clone(), user_id.to_string());
        let taken = task.take_content();
        assert_eq!(taken, payload);
        assert_eq!(task.content, IngestionPayload::default());
    }
    #[tokio::test]
-    async fn test_create_and_store_task() {
+    async fn test_create_all_and_add_to_db_empty() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let tasks = IngestionTask::create_all_and_add_to_db(vec![], "user123", &db).await?;
        assert!(tasks.is_empty());
        Ok(())
    }
    #[tokio::test]
    async fn test_create_all_and_add_to_db_stores_multiple() -> anyhow::Result<()> {
        let db = memory_db().await?;
        let user_id = "user123";
        let payloads = vec![
            create_payload(user_id),
            IngestionPayload::Text {
                text: "second payload".to_string(),
                context: "ctx".to_string(),
                category: "cat".to_string(),
                user_id: user_id.to_string(),
            },
        ];
        let created = IngestionTask::create_all_and_add_to_db(payloads, user_id, &db).await?;
        assert_eq!(created.len(), 2);
        let first = created.first().expect("expected first task");
        let second = created.get(1).expect("expected second task");
        assert_ne!(first.id, second.id);
        for task in &created {
            let stored: Option<IngestionTask> = db.get_item::<IngestionTask>(&task.id).await?;
            let stored = stored.with_context(|| format!("task {} should exist", task.id))?;
            assert_eq!(stored.id, task.id);
            assert_eq!(stored.state, TaskState::Pending);
            assert_eq!(stored.user_id, user_id);
        }
        Ok(())
    }
    #[tokio::test]
    async fn test_create_and_store_task() -> anyhow::Result<()> {
        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let created =
            IngestionTask::create_and_add_to_db(payload.clone(), user_id.to_string(), &db)
                .await
-                .expect("store");
+                .with_context(|| "store".to_string())?;
        let stored: Option<IngestionTask> = db
            .get_item::<IngestionTask>(&created.id)
            .await
-            .expect("fetch");
+            .with_context(|| "fetch".to_string())?;
-        let stored = stored.expect("task exists");
+        let stored = stored.with_context(|| "task exists".to_string())?;
        assert_eq!(stored.id, created.id);
        assert_eq!(stored.state, TaskState::Pending);
        assert_eq!(stored.attempts, 0);
        Ok(())
    }
    #[tokio::test]
-    async fn test_claim_and_transition() {
+    async fn test_claim_and_transition() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload, user_id.to_string());
-        db.store_item(task.clone()).await.expect("store");
+        db.store_item(task.clone())
            .await
            .with_context(|| "store".to_string())?;
        let worker_id = "worker-1";
        let now = chrono::Utc::now();
        let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_secs(60))
            .await
-            .expect("claim");
+            .with_context(|| "claim".to_string())?
            .with_context(|| "task claimed".to_string())?;
        let claimed = claimed.expect("task claimed");
        assert_eq!(claimed.state, TaskState::Reserved);
        assert_eq!(claimed.worker_id.as_deref(), Some(worker_id));
-        let processing = claimed.mark_processing(&db).await.expect("processing");
+        let processing = claimed
            .mark_processing(&db)
            .await
            .with_context(|| "processing".to_string())?;
        assert_eq!(processing.state, TaskState::Processing);
-        let succeeded = processing.mark_succeeded(&db).await.expect("succeeded");
+        let succeeded = processing
            .mark_succeeded(&db)
            .await
            .with_context(|| "succeeded".to_string())?;
        assert_eq!(succeeded.state, TaskState::Succeeded);
        assert!(succeeded.worker_id.is_none());
        assert!(succeeded.locked_at.is_none());
        Ok(())
    }
    #[tokio::test]
-    async fn test_fail_and_dead_letter() {
+    async fn test_fail_and_dead_letter() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload, user_id.to_string());
-        db.store_item(task.clone()).await.expect("store");
+        db.store_item(task.clone())
            .await
            .with_context(|| "store".to_string())?;
        let worker_id = "worker-dead";
        let now = chrono::Utc::now();
        let claimed = IngestionTask::claim_next_ready(&db, worker_id, now, Duration::from_secs(60))
            .await
-            .expect("claim")
+            .with_context(|| "claim".to_string())?
-            .expect("claimed");
+            .with_context(|| "claimed".to_string())?;
-        let processing = claimed.mark_processing(&db).await.expect("processing");
+        let processing = claimed
            .mark_processing(&db)
            .await
            .with_context(|| "processing".to_string())?;
        let error_info = TaskErrorInfo {
            code: Some("pipeline_error".into()),
@@ -629,7 +801,7 @@ mod tests {
        let failed = processing
            .mark_failed(error_info.clone(), Duration::from_secs(30), &db)
            .await
-            .expect("failed update");
+            .with_context(|| "failed update".to_string())?;
        assert_eq!(failed.state, TaskState::Failed);
        assert_eq!(failed.error_message.as_deref(), Some("failed"));
        assert!(failed.worker_id.is_none());
@@ -639,24 +811,26 @@ mod tests {
        let dead = failed
            .mark_dead_letter(error_info.clone(), &db)
            .await
-            .expect("dead letter");
+            .with_context(|| "dead letter".to_string())?;
        assert_eq!(dead.state, TaskState::DeadLetter);
        assert_eq!(dead.error_message.as_deref(), Some("failed"));
        Ok(())
    }
    #[tokio::test]
-    async fn test_mark_processing_requires_reservation() {
+    async fn test_mark_processing_requires_reservation() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload.clone(), user_id.to_string());
-        db.store_item(task.clone()).await.expect("store");
+        db.store_item(task.clone())
        let err = task
            .mark_processing(&db)
            .await
-            .expect_err("processing should fail without reservation");
+            .with_context(|| "store".to_string())?;
        let Err(err) = task.mark_processing(&db).await else {
            anyhow::bail!("processing should fail without reservation")
        };
        match err {
            AppError::Validation(message) => {
@@ -665,20 +839,23 @@ mod tests {
                    "unexpected message: {message}"
                );
            }
-            other => panic!("expected validation error, got {other:?}"),
+            other => anyhow::bail!("expected validation error, got {other:?}"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_mark_failed_requires_processing() {
+    async fn test_mark_failed_requires_processing() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload.clone(), user_id.to_string());
-        db.store_item(task.clone()).await.expect("store");
+        db.store_item(task.clone())
            .await
            .with_context(|| "store".to_string())?;
-        let err = task
+        let Err(err) = task
            .mark_failed(
                TaskErrorInfo {
                    code: None,
@@ -688,7 +865,9 @@ mod tests {
                &db,
            )
            .await
-            .expect_err("failing should require processing state");
+        else {
            anyhow::bail!("failing should require processing state")
        };
        match err {
            AppError::Validation(message) => {
@@ -697,23 +876,25 @@ mod tests {
                    "unexpected message: {message}"
                );
            }
-            other => panic!("expected validation error, got {other:?}"),
+            other => anyhow::bail!("expected validation error, got {other:?}"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_release_requires_reservation() {
+    async fn test_release_requires_reservation() -> anyhow::Result<()> {
-        let db = memory_db().await;
+        let db = memory_db().await?;
        let user_id = "user123";
        let payload = create_payload(user_id);
        let task = IngestionTask::new(payload.clone(), user_id.to_string());
-        db.store_item(task.clone()).await.expect("store");
+        db.store_item(task.clone())
        let err = task
            .release(&db)
            .await
-            .expect_err("release should require reserved state");
+            .with_context(|| "store".to_string())?;
        let Err(err) = task.release(&db).await else {
            anyhow::bail!("release should require reserved state")
        };
        match err {
            AppError::Validation(message) => {
@@ -722,7 +903,8 @@ mod tests {
                    "unexpected message: {message}"
                );
            }
-            other => panic!("expected validation error, got {other:?}"),
+            other => anyhow::bail!("expected validation error, got {other:?}"),
        }
        Ok(())
    }
 }
@@ -0,0 +1,423 @@
 use std::collections::HashMap;
 use surrealdb::RecordId;
 use crate::{
    error::AppError,
    storage::{db::SurrealDbClient, types::EmbeddingRecord},
    stored_object,
 };
 stored_object!(KnowledgeEntityEmbedding, "knowledge_entity_embedding", {
    entity_id: RecordId,
    embedding: Vec<f32>,
    /// Denormalized source id for bulk deletes
    source_id: String,
    /// Denormalized user id for query scoping
    user_id: String
 });
 impl EmbeddingRecord for KnowledgeEntityEmbedding {
    fn link_field() -> &'static str {
        "entity_id"
    }
    fn index_name() -> &'static str {
        "idx_embedding_knowledge_entity_embedding"
    }
    fn source_id(&self) -> &str {
        &self.source_id
    }
    fn user_id(&self) -> &str {
        &self.user_id
    }
    fn embedding(&self) -> &[f32] {
        &self.embedding
    }
    fn new(
        entity_id: &str,
        source_id: String,
        embedding: Vec<f32>,
        user_id: String,
        entity_table: &str,
    ) -> Self {
        let now = Utc::now();
        Self {
            id: entity_id.to_owned(),
            created_at: now,
            updated_at: now,
            entity_id: RecordId::from_table_key(entity_table, entity_id),
            embedding,
            source_id,
            user_id,
        }
    }
 }
 impl KnowledgeEntityEmbedding {
    /// Get embeddings for multiple entities in batch
    pub async fn get_by_entity_ids(
        entity_ids: &[RecordId],
        db: &SurrealDbClient,
    ) -> Result<HashMap<String, Vec<f32>>, AppError> {
        if entity_ids.is_empty() {
            return Ok(HashMap::new());
        }
        let query = format!(
            "SELECT * FROM {} WHERE entity_id INSIDE $entity_ids",
            Self::table_name()
        );
        let mut result = db
            .client
            .query(query)
            .bind(("entity_ids", entity_ids.to_vec()))
            .await
            .map_err(AppError::from)?;
        let embeddings: Vec<Self> = result.take(0).map_err(AppError::from)?;
        Ok(embeddings
            .into_iter()
            .map(|e| (e.entity_id.key().to_string(), e.embedding))
            .collect())
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use super::*;
    use crate::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
    use crate::test_utils::{prepare_knowledge_entity_test_db, setup_test_db};
    use anyhow::{self, Context};
    use chrono::Utc;
    use surrealdb::Value as SurrealValue;
    fn build_knowledge_entity_with_id(
        key: &str,
        source_id: &str,
        user_id: &str,
    ) -> KnowledgeEntity {
        KnowledgeEntity {
            id: key.to_owned(),
            created_at: Utc::now(),
            updated_at: Utc::now(),
            source_id: source_id.to_owned(),
            name: "Test entity".to_owned(),
            description: "Desc".to_owned(),
            entity_type: KnowledgeEntityType::Document,
            metadata: None,
            user_id: user_id.to_owned(),
        }
    }
    #[test]
    fn new_uses_entity_id_as_record_id() {
        let emb = KnowledgeEntityEmbedding::new(
            "entity-abc",
            "source-1".to_owned(),
            vec![0.1, 0.2],
            "user-1".to_owned(),
            KnowledgeEntity::table_name(),
        );
        assert_eq!(emb.id, "entity-abc");
    }
    #[test]
    fn validate_dimension_rejects_mismatch() {
        let err = KnowledgeEntityEmbedding::validate_dimension(&[0.1, 0.2, 0.3], 2)
            .expect_err("expected dimension mismatch");
        assert!(matches!(err, AppError::Validation(_)));
    }
    #[tokio::test]
    async fn test_create_and_get_by_entity_id() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user_ke";
        let entity_key = "entity-1";
        let source_id = "source-ke";
        let embedding_vec = vec![0.11_f32, 0.22, 0.33];
        let entity = build_knowledge_entity_with_id(entity_key, source_id, user_id);
        KnowledgeEntity::store_with_embedding(entity.clone(), embedding_vec.clone(), 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        let entity_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity.id);
        let fetched = KnowledgeEntityEmbedding::get_by_record_id(&db, &entity_rid)
            .await
            .with_context(|| "Failed to get embedding by entity_id".to_string())?
            .ok_or_else(|| anyhow::anyhow!("Expected embedding to exist"))?;
        assert_eq!(fetched.id, entity_key);
        assert_eq!(fetched.user_id, user_id);
        assert_eq!(fetched.source_id, source_id);
        assert_eq!(fetched.entity_id, entity_rid);
        assert_eq!(fetched.embedding, embedding_vec);
        Ok(())
    }
    #[tokio::test]
    async fn test_delete_by_entity_id() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user_ke";
        let entity_key = "entity-delete";
        let source_id = "source-del";
        let entity = build_knowledge_entity_with_id(entity_key, source_id, user_id);
        KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.5_f32, 0.6, 0.7], 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        let entity_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity.id);
        let existing = KnowledgeEntityEmbedding::get_by_record_id(&db, &entity_rid)
            .await
            .with_context(|| "Failed to get embedding before delete".to_string())?;
        assert!(existing.is_some());
        KnowledgeEntityEmbedding::delete_by_record_id(&db, &entity_rid)
            .await
            .with_context(|| "Failed to delete by entity_id".to_string())?;
        let after = KnowledgeEntityEmbedding::get_by_record_id(&db, &entity_rid)
            .await
            .with_context(|| "Failed to get embedding after delete".to_string())?;
        assert!(after.is_none());
        Ok(())
    }
    #[tokio::test]
    async fn test_store_with_embedding_creates_entity_and_embedding() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user_store";
        let source_id = "source_store";
        let embedding = vec![0.2_f32, 0.3, 0.4];
        let entity = build_knowledge_entity_with_id("entity-store", source_id, user_id);
        KnowledgeEntity::store_with_embedding(entity.clone(), embedding.clone(), 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        let stored_entity: Option<KnowledgeEntity> = db
            .get_item(&entity.id)
            .await
            .with_context(|| "Failed to get entity".to_string())?;
        assert!(stored_entity.is_some());
        let entity_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity.id);
        let stored_embedding = KnowledgeEntityEmbedding::get_by_record_id(&db, &entity_rid)
            .await
            .with_context(|| "Failed to fetch embedding".to_string())?;
        let stored_embedding =
            stored_embedding.ok_or_else(|| anyhow::anyhow!("Expected embedding to exist"))?;
        assert_eq!(stored_embedding.id, entity.id);
        assert_eq!(stored_embedding.user_id, user_id);
        assert_eq!(stored_embedding.source_id, source_id);
        assert_eq!(stored_embedding.entity_id, entity_rid);
        Ok(())
    }
    #[tokio::test]
    async fn test_store_with_embedding_rejects_wrong_dimension() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let entity = build_knowledge_entity_with_id("entity-dim", "source-dim", "user-dim");
        let result = KnowledgeEntity::store_with_embedding(entity, vec![0.1, 0.2], 3, &db).await;
        assert!(matches!(result, Err(AppError::Validation(_))));
        Ok(())
    }
    #[tokio::test]
    async fn test_delete_by_source_id() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user_ke";
        let source_id = "shared-ke";
        let other_source = "other-ke";
        let entity1 = build_knowledge_entity_with_id("entity-s1", source_id, user_id);
        let entity2 = build_knowledge_entity_with_id("entity-s2", source_id, user_id);
        let entity_other = build_knowledge_entity_with_id("entity-other", other_source, user_id);
        KnowledgeEntity::store_with_embedding(entity1.clone(), vec![1.0_f32, 1.1, 1.2], 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        KnowledgeEntity::store_with_embedding(entity2.clone(), vec![2.0_f32, 2.1, 2.2], 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        KnowledgeEntity::store_with_embedding(
            entity_other.clone(),
            vec![3.0_f32, 3.1, 3.2],
            3,
            &db,
        )
        .await
        .with_context(|| "Failed to store entity with embedding".to_string())?;
        let entity1_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity1.id);
        let entity2_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity2.id);
        let other_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity_other.id);
        KnowledgeEntityEmbedding::delete_by_source_id(source_id, &db)
            .await
            .with_context(|| "Failed to delete by source_id".to_string())?;
        assert!(
            KnowledgeEntityEmbedding::get_by_record_id(&db, &entity1_rid)
                .await
                .with_context(|| "get entity1 embedding after delete".to_string())?
                .is_none()
        );
        assert!(
            KnowledgeEntityEmbedding::get_by_record_id(&db, &entity2_rid)
                .await
                .with_context(|| "get entity2 embedding after delete".to_string())?
                .is_none()
        );
        assert!(KnowledgeEntityEmbedding::get_by_record_id(&db, &other_rid)
            .await
            .with_context(|| "get other embedding after delete".to_string())?
            .is_some());
        Ok(())
    }
    #[tokio::test]
    async fn test_redefine_hnsw_index_updates_dimension() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        KnowledgeEntityEmbedding::redefine_hnsw_index(&db, 16)
            .await
            .with_context(|| "failed to redefine index".to_string())?;
        let mut info_res = db
            .client
            .query("INFO FOR TABLE knowledge_entity_embedding;")
            .await
            .with_context(|| "info query failed".to_string())?;
        let info: SurrealValue = info_res
            .take(0)
            .with_context(|| "failed to take info result".to_string())?;
        let info_json: serde_json::Value = serde_json::to_value(info)
            .with_context(|| "failed to convert info to json".to_string())?;
        let idx_sql = info_json
            .get("Object")
            .and_then(|v| v.get("indexes"))
            .and_then(|v| v.get("Object"))
            .and_then(|v| v.get("idx_embedding_knowledge_entity_embedding"))
            .and_then(|v| v.get("Strand"))
            .and_then(|v| v.as_str())
            .unwrap_or_default();
        assert!(
            idx_sql.contains("DIMENSION 16"),
            "expected index definition to contain new dimension, got: {idx_sql}"
        );
        assert!(
            idx_sql.contains("DIST COSINE"),
            "expected index definition to use cosine distance, got: {idx_sql}"
        );
        Ok(())
    }
    #[tokio::test]
    async fn test_fetch_entity_via_record_id() -> anyhow::Result<()> {
        #[derive(Deserialize)]
        struct Row {
            entity_id: KnowledgeEntity,
        }
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user_ke";
        let entity_key = "entity-fetch";
        let source_id = "source-fetch";
        let entity = build_knowledge_entity_with_id(entity_key, source_id, user_id);
        KnowledgeEntity::store_with_embedding(entity.clone(), vec![0.7_f32, 0.8, 0.9], 3, &db)
            .await
            .with_context(|| "Failed to store entity with embedding".to_string())?;
        let entity_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity.id);
        let mut res = db
            .client
            .query(
                "SELECT entity_id FROM knowledge_entity_embedding WHERE entity_id = $id FETCH entity_id;",
            )
            .bind(("id", entity_rid.clone()))
            .await
            .with_context(|| "failed to fetch embedding with FETCH".to_string())?;
        let rows: Vec<Row> = res
            .take(0)
            .with_context(|| "failed to deserialize fetch rows".to_string())?;
        assert_eq!(rows.len(), 1);
        let fetched_entity = &rows
            .first()
            .context("Expected at least one result")?
            .entity_id;
        assert_eq!(fetched_entity.id, entity_key);
        assert_eq!(fetched_entity.name, "Test entity");
        assert_eq!(fetched_entity.user_id, user_id);
        Ok(())
    }
    #[tokio::test]
    async fn test_upsert_replaces_existing_embedding_row() -> anyhow::Result<()> {
        let db = prepare_knowledge_entity_test_db(3).await?;
        let user_id = "user-upsert";
        let source_id = "source-upsert";
        let entity = build_knowledge_entity_with_id("entity-upsert", source_id, user_id);
        KnowledgeEntity::store_with_embedding(entity.clone(), vec![1.0_f32, 0.0, 0.0], 3, &db)
            .await
            .with_context(|| "initial store".to_string())?;
        let replacement = KnowledgeEntityEmbedding::new(
            &entity.id,
            source_id.to_owned(),
            vec![0.0, 1.0, 0.0],
            user_id.to_owned(),
            KnowledgeEntity::table_name(),
        );
        db.upsert_item(replacement)
            .await
            .with_context(|| "upsert replacement embedding".to_string())?;
        let entity_rid = RecordId::from_table_key(KnowledgeEntity::table_name(), &entity.id);
        let rows: Vec<KnowledgeEntityEmbedding> = db
            .client
            .query(format!(
                "SELECT * FROM {} WHERE entity_id = $entity_id",
                KnowledgeEntityEmbedding::table_name()
            ))
            .bind(("entity_id", entity_rid))
            .await
            .with_context(|| "count embeddings".to_string())?
            .take(0)
            .with_context(|| "take embeddings".to_string())?;
        assert_eq!(rows.len(), 1);
        let row = rows.first().expect("expected one embedding row");
        assert_eq!(row.id, entity.id);
        assert_eq!(row.embedding, vec![0.0, 1.0, 0.0]);
        Ok(())
    }
 }
@@ -1,4 +1,5 @@
-use crate::storage::types::file_info::deserialize_flexible_id;
+use crate::storage::types::user::User;
 use crate::utils::serde_helpers::deserialize_flexible_id;
 use crate::{error::AppError, storage::db::SurrealDbClient};
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
@@ -21,6 +22,7 @@ pub struct KnowledgeRelationship {
 }
 impl KnowledgeRelationship {
    #[must_use]
    pub fn new(
        in_: String,
        out: String,
@@ -39,36 +41,69 @@ impl KnowledgeRelationship {
            },
        }
    }
    pub async fn store_relationship(&self, db_client: &SurrealDbClient) -> Result<(), AppError> {
        let query = format!(
            r#"RELATE knowledge_entity:`{}`->relates_to:`{}`->knowledge_entity:`{}`
            SET
                metadata.user_id = '{}',
                metadata.source_id = '{}',
                metadata.relationship_type = '{}'"#,
            self.in_,
            self.id,
            self.out,
            self.metadata.user_id,
            self.metadata.source_id,
            self.metadata.relationship_type
        );
-        db_client.query(query).await?;
+    pub async fn store_relationship(self, db_client: &SurrealDbClient) -> Result<(), AppError> {
        User::get_and_validate_knowledge_entity(&self.in_, &self.metadata.user_id, db_client)
            .await?;
        User::get_and_validate_knowledge_entity(&self.out, &self.metadata.user_id, db_client)
            .await?;
        let Self {
            id,
            in_,
            out,
            metadata:
                RelationshipMetadata {
                    user_id,
                    source_id,
                    relationship_type,
                },
        } = self;
        db_client
            .client
            .query(
                r#"BEGIN TRANSACTION;
                LET $in_entity = type::thing('knowledge_entity', $in_id);
                LET $out_entity = type::thing('knowledge_entity', $out_id);
                LET $relation = type::thing('relates_to', $rel_id);
                DELETE type::thing('relates_to', $rel_id);
                RELATE $in_entity->$relation->$out_entity SET
                    metadata.user_id = $user_id,
                    metadata.source_id = $source_id,
                    metadata.relationship_type = $relationship_type;
                COMMIT TRANSACTION;"#,
            )
            .bind(("rel_id", id))
            .bind(("in_id", in_))
            .bind(("out_id", out))
            .bind(("user_id", user_id))
            .bind(("source_id", source_id))
            .bind(("relationship_type", relationship_type))
            .await
            .map_err(AppError::from)?
            .check()
            .map_err(AppError::from)?;
        Ok(())
    }
    pub async fn delete_relationships_by_source_id(
        source_id: &str,
        user_id: &str,
        db_client: &SurrealDbClient,
    ) -> Result<(), AppError> {
-        let query = format!(
+        db_client
-            "DELETE knowledge_entity -> relates_to WHERE metadata.source_id = '{}'",
+            .client
-            source_id
+            .query(
-        );
+                "DELETE FROM relates_to WHERE metadata.source_id = $source_id AND metadata.user_id = $user_id",
-
+            )
-        db_client.query(query).await?;
+            .bind(("source_id", source_id.to_owned()))
            .bind(("user_id", user_id.to_owned()))
            .await
            .map_err(AppError::from)?
            .check()
            .map_err(AppError::from)?;
        Ok(())
    }
@@ -78,48 +113,71 @@ impl KnowledgeRelationship {
        user_id: &str,
        db_client: &SurrealDbClient,
    ) -> Result<(), AppError> {
-        let mut authorized_result = db_client
+        let mut delete_result = db_client
-            .query(format!(
+            .client
-                "SELECT * FROM relates_to WHERE id = relates_to:`{}` AND metadata.user_id = '{}'",
+            .query(
-                id, user_id
+                "DELETE type::thing('relates_to', $id) WHERE metadata.user_id = $user_id RETURN BEFORE;",
            )
            .bind(("id", id.to_owned()))
            .bind(("user_id", user_id.to_owned()))
            .await
            .map_err(AppError::from)?;
        let deleted: Vec<KnowledgeRelationship> = delete_result.take(0).map_err(AppError::from)?;
        if !deleted.is_empty() {
            return Ok(());
        }
        let mut exists_result = db_client
            .client
            .query("SELECT * FROM type::thing('relates_to', $id)")
            .bind(("id", id.to_owned()))
            .await
            .map_err(AppError::from)?;
        let existing: Option<KnowledgeRelationship> =
            exists_result.take(0).map_err(AppError::from)?;
        if existing.is_some() {
            Err(AppError::Auth(
                "Not authorized to delete relationship".into(),
            ))
            .await?;
        let authorized: Vec<KnowledgeRelationship> = authorized_result.take(0).unwrap_or_default();
        if authorized.is_empty() {
            let mut exists_result = db_client
                .query(format!("SELECT * FROM relates_to:`{}`", id))
                .await?;
            let existing: Option<KnowledgeRelationship> = exists_result.take(0)?;
            if existing.is_some() {
                Err(AppError::Auth(
                    "Not authorized to delete relationship".into(),
                ))
            } else {
                Err(AppError::NotFound(format!("Relationship {} not found", id)))
            }
        } else {
-            db_client
+            Err(AppError::NotFound(format!("Relationship {id} not found")))
                .query(format!("DELETE relates_to:`{}`", id))
                .await?;
            Ok(())
        }
    }
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use super::*;
    use crate::storage::types::knowledge_entity::{KnowledgeEntity, KnowledgeEntityType};
    use anyhow::{self, Context};
-    // Helper function to create a test knowledge entity for the relationship tests
+    use crate::test_utils::setup_test_db;
-    async fn create_test_entity(name: &str, db_client: &SurrealDbClient) -> String {
+
    async fn get_relationship_by_id(
        relationship_id: &str,
        db_client: &SurrealDbClient,
    ) -> Option<KnowledgeRelationship> {
        let mut result = db_client
            .client
            .query("SELECT * FROM type::thing('relates_to', $id)")
            .bind(("id", relationship_id.to_owned()))
            .await
            .expect("relationship query by id failed");
        result.take(0).expect("failed to take relationship by id")
    }
    async fn create_test_entity(
        name: &str,
        user_id: &str,
        db_client: &SurrealDbClient,
    ) -> anyhow::Result<String> {
        let source_id = "source123".to_string();
-        let description = format!("Description for {}", name);
+        let description = format!("Description for {name}");
        let entity_type = KnowledgeEntityType::Document;
        let embedding = vec![0.1, 0.2, 0.3];
        let user_id = "user123".to_string();
        let entity = KnowledgeEntity::new(
            source_id,
@@ -127,19 +185,20 @@ mod tests {
            description,
            entity_type,
            None,
-            embedding,
+            user_id.to_string(),
            user_id,
        );
        let stored: Option<KnowledgeEntity> = db_client
            .store_item(entity)
            .await
-            .expect("Failed to store entity");
+            .with_context(|| "Failed to store entity".to_string())?;
-        stored.unwrap().id
+        stored
            .ok_or_else(|| anyhow::anyhow!("Expected stored entity to return Some"))
            .map(|e| e.id)
    }
    #[tokio::test]
-    async fn test_relationship_creation() {
+    async fn test_relationship_creation() -> anyhow::Result<()> {
        let in_id = "entity1".to_string();
        let out_id = "entity2".to_string();
        let user_id = "user123".to_string();
@@ -154,103 +213,157 @@ mod tests {
            relationship_type.clone(),
        );
        // Verify fields are correctly set
        assert_eq!(relationship.in_, in_id);
        assert_eq!(relationship.out, out_id);
        assert_eq!(relationship.metadata.user_id, user_id);
        assert_eq!(relationship.metadata.source_id, source_id);
        assert_eq!(relationship.metadata.relationship_type, relationship_type);
        assert!(!relationship.id.is_empty());
        Ok(())
    }
    #[tokio::test]
-    async fn test_store_relationship() {
+    async fn test_store_and_verify_by_source_id() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
-        let namespace = "test_ns";
+        let user_id = "user123";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
-        // Create two entities to relate
+        let entity1_id = create_test_entity("Entity 1", user_id, &db).await?;
-        let entity1_id = create_test_entity("Entity 1", &db).await;
+        let entity2_id = create_test_entity("Entity 2", user_id, &db).await?;
        let entity2_id = create_test_entity("Entity 2", &db).await;
        // Create relationship
        let user_id = "user123".to_string();
        let source_id = "source123".to_string();
        let relationship_type = "references".to_string();
        let relationship = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity2_id.clone(),
-            user_id.clone(),
+            user_id.to_string(),
            source_id.clone(),
            relationship_type,
        );
        let relationship_id = relationship.id.clone();
        // Store the relationship
        relationship
            .store_relationship(&db)
            .await
-            .expect("Failed to store relationship");
+            .with_context(|| "Failed to store relationship".to_string())?;
-        // Query to verify the relationship exists by checking for relationships with our source_id
+        let persisted = get_relationship_by_id(&relationship_id, &db)
-        // This approach is more reliable than trying to look up by ID
+            .await
-        let check_query = format!(
+            .expect("Relationship should be retrievable by id");
-            "SELECT * FROM relates_to WHERE metadata.source_id = '{}'",
+        assert_eq!(persisted.in_, entity1_id);
-            source_id
+        assert_eq!(persisted.out, entity2_id);
-        );
+        assert_eq!(persisted.metadata.user_id, user_id);
-        let mut check_result = db.query(check_query).await.expect("Check query failed");
+        assert_eq!(persisted.metadata.source_id, source_id);
        let mut check_result = db
            .query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
            .bind(("source_id", source_id.clone()))
            .await
            .expect("Check query failed");
        let check_results: Vec<KnowledgeRelationship> = check_result.take(0).unwrap_or_default();
-        // Just verify that a relationship was created
+        assert_eq!(
-        assert!(
+            check_results.len(),
-            !check_results.is_empty(),
+            1,
-            "Relationship should exist in the database"
+            "Expected one relationship for source_id"
        );
        Ok(())
    }
    #[tokio::test]
-    async fn test_delete_relationship_by_id() {
+    async fn test_store_relationship_rejects_foreign_entity() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
-        let namespace = "test_ns";
+
-        let database = &Uuid::new_v4().to_string();
+        let owner_entity = create_test_entity("Owner entity", "owner-user", &db).await?;
-        let db = SurrealDbClient::memory(namespace, database)
+        let other_entity = create_test_entity("Other entity", "other-user", &db).await?;
        let relationship = KnowledgeRelationship::new(
            owner_entity,
            other_entity,
            "owner-user".to_string(),
            "source123".to_string(),
            "references".to_string(),
        );
        let result = relationship.store_relationship(&db).await;
        assert!(matches!(result, Err(AppError::Auth(_))));
        Ok(())
    }
    #[tokio::test]
    async fn test_store_relationship_resists_query_injection() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_id = "user123";
        let entity1_id = create_test_entity("Entity 1", user_id, &db).await?;
        let entity2_id = create_test_entity("Entity 2", user_id, &db).await?;
        let relationship = KnowledgeRelationship::new(
            entity1_id,
            entity2_id,
            user_id.to_string(),
            "source123'; DELETE FROM relates_to; --".to_string(),
            "references'; UPDATE user SET admin = true; --".to_string(),
        );
        let relationship_id = relationship.id.clone();
        relationship
            .store_relationship(&db)
            .await
-            .expect("Failed to start in-memory surrealdb");
+            .expect("store relationship should safely handle quote-containing values");
-        // Create two entities to relate
+        let mut res = db
-        let entity1_id = create_test_entity("Entity 1", &db).await;
+            .client
-        let entity2_id = create_test_entity("Entity 2", &db).await;
+            .query("SELECT * FROM relates_to WHERE id = type::thing('relates_to', $id)")
            .bind(("id", relationship_id))
            .await
            .expect("query relationship by id failed");
        let rows: Vec<KnowledgeRelationship> = res.take(0).expect("take rows");
        assert_eq!(rows.len(), 1);
        let row = rows.first().expect("expected 1 row");
        assert_eq!(
            row.metadata.source_id,
            "source123'; DELETE FROM relates_to; --"
        );
        Ok(())
    }
    #[tokio::test]
    async fn test_store_and_delete_relationship() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_id = "user123";
        let entity1_id = create_test_entity("Entity 1", user_id, &db).await?;
        let entity2_id = create_test_entity("Entity 2", user_id, &db).await?;
        // Create relationship
        let user_id = "user123".to_string();
        let source_id = "source123".to_string();
        let relationship_type = "references".to_string();
        let relationship = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity2_id.clone(),
-            user_id.clone(),
+            user_id.to_string(),
            source_id.clone(),
            relationship_type,
        );
        let relationship_id = relationship.id.clone();
        // Store the relationship
        relationship
            .store_relationship(&db)
            .await
-            .expect("Failed to store relationship");
+            .with_context(|| "Failed to store relationship".to_string())?;
        // Ensure relationship exists before deletion attempt
        let mut existing_before_delete = db
-            .query(format!(
+            .query("SELECT * FROM relates_to WHERE metadata.user_id = $user_id AND metadata.source_id = $source_id")
-                "SELECT * FROM relates_to WHERE metadata.user_id = '{}' AND metadata.source_id = '{}'",
+            .bind(("user_id", user_id.to_string()))
-                user_id, source_id
+            .bind(("source_id", source_id.clone()))
            ))
            .await
-            .expect("Query failed");
+            .with_context(|| "Query failed".to_string())?;
        let before_results: Vec<KnowledgeRelationship> =
            existing_before_delete.take(0).unwrap_or_default();
        assert!(
@@ -258,59 +371,52 @@ mod tests {
            "Relationship should exist before deletion"
        );
-        // Delete the relationship by ID
+        KnowledgeRelationship::delete_relationship_by_id(&relationship_id, user_id, &db)
        KnowledgeRelationship::delete_relationship_by_id(&relationship.id, &user_id, &db)
            .await
-            .expect("Failed to delete relationship by ID");
+            .with_context(|| "Failed to delete relationship by ID".to_string())?;
        // Query to verify the relationship was deleted
        let mut result = db
-            .query(format!(
+            .query("SELECT * FROM relates_to WHERE metadata.user_id = $user_id AND metadata.source_id = $source_id")
-                "SELECT * FROM relates_to WHERE metadata.user_id = '{}' AND metadata.source_id = '{}'",
+            .bind(("user_id", user_id.to_string()))
-                user_id, source_id
+            .bind(("source_id", source_id))
            ))
            .await
-            .expect("Query failed");
+            .with_context(|| "Query failed".to_string())?;
        let results: Vec<KnowledgeRelationship> = result.take(0).unwrap_or_default();
        // Verify the relationship no longer exists
        assert!(results.is_empty(), "Relationship should be deleted");
        Ok(())
    }
    #[tokio::test]
-    async fn test_delete_relationship_by_id_unauthorized() {
+    async fn test_delete_relationship_by_id_unauthorized() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
-        let database = &Uuid::new_v4().to_string();
+        let owner_user_id = "owner-user";
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
-        let entity1_id = create_test_entity("Entity 1", &db).await;
+        let entity1_id = create_test_entity("Entity 1", owner_user_id, &db).await?;
-        let entity2_id = create_test_entity("Entity 2", &db).await;
+        let entity2_id = create_test_entity("Entity 2", owner_user_id, &db).await?;
        let owner_user_id = "owner-user".to_string();
        let source_id = "source123".to_string();
        let relationship = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity2_id.clone(),
-            owner_user_id.clone(),
+            owner_user_id.to_string(),
            source_id,
            "references".to_string(),
        );
        let relationship_id = relationship.id.clone();
        relationship
            .store_relationship(&db)
            .await
-            .expect("Failed to store relationship");
+            .with_context(|| "Failed to store relationship".to_string())?;
        let mut before_attempt = db
-            .query(format!(
+            .query("SELECT * FROM relates_to WHERE metadata.user_id = $user_id")
-                "SELECT * FROM relates_to WHERE metadata.user_id = '{}'",
+            .bind(("user_id", owner_user_id.to_string()))
                owner_user_id
            ))
            .await
-            .expect("Query failed");
+            .with_context(|| "Query failed".to_string())?;
        let before_results: Vec<KnowledgeRelationship> = before_attempt.take(0).unwrap_or_default();
        assert!(
            !before_results.is_empty(),
@@ -318,7 +424,7 @@ mod tests {
        );
        let result = KnowledgeRelationship::delete_relationship_by_id(
-            &relationship.id,
+            &relationship_id,
            "different-user",
            &db,
        )
@@ -326,48 +432,42 @@ mod tests {
        match result {
            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected authorization error when deleting someone else's relationship"),
+            _ => anyhow::bail!(
                "Expected authorization error when deleting someone else's relationship"
            ),
        }
        let mut after_attempt = db
-            .query(format!(
+            .query("SELECT * FROM relates_to WHERE metadata.user_id = $user_id")
-                "SELECT * FROM relates_to WHERE metadata.user_id = '{}'",
+            .bind(("user_id", owner_user_id.to_string()))
                owner_user_id
            ))
            .await
-            .expect("Query failed");
+            .with_context(|| "Query failed".to_string())?;
        let results: Vec<KnowledgeRelationship> = after_attempt.take(0).unwrap_or_default();
        assert!(
            !results.is_empty(),
            "Relationship should still exist after unauthorized delete attempt"
        );
        Ok(())
    }
    #[tokio::test]
-    async fn test_delete_relationships_by_source_id() {
+    async fn test_store_relationship_exists() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
-        let namespace = "test_ns";
+        let user_id = "user123";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
-        // Create entities to relate
+        let entity1_id = create_test_entity("Entity 1", user_id, &db).await?;
-        let entity1_id = create_test_entity("Entity 1", &db).await;
+        let entity2_id = create_test_entity("Entity 2", user_id, &db).await?;
-        let entity2_id = create_test_entity("Entity 2", &db).await;
+        let entity3_id = create_test_entity("Entity 3", user_id, &db).await?;
        let entity3_id = create_test_entity("Entity 3", &db).await;
        // Create relationships with the same source_id
        let user_id = "user123".to_string();
        let source_id = "source123".to_string();
        let different_source_id = "different_source".to_string();
        // Create two relationships with the same source_id
        let relationship1 = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity2_id.clone(),
-            user_id.clone(),
+            user_id.to_string(),
            source_id.clone(),
            "references".to_string(),
        );
@@ -375,77 +475,170 @@ mod tests {
        let relationship2 = KnowledgeRelationship::new(
            entity2_id.clone(),
            entity3_id.clone(),
-            user_id.clone(),
+            user_id.to_string(),
            source_id.clone(),
            "contains".to_string(),
        );
        // Create a relationship with a different source_id
        let different_relationship = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity3_id.clone(),
-            user_id.clone(),
+            user_id.to_string(),
            different_source_id.clone(),
            "mentions".to_string(),
        );
        let relationship1_id = relationship1.id.clone();
        let relationship2_id = relationship2.id.clone();
        let different_relationship_id = different_relationship.id.clone();
        // Store all relationships
        relationship1
            .store_relationship(&db)
            .await
-            .expect("Failed to store relationship 1");
+            .with_context(|| "Failed to store relationship 1".to_string())?;
        relationship2
            .store_relationship(&db)
            .await
-            .expect("Failed to store relationship 2");
+            .with_context(|| "Failed to store relationship 2".to_string())?;
        different_relationship
            .store_relationship(&db)
            .await
-            .expect("Failed to store different relationship");
+            .with_context(|| "Failed to store different relationship".to_string())?;
-        // Delete relationships by source_id
+        let mut before_delete = db
-        KnowledgeRelationship::delete_relationships_by_source_id(&source_id, &db)
+            .query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
            .bind(("source_id", source_id.clone()))
            .await
-            .expect("Failed to delete relationships by source_id");
+            .expect("before delete query failed");
        let before_delete_rows: Vec<KnowledgeRelationship> =
            before_delete.take(0).unwrap_or_default();
        assert_eq!(before_delete_rows.len(), 2);
-        // Query to verify the relationships with source_id were deleted
+        let mut before_delete_different = db
-        let query1 = format!("SELECT * FROM relates_to WHERE id = '{}'", relationship1.id);
+            .query("SELECT * FROM relates_to WHERE metadata.source_id = $source_id")
-        let query2 = format!("SELECT * FROM relates_to WHERE id = '{}'", relationship2.id);
+            .bind(("source_id", different_source_id.clone()))
-        let different_query = format!(
+            .await
-            "SELECT * FROM relates_to WHERE id = '{}'",
+            .expect("before delete different query failed");
-            different_relationship.id
+        let before_delete_different_rows: Vec<KnowledgeRelationship> =
            before_delete_different.take(0).unwrap_or_default();
        assert_eq!(before_delete_different_rows.len(), 1);
        KnowledgeRelationship::delete_relationships_by_source_id(&source_id, user_id, &db)
            .await
            .with_context(|| "Failed to delete relationships by source_id".to_string())?;
        let result1 = get_relationship_by_id(&relationship1_id, &db).await;
        let result2 = get_relationship_by_id(&relationship2_id, &db).await;
        let different_result = get_relationship_by_id(&different_relationship_id, &db).await;
        assert!(result1.is_none(), "Relationship 1 should be deleted");
        assert!(result2.is_none(), "Relationship 2 should be deleted");
        let remaining =
            different_result.expect("Relationship with different source_id should remain");
        assert_eq!(remaining.metadata.source_id, different_source_id);
        Ok(())
    }
    #[tokio::test]
    async fn test_delete_relationships_by_source_id_scoped_to_user() -> anyhow::Result<()> {
        let db = setup_test_db().await?;
        let user_a = "user-a";
        let user_b = "user-b";
        let shared_source = "shared-source";
        let a1 = create_test_entity("A1", user_a, &db).await?;
        let a2 = create_test_entity("A2", user_a, &db).await?;
        let b1 = create_test_entity("B1", user_b, &db).await?;
        let b2 = create_test_entity("B2", user_b, &db).await?;
        let rel_a = KnowledgeRelationship::new(
            a1,
            a2,
            user_a.to_string(),
            shared_source.to_string(),
            "references".to_string(),
        );
        let rel_b = KnowledgeRelationship::new(
            b1,
            b2,
            user_b.to_string(),
            shared_source.to_string(),
            "references".to_string(),
        );
        let owner_relationship_id = rel_a.id.clone();
        let other_relationship_id = rel_b.id.clone();
        rel_a.store_relationship(&db).await?;
        rel_b.store_relationship(&db).await?;
        KnowledgeRelationship::delete_relationships_by_source_id(shared_source, user_a, &db)
            .await?;
        assert!(get_relationship_by_id(&owner_relationship_id, &db)
            .await
            .is_none());
        assert!(get_relationship_by_id(&other_relationship_id, &db)
            .await
            .is_some());
        Ok(())
    }
    #[tokio::test]
    async fn test_delete_relationships_by_source_id_resists_query_injection() -> anyhow::Result<()>
    {
        let db = setup_test_db().await?;
        let user_id = "user123";
        let entity1_id = create_test_entity("Entity 1", user_id, &db).await?;
        let entity2_id = create_test_entity("Entity 2", user_id, &db).await?;
        let entity3_id = create_test_entity("Entity 3", user_id, &db).await?;
        let safe_relationship = KnowledgeRelationship::new(
            entity1_id.clone(),
            entity2_id.clone(),
            user_id.to_string(),
            "safe_source".to_string(),
            "references".to_string(),
        );
-        let mut result1 = db.query(query1).await.expect("Query 1 failed");
+        let other_relationship = KnowledgeRelationship::new(
-        let results1: Vec<KnowledgeRelationship> = result1.take(0).unwrap_or_default();
+            entity2_id,
-
+            entity3_id,
-        let mut result2 = db.query(query2).await.expect("Query 2 failed");
+            user_id.to_string(),
-        let results2: Vec<KnowledgeRelationship> = result2.take(0).unwrap_or_default();
+            "other_source".to_string(),
-
+            "contains".to_string(),
        let mut different_result = db
            .query(different_query)
            .await
            .expect("Different query failed");
        let _different_results: Vec<KnowledgeRelationship> =
            different_result.take(0).unwrap_or_default();
        // Verify relationships with the source_id are deleted
        assert!(results1.is_empty(), "Relationship 1 should be deleted");
        assert!(results2.is_empty(), "Relationship 2 should be deleted");
        // For the relationship with different source ID, we need to check differently
        // Let's just verify we have a relationship where the source_id matches different_source_id
        let check_query = format!(
            "SELECT * FROM relates_to WHERE metadata.source_id = '{}'",
            different_source_id
        );
-        let mut check_result = db.query(check_query).await.expect("Check query failed");
+        let safe_relationship_id = safe_relationship.id.clone();
-        let check_results: Vec<KnowledgeRelationship> = check_result.take(0).unwrap_or_default();
+        let other_relationship_id = other_relationship.id.clone();
-        // Verify the relationship with a different source_id still exists
+        safe_relationship
            .store_relationship(&db)
            .await
            .expect("store safe relationship");
        other_relationship
            .store_relationship(&db)
            .await
            .expect("store other relationship");
        KnowledgeRelationship::delete_relationships_by_source_id(
            "safe_source' OR 1=1 --",
            user_id,
            &db,
        )
        .await
        .expect("delete call should succeed");
        let remaining_safe = get_relationship_by_id(&safe_relationship_id, &db).await;
        let remaining_other = get_relationship_by_id(&other_relationship_id, &db).await;
        assert!(remaining_safe.is_some(), "Safe relationship should remain");
        assert!(
-            !check_results.is_empty(),
+            remaining_other.is_some(),
-            "Relationship with different source_id should still exist"
+            "Other relationship should remain"
        );
        Ok(())
    }
 }
@@ -1,8 +1,12 @@
 #![allow(clippy::module_name_repetitions)]
 use uuid::Uuid;
 use std::fmt;
 use std::fmt::Write;
 use crate::stored_object;
-#[derive(Deserialize, Debug, Clone, Serialize, PartialEq)]
+#[derive(Deserialize, Debug, Clone, Copy, Serialize, PartialEq)]
 pub enum MessageRole {
    User,
    AI,
@@ -17,6 +21,7 @@ stored_object!(Message, "message", {
 });
 impl Message {
    #[must_use]
    pub fn new(
        conversation_id: String,
        role: MessageRole,
@@ -53,22 +58,31 @@ impl fmt::Display for Message {
 }
 // helper function to format a vector of messages
 #[must_use]
 pub fn format_history(history: &[Message]) -> String {
-    history
+    let estimated: usize = history
        .iter()
-        .map(|msg| format!("{}", msg))
+        .map(|m| m.content.len().saturating_add(10))
-        .collect::<Vec<String>>()
+        .sum();
-        .join("\n")
+    let mut out = String::with_capacity(estimated);
    for (i, msg) in history.iter().enumerate() {
        if i > 0 {
            out.push('\n');
        }
        let _ = write!(out, "{msg}");
    }
    out
 }
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use super::*;
-    use crate::storage::db::SurrealDbClient;
+    use crate::test_utils::setup_test_db;
    use anyhow::{self, Context};
    #[tokio::test]
-    async fn test_message_creation() {
+    async fn test_message_creation() -> anyhow::Result<()> {
        // Test basic message creation
        let conversation_id = "test_conversation";
        let content = "This is a test message";
        let role = MessageRole::User;
@@ -76,29 +90,24 @@ mod tests {
        let message = Message::new(
            conversation_id.to_string(),
-            role.clone(),
+            role,
            content.to_string(),
            references.clone(),
        );
        // Verify message properties
        assert_eq!(message.conversation_id, conversation_id);
        assert_eq!(message.content, content);
        assert_eq!(message.role, role);
        assert_eq!(message.references, references);
        assert!(!message.id.is_empty());
        Ok(())
    }
    #[tokio::test]
-    async fn test_message_persistence() {
+    async fn test_message_persistence() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
        let namespace = "test_ns";
        let database = &uuid::Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        // Create and store a message
        let conversation_id = "test_conversation";
        let message = Message::new(
            conversation_id.to_string(),
@@ -108,39 +117,37 @@ mod tests {
        );
        let message_id = message.id.clone();
        // Store the message
        db.store_item(message.clone())
            .await
-            .expect("Failed to store message");
+            .with_context(|| "Failed to store message".to_string())?;
        // Retrieve the message
        let retrieved: Option<Message> = db
            .get_item(&message_id)
            .await
-            .expect("Failed to retrieve message");
+            .with_context(|| "Failed to retrieve message".to_string())?;
-        assert!(retrieved.is_some());
+        let retrieved = retrieved.ok_or_else(|| anyhow::anyhow!("Expected message to exist"))?;
        let retrieved = retrieved.unwrap();
        // Verify retrieved properties match original
        assert_eq!(retrieved.id, message.id);
        assert_eq!(retrieved.conversation_id, message.conversation_id);
        assert_eq!(retrieved.role, message.role);
        assert_eq!(retrieved.content, message.content);
        assert_eq!(retrieved.references, message.references);
        Ok(())
    }
    #[tokio::test]
-    async fn test_message_role_display() {
+    async fn test_message_role_display() -> anyhow::Result<()> {
        // Test the Display implementation for MessageRole
        assert_eq!(format!("{}", MessageRole::User), "User");
        assert_eq!(format!("{}", MessageRole::AI), "AI");
        assert_eq!(format!("{}", MessageRole::System), "System");
        Ok(())
    }
    #[tokio::test]
-    async fn test_message_display() {
+    async fn test_message_display() -> anyhow::Result<()> {
        // Test the Display implementation for Message
        let message = Message {
            id: "test_id".to_string(),
            created_at: Utc::now(),
@@ -151,12 +158,13 @@ mod tests {
            references: None,
        };
-        assert_eq!(format!("{}", message), "User: Hello world");
+        assert_eq!(format!("{message}"), "User: Hello world");
        Ok(())
    }
    #[tokio::test]
-    async fn test_format_history() {
+    async fn test_format_history() -> anyhow::Result<()> {
        // Create a vector of messages
        let messages = vec![
            Message {
                id: "1".to_string(),
@@ -178,10 +186,10 @@ mod tests {
            },
        ];
        // Format the history
        let formatted = format_history(&messages);
        // Verify the formatting
        assert_eq!(formatted, "User: Hello\nAI: Hi there!");
        Ok(())
    }
 }
@@ -1,3 +1,5 @@
 #![allow(clippy::unsafe_derive_deserialize)]
 #![allow(async_fn_in_trait)]
 use serde::{Deserialize, Serialize};
 pub mod analytics;
 pub mod conversation;
@@ -5,112 +7,164 @@ pub mod file_info;
 pub mod ingestion_payload;
 pub mod ingestion_task;
 pub mod knowledge_entity;
 pub mod knowledge_entity_embedding;
 pub mod knowledge_relationship;
 pub mod message;
 pub mod scratchpad;
 pub mod system_prompts;
 pub mod system_settings;
 pub mod text_chunk;
 pub mod text_chunk_embedding;
 pub mod text_content;
 pub mod user;
 pub trait StoredObject: Serialize + for<'de> Deserialize<'de> {
    fn table_name() -> &'static str;
-    fn get_id(&self) -> &str;
+    fn id(&self) -> &str;
 }
 /// An entity that has an associated embedding record for vector search.
 pub trait HasEmbedding: StoredObject {
    /// The embedding record type paired with this entity.
    type Embedding: EmbeddingRecord;
    fn source_id(&self) -> &str;
    fn user_id(&self) -> &str;
 }
 /// An embedding record linked to a `HasEmbedding` entity.
 pub trait EmbeddingRecord: StoredObject {
    /// The field name in the embedding table that links back to the entity
    /// (e.g. `"entity_id"` or `"chunk_id"`). Used in FETCH and WHERE clauses.
    fn link_field() -> &'static str;
    /// The HNSW index name (e.g. `"idx_embedding_knowledge_entity_embedding"`).
    fn index_name() -> &'static str;
    fn source_id(&self) -> &str;
    fn user_id(&self) -> &str;
    fn embedding(&self) -> &[f32];
    /// Construct a new embedding record.
    ///
    /// * `id` – shared record id (same as the entity id).
    /// * `source_id` – denormalised source id for bulk deletes.
    /// * `embedding` – the embedding vector.
    /// * `user_id` – denormalised user id for query scoping.
    /// * `entity_table` – the entity's table name (used to build the link `RecordId`).
    fn new(
        id: &str,
        source_id: String,
        embedding: Vec<f32>,
        user_id: String,
        entity_table: &str,
    ) -> Self;
    /// Validate that an embedding vector matches the expected dimension.
    fn validate_dimension(embedding: &[f32], expected: usize) -> Result<(), crate::error::AppError>
    where
        Self: Sized,
    {
        if embedding.len() != expected {
            return Err(crate::error::AppError::Validation(format!(
                "embedding dimension mismatch: got {}, expected {expected}",
                embedding.len()
            )));
        }
        Ok(())
    }
    /// Recreate the HNSW vector index with a new dimension.
    ///
    /// This drops and recreates the index inside a transaction.
    async fn redefine_hnsw_index(
        db: &crate::storage::db::SurrealDbClient,
        dimension: usize,
    ) -> Result<(), crate::error::AppError>
    where
        Self: Sized,
    {
        let query = crate::storage::indexes::hnsw_index_redefine_transaction_sql(
            Self::index_name(),
            Self::table_name(),
            dimension,
        );
        db.client.query(query).await?.check()?;
        Ok(())
    }
    /// Fetch a single embedding record by its link `RecordId`.
    async fn get_by_record_id(
        db: &crate::storage::db::SurrealDbClient,
        rid: &surrealdb::RecordId,
    ) -> Result<Option<Self>, crate::error::AppError>
    where
        Self: Sized + serde::de::DeserializeOwned,
    {
        let query = format!(
            "SELECT * FROM {} WHERE {} = $rid LIMIT 1",
            Self::table_name(),
            Self::link_field(),
        );
        let mut result = db.client.query(query).bind(("rid", rid.clone())).await?;
        Ok(result.take(0)?)
    }
    /// Delete an embedding record by its link `RecordId`.
    async fn delete_by_record_id(
        db: &crate::storage::db::SurrealDbClient,
        rid: &surrealdb::RecordId,
    ) -> Result<(), crate::error::AppError>
    where
        Self: Sized,
    {
        let query = format!(
            "DELETE FROM {} WHERE {} = $rid",
            Self::table_name(),
            Self::link_field(),
        );
        db.client
            .query(query)
            .bind(("rid", rid.clone()))
            .await?
            .check()?;
        Ok(())
    }
    /// Delete all embedding records with a given `source_id`.
    async fn delete_by_source_id(
        source_id: &str,
        db: &crate::storage::db::SurrealDbClient,
    ) -> Result<(), crate::error::AppError>
    where
        Self: Sized,
    {
        let query = format!(
            "DELETE FROM {} WHERE source_id = $source_id",
            Self::table_name(),
        );
        db.client
            .query(query)
            .bind(("source_id", source_id.to_owned()))
            .await?
            .check()?;
        Ok(())
    }
 }
 #[macro_export]
 macro_rules! stored_object {
-    ($name:ident, $table:expr, {$($(#[$attr:meta])* $field:ident: $ty:ty),*}) => {
+    ($(#[$struct_attr:meta])* $name:ident, $table:expr, {$($(#[$field_attr:meta])* $field:ident: $ty:ty),*}) => {
-        use serde::{Deserialize, Deserializer, Serialize};
+        use serde::{Deserialize, Serialize};
        use surrealdb::sql::Thing;
        use $crate::storage::types::StoredObject;
-        use serde::de::{self, Visitor};
+        #[allow(unused_imports)]
-        use std::fmt;
+        use $crate::utils::serde_helpers::{
            deserialize_flexible_id, serialize_datetime, deserialize_datetime,
            serialize_option_datetime, deserialize_option_datetime,
        };
        use chrono::{DateTime, Utc };
-        struct FlexibleIdVisitor;
+        $(#[$struct_attr])*
        impl<'de> Visitor<'de> for FlexibleIdVisitor {
            type Value = String;
            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
                formatter.write_str("a string or a Thing")
            }
            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
            where
                E: de::Error,
            {
                Ok(value.to_string())
            }
            fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
            where
                E: de::Error,
            {
                Ok(value)
            }
            fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
            where
                A: de::MapAccess<'de>,
            {
                // Try to deserialize as Thing
                let thing = Thing::deserialize(de::value::MapAccessDeserializer::new(map))?;
                Ok(thing.id.to_raw())
            }
        }
        pub fn deserialize_flexible_id<'de, D>(deserializer: D) -> Result<String, D::Error>
        where
            D: Deserializer<'de>,
        {
            deserializer.deserialize_any(FlexibleIdVisitor)
        }
        fn serialize_datetime<S>(date: &DateTime<Utc>, serializer: S) -> Result<S::Ok, S::Error>
        where
            S: serde::Serializer,
        {
            Into::<surrealdb::sql::Datetime>::into(*date).serialize(serializer)
        }
        fn deserialize_datetime<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
        where
            D: serde::Deserializer<'de>,
        {
            let dt = surrealdb::sql::Datetime::deserialize(deserializer)?;
            Ok(DateTime::<Utc>::from(dt))
        }
        #[allow(dead_code)]
        fn serialize_option_datetime<S>(
            date: &Option<DateTime<Utc>>,
            serializer: S,
        ) -> Result<S::Ok, S::Error>
        where
            S: serde::Serializer,
        {
            match date {
                Some(dt) => serializer
                    .serialize_some(&Into::<surrealdb::sql::Datetime>::into(*dt)),
                None => serializer.serialize_none(),
            }
        }
        #[allow(dead_code)]
        fn deserialize_option_datetime<'de, D>(
            deserializer: D,
        ) -> Result<Option<DateTime<Utc>>, D::Error>
        where
            D: serde::Deserializer<'de>,
        {
            let value = Option::<surrealdb::sql::Datetime>::deserialize(deserializer)?;
            Ok(value.map(DateTime::<Utc>::from))
        }
        #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
        pub struct $name {
            #[serde(deserialize_with = "deserialize_flexible_id")]
@@ -119,7 +173,7 @@ macro_rules! stored_object {
            pub created_at: DateTime<Utc>,
            #[serde(serialize_with = "serialize_datetime", deserialize_with = "deserialize_datetime", default)]
            pub updated_at: DateTime<Utc>,
-            $( $(#[$attr])* pub $field: $ty),*
+            $( $(#[$field_attr])* pub $field: $ty),*
        }
        impl StoredObject for $name {
@@ -127,7 +181,7 @@ macro_rules! stored_object {
                $table
            }
-            fn get_id(&self) -> &str {
+            fn id(&self) -> &str {
                &self.id
            }
        }
@@ -28,6 +28,7 @@ stored_object!(Scratchpad, "scratchpad", {
 });
 impl Scratchpad {
    #[must_use]
    pub fn new(user_id: String, title: String) -> Self {
        let now = ChronoUtc::now();
        Self {
@@ -78,7 +79,7 @@ impl Scratchpad {
        let scratchpad: Option<Scratchpad> = db.get_item(id).await?;
        let scratchpad =
-            scratchpad.ok_or_else(|| AppError::NotFound("Scratchpad not found".to_string()))?;
+            scratchpad.ok_or_else(|| AppError::NotFound("scratchpad not found".to_string()))?;
        if scratchpad.user_id != user_id {
            return Err(AppError::Auth(
@@ -216,20 +217,15 @@ impl Scratchpad {
 #[cfg(test)]
 mod tests {
    #![allow(clippy::expect_used, clippy::must_use_candidate)]
    use anyhow::{self, Context};
    use super::*;
    use crate::test_utils::setup_test_db;
    #[tokio::test]
-    async fn test_create_scratchpad() {
+    async fn test_create_scratchpad() -> anyhow::Result<()> {
-        // Setup in-memory database for testing
+        let db = setup_test_db().await?;
        let namespace = "test_ns";
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        // Create a new scratchpad
        let user_id = "test_user";
@@ -254,29 +250,20 @@ mod tests {
        let retrieved: Option<Scratchpad> = db
            .get_item(&scratchpad.id)
            .await
-            .expect("Failed to retrieve scratchpad");
+            .with_context(|| "Failed to retrieve scratchpad".to_string())?;
-        assert!(retrieved.is_some());
+        let retrieved = retrieved.with_context(|| "expected scratchpad to exist".to_string())?;
        let retrieved = retrieved.unwrap();
        assert_eq!(retrieved.id, scratchpad.id);
        assert_eq!(retrieved.user_id, user_id);
        assert_eq!(retrieved.title, title);
        assert!(!retrieved.is_archived);
        assert!(retrieved.archived_at.is_none());
        assert!(retrieved.ingested_at.is_none());
        Ok(())
    }
    #[tokio::test]
-    async fn test_get_by_user() {
+    async fn test_get_by_user() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let user_id = "test_user";
@@ -288,19 +275,30 @@ mod tests {
        // Store them
        let scratchpad1_id = scratchpad1.id.clone();
        let scratchpad2_id = scratchpad2.id.clone();
-        db.store_item(scratchpad1).await.unwrap();
+        db.store_item(scratchpad1)
-        db.store_item(scratchpad2).await.unwrap();
+            .await
-        db.store_item(scratchpad3).await.unwrap();
+            .with_context(|| "store scratchpad1".to_string())?;
        db.store_item(scratchpad2)
            .await
            .with_context(|| "store scratchpad2".to_string())?;
        db.store_item(scratchpad3)
            .await
            .with_context(|| "store scratchpad3".to_string())?;
        // Archive one of the user's scratchpads
        Scratchpad::archive(&scratchpad2_id, user_id, &db, false)
            .await
-            .unwrap();
+            .with_context(|| "archive".to_string())?;
        // Get scratchpads for user_id
-        let user_scratchpads = Scratchpad::get_by_user(user_id, &db).await.unwrap();
+        let user_scratchpads = Scratchpad::get_by_user(user_id, &db)
            .await
            .with_context(|| "get_by_user".to_string())?;
        assert_eq!(user_scratchpads.len(), 1);
-        assert_eq!(user_scratchpads[0].id, scratchpad1_id);
+        assert_eq!(
            user_scratchpads.first().map(|s| &s.id),
            Some(&scratchpad1_id)
        );
        // Verify they belong to the user
        for scratchpad in &user_scratchpads {
@@ -309,177 +307,155 @@ mod tests {
        let archived = Scratchpad::get_archived_by_user(user_id, &db)
            .await
-            .unwrap();
+            .with_context(|| "get_archived_by_user".to_string())?;
        assert_eq!(archived.len(), 1);
-        assert_eq!(archived[0].id, scratchpad2_id);
+        assert_eq!(archived.first().map(|s| &s.id), Some(&scratchpad2_id));
-        assert!(archived[0].is_archived);
+        assert!(archived.first().is_some_and(|s| s.is_archived));
-        assert!(archived[0].ingested_at.is_none());
+        assert!(archived.first().is_some_and(|s| s.ingested_at.is_none()));
        Ok(())
    }
    #[tokio::test]
-    async fn test_archive_and_restore() {
+    async fn test_archive_and_restore() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let user_id = "test_user";
        let scratchpad = Scratchpad::new(user_id.to_string(), "Test".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        let archived = Scratchpad::archive(&scratchpad_id, user_id, &db, true)
            .await
-            .expect("Failed to archive");
+            .with_context(|| "Failed to archive".to_string())?;
        assert!(archived.is_archived);
        assert!(archived.archived_at.is_some());
        assert!(archived.ingested_at.is_some());
        let restored = Scratchpad::restore(&scratchpad_id, user_id, &db)
            .await
-            .expect("Failed to restore");
+            .with_context(|| "Failed to restore".to_string())?;
        assert!(!restored.is_archived);
        assert!(restored.archived_at.is_none());
        assert!(restored.ingested_at.is_none());
        Ok(())
    }
    #[tokio::test]
-    async fn test_update_content() {
+    async fn test_update_content() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let user_id = "test_user";
        let scratchpad = Scratchpad::new(user_id.to_string(), "Test".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        let new_content = "Updated content";
        let updated = Scratchpad::update_content(&scratchpad_id, user_id, new_content, &db)
            .await
-            .unwrap();
+            .with_context(|| "update_content".to_string())?;
        assert_eq!(updated.content, new_content);
        assert!(!updated.is_dirty);
        Ok(())
    }
    #[tokio::test]
-    async fn test_update_content_unauthorized() {
+    async fn test_update_content_unauthorized() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let owner_id = "owner";
        let other_user = "other_user";
        let scratchpad = Scratchpad::new(owner_id.to_string(), "Test".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        let result = Scratchpad::update_content(&scratchpad_id, other_user, "Hacked", &db).await;
        assert!(result.is_err());
        match result {
            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected Auth error"),
+            _ => anyhow::bail!("Expected Auth error"),
        }
        Ok(())
    }
    #[tokio::test]
-    async fn test_delete_scratchpad() {
+    async fn test_delete_scratchpad() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let user_id = "test_user";
        let scratchpad = Scratchpad::new(user_id.to_string(), "Test".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        // Delete should succeed
        let result = Scratchpad::delete(&scratchpad_id, user_id, &db).await;
        assert!(result.is_ok());
        // Verify it's gone
-        let retrieved: Option<Scratchpad> = db.get_item(&scratchpad_id).await.unwrap();
+        let retrieved: Option<Scratchpad> = db
            .get_item(&scratchpad_id)
            .await
            .with_context(|| "get_item".to_string())?;
        assert!(retrieved.is_none());
        Ok(())
    }
    #[tokio::test]
-    async fn test_delete_unauthorized() {
+    async fn test_delete_unauthorized() -> anyhow::Result<()> {
-        let namespace = "test_ns";
+        let db = setup_test_db().await?;
        let database = &Uuid::new_v4().to_string();
        let db = SurrealDbClient::memory(namespace, database)
            .await
            .expect("Failed to start in-memory surrealdb");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let owner_id = "owner";
        let other_user = "other_user";
        let scratchpad = Scratchpad::new(owner_id.to_string(), "Test".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        let result = Scratchpad::delete(&scratchpad_id, other_user, &db).await;
        assert!(result.is_err());
        match result {
            Err(AppError::Auth(_)) => {}
-            _ => panic!("Expected Auth error"),
+            _ => anyhow::bail!("Expected Auth error"),
        }
        // Verify it still exists
-        let retrieved: Option<Scratchpad> = db.get_item(&scratchpad_id).await.unwrap();
+        let retrieved: Option<Scratchpad> = db
            .get_item(&scratchpad_id)
            .await
            .with_context(|| "get_item".to_string())?;
        assert!(retrieved.is_some());
        Ok(())
    }
    #[tokio::test]
-    async fn test_timezone_aware_scratchpad_conversion() {
+    async fn test_timezone_aware_scratchpad_conversion() -> anyhow::Result<()> {
-        let db = SurrealDbClient::memory("test_ns", &Uuid::new_v4().to_string())
+        let db = setup_test_db().await?;
            .await
            .expect("Failed to create test database");
        db.apply_migrations()
            .await
            .expect("Failed to apply migrations");
        let user_id = "test_user_123";
        let scratchpad =
            Scratchpad::new(user_id.to_string(), "Test Timezone Scratchpad".to_string());
        let scratchpad_id = scratchpad.id.clone();
-        db.store_item(scratchpad).await.unwrap();
+        db.store_item(scratchpad)
            .await
            .with_context(|| "store scratchpad".to_string())?;
        let retrieved = Scratchpad::get_by_id(&scratchpad_id, user_id, &db)
            .await
-            .unwrap();
+            .with_context(|| "get_by_id".to_string())?;
        // Test that datetime fields are preserved and can be used for timezone formatting
        assert!(retrieved.created_at.timestamp() > 0);
@@ -493,10 +469,17 @@ mod tests {
        // Archive the scratchpad to test optional datetime handling
        let archived = Scratchpad::archive(&scratchpad_id, user_id, &db, false)
            .await
-            .unwrap();
+            .with_context(|| "archive".to_string())?;
        assert!(archived.archived_at.is_some());
-        assert!(archived.archived_at.unwrap().timestamp() > 0);
+        assert!(
            archived
                .archived_at
                .with_context(|| "expected archived_at".to_string())?
                .timestamp()
                > 0
        );
        assert!(archived.ingested_at.is_none());
        Ok(())
    }
 }
@@ -1,4 +1,4 @@
-pub static DEFAULT_QUERY_SYSTEM_PROMPT: &str = r#"You are a knowledgeable assistant with access to a specialized knowledge base. You will be provided with relevant knowledge entities from the database as context. Each knowledge entity contains a name, description, and type, representing different concepts, ideas, and information.
+pub const DEFAULT_QUERY_SYSTEM_PROMPT: &str = r#"You are a knowledgeable assistant with access to a specialized knowledge base. You will be provided with relevant knowledge entities from the database as context. Each knowledge entity contains a name, description, and type, representing different concepts, ideas, and information.
 Your task is to:
 1. Carefully analyze the provided knowledge entities in the context
@@ -20,7 +20,7 @@ Example response formats:
 "I found relevant information in multiple entries: [explanation...]"
 "I apologize, but the provided context doesn't contain information about [topic]""#;
-pub static DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT: &str = r#"You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.
+pub const DEFAULT_INGRESS_ANALYSIS_SYSTEM_PROMPT: &str = r#"You are an AI assistant. You will receive a text content, along with user context and a category. Your task is to provide a structured JSON object representing the content in a graph format suitable for a graph database. You will also be presented with some existing knowledge_entities from the database, do not replicate these! Your task is to create meaningful knowledge entities from the submitted content. Try and infer as much as possible from the users context and category when creating these. If the user submits a large content, create more general entities. If the user submits a narrow and precise content, try and create precise knowledge entities.
 The JSON should have the following structure:
@@ -49,13 +49,13 @@ Guidelines:
 2. Each KnowledgeEntity should have a unique `key`, a meaningful `name`, and a descriptive `description`.
 3. Define the type of each KnowledgeEntity using the following categories: Idea, Project, Document, Page, TextSnippet.
 4. Establish relationships between entities using types like RelatedTo, RelevantTo, SimilarTo.
-5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity"
+5. Use the `source` key to indicate the originating entity and the `target` key to indicate the related entity.
 6. You will be presented with a few existing KnowledgeEntities that are similar to the current ones. They will have an existing UUID. When creating relationships to these entities, use their UUID.
 7. Only create relationships between existing KnowledgeEntities.
 8. Entities that exist already in the database should NOT be created again. If there is only a minor overlap, skip creating a new entity.
 9. A new relationship MUST include a newly created KnowledgeEntity."#;
-pub static DEFAULT_IMAGE_PROCESSING_PROMPT: &str = r#"Analyze this image and respond based on its primary content:
+pub const DEFAULT_IMAGE_PROCESSING_PROMPT: &str = r#"Analyze this image and respond based on its primary content:
 - If the image is mainly text (document, screenshot, sign), transcribe the text verbatim.
 - If the image is mainly visual (photograph, art, landscape), provide a concise description of the scene.
 - For hybrid images (diagrams, ads), briefly describe the visual, then transcribe the text under a "Text:" heading.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Per Stark	bddb603bd9	release: 1.0.4	2026-06-23 10:32:38 +02:00
Per Stark	aee9136f1e	ci: cache-nix-action preffered magic cache doesnt work due to size	2026-06-23 08:01:04 +02:00
Per Stark	253ac9819d	ci: disable determinate nix flakehub	2026-06-23 07:36:13 +02:00
Per Stark	511e42a078	ci: nix flake fixes	2026-06-22 19:26:53 +02:00
Per Stark	d8e839bf46	refactor: harmonized nix ci alignment	2026-06-22 15:29:30 +02:00
Per Stark	588e616baf	refactor: replace headless_chrome with lighter alternatives	2026-06-21 21:43:33 +02:00
Per Stark	87e6fa14b2	chore: dependabot deps update chore: openai version bump	2026-06-20 11:16:43 +02:00
Per Stark	09e545816e	chore: git-hooks rustfmt and clippy	2026-06-20 10:10:29 +02:00
Per Stark	01ef1bcb7a	refactor: extract generic ensure_fts_index helper	2026-06-19 08:10:49 +02:00
Per Stark	530cd0a8f1	refactor: consolidate test database setup in common/src/storage	2026-06-19 08:10:38 +02:00
Per Stark	3b20adc50f	refactor: split knowledge-graph monolith and extract rubberbanding logic	2026-06-19 08:10:38 +02:00
Per Stark	b3d42d2586	chore: technical maintenance, reduced duplication	2026-06-18 14:58:13 +02:00
Per Stark	fb51a8b55f	evals: eval crate overhaul, simplification and performance improvements	2026-06-17 19:23:11 +02:00
Per Stark	adc04d8c6d	perf: batch entity embeddings during ingest and expand retry tests. Entity enrichment now uses embed_batch like chunks; the unused entity_embedding_concurrency knob is removed and ingest retry paths gain test coverage.	2026-06-12 18:40:36 +02:00
Per Stark	1013035731	fix: atomic ingestion persist with task reclaim and shared cleanup. One transaction per task replaces prior artifact rows; workers skip the pipeline when content already exists, eval seeding reuses persist_artifacts, and deletes clear graph children via shared SQL.	2026-06-12 16:27:07 +02:00
Per Stark	cf69cb7b05	fix: don't close modal on inner HTMX requests. changelog	2026-06-12 15:09:59 +02:00
Per Stark	ead17530bd	fix: schedule nightly index rebuild on worker and skip per-ingest rebuild. Ingest relies on SurrealDB incremental index maintenance; the worker runs native REBUILD INDEX on a configurable interval with lease state on system_settings.	2026-06-12 15:01:53 +02:00
Per Stark	4e8a58fff1	fix: load embedding dimensions once per persist and trim vector search select.	2026-06-12 13:54:51 +02:00
Per Stark	28e8ede478	release: 1.0.3 fix: load ort-version via bash script on all release runners, toolchain harmonization	2026-06-12 12:42:40 +02:00
Per Stark	00453fdcbe	chore: bump to 1.0.3 and harmonize onnx runtime version across nix, ci, and docker.	2026-06-12 09:11:55 +02:00
Per Stark	c53ec8c0a1	fix: arc-share retrieved chunks, centralize entity embeddings, and trim hot-path clones.	2026-06-06 23:05:53 +02:00
Per Stark	60cf63292a	fix: replaced several instances if cloning, reduced allocations	2026-06-06 19:45:18 +02:00
Per Stark	ac0d34bfbd	fix: leaner error handling by boxing large variants	2026-06-06 07:59:57 +02:00
Per Stark	4e20da538d	feat: configure FastEmbed model in config and admin, with restart to apply Expose fastembed_model in config and a model dropdown on Admin → Models. Persist dimension from the chosen model, require restart to load it, and align legacy OpenAI default settings so fresh local-embedding installs start cleanly.	2026-06-04 21:51:57 +02:00
Per Stark	15c9f18f6e	feat: pool fastembed, batch embeddings, and reconcile embedding config on startup	2026-06-04 21:51:57 +02:00
Per Stark	7b850769c9	fix: html-router modals and add insta snapshot tests. Avoid nested forms in the scratchpad editor, centralize modal lifecycle in modal.js, return HTMX partials from archive, and add template compile plus layout snapshots.	2026-06-03 20:20:43 +02:00
Per Stark	2a28243213	feat: can now choose search result types	2026-06-01 14:37:19 +02:00
Per Stark	b22c351785	fix: knowledge entity suggestions simplification	2026-05-31 20:23:40 +02:00
Per Stark	3897345ab3	chore: ingestion-pipeline refactor, sort technical debt, rustfmt	2026-05-31 19:48:41 +02:00
Per Stark	5c2d2e24d3	chore: refactor retrieval pipeline to chunk-first RRF with derived entities and slimmer eval surface. Collapse the multi-strategy entity engine into one benchmarked chunk retrieval path, derive entities from retrieved chunks, and update consumers, docs, and clippy fixes across the workspace.	2026-05-30 22:19:08 +02:00
Per Stark	c70141de35	chore: harden api-router errors and add router integration tests while slimming html handlers.	2026-05-30 15:18:12 +02:00
Per Stark	2aa92b6ad7	chore: improve html-router auth, caching, and analytics while centralizing search labels in common. small fix	2026-05-29 15:03:55 +02:00
Per Stark	d3443d4153	chore: centralize embedding errors, retrieval strategy, and test DB helpers. Replace anyhow in embedding production code with EmbeddingError, move RetrievalStrategy into common config, and deduplicate Surreal test setup via common::test_utils.	2026-05-29 14:44:23 +02:00
Per Stark	e3bb2935d0	chore: harden common storage bootstrap and slim embedded db assets Unify embedding config, build providers from system settings, and fail startup when index builds error or time out. Move Surreal assets under common/db so embeds exclude crate source, and read storage via streams.	2026-05-29 14:44:23 +02:00
Per Stark	93d11b66eb	test: cover system settings sync, validation, and ingestion prompts Add tests for embedding provider sync, patch isolation, typed backend serde, and DB-backed ingestion prompts.	2026-05-29 14:44:23 +02:00
Per Stark	125b856c49	chore: harden system settings and unify prompt usage Validate settings updates, use typed embedding backends, and route ingestion through DB-stored prompts so admin edits take effect.	2026-05-29 14:44:23 +02:00
Per Stark	bc41a619ce	chore: move serde helpers to common utils Relocate SurrealDB serde helpers out of storage types so they can be reused broadly, and align retrieval-pipeline test setup with configured embedding dimensions.	2026-05-29 14:44:23 +02:00
Per Stark	ba8c36da1e	chore: harden text chunk embeddings and text content storage Align text chunk embedding identity with knowledge entities (chunk id as record id, UNIQUE chunk_id index, dimension validation), make cascade deletes transactional, and improve text content patch/search reliability with tests.	2026-05-29 14:44:23 +02:00
Per Stark	5724f11dc1	chore: harden knowledge graph storage and clear common clippy warnings Enforce stable 1:1 entity embeddings, relationship endpoint auth, and user-scoped deletes; align schemas/migrations and resolve common crate clippy findings.	2026-05-29 14:44:23 +02:00
Per Stark	189adb1a5f	chore: harden analytics, conversation access, and per-user file dedup Use UPSERT for analytics counters, enforce message ownership in SQL, return NotFound when patch_title updates nothing, scope file dedup by user_id with a composite unique index, and expand tests for auth, ordering, and edge cases.	2026-05-29 14:44:23 +02:00
Per Stark	97beb91710	chore: optimize ingest payloads and add parallel task batch store Parse content before building file payloads to move shared metadata when possible, add create_all_and_add_to_db for concurrent stores, and extend tests for batch persistence and payload edge cases.	2026-05-29 14:44:23 +02:00
Per Stark	85336d77a3	chore: harden common errors, fastembed blocking, and ingest ownership Run FastEmbed inference on spawn_blocking, propagate Surreal take failures, add AppError::internal and typed ingest/embedding parse errors, and take owned file lists in ingestion payload construction.	2026-05-29 14:44:23 +02:00
Per Stark	9d5e7cd794	chore: improved error handling	2026-05-28 19:58:14 +02:00
Per Stark	30bb59f243	chore: rename get_id to id, add doc comments, pre-allocate format_history	2026-05-27 18:06:16 +02:00
Per Stark	224a7db451	chore: lowercase all error messages and add # Errors doc sections - Fix err-lowercase-msg: normalize all #[error(...)] display strings to lowercase (AppError, FileError, ApiErr) and update affected tests - Fix err-doc-errors: add # Errors sections to 25+ fallible public functions across db.rs, store.rs, embedding.rs, indexes.rs, ingestion_task.rs, and ingest_limits.rs	2026-05-27 14:59:48 +02:00
Per Stark	4579725130	chore: resolve remaining uninlined_format_args clippy warnings	2026-05-27 14:34:37 +02:00
Per Stark	0b08801c90	chore: fix and reduce clippy allows in knowledge_entity.rs - rm duplicate 'document' match arm (match_same_arms) - .get(0) -> .first() (get_first) - for entity in all_entities.iter() -> &all_entities (explicit_iter_loop) - 2x error!("{}", err_msg) -> error!("{err_msg}") (uninlined_format_args) - 2x test format!()/assert!() positional -> inlined (uninlined_format_args) - removed 6 now-unnecessary allow attributes	2026-05-27 14:28:08 +02:00
Per Stark	45d13230a6	chore: add must_use to 27 non-Result public functions - constructors: KnowledgeEntity, TextChunk, Scratchpad, IngestionTask, Conversation, KnowledgeRelationship, Message, TextContent, KnowledgeEntityEmbedding, TextChunkEmbedding - accessors: Theme::as_str, Theme::initial_theme, TaskState::as_str, TaskState::display_label, StorageManager::backend_kind, StorageManager::local_base_path, EmbeddingProvider::backend_label, EmbeddingProvider::dimension, EmbeddingProvider::model_code - queries: TaskState::is_terminal, IngestionTask::can_retry, KnowledgeEntityType::variants, StorageManager::resolve_local_path, resolve_base_dir, IngestionTask::lease_duration - helpers: Message::format_history - builders: StorageManager::with_backend	2026-05-27 14:23:56 +02:00
Per Stark	0acdba4f54	fix: replace manual embedding serialization with serde_json - replaced write!() loops with serde_json::to_string in 4 re-embedding methods - standardized SQL building to use write!() with proper error propagation - eliminates manual f32 vector string building (memory waste + loop risk)	2026-05-27 14:13:19 +02:00
Per Stark	9609880cff	fix: revoke_api_key sets NONE, remove unused bind, lowercase error msgs - fix bug where revoke_api_key set literal 'test_string_nullish' instead of NONE - remove unused table_name bind in update_timezone - lowercase ~16 error messages across 4 crates	2026-05-27 13:56:32 +02:00
Per Stark	31d585b59f	chore: removed anyhow from apperror for improved error handling	2026-05-27 13:33:02 +02:00
Per Stark	890a4b381d	chore: index slicing and lowercase errors	2026-05-27 12:41:26 +02:00
Per Stark	2d630e2af9	chore: tightening and removing super fn	2026-05-27 11:23:39 +02:00
Per Stark	9ec11e1f79	chore: clippy and nix fmt	2026-05-27 11:23:08 +02:00
Per Stark	c60db0fb56	perf: avoid small own clones and intermediate Vec allocations - Derive Copy on 6 small enums (MessageRole, TaskState, StorageKind, EmbeddingBackend, PdfIngestMode, KnowledgeEntityType) - Change create_ingestion_payload files param from Vec<FileInfo> to &[FileInfo] - Remove 5 intermediate Vec allocations (4 embedding serialization + 1 format_history) using write! loop - Remove 7 unnecessary .clone() calls exposed by Copy derive	2026-05-27 10:28:08 +02:00
Per Stark	f5f0454904	fix: html-router dependency of json-stream-parser	2026-05-27 09:59:26 +02:00
Per Stark	18aadab8ee	refactor: json-stream-parser aligned to clippy standard	2026-05-27 09:07:38 +02:00
Per Stark	414d2f5b34	chore: additional clippy fixes after rebasing	2026-05-27 07:37:18 +02:00
Per Stark	293440b0ee	fix: pin surrealdb	2026-05-26 20:21:40 +02:00
Per Stark	041d9bd81f	clippy: evaluations crate	2026-05-26 20:21:25 +02:00
Per Stark	b4383bb227	perf: pre-allocate collections with known capacity in hot paths - Use with_capacity for chunk_by_source, results, per_entity_traces, and selected_chunks in assemble() where bound is known - Pre-allocate tokens/terms vectors in normalize_fts_query and extract_keywords based on input length - Pre-allocate neighbor_ids, seen, and ordered in graph expansion based on relationship count	2026-05-26 20:21:25 +02:00
Per Stark	6c7b586fc5	perf: offload blocking calls to spawn_blocking - Move headless_chrome PDF rasterization from async context to spawn_blocking, keeping tokio worker threads responsive. - Switch RerankerPool from tokio::sync::Mutex to std::sync::Mutex and run TextRerank::rerank inside spawn_blocking, since the rerank call is CPU-bound with no .await points.	2026-05-26 20:21:25 +02:00
Per Stark	1927149ce9	lint: inherit workspace clippy config in json-stream-parser and evaluations Both crates were missing the [lints] workspace = true directive, bypassing workspace clippy rules (unwrap_used, expect_used, etc.).	2026-05-26 20:21:25 +02:00
Per Stark	a52dc802de	refactor: simplify and improve testing for initialization	2026-05-26 20:21:24 +02:00
Per Stark	000852c94c	clippy: adhere to pedantic clippy, uniform test error handling	2026-05-26 20:21:13 +02:00
Per Stark	6a5d631287	chore: remove unused clap dep and fix test_session_table name - Remove clap dependency from retrieval-pipeline (RetrievalStrategy already has FromStr/Display; evaluations uses clap directly) - Rename session table from test_session_table to session	2026-05-26 20:14:29 +02:00
Per Stark	b965c5a2e6	refactor: replace Box<dyn Error> with anyhow::Result - ingestion_pipeline::run_worker_loop returns anyhow::Result<()> - api_router::ApiState::new returns anyhow::Result<Self> - html_router::HtmlState::new_with_resources is infallible, returns Self - main/server/worker binary entry points return anyhow::Result<()>	2026-05-26 20:14:11 +02:00
Per Stark	79e46e9c09	refactor: extract serde helpers from stored_object! macro Move FlexibleIdVisitor, deserialize_flexible_id, and four datetime serde helpers from repeating inside every macro expansion into a shared common/src/storage/types/serde_helpers.rs module. 14 macro invocations × 6 items = ~84 fewer redundant function definitions. Fragile cross-module imports (file_info::deserialize_flexible_id etc.) are updated to point to the canonical module.	2026-05-26 20:12:54 +02:00
Per Stark	f22a1e5ba4	chore: devenv inconsistency, spawn server manually in dev	2026-02-15 18:31:43 +01:00
Per Stark	4d237ff6d9	release: 1.0.2	2026-02-15 11:57:04 +01:00
Per Stark	eb928cdb0e	test: minio to devenv, improved testing s3 and relationships	2026-02-15 08:52:56 +01:00
Per Stark	1490852a09	chore: dep updates & kv-mem separation to test feature docker builder update	2026-02-15 08:51:48 +01:00
Per Stark	b0b01182d7	test: add admin auth integration coverage	2026-02-14 23:11:35 +01:00
Per Stark	679308aa1d	feat: caching chat history & dto	2026-02-14 19:43:34 +01:00
Per Stark	f93c06b347	fix: harden html responses and cache chat sidebar data Use strict template response handling and sanitized template user context, then add an in-process conversation archive cache with mutation-driven invalidation for chat sidebar renders.	2026-02-14 17:47:14 +01:00
Per Stark	a3f207beb1	fix: simplified admin checking	2026-02-13 23:04:01 +01:00
Per Stark	e07199adfc	fix: name harmonization of endpoints & ingestion security hardening	2026-02-13 22:36:00 +01:00
Per Stark	f22cac891c	fix: redact ingestion payload logs and update changelog	2026-02-13 12:06:18 +01:00
Per Stark	b89171d934	fix: parameterize storage-layer queries and add injection tests	2026-02-12 21:42:46 +01:00
Per Stark	0133eead63	fix: border in navigation	2026-02-12 20:39:36 +01:00
Per Stark	e5d2b6605f	fix: browser back navigation from chat windows addenum	2026-02-12 20:32:06 +01:00
Per Stark	bbad91d55b	fix: references bug fix	2026-02-11 22:02:40 +01:00
Per Stark	96846ad664	release: 1.0.1	2026-02-11 15:39:28 +01:00
Per Stark	269bcec659	docs: updated domain name	2026-02-11 15:17:03 +01:00
Per Stark	7c738c4b30	fix: gracefully handle old users	2026-02-11 07:50:19 +01:00
Per Stark	cb88127fcb	docs: updated readme	2026-01-18 18:48:53 +01:00
Per Stark	49e1fbd985	dev: devenv processes	2026-01-18 18:45:30 +01:00
Per Stark	f2fa5bbbcc	fix: edge case when deleting content nit	2026-01-18 18:45:21 +01:00
Per Stark	a3bc6fba98	design: better dark mode	2026-01-17 23:31:05 +01:00
Per Stark	ece744d5a0	refactor: additional responsibilities to middleware, simplified handlers fix	2026-01-17 21:07:25 +01:00
Per Stark	a9fda67209	theme: obsidian-prism	2026-01-17 08:45:47 +01:00
Per Stark	fa7f407306	feat: s3 storage backend	2026-01-16 23:38:47 +01:00
Per Stark	b25cfb4633	feat: add user theme preference - Add theme field to User model (common) - Create migration for theme field - Add theme selection to Account Settings (html-router) - Implement server-side theme rendering in base template - Update JS for system/preference theme handling - Remove header theme toggle for authenticated users	2026-01-16 13:54:07 +01:00
Per Stark	0df2b9810c	docs: addenum	2026-01-14 22:24:23 +01:00
Per Stark	354dc727c1	refactor: extendable templates refactor: simplification refactor: simplification	2026-01-13 22:18:00 +01:00
Per Stark	037057d108	fix: allow for multiple templates directories	2026-01-12 21:25:12 +01:00
Per Stark	9f17c6c2b0	fix: updating models in admin view	2026-01-12 21:01:53 +01:00
Per Stark	17f252e630	release: 1.0.0 fix: cargo dist	2026-01-11 20:35:01 +01:00
Per Stark	db43be1606	fix: schemafull and textcontent	2026-01-02 15:41:22 +01:00
Per Stark	8e8370b080	docs: more complete and correct	2025-12-24 23:36:58 +01:00
Per Stark	84695fa0cc	chore: wording	2025-12-22 23:03:33 +01:00
Per Stark	654add98bc	fix: never block fts, rely on rrf	2025-12-22 22:56:57 +01:00
Per Stark	244ec0ea25	fix: migrating embeddings to new dimensions changing order	2025-12-22 22:39:14 +01:00
Per Stark	d8416ac711	fix: ordering of index creation	2025-12-22 21:59:35 +01:00
Per Stark	f9f48d1046	docs: evaluations instructions and readme refactoring	2025-12-22 18:55:47 +01:00
Per Stark	30b8a65377	fix: migrations schemafull	2025-12-22 18:32:08 +01:00
Per Stark	04faa38ee6	fix: admin page sorted	2025-12-21 21:35:52 +01:00
Per Stark	cdc62dda30	Merge branch 'main' into benchmarks	2025-12-20 23:09:16 +01:00
Per Stark	ab8ff8b07a	changelog	2025-12-20 23:03:06 +01:00
Per Stark	79ea007b0a	tidying stuff up, dto for search	2025-12-20 22:30:31 +01:00
Per Stark	a5bc72aedf	passed wide smoke check	2025-12-10 13:54:08 +01:00
Per Stark	2e2ea0c4ff	faster index creation	2025-12-09 21:32:23 +01:00
Per Stark	a090a8c76e	retrieval simplfied	2025-12-09 20:35:42 +01:00
Per Stark	a8d10f265c	benchmarks: fin	2025-12-08 21:57:53 +01:00
Per Stark	0cb1abc6db	beir-rff	2025-12-08 20:39:12 +01:00
Per Stark	d1a6d9abdf	dataset: beir	2025-12-04 17:50:35 +01:00
Per Stark	d3fa3be3e5	retrieval: hybrid search, linear fusion	2025-12-04 12:48:59 +01:00
Per Stark	dd881efbf9	benchmarks: ready for hybrid revised	2025-12-03 11:38:07 +01:00
Per Stark	2939e4c2a4	fix: removed stale embeddings handler	2025-11-29 20:07:48 +01:00
Per Stark	1039ec32a4	fix: all tests now in sync	2025-11-29 18:59:08 +01:00
Per Stark	cb906c5b53	ndcg fix	2025-11-29 16:24:09 +01:00
Per Stark	08b1612fcb	refactored to clap, mrr and ndcg	2025-11-28 21:26:51 +01:00
Per Stark	67004c9646	fix: index creation at init	2025-11-26 21:49:20 +01:00
Per Stark	030f0fc17d	evals: v3, ebeddings at the side additional indexes	2025-11-26 15:15:10 +01:00
Per Stark	226b2db43a	retrieval-pipeline: v1	2025-11-19 12:58:27 +01:00
Per Stark	6f88d87e74	fix: add dockerfile changes related to retrieval-pipeline	2025-11-18 22:51:48 +01:00
Per Stark	bd519ab269	benchmarks: v2 Minor refactor	2025-11-18 22:51:06 +01:00
Per Stark	f535df7e61	retrieval-pipeline: v0	2025-11-18 22:46:35 +01:00
Per Stark	6b7befbd04	upsert relationship and creation	2025-11-18 21:18:09 +01:00
Per Stark	0eda65b07e	benchmarks: v1 Benchmarking ingestion, retrieval precision and performance	2025-11-18 11:50:15 +01:00
		`@@ -0,0 +1,2 @@`
							`[alias]`
							`eval = "run -p evaluations --release --"`
		`@@ -0,0 +1,2 @@`
							`-- Runtime-managed: text_content FTS indexes now created at startup via the shared Surreal helper.`
							`-- This migration is intentionally left as a no-op to avoid heavy index builds during migration.`
		`@@ -0,0 +1 @@`
							-- No-op: legacy `job` table was superseded by `ingestion_task`; kept for migration order compatibility.
		`@@ -0,0 +1 @@`
							`-- Runtime-managed: FTS indexes now built at startup; migration retained as a no-op.`
		`@@ -0,0 +1 @@`
							`DEFINE FIELD IF NOT EXISTS theme ON user TYPE string DEFAULT "system";`
		`@@ -0,0 +1 @@`
							{"schemas":"--- original\n+++ modified\n@@ -242,7 +242,7 @@\n\n # Defines the schema for the 'text_content' table.\n\n-DEFINE TABLE IF NOT EXISTS text_content SCHEMALESS;\n+DEFINE TABLE IF NOT EXISTS text_content SCHEMAFULL;\n\n # Standard fields\n DEFINE FIELD IF NOT EXISTS created_at ON text_content TYPE datetime;\n@@ -254,10 +254,24 @@\n DEFINE FIELD IF NOT EXISTS file_info ON text_content TYPE option<object>;\n # UrlInfo is a struct, store as object\n DEFINE FIELD IF NOT EXISTS url_info ON text_content TYPE option<object>;\n+DEFINE FIELD IF NOT EXISTS url_info.url ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.title ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS url_info.image_id ON text_content TYPE string;\n+\n DEFINE FIELD IF NOT EXISTS context ON text_content TYPE option<string>;\n DEFINE FIELD IF NOT EXISTS category ON text_content TYPE string;\n DEFINE FIELD IF NOT EXISTS user_id ON text_content TYPE string;\n\n+# FileInfo fields\n+DEFINE FIELD IF NOT EXISTS file_info.id ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.created_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.updated_at ON text_content TYPE datetime;\n+DEFINE FIELD IF NOT EXISTS file_info.sha256 ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.path ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.file_name ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.mime_type ON text_content TYPE string;\n+DEFINE FIELD IF NOT EXISTS file_info.user_id ON text_content TYPE string;\n+\n # Indexes based on query patterns\n DEFINE INDEX IF NOT EXISTS text_content_user_id_idx ON text_content FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS text_content_created_at_idx ON text_content FIELDS created_at;\n","events":null}
		`@@ -1 +0,0 @@`
			{"schemas":"--- original\n+++ modified\n@@ -98,7 +98,7 @@\n DEFINE INDEX IF NOT EXISTS knowledge_entity_user_id_idx ON knowledge_entity FIELDS user_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_source_id_idx ON knowledge_entity FIELDS source_id;\n DEFINE INDEX IF NOT EXISTS knowledge_entity_entity_type_idx ON knowledge_entity FIELDS entity_type;\n-DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at; # For get_latest_knowledge_entities\n+DEFINE INDEX IF NOT EXISTS knowledge_entity_created_at_idx ON knowledge_entity FIELDS created_at;\n\n # Defines the schema for the 'message' table.\n\n@@ -157,6 +157,8 @@\n DEFINE FIELD IF NOT EXISTS require_email_verification ON system_settings TYPE bool;\n DEFINE FIELD IF NOT EXISTS query_model ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS processing_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_model ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS embedding_dimensions ON system_settings TYPE int;\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n\n","events":null}
		`@@ -1 +0,0 @@`
			`{"schemas":"--- original\n+++ modified\n@@ -160,6 +160,7 @@\n DEFINE FIELD IF NOT EXISTS query_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS ingestion_system_prompt ON system_settings TYPE string;\n DEFINE FIELD IF NOT EXISTS image_processing_prompt ON system_settings TYPE string;\n+DEFINE FIELD IF NOT EXISTS voice_processing_model ON system_settings TYPE string;\n\n # Defines the schema for the 'text_chunk' table.\n\n","events":null}`