release: 0.2.6

dist update fix new workflow fix mkdir moved to dist fix only dir dont verify sha files fix verify ci part fix no checking anymore
Merge pull request #5 from josephleee/patch-1
2026-02-21 15:47:39 +01:00 · 2025-11-01 21:26:06 +01:00 · 2025-10-31 13:40:06 +01:00 · 2025-10-31 13:38:11 +01:00 · 2025-10-30 16:12:39 +09:00 · 2025-10-29 12:04:39 +01:00
35 changed files with 2108 additions and 652 deletions
--- a/.github/build-setup.yml
+++ b/.github/build-setup.yml
@@ -0,0 +1,49 @@
+- name: Prepare lib dir
+  run: mkdir -p lib
+
+# Linux
+- name: Fetch ONNX Runtime (Linux)
+  if: runner.os == 'Linux'
+  env:
+    ORT_VER: 1.22.0
+  run: |
+    set -euo pipefail
+    ARCH="$(uname -m)"
+    case "$ARCH" in
+      x86_64)  URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-x64-${ORT_VER}.tgz" ;;
+      aarch64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-aarch64-${ORT_VER}.tgz" ;;
+      *) echo "Unsupported arch $ARCH"; exit 1 ;;
+    esac
+    curl -fsSL -o ort.tgz "$URL"
+    tar -xzf ort.tgz
+    cp -v onnxruntime-*/lib/libonnxruntime.so* lib/
+
+# macOS
+- name: Fetch ONNX Runtime (macOS)
+  if: runner.os == 'macOS'
+  env:
+    ORT_VER: 1.22.0
+  run: |
+    set -euo pipefail
+    curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
+    tar -xzf ort.tgz
+    # copy the main dylib; rename to stable name if needed
+    cp -v onnxruntime-*/lib/libonnxruntime*.dylib lib/
+    # optional: ensure a stable name
+    if [ ! -f lib/libonnxruntime.dylib ]; then
+      cp -v lib/libonnxruntime*.dylib lib/libonnxruntime.dylib
+    fi
+
+# Windows
+- name: Fetch ONNX Runtime (Windows)
+  if: runner.os == 'Windows'
+  shell: pwsh
+  env:
+    ORT_VER: 1.22.0
+  run: |
+    $url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
+    Invoke-WebRequest $url -OutFile ort.zip
+    Expand-Archive ort.zip -DestinationPath ort
+    $dll = Get-ChildItem -Recurse -Path ort -Filter onnxruntime.dll | Select-Object -First 1
+    Copy-Item $dll.FullName lib\onnxruntime.dll
+
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,44 +1,8 @@
-# This file was autogenerated by dist: https://opensource.axo.dev/cargo-dist/
-#
-# Copyright 2022-2024, axodotdev
-# SPDX-License-Identifier: MIT or Apache-2.0
-#
-# CI that:
-#
-# * checks for a Git Tag that looks like a release
-# * builds artifacts with dist (archives, installers, hashes)
-# * uploads those artifacts to temporary workflow zip
-# * on success, uploads the artifacts to a GitHub Release
-#
-# Note that the GitHub Release will be created with a generated
-# title/body based on your changelogs.
-
 name: Release
 permissions:
-  "contents": "write"
-  "packages": "write"
+  contents: write
+  packages: write

-# This task will run whenever you push a git tag that looks like a version
-# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
-# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
-# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
-# must be a Cargo-style SemVer Version (must have at least major.minor.patch).
-#
-# If PACKAGE_NAME is specified, then the announcement will be for that
-# package (erroring out if it doesn't have the given version or isn't dist-able).
-#
-# If PACKAGE_NAME isn't specified, then the announcement will be for all
-# (dist-able) packages in the workspace with that version (this mode is
-# intended for workspaces with only one dist-able package, or with all dist-able
-# packages versioned/released in lockstep).
-#
-# If you push multiple tags at once, separate instances of this workflow will
-# spin up, creating an independent announcement for each one. However, GitHub
-# will hard limit this to 3 tags per commit, as it will assume more tags is a
-# mistake.
-#
-# If there's a prerelease-style suffix to the version, then the release(s)
-# will be marked as a prerelease.
 on:
  pull_request:
  push:
@@ -46,9 +10,8 @@ on:
      - '**[0-9]+.[0-9]+.[0-9]+*'

 jobs:
-  # Run 'dist plan' (or host) to determine what tasks we need to do
  plan:
-    runs-on: "ubuntu-22.04"
+    runs-on: ubuntu-22.04
    outputs:
      val: ${{ steps.plan.outputs.manifest }}
      tag: ${{ !github.event.pull_request && github.ref_name || '' }}
@@ -60,52 +23,36 @@ jobs:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
+
      - name: Install dist
-        # we specify bash to get pipefail; it guards against the `curl` command
-        # failing. otherwise `sh` won't catch that `curl` returned non-0
        shell: bash
-        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.28.0/cargo-dist-installer.sh | sh"
+        run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.0/cargo-dist-installer.sh | sh"
+
      - name: Cache dist
        uses: actions/upload-artifact@v4
        with:
          name: cargo-dist-cache
          path: ~/.cargo/bin/dist
-      # sure would be cool if github gave us proper conditionals...
-      # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
-      # functionality based on whether this is a pull_request, and whether it's from a fork.
-      # (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
-      # but also really annoying to build CI around when it needs secrets to work right.)
+
      - id: plan
        run: |
          dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
          echo "dist ran successfully"
          cat plan-dist-manifest.json
-          echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
-      - name: "Upload dist-manifest.json"
+          echo "manifest=$(jq -c . plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
+
+      - name: Upload dist-manifest.json
        uses: actions/upload-artifact@v4
        with:
          name: artifacts-plan-dist-manifest
          path: plan-dist-manifest.json

-  # Build and packages all the platform-specific things
  build-local-artifacts:
    name: build-local-artifacts (${{ join(matrix.targets, ', ') }})
-    # Let the initial task tell us to not run (currently very blunt)
-    needs:
-      - plan
+    needs: [plan]
    if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
    strategy:
      fail-fast: false
-      # Target platforms/runners are computed by dist in create-release.
-      # Each member of the matrix has the following arguments:
-      #
-      # - runner: the github runner
-      # - dist-args: cli flags to pass to dist
-      # - install-dist: expression to run to install dist on the runner
-      #
-      # Typically there will be:
-      # - 1 "global" task that builds universal installers
-      # - N "local" tasks that build each platform's binaries and platform-specific installers
      matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
    runs-on: ${{ matrix.runner }}
    container: ${{ matrix.container && matrix.container.image || null }}
@@ -114,11 +61,12 @@ jobs:
      BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
    steps:
      - name: enable windows longpaths
-        run: |
-          git config --global core.longpaths true
+        run: git config --global core.longpaths true
+
      - uses: actions/checkout@v4
        with:
          submodules: recursive
+
      - name: Install Rust non-interactively if not already installed
        if: ${{ matrix.container }}
        run: |
@@ -126,37 +74,103 @@ jobs:
            curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
            echo "$HOME/.cargo/bin" >> $GITHUB_PATH
          fi
+
      - name: Install dist
        run: ${{ matrix.install_dist.run }}
-      # Get the dist-manifest
+
      - name: Fetch local artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: artifacts-*
          path: target/distrib/
          merge-multiple: true
+
+      # ===== BEGIN: Injected ORT staging for cargo-dist bundling =====
+      - run: echo "=== BUILD-SETUP START ==="
+
+      # Unix shells
+      - name: Prepare lib dir (Unix)
+        if: runner.os != 'Windows'
+        shell: bash
+        run: |
+          mkdir -p lib
+          rm -f lib/*
+      
+      # Windows PowerShell
+      - name: Prepare lib dir (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        run: |
+          New-Item -ItemType Directory -Force -Path lib | Out-Null
+          # remove contents if any
+          Get-ChildItem -Path lib -Force | Remove-Item -Force -Recurse -ErrorAction SilentlyContinue
+
+      - name: Fetch ONNX Runtime (Linux)
+        if: runner.os == 'Linux'
+        env:
+          ORT_VER: 1.22.0
+        run: |
+          set -euo pipefail
+          ARCH="$(uname -m)"
+          case "$ARCH" in
+            x86_64)  URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-x64-${ORT_VER}.tgz" ;;
+            aarch64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-aarch64-${ORT_VER}.tgz" ;;
+            *) echo "Unsupported arch $ARCH"; exit 1 ;;
+          esac
+          curl -fsSL -o ort.tgz "$URL"
+          tar -xzf ort.tgz
+          cp -v onnxruntime-*/lib/libonnxruntime.so* lib/
+          # normalize to stable name if needed
+          [ -f lib/libonnxruntime.so ] || cp -v lib/libonnxruntime.so.* lib/libonnxruntime.so
+
+      - name: Fetch ONNX Runtime (macOS)
+        if: runner.os == 'macOS'
+        env:
+          ORT_VER: 1.22.0
+        run: |
+          set -euo pipefail
+          curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
+          tar -xzf ort.tgz
+          cp -v onnxruntime-*/lib/libonnxruntime*.dylib lib/
+          [ -f lib/libonnxruntime.dylib ] || cp -v lib/libonnxruntime*.dylib lib/libonnxruntime.dylib
+
+      - name: Fetch ONNX Runtime (Windows)
+        if: runner.os == 'Windows'
+        shell: pwsh
+        env:
+          ORT_VER: 1.22.0
+        run: |
+          $url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
+          Invoke-WebRequest $url -OutFile ort.zip
+          Expand-Archive ort.zip -DestinationPath ort
+          $dll = Get-ChildItem -Recurse -Path ort -Filter onnxruntime.dll | Select-Object -First 1
+          Copy-Item $dll.FullName lib\onnxruntime.dll
+
+      - run: |
+          echo "=== BUILD-SETUP END ==="
+          echo "lib/ contents:"
+          ls -l lib || dir lib
+      # ===== END: Injected ORT staging =====
+
      - name: Install dependencies
        run: |
          ${{ matrix.packages_install }}
+
      - name: Build artifacts
        run: |
-          # Actually do builds and make zips and whatnot
          dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
          echo "dist ran successfully"
+
      - id: cargo-dist
        name: Post-build
-        # We force bash here just because github makes it really hard to get values up
-        # to "real" actions without writing to env-vars, and writing to env-vars has
-        # inconsistent syntax between shell and powershell.
        shell: bash
        run: |
-          # Parse out what we just built and upload it to scratch storage
          echo "paths<<EOF" >> "$GITHUB_OUTPUT"
          dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
          echo "EOF" >> "$GITHUB_OUTPUT"
-
          cp dist-manifest.json "$BUILD_MANIFEST_NAME"
-      - name: "Upload artifacts"
+
+      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: artifacts-build-local-${{ join(matrix.targets, '_') }}
@@ -167,16 +181,16 @@ jobs:
  build_and_push_docker_image:
    name: Build and Push Docker Image
    runs-on: ubuntu-latest
-    needs: [plan] 
-    if: ${{ needs.plan.outputs.publishing == 'true' }} 
+    needs: [plan]
+    if: ${{ needs.plan.outputs.publishing == 'true' }}
    permissions:
-      contents: read      # Permission to checkout the repository
-      packages: write    # Permission to push Docker image to GHCR
+      contents: read
+      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
-          submodules: recursive # Matches your other checkout steps
+          submodules: recursive

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -185,33 +199,28 @@ jobs:
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
-          username: ${{ github.actor }} # User triggering the workflow
+          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Extract Docker metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
-          images: ghcr.io/${{ github.repository }} 
-          # This action automatically uses the Git tag as the Docker image tag.
-          # For example, a Git tag 'v1.2.3' will result in Docker tag 'ghcr.io/owner/repo:v1.2.3'.
+          images: ghcr.io/${{ github.repository }}

      - name: Build and push Docker image
        uses: docker/build-push-action@v5
        with:
-          context: . 
+          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha # Enable Docker layer caching from GitHub Actions cache
-          cache-to: type=gha,mode=max # Enable Docker layer caching to GitHub Actions cache
+          cache-from: type=gha
+          cache-to: type=gha,mode=max

-  # Build and package all the platform-agnostic(ish) things
  build-global-artifacts:
-    needs:
-      - plan
-      - build-local-artifacts
-    runs-on: "ubuntu-22.04"
+    needs: [plan, build-local-artifacts]
+    runs-on: ubuntu-22.04
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
@@ -219,92 +228,90 @@ jobs:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
+
      - name: Install cached dist
        uses: actions/download-artifact@v4
        with:
          name: cargo-dist-cache
          path: ~/.cargo/bin/
      - run: chmod +x ~/.cargo/bin/dist
-      # Get all the local artifacts for the global tasks to use (for e.g. checksums)
+
      - name: Fetch local artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: artifacts-*
          path: target/distrib/
          merge-multiple: true
+
      - id: cargo-dist
        shell: bash
        run: |
          dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
          echo "dist ran successfully"
-
-          # Parse out what we just built and upload it to scratch storage
          echo "paths<<EOF" >> "$GITHUB_OUTPUT"
          jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
          echo "EOF" >> "$GITHUB_OUTPUT"
-
          cp dist-manifest.json "$BUILD_MANIFEST_NAME"
-      - name: "Upload artifacts"
+
+      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: artifacts-build-global
          path: |
            ${{ steps.cargo-dist.outputs.paths }}
            ${{ env.BUILD_MANIFEST_NAME }}
-  # Determines if we should publish/announce
+
  host:
-    needs:
-      - plan
-      - build-local-artifacts
-      - build-global-artifacts
-    # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
+    needs: [plan, build-local-artifacts, build-global-artifacts]
    if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-    runs-on: "ubuntu-22.04"
+    runs-on: ubuntu-22.04
    outputs:
      val: ${{ steps.host.outputs.manifest }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
+
      - name: Install cached dist
        uses: actions/download-artifact@v4
        with:
          name: cargo-dist-cache
          path: ~/.cargo/bin/
      - run: chmod +x ~/.cargo/bin/dist
-      # Fetch artifacts from scratch-storage
+
      - name: Fetch artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: artifacts-*
          path: target/distrib/
          merge-multiple: true
+
      - id: host
        shell: bash
        run: |
          dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
          echo "artifacts uploaded and released successfully"
          cat dist-manifest.json
-          echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
-      - name: "Upload dist-manifest.json"
+          echo "manifest=$(jq -c . dist-manifest.json)" >> "$GITHUB_OUTPUT"
+
+      - name: Upload dist-manifest.json
        uses: actions/upload-artifact@v4
        with:
-          # Overwrite the previous copy
          name: artifacts-dist-manifest
          path: dist-manifest.json
-      # Create a GitHub Release while uploading all files to it
-      - name: "Download GitHub Artifacts"
+
+      - name: Download GitHub Artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: artifacts-*
          path: artifacts
          merge-multiple: true
+
      - name: Cleanup
-        run: |
-          # Remove the granular manifests
-          rm -f artifacts/*-dist-manifest.json
+        run: rm -f artifacts/*-dist-manifest.json
+
      - name: Create GitHub Release
        env:
          PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}"
@@ -312,20 +319,13 @@ jobs:
          ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}"
          RELEASE_COMMIT: "${{ github.sha }}"
        run: |
-          # Write and read notes from a file to avoid quoting breaking things
          echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
-
          gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*

  announce:
-    needs:
-      - plan
-      - host
-    # use "always() && ..." to allow us to wait for all publish jobs while
-    # still allowing individual publish jobs to skip themselves (for prereleases).
-    # "host" however must run to completion, no skipping allowed!
+    needs: [plan, host]
    if: ${{ always() && needs.host.result == 'success' }}
-    runs-on: "ubuntu-22.04"
+    runs-on: ubuntu-22.04
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    steps:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,9 @@
 # Changelog
 ## Unreleased

+## Version 0.2.6 (2025-10-29)
+- Added an opt-in FastEmbed-based reranking stage behind `reranking_enabled`. It improves retrieval accuracy by re-scoring hybrid results.
+
 ## Version 0.2.5 (2025-10-24)
 - Added manual knowledge entity creation flows using a modal, with the option for suggested relationships
 - Scratchpad feature, with the feature to convert scratchpads to content.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,18 +56,56 @@ base64 = "0.22.1"
 object_store = { version = "0.11.2" }
 bytes = "1.7.1"
 state-machines = "0.2.0"
+fastembed = { version = "5.2.0", default-features = false, features = ["hf-hub-native-tls", "ort-load-dynamic"] }

 [profile.dist]
 inherits = "release"
 lto = "thin"

 [workspace.lints.clippy]
-perf = { level = "warn",  priority = -1 }
-pedantic = { level = "warn",  priority = -1 }
-nursery = { level = "warn",  priority = -1 }
-cargo = { level = "warn",  priority = -1 }
+# Performance-focused lints
+perf = { level = "warn", priority = -1 }
+vec_init_then_push = "warn"
+large_stack_frames = "warn"
+redundant_allocation = "warn"
+single_char_pattern = "warn"
+string_extend_chars = "warn"
+format_in_format_args = "warn"
+slow_vector_initialization = "warn"
+inefficient_to_string = "warn"
+implicit_clone = "warn"
+redundant_clone = "warn"

-needless_question_mark = "allow"
-single_call_fn = "allow"
+# Security-focused lints
+integer_arithmetic = "warn"
+indexing_slicing = "warn"
+unwrap_used = "warn"
+expect_used = "warn"
+panic = "warn"
+unimplemented = "warn"
+todo = "warn"
+
+# Async/Network lints
+async_yields_async = "warn"
+await_holding_invalid_state = "warn"
+rc_buffer = "warn"
+
+# Maintainability-focused lints
+cargo = { level = "warn", priority = -1 }
+pedantic = { level = "warn", priority = -1 }
+clone_on_ref_ptr = "warn"
+float_cmp = "warn"
+manual_string_new = "warn"
+uninlined_format_args = "warn"
+unused_self = "warn"
 must_use_candidate = "allow"
 missing_errors_doc = "allow"
+missing_panics_doc = "warn"
+module_name_repetitions = "warn"
+wildcard_dependencies = "warn"
+missing_docs_in_private_items = "warn"
+
+# Allow noisy lints that don't add value for this project
+manual_must_use = "allow"
+needless_raw_string_hashes = "allow"
+multiple_bound_locations = "allow"
--- a/62
+++ b/62
@@ -1,7 +1,10 @@
-# === Builder Stage ===
-FROM clux/muslrust:1.86.0-stable as builder 
-
+# === Builder ===
+FROM rust:1.86-bookworm AS builder
 WORKDIR /usr/src/minne
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    pkg-config clang cmake git && rm -rf /var/lib/apt/lists/*
+
+# Cache deps
 COPY Cargo.toml Cargo.lock ./
 RUN mkdir -p api-router common composite-retrieval html-router ingestion-pipeline json-stream-parser main worker
 COPY api-router/Cargo.toml ./api-router/
@@ -11,43 +14,38 @@ COPY html-router/Cargo.toml ./html-router/
 COPY ingestion-pipeline/Cargo.toml ./ingestion-pipeline/
 COPY json-stream-parser/Cargo.toml ./json-stream-parser/
 COPY main/Cargo.toml ./main/
+RUN cargo build --release --bin main --features ingestion-pipeline/docker || true

-# Build with the MUSL target
-RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker || true
-
-# Copy the rest of the source code
+# Build
 COPY . .
+RUN cargo build --release --bin main --features ingestion-pipeline/docker

-# Build the final application binary with the MUSL target
-RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker
+# === Runtime ===
+FROM debian:bookworm-slim

-# === Runtime Stage ===
-FROM alpine:latest
+# Chromium + runtime deps + OpenMP for ORT
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    chromium libnss3 libasound2 libgbm1 libxshmfence1 \
+    ca-certificates fonts-dejavu fonts-noto-color-emoji \
+    libgomp1 libstdc++6 curl \
+  && rm -rf /var/lib/apt/lists/*

-RUN apk update && apk add --no-cache \
-    chromium \
-    nss \
-    freetype \
-    harfbuzz \
-    ca-certificates \
-    ttf-freefont \
-    font-noto-emoji \
-    && \
-    rm -rf /var/cache/apk/*
+# ONNX Runtime (CPU). Change if you bump ort.
+ARG ORT_VERSION=1.22.0
+RUN mkdir -p /opt/onnxruntime && \
+    curl -fsSL -o /tmp/ort.tgz \
+      "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
+    tar -xzf /tmp/ort.tgz -C /opt/onnxruntime --strip-components=1 && rm /tmp/ort.tgz

-ENV CHROME_BIN=/usr/bin/chromium-browser \
-    CHROME_PATH=/usr/lib/chromium/ \
-    SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
+ENV CHROME_BIN=/usr/bin/chromium \
+    SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
+    ORT_DYLIB_PATH=/opt/onnxruntime/lib/libonnxruntime.so

-# Create a non-root user to run the application
-RUN adduser -D -h /home/appuser appuser
-WORKDIR /home/appuser
+# Non-root
+RUN useradd -m appuser
 USER appuser
+WORKDIR /home/appuser

-# Copy the compiled binary from the builder stage (note the target path)
-COPY --from=builder /usr/src/minne/target/x86_64-unknown-linux-musl/release/main /usr/local/bin/main
-
+COPY --from=builder /usr/src/minne/target/release/main /usr/local/bin/main
 EXPOSE 3000
-# EXPOSE 8000-9000
-
 CMD ["main"]
--- a/README.md
+++ b/README.md
@@ -12,196 +12,142 @@

 To test _Minne_ out, enter [this](https://minne-demo.stark.pub) read-only demo deployment to view and test functionality out.

+## Noteworthy Features
+
+- **Search & Chat Interface** - Find content or knowledge instantly with full-text search, or use the chat mode and conversational AI to find and reason about content
+- **Manual and AI-assisted connections** - Build entities and relationships manually with full control, let AI create entities and relationships automatically, or blend both approaches with AI suggestions for manual approval 
+- **Hybrid Retrieval System** - Search combining vector similarity, full-text search, and graph traversal for highly relevant results
+- **Scratchpad Feature** - Quickly capture thoughts and convert them to permanent content when ready
+- **Visual Graph Explorer** - Interactive D3-based navigation of your knowledge entities and connections
+- **Multi-Format Support** - Ingest text, URLs, PDFs, audio files, and images into your knowledge base
+- **Performance Focus** - Built with Rust and server-side rendering for speed and efficiency
+- **Self-Hosted & Privacy-Focused** - Full control over your data, and compatible with any OpenAI-compatible API that supports structured outputs
+
 ## The "Why" Behind Minne

-For a while I've been fascinated by Zettelkasten-style PKM systems. While tools like Logseq and Obsidian are excellent, I found the manual linking process to be a hindrance for me. I also wanted a centralized storage and easy access across devices.
+For a while I've been fascinated by personal knowledge management systems. I wanted something that made it incredibly easy to capture content - snippets of text, URLs, and other media - while automatically discovering connections between ideas. But I also wanted to maintain control over my knowledge structure.

-While developing Minne, I discovered [KaraKeep](https://karakeep.com/) (formerly Hoarder), which is an excellent application in a similar space – you probably want to check it out! However, if you're interested in a PKM that builds an automatic network between related concepts using AI, offers search and the **possibility to chat with your knowledge resource**, and provides a blend of manual and AI-driven organization, then Minne might be worth testing.
+Traditional tools like Logseq and Obsidian are excellent, but the manual linking process often became a hindrance. Meanwhile, fully automated systems sometimes miss important context or create relationships I wouldn't have chosen myself.

-## Core Philosophy & Features
+So I built Minne to offer the best of both worlds: effortless content capture with AI-assisted relationship discovery, but with the flexibility to manually curate, edit, or override any connections. You can let AI handle the heavy lifting of extracting entities and finding relationships, take full control yourself, or use a hybrid approach where AI suggests connections that you can approve or modify.

-Minne is designed to make it incredibly easy to save snippets of text, URLs, and other content (limited, pending demand). Simply send content along with a category tag. Minne then ingests this, leveraging AI to create relevant nodes and relationships within its graph database, alongside your manual categorization. This graph backend allows for discoverable connections between your pieces of knowledge.
+While developing Minne, I discovered [KaraKeep](https://github.com/karakeep-app/karakeep) (formerly Hoarder), which is an excellent application in a similar space – you probably want to check it out! However, if you're interested in a PKM that offers both intelligent automation and manual curation, with the ability to chat with your knowledge base, then Minne might be worth testing.

-You can converse with your knowledge base through an LLM-powered chat interface (via OpenAI compatible API, like Ollama or others). For those who like to see the bigger picture, Minne also includes an feature to visually explore your knowledge graph.
+## Table of Contents

-You may switch and choose between models used, and have the possiblity to change the prompts to your liking. There is the option to change embeddings length, making it easy to test another embedding model.
+- [Quick Start](#quick-start)
+- [Features in Detail](#features-in-detail)
+- [Configuration](#configuration)
+- [Tech Stack](#tech-stack)
+- [Application Architecture](#application-architecture)
+- [AI Configuration](#ai-configuration--model-selection)
+- [Roadmap](#roadmap)
+- [Development](#development)
+- [Contributing](#contributing)
+- [License](#license)

-The application is built for speed and efficiency using Rust with a Server-Side Rendered (SSR) frontend (HTMX and minimal JavaScript). It's fully responsive, offering a complete mobile interface for reading, editing, and managing your content, including the graph database itself. **PWA (Progressive Web App) support** means you can "install" Minne to your device for a native-like experience. For quick capture on the go on iOS, a [**Shortcut**](https://www.icloud.com/shortcuts/e433fbd7602f4e2eaa70dca162323477) makes sending content to your Minne instance a breeze.
+## Quick Start

-A hybrid retrieval layer blends embeddings, full-text search, and graph signals to surface the best context when augmenting chat responses and when analyzing new content during ingestion.
+The fastest way to get Minne running is with Docker Compose:

-Minne is open source (AGPL), self-hostable, and can be deployed flexibly: via Nix, Docker Compose, pre-built binaries, or by building from source. It can run as a single `main` binary or as separate `server` and `worker` processes for optimized resource allocation.
+```bash
+# Clone the repository
+git clone https://github.com/perstarkse/minne.git
+cd minne
+
+# Start Minne and its database
+docker compose up -d
+
+# Access at http://localhost:3000
+```
+
+**Required Setup:**
+- Replace `your_openai_api_key_here` in `docker-compose.yml` with your actual API key
+- Configure `OPENAI_BASE_URL` if using a custom AI provider (like Ollama)
+
+For detailed installation options, see [Configuration](#configuration).
+
+## Features in Detail
+
+### Search vs. Chat mode
+
+**Search** - Use when you know roughly what you're looking for. Full-text search finds items quickly by matching your query terms.
+
+**Chat Mode** - Use when you want to explore concepts, find connections, or reason about your knowledge. The AI analyzes your query and finds relevant context across your entire knowledge base.
+
+### Content Processing
+
+Minne automatically processes content you save:
+1. **Web scraping** extracts readable text from URLs
+2. **Text analysis** identifies key concepts and relationships
+3. **Graph creation** builds connections between related content
+4. **Embedding generation** enables semantic search capabilities
+
+### Visual Knowledge Graph
+
+Explore your knowledge as an interactive network with flexible curation options:
+
+**Manual Curation** - Create knowledge entities and relationships yourself with full control over your graph structure
+
+**AI Automation** - Let AI automatically extract entities and discover relationships from your content
+
+**Hybrid Approach** - Get AI-suggested relationships and entities that you can manually review, edit, or approve
+
+The graph visualization shows:
+- Knowledge entities as nodes (manually created or AI-extracted)
+- Relationships as connections (manually defined, AI-discovered, or suggested)
+- Interactive navigation for discovery and editing
+
+### Optional FastEmbed Reranking
+
+Minne ships with an opt-in reranking stage powered by [fastembed-rs](https://github.com/Anush008/fastembed-rs). When enabled, the hybrid retrieval results are rescored with a lightweight cross-encoder before being returned to chat or ingestion flows. In practice this often means more relevant results, boosting answer quality and downstream enrichment.
+
+⚠️ **Resource notes**
+- Enabling reranking downloads and caches ~1.1 GB of model data on first startup (cached under `<data_dir>/fastembed/reranker` by default).
+- Initialization takes longer while warming the cache, and each query consumes extra CPU. The default pool size (2) is tuned for a singe user setup, but could work with a pool size on 1 as well.
+- The feature is disabled by default. Set `reranking_enabled: true` (or `RERANKING_ENABLED=true`) if you’re comfortable with the additional footprint.
+
+Example configuration:
+
+```yaml
+reranking_enabled: true
+reranking_pool_size: 2
+fastembed_cache_dir: "/var/lib/minne/fastembed"  # optional override, defaults to .fastembed_cache
+```

 ## Tech Stack

- **Backend:** Rust. Server-Side Rendering (SSR). Axum. Minijinja for templating.
- **Frontend:** HTML. HTMX and plain JavaScript for interactivity.
- **Database:** SurrealDB
- **AI Integration:** OpenAI API compatible endpoint (for chat and content processing), with support for structured outputs.
- **Web Content Processing:** Relies on a Chromium instance for robust webpage fetching/rendering.
-
-## Prerequisites
-
- **For Docker/Nix:** Docker or Nix installed. These methods handle SurrealDB and Chromium dependencies.
- **For Binaries/Source:**
-  - A running SurrealDB instance.
-  - Chromium (or a compatible Chrome browser) installed and accessible in your `PATH`.
-  - Git (if cloning and building from source).
-  - Rust toolchain (if building from source).
-
-## Getting Started
-
-You have several options to get Minne up and running:
-
-### 1. Nix (Recommended for ease of dependency management)
-
-If you have Nix installed, you can run Minne directly:
-
-```bash
-nix run 'github:perstarkse/minne#main'
-```
-
-This command will fetch Minne and its dependencies (including Chromium) and run the `main` (combined server/worker) application.
-
-### 2. Docker Compose (Recommended for containerized environments)
-
-This is a great way to manage Minne and its SurrealDB dependency together.
-
-1. Clone the repository (or just save the `docker-compose.yml` below).
-
-1. Create a `docker-compose.yml` file:
-
-   ```yaml
-   version: "3.8"
-   services:
-     minne:
-       image: ghcr.io/perstarkse/minne:latest # Pulls the latest pre-built image
-       # Or, to build from local source:
-       # build: .
-       container_name: minne_app
-       ports:
-         - "3000:3000" # Exposes Minne on port 3000
-       environment:
-         # These are examples, ensure they match your SurrealDB setup below
-         # and your actual OpenAI key.
-         SURREALDB_ADDRESS: "ws://surrealdb:8000"
-         SURREALDB_USERNAME: "root_user" # Default from SurrealDB service below
-         SURREALDB_PASSWORD: "root_password" # Default from SurrealDB service below
-         SURREALDB_DATABASE: "minne_db"
-         SURREALDB_NAMESPACE: "minne_ns"
-         OPENAI_API_KEY: "your_openai_api_key_here" # IMPORTANT: Replace with your actual key
-         #OPENAI_BASE_URL: "your_ollama_address" # Uncomment this and change it to override the default openai base url
-         HTTP_PORT: 3000
-         DATA_DIR: "/data" # Data directory inside the container
-         RUST_LOG: "minne=info,tower_http=info" # Example logging level
-       volumes:
-         - ./minne_data:/data # Persists Minne's data (e.g., scraped content) on the host
-       depends_on:
-         - surrealdb
-       networks:
-         - minne-net
-       # Waits for SurrealDB to be ready before starting Minne
-       command: >
-         sh -c "
-           echo 'Waiting for SurrealDB to start...' &&
-           # Adjust sleep time if SurrealDB takes longer to initialize in your environment
-           until nc -z surrealdb 8000; do echo 'Waiting for SurrealDB...'; sleep 2; done &&
-           echo 'SurrealDB is up, starting Minne application...' &&
-           /usr/local/bin/main
-         "
-       # For separate server/worker:
-       # command: /usr/local/bin/server # or /usr/local/bin/worker
-
-     surrealdb:
-       image: surrealdb/surrealdb:latest
-       container_name: minne_surrealdb
-       ports:
-         # Exposes SurrealDB on port 8000 (primarily for direct access/debugging if needed,
-         # not strictly required for Minne if only accessed internally by the minne service)
-         - "127.0.0.1:8000:8000" # Bind to localhost only for SurrealDB by default
-       volumes:
-         # Persists SurrealDB data on the host in a 'surreal_database' folder
-         - ./surreal_database:/database
-       command: >
-         start
-         --log info # Consider 'debug' for troubleshooting
-         --user root_user
-         --pass root_password
-         file:/database/minne_v1.db # Using file-based storage for simplicity
-       networks:
-         - minne-net
-
-   volumes:
-     minne_data: {} # Defines a named volume for Minne data (can be managed by Docker)
-     surreal_database: {} # Defines a named volume for SurrealDB data
-
-   networks:
-     minne-net:
-       driver: bridge
-   ```
-
-1. Run:
-
-   ```bash
-   docker compose up -d
-   ```
-
-   Minne will be accessible at `http://localhost:3000`.
-
-### 3. Pre-built Binaries (GitHub Releases)
-
-Binaries for Windows, macOS, and Linux (combined `main` version) are available on the [GitHub Releases page](https://github.com/perstarkse/minne/releases/latest).
-
-1. Download the appropriate binary for your system.
-1. **You will need to provide and run SurrealDB and have Chromium installed and accessible in your PATH separately.**
-1. Set the required [Configuration](#configuration) environment variables or use a `config.yaml`.
-1. Run the executable.
-
-### 4. Build from Source
-
-1. Clone the repository:
-   ```bash
-   git clone https://github.com/perstarkse/minne.git
-   cd minne
-   ```
-1. **You will need to provide and run SurrealDB and have Chromium installed and accessible in your PATH separately.**
-1. Set the required [Configuration](#configuration) environment variables or use a `config.yaml`.
-1. Build and run:
-   - For the combined `main` binary:
-     ```bash
-     cargo run --release --bin main
-     ```
-   - For the `server` binary:
-     ```bash
-     cargo run --release --bin server
-     ```
-   - For the `worker` binary (if you want to run it separately):
-     ```bash
-     cargo run --release --bin worker
-     ```
-     The compiled binaries will be in `target/release/`.
+- **Backend:** Rust with Axum framework and Server-Side Rendering (SSR)
+- **Frontend:** HTML with HTMX and minimal JavaScript for interactivity
+- **Database:** SurrealDB (graph, document, and vector search)
+- **AI Integration:** OpenAI-compatible API with structured outputs
+- **Web Processing:** Headless Chrome for robust webpage content extraction

 ## Configuration

-Minne can be configured using environment variables or a `config.yaml` file placed in the working directory where you run the application. Environment variables take precedence over `config.yaml`.
+Minne can be configured using environment variables or a `config.yaml` file. Environment variables take precedence over `config.yaml`.

-**Required Configuration:**
+### Required Configuration

- `SURREALDB_ADDRESS`: WebSocket address of your SurrealDB instance (e.g., `ws://127.0.0.1:8000` or `ws://surrealdb:8000` for Docker).
- `SURREALDB_USERNAME`: Username for SurrealDB (e.g., `root_user`).
- `SURREALDB_PASSWORD`: Password for SurrealDB (e.g., `root_password`).
- `SURREALDB_DATABASE`: Database name in SurrealDB (e.g., `minne_db`).
- `SURREALDB_NAMESPACE`: Namespace in SurrealDB (e.g., `minne_ns`).
- `OPENAI_API_KEY`: Your API key for OpenAI compatible endpoint (e.g., `sk-YourActualOpenAIKeyGoesHere`).
- `HTTP_PORT`: Port for the Minne server to listen on (Default: `3000`).
+- `SURREALDB_ADDRESS`: WebSocket address of your SurrealDB instance (e.g., `ws://127.0.0.1:8000`)
+- `SURREALDB_USERNAME`: Username for SurrealDB (e.g., `root_user`)
+- `SURREALDB_PASSWORD`: Password for SurrealDB (e.g., `root_password`)
+- `SURREALDB_DATABASE`: Database name in SurrealDB (e.g., `minne_db`)
+- `SURREALDB_NAMESPACE`: Namespace in SurrealDB (e.g., `minne_ns`)
+- `OPENAI_API_KEY`: Your API key for OpenAI compatible endpoint
+- `HTTP_PORT`: Port for the Minne server (Default: `3000`)

-**Optional Configuration:**
+### Optional Configuration

- `RUST_LOG`: Controls logging level (e.g., `minne=info,tower_http=debug`).
- `DATA_DIR`: Directory to store local data like fetched webpage content (e.g., `./data`).
- `OPENAI_BASE_URL`: Base URL to a OpenAI API provider, such as Ollama.
+- `RUST_LOG`: Controls logging level (e.g., `minne=info,tower_http=debug`)
+- `DATA_DIR`: Directory to store local data (e.g., `./data`)
+- `OPENAI_BASE_URL`: Base URL for custom AI providers (like Ollama)
+- `RERANKING_ENABLED` / `reranking_enabled`: Set to `true` to enable the FastEmbed reranking stage (default `false`)
+- `RERANKING_POOL_SIZE` / `reranking_pool_size`: Maximum concurrent reranker workers (defaults to `2`)
+- `FASTEMBED_CACHE_DIR` / `fastembed_cache_dir`: Directory for cached FastEmbed models (defaults to `<data_dir>/fastembed/reranker`)
+- `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` / `fastembed_show_download_progress`: Show model download progress when warming the cache (default `true`)

-**Example `config.yaml`:**
+### Example config.yaml

 ```yaml
 surrealdb_address: "ws://127.0.0.1:8000"
@@ -215,66 +161,105 @@ http_port: 3000
 # rust_log: "info"
 ```

-## Application Architecture (Binaries)
+## Installation Options

-Minne offers flexibility in deployment:
+### 1. Docker Compose (Recommended)

- **`main`**: A combined binary running both server (API, web UI) and worker (background tasks) in one process. Ideal for simpler setups.
- **`server`**: Runs only the server component.
- **`worker`**: Runs only the worker component, suitable for deployment on a machine with more resources for intensive tasks.
+```bash
+# Clone and run
+git clone https://github.com/perstarkse/minne.git
+cd minne
+docker compose up -d
+```

-This modularity allows scaling and resource optimization. The `main` binary or the Docker Compose setup (using `main`) is sufficient for most users.
+The included `docker-compose.yml` handles SurrealDB and Chromium dependencies automatically.
+
+### 2. Nix
+
+```bash
+nix run 'github:perstarkse/minne#main'
+```
+
+This fetches Minne and all dependencies, including Chromium.
+
+### 3. Pre-built Binaries
+
+Download binaries for Windows, macOS, and Linux from the [GitHub Releases](https://github.com/perstarkse/minne/releases/latest).
+
+**Requirements:** You'll need to provide SurrealDB and Chromium separately.
+
+### 4. Build from Source
+
+```bash
+git clone https://github.com/perstarkse/minne.git
+cd minne
+cargo run --release --bin main
+```
+
+**Requirements:** SurrealDB and Chromium must be installed and accessible in your PATH.
+
+## Application Architecture
+
+Minne offers flexible deployment options:
+
+- **`main`**: Combined server and worker in one process (recommended for most users)
+- **`server`**: Web interface and API only
+- **`worker`**: Background processing only (for resource optimization)

 ## Usage

-Once Minne is running:
+Once Minne is running at `http://localhost:3000`:

-1. Access the web interface at `http://localhost:3000` (or your configured port).
-1. On iOS, consider setting up the [Minne iOS Shortcut](https://www.icloud.com/shortcuts/9aa960600ec14329837ba4169f57a166) for effortless content sending. **Add the shortcut, replace the [insert_url] and the [insert_api_key] snippets**.
-1. Add notes, URLs, **audio files**, and explore your growing knowledge graph.
-1. Engage with the chat interface to query your saved content.
-1. Try the experimental visual graph explorer to see connections.
+1. **Web Interface**: Full-featured experience for desktop and mobile
+2. **iOS Shortcut**: Use the [Minne iOS Shortcut](https://www.icloud.com/shortcuts/e433fbd7602f4e2eaa70dca162323477) for quick content capture
+3. **Content Types**: Save notes, URLs, audio files, and more
+4. **Knowledge Graph**: Explore automatic connections between your content
+5. **Chat Interface**: Query your knowledge base conversationally

 ## AI Configuration & Model Selection

-Minne relies on an OpenAI-compatible API for processing content, generating graph relationships, and powering the chat feature.
+### Setting Up AI Providers

-**Environment Variables / `config.yaml` keys:**
+Minne uses OpenAI-compatible APIs. Configure via environment variables or `config.yaml`:

- `OPENAI_API_KEY` (required): Your API key for the chosen AI provider.
- `OPENAI_BASE_URL` (optional): Use this to override the default OpenAI API URL (`https://api.openai.com/v1`). This is essential for using local models via services like Ollama, or other API providers.
-  - **Example for Ollama:** `http://<your-ollama-ip>:11434/v1`
+- `OPENAI_API_KEY` (required): Your API key
+- `OPENAI_BASE_URL` (optional): Custom provider URL (e.g., Ollama: `http://localhost:11434/v1`)

-### Changing Models
+### Model Selection

-Once you have configured the `OPENAI_BASE_URL` to point to your desired provider, you can select the specific models Minne should use.
-
-1. Navigate to the `/admin` page in your Minne instance.
-1. The page will list the models available from your configured endpoint. You can select different models for processing content and for chat.
-1. **Important:** For content processing, Minne relies on structured outputs (function calling). The model and provider you select for this task **must** support this feature.
-1. **Embedding Dimensions:** If you change the embedding model, you **must** update the "Embedding Dimensions" setting in the admin panel to match the output dimensions of your new model (e.g., `text-embedding-3-small` uses 1536, `nomic-embed-text` uses 768). Mismatched dimensions will cause errors. Some newer models will accept a dimension argument, and for these setting the dimensions to whatever should work.
+1. Access the `/admin` page in your Minne instance
+2. Select models for content processing and chat from your configured provider
+3. **Content Processing Requirements**: The model must support structured outputs
+4. **Embedding Dimensions**: Update this setting when changing embedding models (e.g., 1536 for `text-embedding-3-small`, 768 for `nomic-embed-text`)

 ## Roadmap

-I've developed Minne primarily for my own use, but having been in the selfhosted space for a long time, and using the efforts by others, I thought I'd share with the community. Feature requests are welcome.
-The roadmap as of now is:
+Current development focus:

-~~- Handle uploaded images wisely.~~
-~~- An updated explorer of the graph database.~~
- A TUI frontend which opens your system default editor for improved writing and document management.
+- TUI frontend with system editor integration
+- Enhanced reranking for improved retrieval recall
+- Additional content type support

-## Contributing
-
-Contributions are welcome! Whether it's bug reports, feature suggestions, documentation improvements, or code contributions, please feel free to open an issue or submit a pull request.
+Feature requests and contributions are welcome!

 ## Development

-Run test with
-```rust
+```bash
+# Run tests
 cargo test
+
+# Development build
+cargo build
+
+# Comprehensive linting
+cargo clippy --workspace --all-targets --all-features
 ```
-There is currently a variety of unit tests for commonly used functions. Additional tests, especially integration tests would be very welcome.
+
+The codebase includes extensive unit tests. Integration tests and additional contributions are welcome.
+
+## Contributing
+I've developed Minne primarily for my own use, but having been in the selfhosted space for a long time, and using the efforts by others, I thought I'd share with the community. Feature requests are welcome.

 ## License

-Minne is licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**. See the [LICENSE](LICENSE) file for details. This means if you run a modified version of Minne as a network service, you must also offer the source code of that modified version to its users.
+Minne is licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**. See the [LICENSE](LICENSE) file for details.
--- a/common/src/storage/store.rs
+++ b/common/src/storage/store.rs
@@ -214,6 +214,7 @@ mod tests {
            openai_base_url: "..".into(),
            storage: StorageKind::Local,
            pdf_ingest_mode: LlmFirst,
+            ..Default::default()
        }
    }

--- a/common/src/storage/types/file_info.rs
+++ b/common/src/storage/types/file_info.rs
@@ -270,12 +270,29 @@ impl FileInfo {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::utils::config::{PdfIngestMode::LlmFirst, StorageKind};
+    use crate::utils::config::{AppConfig, PdfIngestMode::LlmFirst, StorageKind};
    use axum::http::HeaderMap;
    use axum_typed_multipart::FieldMetadata;
    use std::io::Write;
    use tempfile::NamedTempFile;

+    fn test_config(data_dir: &str) -> AppConfig {
+        AppConfig {
+            data_dir: data_dir.to_string(),
+            openai_api_key: "test_key".to_string(),
+            surrealdb_address: "test_address".to_string(),
+            surrealdb_username: "test_user".to_string(),
+            surrealdb_password: "test_pass".to_string(),
+            surrealdb_namespace: "test_ns".to_string(),
+            surrealdb_database: "test_db".to_string(),
+            http_port: 3000,
+            openai_base_url: "..".to_string(),
+            storage: StorageKind::Local,
+            pdf_ingest_mode: LlmFirst,
+            ..Default::default()
+        }
+    }
+
    /// Creates a test temporary file with the given content
    fn create_test_file(content: &[u8], file_name: &str) -> FieldData<NamedTempFile> {
        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
@@ -314,19 +331,7 @@ mod tests {

        // Create a FileInfo instance with data_dir in /tmp
        let user_id = "test_user";
-        let config = AppConfig {
-            data_dir: "/tmp/minne_test_data".to_string(), // Using /tmp which is typically on a different filesystem
-            openai_api_key: "test_key".to_string(),
-            surrealdb_address: "test_address".to_string(),
-            surrealdb_username: "test_user".to_string(),
-            surrealdb_password: "test_pass".to_string(),
-            surrealdb_namespace: "test_ns".to_string(),
-            surrealdb_database: "test_db".to_string(),
-            http_port: 3000,
-            openai_base_url: "..".to_string(),
-            storage: StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let config = test_config("/tmp/minne_test_data");

        // Test file creation
        let file_info = FileInfo::new(field_data, &db, user_id, &config)
@@ -375,19 +380,7 @@ mod tests {

        // Create a FileInfo instance with data_dir in /tmp
        let user_id = "test_user";
-        let config = AppConfig {
-            data_dir: "/tmp/minne_test_data".to_string(),
-            openai_api_key: "test_key".to_string(),
-            surrealdb_address: "test_address".to_string(),
-            surrealdb_username: "test_user".to_string(),
-            surrealdb_password: "test_pass".to_string(),
-            surrealdb_namespace: "test_ns".to_string(),
-            surrealdb_database: "test_db".to_string(),
-            http_port: 3000,
-            openai_base_url: "..".to_string(),
-            storage: StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let config = test_config("/tmp/minne_test_data");

        // Store the original file
        let original_file_info = FileInfo::new(field_data, &db, user_id, &config)
@@ -432,19 +425,7 @@ mod tests {

        // Create a FileInfo instance
        let user_id = "test_user";
-        let config = AppConfig {
-            data_dir: "./data".to_string(),
-            openai_api_key: "test_key".to_string(),
-            surrealdb_address: "test_address".to_string(),
-            surrealdb_username: "test_user".to_string(),
-            surrealdb_password: "test_pass".to_string(),
-            surrealdb_namespace: "test_ns".to_string(),
-            surrealdb_database: "test_db".to_string(),
-            http_port: 3000,
-            openai_base_url: "..".to_string(),
-            storage: StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let config = test_config("./data");
        let file_info = FileInfo::new(field_data, &db, user_id, &config).await;

        // We can't fully test persistence to disk in unit tests,
@@ -490,19 +471,7 @@ mod tests {
        let file_name = "original.txt";
        let user_id = "test_user";

-        let config = AppConfig {
-            data_dir: "./data".to_string(),
-            openai_api_key: "test_key".to_string(),
-            surrealdb_address: "test_address".to_string(),
-            surrealdb_username: "test_user".to_string(),
-            surrealdb_password: "test_pass".to_string(),
-            surrealdb_namespace: "test_ns".to_string(),
-            surrealdb_database: "test_db".to_string(),
-            http_port: 3000,
-            openai_base_url: "..".to_string(),
-            storage: StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let config = test_config("./data");

        let field_data1 = create_test_file(content, file_name);
        let original_file_info = FileInfo::new(field_data1, &db, user_id, &config)
@@ -655,19 +624,7 @@ mod tests {

        // Create and persist a test file via FileInfo::new
        let user_id = "user123";
-        let cfg = AppConfig {
-            data_dir: "./data".to_string(),
-            openai_api_key: "".to_string(),
-            surrealdb_address: "".to_string(),
-            surrealdb_username: "".to_string(),
-            surrealdb_password: "".to_string(),
-            surrealdb_namespace: "".to_string(),
-            surrealdb_database: "".to_string(),
-            http_port: 0,
-            openai_base_url: "".to_string(),
-            storage: crate::utils::config::StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let cfg = test_config("./data");
        let temp = create_test_file(b"test content", "test_file.txt");
        let file_info = FileInfo::new(temp, &db, user_id, &cfg)
            .await
@@ -710,19 +667,7 @@ mod tests {
        let result = FileInfo::delete_by_id(
            "nonexistent_id",
            &db,
-            &AppConfig {
-                data_dir: "./data".to_string(),
-                openai_api_key: "".to_string(),
-                surrealdb_address: "".to_string(),
-                surrealdb_username: "".to_string(),
-                surrealdb_password: "".to_string(),
-                surrealdb_namespace: "".to_string(),
-                surrealdb_database: "".to_string(),
-                http_port: 0,
-                openai_base_url: "".to_string(),
-                storage: crate::utils::config::StorageKind::Local,
-                pdf_ingest_mode: LlmFirst,
-            },
+            &test_config("./data"),
        )
        .await;

@@ -813,19 +758,7 @@ mod tests {
        // Create a FileInfo instance with a custom data directory
        let user_id = "test_user";
        let custom_data_dir = "/tmp/minne_custom_data_dir";
-        let config = AppConfig {
-            data_dir: custom_data_dir.to_string(),
-            openai_api_key: "test_key".to_string(),
-            surrealdb_address: "test_address".to_string(),
-            surrealdb_username: "test_user".to_string(),
-            surrealdb_password: "test_pass".to_string(),
-            surrealdb_namespace: "test_ns".to_string(),
-            surrealdb_database: "test_db".to_string(),
-            http_port: 3000,
-            openai_base_url: "..".to_string(),
-            storage: StorageKind::Local,
-            pdf_ingest_mode: LlmFirst,
-        };
+        let config = test_config(custom_data_dir);

        // Test file creation
        let file_info = FileInfo::new(field_data, &db, user_id, &config)
--- a/common/src/utils/config.rs
+++ b/common/src/utils/config.rs
@@ -1,5 +1,6 @@
 use config::{Config, ConfigError, Environment, File};
 use serde::Deserialize;
+use std::env;

 #[derive(Clone, Deserialize, Debug)]
 #[serde(rename_all = "lowercase")]
@@ -42,6 +43,16 @@ pub struct AppConfig {
    pub storage: StorageKind,
    #[serde(default = "default_pdf_ingest_mode")]
    pub pdf_ingest_mode: PdfIngestMode,
+    #[serde(default = "default_reranking_enabled")]
+    pub reranking_enabled: bool,
+    #[serde(default)]
+    pub reranking_pool_size: Option<usize>,
+    #[serde(default)]
+    pub fastembed_cache_dir: Option<String>,
+    #[serde(default)]
+    pub fastembed_show_download_progress: Option<bool>,
+    #[serde(default)]
+    pub fastembed_max_length: Option<usize>,
 }

 fn default_data_dir() -> String {
@@ -52,7 +63,66 @@ fn default_base_url() -> String {
    "https://api.openai.com/v1".to_string()
 }

+fn default_reranking_enabled() -> bool {
+    false
+}
+
+pub fn ensure_ort_path() {
+    if env::var_os("ORT_DYLIB_PATH").is_some() {
+        return;
+    }
+    if let Ok(mut exe) = env::current_exe() {
+        exe.pop();
+
+        if cfg!(target_os = "windows") {
+            for p in [
+                exe.join("onnxruntime.dll"),
+                exe.join("lib").join("onnxruntime.dll"),
+            ] {
+                if p.exists() {
+                    env::set_var("ORT_DYLIB_PATH", p);
+                    return;
+                }
+            }
+        }
+        let name = if cfg!(target_os = "macos") {
+            "libonnxruntime.dylib"
+        } else {
+            "libonnxruntime.so"
+        };
+        let p = exe.join("lib").join(name);
+        if p.exists() {
+            env::set_var("ORT_DYLIB_PATH", p);
+        }
+    }
+}
+
+impl Default for AppConfig {
+    fn default() -> Self {
+        Self {
+            openai_api_key: String::new(),
+            surrealdb_address: String::new(),
+            surrealdb_username: String::new(),
+            surrealdb_password: String::new(),
+            surrealdb_namespace: String::new(),
+            surrealdb_database: String::new(),
+            data_dir: default_data_dir(),
+            http_port: 0,
+            openai_base_url: default_base_url(),
+            storage: default_storage_kind(),
+            pdf_ingest_mode: default_pdf_ingest_mode(),
+            reranking_enabled: default_reranking_enabled(),
+            reranking_pool_size: None,
+            fastembed_cache_dir: None,
+            fastembed_show_download_progress: None,
+            fastembed_max_length: None,
+        }
+    }
+}
+
 pub fn get_config() -> Result<AppConfig, ConfigError> {
+    ensure_ort_path();
+
    let config = Config::builder()
        .add_source(File::with_name("config").required(false))
        .add_source(Environment::default())
--- a/composite-retrieval/Cargo.toml
+++ b/composite-retrieval/Cargo.toml
@@ -19,6 +19,7 @@ surrealdb = { workspace = true }
 futures = { workspace = true }
 async-openai = { workspace = true }
 uuid = { workspace = true }
+fastembed = { workspace = true }
 
 common = { path = "../common", features = ["test-utils"] }
 state-machines = { workspace = true }
--- a/composite-retrieval/src/answer_retrieval.rs
+++ b/composite-retrieval/src/answer_retrieval.rs
@@ -8,19 +8,14 @@ use async_openai::{
 };
 use common::{
    error::AppError,
-    storage::{
-        db::SurrealDbClient,
-        types::{
-            message::{format_history, Message},
-            system_settings::SystemSettings,
-        },
+    storage::types::{
+        message::{format_history, Message},
+        system_settings::SystemSettings,
    },
 };
 use serde::Deserialize;
 use serde_json::Value;

-use crate::{retrieve_entities, retrieved_entities_to_json};
-
 use super::answer_retrieval_helper::get_query_response_schema;

 #[derive(Debug, Deserialize)]
@@ -36,53 +31,12 @@ pub struct LLMResponseFormat {
    pub references: Vec<Reference>,
 }

-/// Orchestrates query processing and returns an answer with references
-///
-/// Takes a query and uses the provided clients to generate an answer with supporting references.
-///
-/// # Arguments
-///
-/// * `surreal_db_client` - Client for `SurrealDB` interactions
-/// * `openai_client` - Client for `OpenAI` API calls
-/// * `query` - The user's query string
-/// * `user_id` - The user's id
-///
-/// # Returns
-///
-/// Returns a tuple of the answer and its references, or an API error
 #[derive(Debug)]
 pub struct Answer {
    pub content: String,
    pub references: Vec<String>,
 }

-pub async fn get_answer_with_references(
-    surreal_db_client: &SurrealDbClient,
-    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
-    query: &str,
-    user_id: &str,
-) -> Result<Answer, AppError> {
-    let entities = retrieve_entities(surreal_db_client, openai_client, query, user_id).await?;
-    let settings = SystemSettings::get_current(surreal_db_client).await?;
-
-    let entities_json = retrieved_entities_to_json(&entities);
-    let user_message = create_user_message(&entities_json, query);
-
-    let request = create_chat_request(user_message, &settings)?;
-    let response = openai_client.chat().create(request).await?;
-
-    let llm_response = process_llm_response(response).await?;
-
-    Ok(Answer {
-        content: llm_response.answer,
-        references: llm_response
-            .references
-            .into_iter()
-            .map(|r| r.reference)
-            .collect(),
-    })
-}
-
 pub fn create_user_message(entities_json: &Value, query: &str) -> String {
    format!(
        r"
--- a/composite-retrieval/src/lib.rs
+++ b/composite-retrieval/src/lib.rs
@@ -3,6 +3,7 @@ pub mod answer_retrieval_helper;
 pub mod fts;
 pub mod graph;
 pub mod pipeline;
+pub mod reranking;
 pub mod scoring;
 pub mod vector;

@@ -13,6 +14,7 @@ use common::{
        types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
    },
 };
+use reranking::RerankerLease;
 use tracing::instrument;

 pub use pipeline::{retrieved_entities_to_json, RetrievalConfig, RetrievalTuning};
@@ -39,6 +41,7 @@ pub async fn retrieve_entities(
    openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
    input_text: &str,
    user_id: &str,
+    reranker: Option<RerankerLease>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    pipeline::run_pipeline(
        db_client,
@@ -46,6 +49,7 @@ pub async fn retrieve_entities(
        input_text,
        user_id,
        RetrievalConfig::default(),
+        reranker,
    )
    .await
 }
@@ -142,6 +146,7 @@ mod tests {
            "Rust concurrency async tasks",
            user_id,
            RetrievalConfig::default(),
+            None,
        )
        .await
        .expect("Hybrid retrieval failed");
@@ -232,6 +237,7 @@ mod tests {
            "Rust concurrency async tasks",
            user_id,
            RetrievalConfig::default(),
+            None,
        )
        .await
        .expect("Hybrid retrieval failed");
--- a/composite-retrieval/src/pipeline/config.rs
+++ b/composite-retrieval/src/pipeline/config.rs
@@ -17,6 +17,9 @@ pub struct RetrievalTuning {
    pub graph_score_decay: f32,
    pub graph_seed_min_score: f32,
    pub graph_vector_inheritance: f32,
+    pub rerank_blend_weight: f32,
+    pub rerank_scores_only: bool,
+    pub rerank_keep_top: usize,
 }

 impl Default for RetrievalTuning {
@@ -36,6 +39,9 @@ impl Default for RetrievalTuning {
            graph_score_decay: 0.75,
            graph_seed_min_score: 0.4,
            graph_vector_inheritance: 0.6,
+            rerank_blend_weight: 0.65,
+            rerank_scores_only: false,
+            rerank_keep_top: 8,
        }
    }
 }
--- a/composite-retrieval/src/pipeline/mod.rs
+++ b/composite-retrieval/src/pipeline/mod.rs
@@ -4,7 +4,7 @@ mod state;

 pub use config::{RetrievalConfig, RetrievalTuning};

-use crate::RetrievedEntity;
+use crate::{reranking::RerankerLease, RetrievedEntity};
 use async_openai::Client;
 use common::{error::AppError, storage::db::SurrealDbClient};
 use tracing::info;
@@ -16,6 +16,7 @@ pub async fn run_pipeline(
    input_text: &str,
    user_id: &str,
    config: RetrievalConfig,
+    reranker: Option<RerankerLease>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    let machine = state::ready();
    let input_chars = input_text.chars().count();
@@ -35,11 +36,13 @@ pub async fn run_pipeline(
        input_text.to_owned(),
        user_id.to_owned(),
        config,
+        reranker,
    );
    let machine = stages::embed(machine, &mut ctx).await?;
    let machine = stages::collect_candidates(machine, &mut ctx).await?;
    let machine = stages::expand_graph(machine, &mut ctx).await?;
    let machine = stages::attach_chunks(machine, &mut ctx).await?;
+    let machine = stages::rerank(machine, &mut ctx).await?;
    let results = stages::assemble(machine, &mut ctx)?;

    Ok(results)
@@ -53,6 +56,7 @@ pub async fn run_pipeline_with_embedding(
    input_text: &str,
    user_id: &str,
    config: RetrievalConfig,
+    reranker: Option<RerankerLease>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    let machine = state::ready();
    let mut ctx = stages::PipelineContext::with_embedding(
@@ -62,11 +66,13 @@ pub async fn run_pipeline_with_embedding(
        input_text.to_owned(),
        user_id.to_owned(),
        config,
+        reranker,
    );
    let machine = stages::embed(machine, &mut ctx).await?;
    let machine = stages::collect_candidates(machine, &mut ctx).await?;
    let machine = stages::expand_graph(machine, &mut ctx).await?;
    let machine = stages::attach_chunks(machine, &mut ctx).await?;
+    let machine = stages::rerank(machine, &mut ctx).await?;
    let results = stages::assemble(machine, &mut ctx)?;

    Ok(results)
--- a/composite-retrieval/src/pipeline/stages/mod.rs
+++ b/composite-retrieval/src/pipeline/stages/mod.rs
@@ -7,6 +7,7 @@ use common::{
    },
    utils::embedding::generate_embedding,
 };
+use fastembed::RerankResult;
 use futures::{stream::FuturesUnordered, StreamExt};
 use state_machines::core::GuardError;
 use std::collections::{HashMap, HashSet};
@@ -15,6 +16,7 @@ use tracing::{debug, instrument, warn};
 use crate::{
    fts::find_items_by_fts,
    graph::{find_entities_by_relationship_by_id, find_entities_by_source_ids},
+    reranking::RerankerLease,
    scoring::{
        clamp_unit, fuse_scores, merge_scored_by_id, min_max_normalize, sort_by_fused_desc,
        FusionWeights, Scored,
@@ -27,6 +29,7 @@ use super::{
    config::RetrievalConfig,
    state::{
        CandidatesLoaded, ChunksAttached, Embedded, GraphExpanded, HybridRetrievalMachine, Ready,
+        Reranked,
    },
 };

@@ -41,6 +44,7 @@ pub struct PipelineContext<'a> {
    pub chunk_candidates: HashMap<String, Scored<TextChunk>>,
    pub filtered_entities: Vec<Scored<KnowledgeEntity>>,
    pub chunk_values: Vec<Scored<TextChunk>>,
+    pub reranker: Option<RerankerLease>,
 }

 impl<'a> PipelineContext<'a> {
@@ -50,6 +54,7 @@ impl<'a> PipelineContext<'a> {
        input_text: String,
        user_id: String,
        config: RetrievalConfig,
+        reranker: Option<RerankerLease>,
    ) -> Self {
        Self {
            db_client,
@@ -62,6 +67,7 @@ impl<'a> PipelineContext<'a> {
            chunk_candidates: HashMap::new(),
            filtered_entities: Vec::new(),
            chunk_values: Vec::new(),
+            reranker,
        }
    }

@@ -73,8 +79,16 @@ impl<'a> PipelineContext<'a> {
        input_text: String,
        user_id: String,
        config: RetrievalConfig,
+        reranker: Option<RerankerLease>,
    ) -> Self {
-        let mut ctx = Self::new(db_client, openai_client, input_text, user_id, config);
+        let mut ctx = Self::new(
+            db_client,
+            openai_client,
+            input_text,
+            user_id,
+            config,
+            reranker,
+        );
        ctx.query_embedding = Some(query_embedding);
        ctx
    }
@@ -327,9 +341,58 @@ pub async fn attach_chunks(
 }

 #[instrument(level = "trace", skip_all)]
-pub fn assemble(
+pub async fn rerank(
    machine: HybridRetrievalMachine<(), ChunksAttached>,
    ctx: &mut PipelineContext<'_>,
+) -> Result<HybridRetrievalMachine<(), Reranked>, AppError> {
+    let mut applied = false;
+
+    if let Some(reranker) = ctx.reranker.as_ref() {
+        if ctx.filtered_entities.len() > 1 {
+            let documents = build_rerank_documents(ctx, ctx.config.tuning.max_chunks_per_entity);
+
+            if documents.len() > 1 {
+                match reranker.rerank(&ctx.input_text, documents).await {
+                    Ok(results) if !results.is_empty() => {
+                        apply_rerank_results(ctx, results);
+                        applied = true;
+                    }
+                    Ok(_) => {
+                        debug!("Reranker returned no results; retaining original ordering");
+                    }
+                    Err(err) => {
+                        warn!(
+                            error = %err,
+                            "Reranking failed; continuing with original ordering"
+                        );
+                    }
+                }
+            } else {
+                debug!(
+                    document_count = documents.len(),
+                    "Skipping reranking stage; insufficient document context"
+                );
+            }
+        } else {
+            debug!("Skipping reranking stage; less than two entities available");
+        }
+    } else {
+        debug!("No reranker lease provided; skipping reranking stage");
+    }
+
+    if applied {
+        debug!("Applied reranking adjustments to candidate ordering");
+    }
+
+    machine
+        .rerank()
+        .map_err(|(_, guard)| map_guard_error("rerank", guard))
+}
+
+#[instrument(level = "trace", skip_all)]
+pub fn assemble(
+    machine: HybridRetrievalMachine<(), Reranked>,
+    ctx: &mut PipelineContext<'_>,
 ) -> Result<Vec<RetrievedEntity>, AppError> {
    debug!("Assembling final retrieved entities");
    let tuning = &ctx.config.tuning;
@@ -561,6 +624,113 @@ async fn enrich_chunks_from_entities(
    Ok(())
 }

+fn build_rerank_documents(ctx: &PipelineContext<'_>, max_chunks_per_entity: usize) -> Vec<String> {
+    if ctx.filtered_entities.is_empty() {
+        return Vec::new();
+    }
+
+    let mut chunk_by_source: HashMap<&str, Vec<&Scored<TextChunk>>> = HashMap::new();
+    for chunk in &ctx.chunk_values {
+        chunk_by_source
+            .entry(chunk.item.source_id.as_str())
+            .or_default()
+            .push(chunk);
+    }
+
+    ctx.filtered_entities
+        .iter()
+        .map(|entity| {
+            let mut doc = format!(
+                "Name: {}\nType: {:?}\nDescription: {}\n",
+                entity.item.name, entity.item.entity_type, entity.item.description
+            );
+
+            if let Some(chunks) = chunk_by_source.get(entity.item.source_id.as_str()) {
+                let mut chunk_refs = chunks.clone();
+                chunk_refs.sort_by(|a, b| {
+                    b.fused
+                        .partial_cmp(&a.fused)
+                        .unwrap_or(std::cmp::Ordering::Equal)
+                });
+
+                let mut header_added = false;
+                for chunk in chunk_refs.into_iter().take(max_chunks_per_entity.max(1)) {
+                    let snippet = chunk.item.chunk.trim();
+                    if snippet.is_empty() {
+                        continue;
+                    }
+                    if !header_added {
+                        doc.push_str("Chunks:\n");
+                        header_added = true;
+                    }
+                    doc.push_str("- ");
+                    doc.push_str(snippet);
+                    doc.push('\n');
+                }
+            }
+
+            doc
+        })
+        .collect()
+}
+
+fn apply_rerank_results(ctx: &mut PipelineContext<'_>, results: Vec<RerankResult>) {
+    if results.is_empty() || ctx.filtered_entities.is_empty() {
+        return;
+    }
+
+    let mut remaining: Vec<Option<Scored<KnowledgeEntity>>> =
+        std::mem::take(&mut ctx.filtered_entities)
+            .into_iter()
+            .map(Some)
+            .collect();
+
+    let raw_scores: Vec<f32> = results.iter().map(|r| r.score).collect();
+    let normalized_scores = min_max_normalize(&raw_scores);
+
+    let use_only = ctx.config.tuning.rerank_scores_only;
+    let blend = if use_only {
+        1.0
+    } else {
+        clamp_unit(ctx.config.tuning.rerank_blend_weight)
+    };
+    let mut reranked: Vec<Scored<KnowledgeEntity>> = Vec::with_capacity(remaining.len());
+    for (result, normalized) in results.into_iter().zip(normalized_scores.into_iter()) {
+        if let Some(slot) = remaining.get_mut(result.index) {
+            if let Some(mut candidate) = slot.take() {
+                let original = candidate.fused;
+                let blended = if use_only {
+                    clamp_unit(normalized)
+                } else {
+                    clamp_unit(original * (1.0 - blend) + normalized * blend)
+                };
+                candidate.update_fused(blended);
+                reranked.push(candidate);
+            }
+        } else {
+            warn!(
+                result_index = result.index,
+                "Reranker returned out-of-range index; skipping"
+            );
+        }
+        if reranked.len() == remaining.len() {
+            break;
+        }
+    }
+
+    for slot in remaining.into_iter() {
+        if let Some(candidate) = slot {
+            reranked.push(candidate);
+        }
+    }
+
+    ctx.filtered_entities = reranked;
+    let keep_top = ctx.config.tuning.rerank_keep_top;
+    if keep_top > 0 && ctx.filtered_entities.len() > keep_top {
+        ctx.filtered_entities.truncate(keep_top);
+    }
+}
+
 fn estimate_tokens(text: &str, avg_chars_per_token: usize) -> usize {
    let chars = text.chars().count().max(1);
    (chars / avg_chars_per_token).max(1)
--- a/composite-retrieval/src/pipeline/state.rs
+++ b/composite-retrieval/src/pipeline/state.rs
@@ -4,18 +4,20 @@ state_machine! {
    name: HybridRetrievalMachine,
    state: HybridRetrievalState,
    initial: Ready,
-    states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Completed, Failed],
+    states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Reranked, Completed, Failed],
    events {
        embed { transition: { from: Ready, to: Embedded } }
        collect_candidates { transition: { from: Embedded, to: CandidatesLoaded } }
        expand_graph { transition: { from: CandidatesLoaded, to: GraphExpanded } }
        attach_chunks { transition: { from: GraphExpanded, to: ChunksAttached } }
-        assemble { transition: { from: ChunksAttached, to: Completed } }
+        rerank { transition: { from: ChunksAttached, to: Reranked } }
+        assemble { transition: { from: Reranked, to: Completed } }
        abort {
            transition: { from: Ready, to: Failed }
            transition: { from: CandidatesLoaded, to: Failed }
            transition: { from: GraphExpanded, to: Failed }
            transition: { from: ChunksAttached, to: Failed }
+            transition: { from: Reranked, to: Failed }
        }
    }
 }
--- a/composite-retrieval/src/reranking/mod.rs
+++ b/composite-retrieval/src/reranking/mod.rs
@@ -0,0 +1,170 @@
+use std::{
+    env, fs,
+    path::{Path, PathBuf},
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        Arc,
+    },
+    thread::available_parallelism,
+};
+
+use common::{error::AppError, utils::config::AppConfig};
+use fastembed::{RerankInitOptions, RerankResult, TextRerank};
+use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
+use tracing::debug;
+
+static NEXT_ENGINE: AtomicUsize = AtomicUsize::new(0);
+
+fn pick_engine_index(pool_len: usize) -> usize {
+    let n = NEXT_ENGINE.fetch_add(1, Ordering::Relaxed);
+    n % pool_len
+}
+
+pub struct RerankerPool {
+    engines: Vec<Arc<Mutex<TextRerank>>>,
+    semaphore: Arc<Semaphore>,
+}
+
+impl RerankerPool {
+    /// Build the pool at startup.
+    /// `pool_size` controls max parallel reranks.
+    pub fn new(pool_size: usize) -> Result<Arc<Self>, AppError> {
+        Self::new_with_options(pool_size, RerankInitOptions::default())
+    }
+
+    fn new_with_options(
+        pool_size: usize,
+        init_options: RerankInitOptions,
+    ) -> Result<Arc<Self>, AppError> {
+        if pool_size == 0 {
+            return Err(AppError::Validation(
+                "RERANKING_POOL_SIZE must be greater than zero".to_string(),
+            ));
+        }
+
+        fs::create_dir_all(&init_options.cache_dir)?;
+
+        let mut engines = Vec::with_capacity(pool_size);
+        for x in 0..pool_size {
+            debug!("Creating reranking engine: {x}");
+            let model = TextRerank::try_new(init_options.clone())
+                .map_err(|e| AppError::InternalError(e.to_string()))?;
+            engines.push(Arc::new(Mutex::new(model)));
+        }
+
+        Ok(Arc::new(Self {
+            engines,
+            semaphore: Arc::new(Semaphore::new(pool_size)),
+        }))
+    }
+
+    /// Initialize a pool using application configuration.
+    pub fn maybe_from_config(config: &AppConfig) -> Result<Option<Arc<Self>>, AppError> {
+        if !config.reranking_enabled {
+            return Ok(None);
+        }
+
+        let pool_size = config.reranking_pool_size.unwrap_or_else(default_pool_size);
+
+        let init_options = build_rerank_init_options(config)?;
+        Self::new_with_options(pool_size, init_options).map(Some)
+    }
+
+    /// Check out capacity + pick an engine.
+    /// This returns a lease that can perform rerank().
+    pub async fn checkout(self: &Arc<Self>) -> RerankerLease {
+        // Acquire a permit. This enforces backpressure.
+        let permit = self
+            .semaphore
+            .clone()
+            .acquire_owned()
+            .await
+            .expect("semaphore closed");
+
+        // Pick an engine.
+        // This is naive: just pick based on a simple modulo counter.
+        // We use an atomic counter to avoid always choosing index 0.
+        let idx = pick_engine_index(self.engines.len());
+        let engine = self.engines[idx].clone();
+
+        RerankerLease {
+            _permit: permit,
+            engine,
+        }
+    }
+}
+
+fn default_pool_size() -> usize {
+    available_parallelism()
+        .map(|value| value.get().min(2))
+        .unwrap_or(2)
+        .max(1)
+}
+
+fn is_truthy(value: &str) -> bool {
+    matches!(
+        value.trim().to_ascii_lowercase().as_str(),
+        "1" | "true" | "yes" | "on"
+    )
+}
+
+fn build_rerank_init_options(config: &AppConfig) -> Result<RerankInitOptions, AppError> {
+    let mut options = RerankInitOptions::default();
+
+    let cache_dir = config
+        .fastembed_cache_dir
+        .as_ref()
+        .map(PathBuf::from)
+        .or_else(|| env::var("RERANKING_CACHE_DIR").ok().map(PathBuf::from))
+        .or_else(|| env::var("FASTEMBED_CACHE_DIR").ok().map(PathBuf::from))
+        .unwrap_or_else(|| {
+            Path::new(&config.data_dir)
+                .join("fastembed")
+                .join("reranker")
+        });
+    fs::create_dir_all(&cache_dir)?;
+    options.cache_dir = cache_dir;
+
+    let show_progress = config
+        .fastembed_show_download_progress
+        .or_else(|| env_bool("RERANKING_SHOW_DOWNLOAD_PROGRESS"))
+        .or_else(|| env_bool("FASTEMBED_SHOW_DOWNLOAD_PROGRESS"))
+        .unwrap_or(true);
+    options.show_download_progress = show_progress;
+
+    if let Some(max_length) = config.fastembed_max_length.or_else(|| {
+        env::var("RERANKING_MAX_LENGTH")
+            .ok()
+            .and_then(|value| value.parse().ok())
+    }) {
+        options.max_length = max_length;
+    }
+
+    Ok(options)
+}
+
+fn env_bool(key: &str) -> Option<bool> {
+    env::var(key).ok().map(|value| is_truthy(&value))
+}
+
+/// Active lease on a single TextRerank instance.
+pub struct RerankerLease {
+    // When this drops the semaphore permit is released.
+    _permit: OwnedSemaphorePermit,
+    engine: Arc<Mutex<TextRerank>>,
+}
+
+impl RerankerLease {
+    pub async fn rerank(
+        &self,
+        query: &str,
+        documents: Vec<String>,
+    ) -> Result<Vec<RerankResult>, AppError> {
+        // Lock this specific engine so we get &mut TextRerank
+        let mut guard = self.engine.lock().await;
+
+        guard
+            .rerank(query.to_owned(), documents, false, None)
+            .map_err(|e| AppError::InternalError(e.to_string()))
+    }
+}
--- a/devenv.lock
+++ b/devenv.lock
@@ -3,10 +3,10 @@
    "devenv": {
      "locked": {
        "dir": "src/modules",
-        "lastModified": 1746681099,
+        "lastModified": 1761839147,
        "owner": "cachix",
        "repo": "devenv",
-        "rev": "a7f2ea275621391209fd702f5ddced32dd56a4e2",
+        "rev": "bb7849648b68035f6b910120252c22b28195cf54",
        "type": "github"
      },
      "original": {
@@ -16,13 +16,31 @@
        "type": "github"
      }
    },
+    "fenix": {
+      "inputs": {
+        "nixpkgs": "nixpkgs",
+        "rust-analyzer-src": "rust-analyzer-src"
+      },
+      "locked": {
+        "lastModified": 1761893049,
+        "owner": "nix-community",
+        "repo": "fenix",
+        "rev": "c2ac9a5c0d6d16630c3b225b874bd14528d1abe6",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-community",
+        "repo": "fenix",
+        "type": "github"
+      }
+    },
    "flake-compat": {
      "flake": false,
      "locked": {
-        "lastModified": 1733328505,
+        "lastModified": 1761588595,
        "owner": "edolstra",
        "repo": "flake-compat",
-        "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
+        "rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5",
        "type": "github"
      },
      "original": {
@@ -40,10 +58,10 @@
        ]
      },
      "locked": {
-        "lastModified": 1746537231,
+        "lastModified": 1760663237,
        "owner": "cachix",
        "repo": "git-hooks.nix",
-        "rev": "fa466640195d38ec97cf0493d6d6882bc4d14969",
+        "rev": "ca5b894d3e3e151ffc1db040b6ce4dcc75d31c37",
        "type": "github"
      },
      "original": {
@@ -74,10 +92,25 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1746576598,
+        "lastModified": 1761672384,
        "owner": "nixos",
        "repo": "nixpkgs",
-        "rev": "b3582c75c7f21ce0b429898980eddbbf05c68e55",
+        "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs_2": {
+      "locked": {
+        "lastModified": 1761880412,
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "a7fc11be66bdfb5cdde611ee5ce381c183da8386",
        "type": "github"
      },
      "original": {
@@ -90,11 +123,48 @@
    "root": {
      "inputs": {
        "devenv": "devenv",
+        "fenix": "fenix",
        "git-hooks": "git-hooks",
-        "nixpkgs": "nixpkgs",
+        "nixpkgs": "nixpkgs_2",
        "pre-commit-hooks": [
          "git-hooks"
+        ],
+        "rust-overlay": "rust-overlay"
+      }
+    },
+    "rust-analyzer-src": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1761849405,
+        "owner": "rust-lang",
+        "repo": "rust-analyzer",
+        "rev": "f7de8ae045a5fe80f1203c5a1c3015b05f7c3550",
+        "type": "github"
+      },
+      "original": {
+        "owner": "rust-lang",
+        "ref": "nightly",
+        "repo": "rust-analyzer",
+        "type": "github"
+      }
+    },
+    "rust-overlay": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
        ]
+      },
+      "locked": {
+        "lastModified": 1761878277,
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "rev": "6604534e44090c917db714faa58d47861657690c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "type": "github"
      }
    }
  },
--- a/devenv.nix
+++ b/devenv.nix
@@ -11,14 +11,24 @@
    pkgs.openssl
    pkgs.nodejs
    pkgs.vscode-langservers-extracted
+    pkgs.cargo-dist
+    pkgs.cargo-xwin
+    pkgs.clang
+    pkgs.onnxruntime
  ];

  languages.rust = {
    enable = true;
    components = ["rustc" "clippy" "rustfmt" "cargo" "rust-analyzer"];
+    channel = "nightly";
+    targets = ["x86_64-unknown-linux-gnu" "x86_64-pc-windows-msvc"];
    mold.enable = true;
  };

+  env = {
+    ORT_DYLIB_PATH = "${pkgs.onnxruntime}/lib/libonnxruntime.so";
+  };
+
  processes = {
    surreal_db.exec = "docker run --rm --pull always -p 8000:8000 --net=host --user $(id -u) -v $(pwd)/database:/database surrealdb/surrealdb:latest-dev start rocksdb:/database/database.db --user root_user --pass root_password";
  };
--- a/devenv.yaml
+++ b/devenv.yaml
@@ -1,15 +1,11 @@
-# yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
 inputs:
+  fenix:
+    url: github:nix-community/fenix
  nixpkgs:
    url: github:nixos/nixpkgs/nixpkgs-unstable
-
-# If you're using non-OSS software, you can set allowUnfree to true.
+  rust-overlay:
+    url: github:oxalica/rust-overlay
+    inputs:
+      nixpkgs:
+        follows: nixpkgs
 allowUnfree: true
-
-# If you're willing to use a package that's vulnerable
-# permittedInsecurePackages:
-#  - "openssl-1.1.1w"
-
-# If you have more than one devenv you can merge them
-#imports:
-# - ./backend
--- a/dist-workspace.toml
+++ b/dist-workspace.toml
@@ -4,9 +4,11 @@ members = ["cargo:."]
 # Config for 'dist'
 [dist]
 # The preferred dist version to use in CI (Cargo.toml SemVer syntax)
-cargo-dist-version = "0.28.0"
+cargo-dist-version = "0.30.0"
 # CI backends to support
 ci = "github"
+# Extra static files to include in each App (path relative to this Cargo.toml's dir)
+include = ["lib"]
 # The installers to generate for each app
 installers = []
 # Target platforms to build apps for (Rust target-triple syntax)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: '3.8' 
-
 services:
  minne:
    build: . 
@@ -12,10 +10,11 @@ services:
      SURREALDB_PASSWORD: "root_password"
      SURREALDB_DATABASE: "test"
      SURREALDB_NAMESPACE: "test"
-      OPENAI_API_KEY: "sk-key"
+      OPENAI_API_KEY: "sk-add-your-key"
      DATA_DIR: "./data"
      HTTP_PORT: 3000
      RUST_LOG: "info"
+      RERANKING_ENABLED: false ## Change to true to enable reranking
    depends_on:
      - surrealdb 
    networks:
@@ -31,7 +30,7 @@ services:
      - ./database:/database # Mounts a 'database' folder from your project directory
    command: >
      start
-      --log debug
+      --log info
      --user root_user
      --pass root_password
      rocksdb:./database/database.db
--- a/flake.lock
+++ b/flake.lock
@@ -1,5 +1,20 @@
 {
  "nodes": {
+    "crane": {
+      "locked": {
+        "lastModified": 1760924934,
+        "narHash": "sha256-tuuqY5aU7cUkR71sO2TraVKK2boYrdW3gCSXUkF4i44=",
+        "owner": "ipetkov",
+        "repo": "crane",
+        "rev": "c6b4d5308293d0d04fcfeee92705017537cad02f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "ipetkov",
+        "repo": "crane",
+        "type": "github"
+      }
+    },
    "flake-utils": {
      "inputs": {
        "systems": "systems"
@@ -20,11 +35,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1746232882,
-        "narHash": "sha256-MHmBH2rS8KkRRdoU/feC/dKbdlMkcNkB5mwkuipVHeQ=",
+        "lastModified": 1761672384,
+        "narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "7a2622e2c0dbad5c4493cb268aba12896e28b008",
+        "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
        "type": "github"
      },
      "original": {
@@ -36,6 +51,7 @@
    },
    "root": {
      "inputs": {
+        "crane": "crane",
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs"
      }
--- a/flake.nix
+++ b/flake.nix
@@ -4,77 +4,83 @@
  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-utils.url = "github:numtide/flake-utils";
+    crane.url = "github:ipetkov/crane";
  };

  outputs = {
    self,
    nixpkgs,
    flake-utils,
+    crane,
  }:
-    flake-utils.lib.eachDefaultSystem (
-      system: let
-        pkgs = nixpkgs.legacyPackages.${system};
-
-        # --- Minne Package Definition ---
-        minne-pkg = pkgs.rustPlatform.buildRustPackage {
-          pname = "minne";
-          version = "0.1.0";
-
-          src = self;
-
-          cargoLock = {
-            lockFile = ./Cargo.lock;
-          };
-
-          # Skip tests due to testing fs operations
-          doCheck = false;
-
-          nativeBuildInputs = [
-            pkgs.pkg-config
-            pkgs.rustfmt
-            pkgs.makeWrapper # For the postInstall hook
-          ];
-          buildInputs = [
-            pkgs.openssl
-            pkgs.chromium # Runtime dependency for the browser
-          ];
-
-          # Wrap the actual executables to provide CHROME at runtime
-          postInstall = let
-            chromium_executable = "${pkgs.chromium}/bin/chromium";
-          in ''
-            wrapProgram $out/bin/main \
-              --set CHROME "${chromium_executable}"
-            wrapProgram $out/bin/worker \
-              --set CHROME "${chromium_executable}"
-          '';
-
-          meta = with pkgs.lib; {
-            description = "Minne Application";
-            license = licenses.mit;
-          };
-        };
-      in {
-        packages = {
-          minne = minne-pkg;
-          default = self.packages.${system}.minne;
+    flake-utils.lib.eachDefaultSystem (system: let
+      pkgs = nixpkgs.legacyPackages.${system};
+      lib = pkgs.lib;
+      craneLib = crane.mkLib pkgs;
+      libExt =
+        if pkgs.stdenv.isDarwin
+        then "dylib"
+        else "so";
+      minne-pkg = craneLib.buildPackage {
+        src = lib.cleanSourceWith {
+          src = ./.;
+          filter = let
+            extraPaths = [
+              (toString ./common/migrations)
+              (toString ./common/schemas)
+              (toString ./html-router/templates)
+              (toString ./html-router/assets)
+            ];
+          in
+            path: type: let
+              p = toString path;
+            in
+              craneLib.filterCargoSources path type
+              || lib.any (x: lib.hasPrefix x p) extraPaths;
        };

-        apps = {
-          main = flake-utils.lib.mkApp {
-            drv = minne-pkg;
-            name = "main";
-          };
-          worker = flake-utils.lib.mkApp {
-            drv = minne-pkg;
-            name = "worker";
-          };
-          server = flake-utils.lib.mkApp {
-            drv = minne-pkg;
-            name = "server";
-          };
-          default = self.apps.${system}.main;
+        pname = "minne";
+        version = "0.2.6";
+        doCheck = false;
+
+        nativeBuildInputs = [pkgs.pkg-config pkgs.rustfmt pkgs.makeWrapper];
+        buildInputs = [pkgs.openssl pkgs.chromium pkgs.onnxruntime];
+
+        postInstall = ''
+          wrapProgram $out/bin/main \
+            --set CHROME ${pkgs.chromium}/bin/chromium \
+            --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt}
+          for b in worker server; do
+            if [ -x "$out/bin/$b" ]; then
+              wrapProgram $out/bin/$b \
+                --set CHROME ${pkgs.chromium}/bin/chromium \
+                --set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt}
+            fi
+          done
+        '';
+      };
+    in {
+      packages = {
+        minne-pkg = minne-pkg;
+        default = minne-pkg;
+      };
+      apps = {
+        main = flake-utils.lib.mkApp {
+          drv = minne-pkg;
+          name = "main";
        };
-      }
-    );
+        worker = flake-utils.lib.mkApp {
+          drv = minne-pkg;
+          name = "worker";
+        };
+        server = flake-utils.lib.mkApp {
+          drv = minne-pkg;
+          name = "server";
+        };
+        default = flake-utils.lib.mkApp {
+          drv = minne-pkg;
+          name = "main";
+        };
+      };
+    });
 }
--- a/html-router/assets/style.css
+++ b/html-router/assets/style.css
--- a/html-router/src/html_state.rs
+++ b/html-router/src/html_state.rs
@@ -1,6 +1,7 @@
 use common::storage::db::SurrealDbClient;
 use common::utils::template_engine::{ProvidesTemplateEngine, TemplateEngine};
 use common::{create_template_engine, storage::db::ProvidesDb, utils::config::AppConfig};
+use composite_retrieval::reranking::RerankerPool;
 use std::sync::Arc;
 use tracing::debug;

@@ -13,6 +14,7 @@ pub struct HtmlState {
    pub templates: Arc<TemplateEngine>,
    pub session_store: Arc<SessionStoreType>,
    pub config: AppConfig,
+    pub reranker_pool: Option<Arc<RerankerPool>>,
 }

 impl HtmlState {
@@ -21,6 +23,7 @@ impl HtmlState {
        openai_client: Arc<OpenAIClientType>,
        session_store: Arc<SessionStoreType>,
        config: AppConfig,
+        reranker_pool: Option<Arc<RerankerPool>>,
    ) -> Result<Self, Box<dyn std::error::Error>> {
        let template_engine = create_template_engine!("templates");
        debug!("Template engine created for html_router.");
@@ -31,6 +34,7 @@ impl HtmlState {
            session_store,
            templates: Arc::new(template_engine),
            config,
+            reranker_pool,
        })
    }
 }
--- a/html-router/src/routes/chat/message_response_stream.rs
+++ b/html-router/src/routes/chat/message_response_stream.rs
@@ -118,11 +118,17 @@ pub async fn get_response_stream(
        };

    // 2. Retrieve knowledge entities
+    let rerank_lease = match state.reranker_pool.as_ref() {
+        Some(pool) => Some(pool.checkout().await),
+        None => None,
+    };
+
    let entities = match retrieve_entities(
        &state.db,
        &state.openai_client,
        &user_message.content,
        &user.id,
+        rerank_lease,
    )
    .await
    {
--- a/html-router/src/routes/knowledge/handlers.rs
+++ b/html-router/src/routes/knowledge/handlers.rs
@@ -195,8 +195,19 @@ pub async fn suggest_knowledge_relationships(

    if !query_parts.is_empty() {
        let query = query_parts.join(" ");
-        if let Ok(results) =
-            retrieve_entities(&state.db, &state.openai_client, &query, &user.id).await
+        let rerank_lease = match state.reranker_pool.as_ref() {
+            Some(pool) => Some(pool.checkout().await),
+            None => None,
+        };
+
+        if let Ok(results) = retrieve_entities(
+            &state.db,
+            &state.openai_client,
+            &query,
+            &user.id,
+            rerank_lease,
+        )
+        .await
        {
            for RetrievedEntity { entity, score, .. } in results {
                if suggestion_scores.len() >= MAX_RELATIONSHIP_SUGGESTIONS {
--- a/ingestion-pipeline/src/pipeline/mod.rs
+++ b/ingestion-pipeline/src/pipeline/mod.rs
@@ -26,6 +26,7 @@ use common::{
    },
    utils::config::AppConfig,
 };
+use composite_retrieval::reranking::RerankerPool;
 use tracing::{debug, info, warn};

 use self::{
@@ -45,9 +46,14 @@ impl IngestionPipeline {
        db: Arc<SurrealDbClient>,
        openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
        config: AppConfig,
+        reranker_pool: Option<Arc<RerankerPool>>,
    ) -> Result<Self, AppError> {
-        let services =
-            DefaultPipelineServices::new(db.clone(), openai_client.clone(), config.clone());
+        let services = DefaultPipelineServices::new(
+            db.clone(),
+            openai_client.clone(),
+            config.clone(),
+            reranker_pool,
+        );

        Self::with_services(db, IngestionConfig::default(), Arc::new(services))
    }
--- a/ingestion-pipeline/src/pipeline/services.rs
+++ b/ingestion-pipeline/src/pipeline/services.rs
@@ -18,7 +18,9 @@ use common::{
    },
    utils::{config::AppConfig, embedding::generate_embedding},
 };
-use composite_retrieval::{retrieve_entities, retrieved_entities_to_json, RetrievedEntity};
+use composite_retrieval::{
+    reranking::RerankerPool, retrieve_entities, retrieved_entities_to_json, RetrievedEntity,
+};
 use text_splitter::TextSplitter;

 use super::{enrichment_result::LLMEnrichmentResult, preparation::to_text_content};
@@ -62,6 +64,7 @@ pub struct DefaultPipelineServices {
    db: Arc<SurrealDbClient>,
    openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
    config: AppConfig,
+    reranker_pool: Option<Arc<RerankerPool>>,
 }

 impl DefaultPipelineServices {
@@ -69,11 +72,13 @@ impl DefaultPipelineServices {
        db: Arc<SurrealDbClient>,
        openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
        config: AppConfig,
+        reranker_pool: Option<Arc<RerankerPool>>,
    ) -> Self {
        Self {
            db,
            openai_client,
            config,
+            reranker_pool,
        }
    }

@@ -151,7 +156,19 @@ impl PipelineServices for DefaultPipelineServices {
            content.text, content.category, content.context
        );

-        retrieve_entities(&self.db, &self.openai_client, &input_text, &content.user_id).await
+        let rerank_lease = match &self.reranker_pool {
+            Some(pool) => Some(pool.checkout().await),
+            None => None,
+        };
+
+        retrieve_entities(
+            &self.db,
+            &self.openai_client,
+            &input_text,
+            &content.user_id,
+            rerank_lease,
+        )
+        .await
    }

    async fn run_enrichment(
--- a/main/Cargo.toml
+++ b/main/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "main"
-version = "0.2.5"
+version = "0.2.6"
 edition = "2021"
 repository = "https://github.com/perstarkse/minne"
 license = "AGPL-3.0-or-later"
@@ -25,6 +25,7 @@ ingestion-pipeline = { path = "../ingestion-pipeline" }
 api-router = { path = "../api-router" }
 html-router = { path = "../html-router" }
 common = { path = "../common" }
+composite-retrieval = { path = "../composite-retrieval" }

 [dev-dependencies]
 tower = "0.5"
--- a/main/src/main.rs
+++ b/main/src/main.rs
@@ -1,6 +1,7 @@
 use api_router::{api_routes_v1, api_state::ApiState};
 use axum::{extract::FromRef, Router};
 use common::{storage::db::SurrealDbClient, utils::config::get_config};
+use composite_retrieval::reranking::RerankerPool;
 use html_router::{html_routes, html_state::HtmlState};
 use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
 use std::sync::Arc;
@@ -43,8 +44,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
            .with_api_base(&config.openai_base_url),
    ));

-    let html_state =
-        HtmlState::new_with_resources(db, openai_client, session_store, config.clone())?;
+    let reranker_pool = RerankerPool::maybe_from_config(&config)?;
+
+    let html_state = HtmlState::new_with_resources(
+        db,
+        openai_client,
+        session_store,
+        config.clone(),
+        reranker_pool.clone(),
+    )?;

    let api_state = ApiState {
        db: html_state.db.clone(),
@@ -102,9 +110,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
                .with_api_base(&config.openai_base_url),
        ));
        let ingestion_pipeline = Arc::new(
-            IngestionPipeline::new(worker_db.clone(), openai_client.clone(), config.clone())
-                .await
-                .unwrap(),
+            IngestionPipeline::new(
+                worker_db.clone(),
+                openai_client.clone(),
+                config.clone(),
+                reranker_pool.clone(),
+            )
+            .await
+            .unwrap(),
        );

        info!("Starting worker process");
@@ -152,6 +165,7 @@ mod tests {
            openai_base_url: "https://example.com".into(),
            storage: StorageKind::Local,
            pdf_ingest_mode: PdfIngestMode::LlmFirst,
+            ..Default::default()
        }
    }

@@ -181,9 +195,14 @@ mod tests {
                .with_api_base(&config.openai_base_url),
        ));

-        let html_state =
-            HtmlState::new_with_resources(db.clone(), openai_client, session_store, config.clone())
-                .expect("failed to build html state");
+        let html_state = HtmlState::new_with_resources(
+            db.clone(),
+            openai_client,
+            session_store,
+            config.clone(),
+            None,
+        )
+        .expect("failed to build html state");

        let api_state = ApiState {
            db: html_state.db.clone(),
--- a/main/src/server.rs
+++ b/main/src/server.rs
@@ -3,6 +3,7 @@ use std::sync::Arc;
 use api_router::{api_routes_v1, api_state::ApiState};
 use axum::{extract::FromRef, Router};
 use common::{storage::db::SurrealDbClient, utils::config::get_config};
+use composite_retrieval::reranking::RerankerPool;
 use html_router::{html_routes, html_state::HtmlState};
 use tracing::info;
 use tracing_subscriber::{fmt, prelude::*, EnvFilter};
@@ -41,8 +42,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
            .with_api_base(&config.openai_base_url),
    ));

-    let html_state =
-        HtmlState::new_with_resources(db, openai_client, session_store, config.clone())?;
+    let reranker_pool = RerankerPool::maybe_from_config(&config)?;
+
+    let html_state = HtmlState::new_with_resources(
+        db,
+        openai_client,
+        session_store,
+        config.clone(),
+        reranker_pool,
+    )?;

    let api_state = ApiState {
        db: html_state.db.clone(),
--- a/main/src/worker.rs
+++ b/main/src/worker.rs
@@ -1,6 +1,7 @@
 use std::sync::Arc;

 use common::{storage::db::SurrealDbClient, utils::config::get_config};
+use composite_retrieval::reranking::RerankerPool;
 use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
 use tracing_subscriber::{fmt, prelude::*, EnvFilter};

@@ -32,8 +33,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
            .with_api_base(&config.openai_base_url),
    ));

-    let ingestion_pipeline =
-        Arc::new(IngestionPipeline::new(db.clone(), openai_client.clone(), config).await?);
+    let reranker_pool = RerankerPool::maybe_from_config(&config)?;
+
+    let ingestion_pipeline = Arc::new(
+        IngestionPipeline::new(db.clone(), openai_client.clone(), config, reranker_pool).await?,
+    );

    run_worker_loop(db, ingestion_pipeline).await
 }
Author	SHA1	Message	Date
Per Stark	380c900c86	release: 0.2.6 dist update fix new workflow fix mkdir moved to dist fix only dir dont verify sha files fix verify ci part fix no checking anymore	2025-11-01 21:26:06 +01:00
Per Stark	a99e5ada8b	Merge pull request #5 from josephleee/patch-1 Update README.md	2025-10-31 13:40:06 +01:00
Per Stark	b0deabaf3f	release: 0.2.6	2025-10-31 13:38:11 +01:00
Joseph	a8f0d9fa88	Update README.md KaraKeep url is deprecated. link to origin github url	2025-10-30 16:12:39 +09:00
Per Stark	56a1dfddb8	fix: updated docker container for reranking	2025-10-29 12:04:39 +01:00
Per Stark	863b921fb4	fix: updated nix build to work with reranking deps	2025-10-28 22:28:45 +01:00
Per Stark	72578296db	feat: reranking with fastembed added	2025-10-27 13:05:10 +01:00
Per Stark	a0e9387c76	docs: updated readme	2025-10-24 23:34:18 +02:00