8 Commits

Author SHA1 Message Date
Per Stark
380c900c86 release: 0.2.6
dist update

fix new workflow

fix

mkdir

moved to dist

fix only dir

dont verify sha files

fix verify ci part

fix

no checking anymore
2025-11-01 21:26:06 +01:00
Per Stark
a99e5ada8b Merge pull request #5 from josephleee/patch-1
Update README.md
2025-10-31 13:40:06 +01:00
Per Stark
b0deabaf3f release: 0.2.6 2025-10-31 13:38:11 +01:00
Joseph
a8f0d9fa88 Update README.md
KaraKeep url is deprecated. link to origin github url
2025-10-30 16:12:39 +09:00
Per Stark
56a1dfddb8 fix: updated docker container for reranking 2025-10-29 12:04:39 +01:00
Per Stark
863b921fb4 fix: updated nix build to work with reranking deps 2025-10-28 22:28:45 +01:00
Per Stark
72578296db feat: reranking with fastembed added 2025-10-27 13:05:10 +01:00
Per Stark
a0e9387c76 docs: updated readme 2025-10-24 23:34:18 +02:00
35 changed files with 2108 additions and 652 deletions

49
.github/build-setup.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
- name: Prepare lib dir
run: mkdir -p lib
# Linux
- name: Fetch ONNX Runtime (Linux)
if: runner.os == 'Linux'
env:
ORT_VER: 1.22.0
run: |
set -euo pipefail
ARCH="$(uname -m)"
case "$ARCH" in
x86_64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-x64-${ORT_VER}.tgz" ;;
aarch64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-aarch64-${ORT_VER}.tgz" ;;
*) echo "Unsupported arch $ARCH"; exit 1 ;;
esac
curl -fsSL -o ort.tgz "$URL"
tar -xzf ort.tgz
cp -v onnxruntime-*/lib/libonnxruntime.so* lib/
# macOS
- name: Fetch ONNX Runtime (macOS)
if: runner.os == 'macOS'
env:
ORT_VER: 1.22.0
run: |
set -euo pipefail
curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
tar -xzf ort.tgz
# copy the main dylib; rename to stable name if needed
cp -v onnxruntime-*/lib/libonnxruntime*.dylib lib/
# optional: ensure a stable name
if [ ! -f lib/libonnxruntime.dylib ]; then
cp -v lib/libonnxruntime*.dylib lib/libonnxruntime.dylib
fi
# Windows
- name: Fetch ONNX Runtime (Windows)
if: runner.os == 'Windows'
shell: pwsh
env:
ORT_VER: 1.22.0
run: |
$url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
Invoke-WebRequest $url -OutFile ort.zip
Expand-Archive ort.zip -DestinationPath ort
$dll = Get-ChildItem -Recurse -Path ort -Filter onnxruntime.dll | Select-Object -First 1
Copy-Item $dll.FullName lib\onnxruntime.dll

View File

@@ -1,44 +1,8 @@
# This file was autogenerated by dist: https://opensource.axo.dev/cargo-dist/
#
# Copyright 2022-2024, axodotdev
# SPDX-License-Identifier: MIT or Apache-2.0
#
# CI that:
#
# * checks for a Git Tag that looks like a release
# * builds artifacts with dist (archives, installers, hashes)
# * uploads those artifacts to temporary workflow zip
# * on success, uploads the artifacts to a GitHub Release
#
# Note that the GitHub Release will be created with a generated
# title/body based on your changelogs.
name: Release
permissions:
"contents": "write"
"packages": "write"
contents: write
packages: write
# This task will run whenever you push a git tag that looks like a version
# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
# must be a Cargo-style SemVer Version (must have at least major.minor.patch).
#
# If PACKAGE_NAME is specified, then the announcement will be for that
# package (erroring out if it doesn't have the given version or isn't dist-able).
#
# If PACKAGE_NAME isn't specified, then the announcement will be for all
# (dist-able) packages in the workspace with that version (this mode is
# intended for workspaces with only one dist-able package, or with all dist-able
# packages versioned/released in lockstep).
#
# If you push multiple tags at once, separate instances of this workflow will
# spin up, creating an independent announcement for each one. However, GitHub
# will hard limit this to 3 tags per commit, as it will assume more tags is a
# mistake.
#
# If there's a prerelease-style suffix to the version, then the release(s)
# will be marked as a prerelease.
on:
pull_request:
push:
@@ -46,9 +10,8 @@ on:
- '**[0-9]+.[0-9]+.[0-9]+*'
jobs:
# Run 'dist plan' (or host) to determine what tasks we need to do
plan:
runs-on: "ubuntu-22.04"
runs-on: ubuntu-22.04
outputs:
val: ${{ steps.plan.outputs.manifest }}
tag: ${{ !github.event.pull_request && github.ref_name || '' }}
@@ -60,52 +23,36 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install dist
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.28.0/cargo-dist-installer.sh | sh"
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.0/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
# sure would be cool if github gave us proper conditionals...
# so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
# functionality based on whether this is a pull_request, and whether it's from a fork.
# (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
# but also really annoying to build CI around when it needs secrets to work right.)
- id: plan
run: |
dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
echo "dist ran successfully"
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
echo "manifest=$(jq -c . plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: Upload dist-manifest.json
uses: actions/upload-artifact@v4
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
# Build and packages all the platform-specific things
build-local-artifacts:
name: build-local-artifacts (${{ join(matrix.targets, ', ') }})
# Let the initial task tell us to not run (currently very blunt)
needs:
- plan
needs: [plan]
if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
strategy:
fail-fast: false
# Target platforms/runners are computed by dist in create-release.
# Each member of the matrix has the following arguments:
#
# - runner: the github runner
# - dist-args: cli flags to pass to dist
# - install-dist: expression to run to install dist on the runner
#
# Typically there will be:
# - 1 "global" task that builds universal installers
# - N "local" tasks that build each platform's binaries and platform-specific installers
matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
runs-on: ${{ matrix.runner }}
container: ${{ matrix.container && matrix.container.image || null }}
@@ -114,11 +61,12 @@ jobs:
BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
steps:
- name: enable windows longpaths
run: |
git config --global core.longpaths true
run: git config --global core.longpaths true
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Rust non-interactively if not already installed
if: ${{ matrix.container }}
run: |
@@ -126,37 +74,103 @@ jobs:
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
- name: Install dist
run: ${{ matrix.install_dist.run }}
# Get the dist-manifest
- name: Fetch local artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
# ===== BEGIN: Injected ORT staging for cargo-dist bundling =====
- run: echo "=== BUILD-SETUP START ==="
# Unix shells
- name: Prepare lib dir (Unix)
if: runner.os != 'Windows'
shell: bash
run: |
mkdir -p lib
rm -f lib/*
# Windows PowerShell
- name: Prepare lib dir (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
New-Item -ItemType Directory -Force -Path lib | Out-Null
# remove contents if any
Get-ChildItem -Path lib -Force | Remove-Item -Force -Recurse -ErrorAction SilentlyContinue
- name: Fetch ONNX Runtime (Linux)
if: runner.os == 'Linux'
env:
ORT_VER: 1.22.0
run: |
set -euo pipefail
ARCH="$(uname -m)"
case "$ARCH" in
x86_64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-x64-${ORT_VER}.tgz" ;;
aarch64) URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-linux-aarch64-${ORT_VER}.tgz" ;;
*) echo "Unsupported arch $ARCH"; exit 1 ;;
esac
curl -fsSL -o ort.tgz "$URL"
tar -xzf ort.tgz
cp -v onnxruntime-*/lib/libonnxruntime.so* lib/
# normalize to stable name if needed
[ -f lib/libonnxruntime.so ] || cp -v lib/libonnxruntime.so.* lib/libonnxruntime.so
- name: Fetch ONNX Runtime (macOS)
if: runner.os == 'macOS'
env:
ORT_VER: 1.22.0
run: |
set -euo pipefail
curl -fsSL -o ort.tgz "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VER}/onnxruntime-osx-universal2-${ORT_VER}.tgz"
tar -xzf ort.tgz
cp -v onnxruntime-*/lib/libonnxruntime*.dylib lib/
[ -f lib/libonnxruntime.dylib ] || cp -v lib/libonnxruntime*.dylib lib/libonnxruntime.dylib
- name: Fetch ONNX Runtime (Windows)
if: runner.os == 'Windows'
shell: pwsh
env:
ORT_VER: 1.22.0
run: |
$url = "https://github.com/microsoft/onnxruntime/releases/download/v$env:ORT_VER/onnxruntime-win-x64-$env:ORT_VER.zip"
Invoke-WebRequest $url -OutFile ort.zip
Expand-Archive ort.zip -DestinationPath ort
$dll = Get-ChildItem -Recurse -Path ort -Filter onnxruntime.dll | Select-Object -First 1
Copy-Item $dll.FullName lib\onnxruntime.dll
- run: |
echo "=== BUILD-SETUP END ==="
echo "lib/ contents:"
ls -l lib || dir lib
# ===== END: Injected ORT staging =====
- name: Install dependencies
run: |
${{ matrix.packages_install }}
- name: Build artifacts
run: |
# Actually do builds and make zips and whatnot
dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "dist ran successfully"
- id: cargo-dist
name: Post-build
# We force bash here just because github makes it really hard to get values up
# to "real" actions without writing to env-vars, and writing to env-vars has
# inconsistent syntax between shell and powershell.
shell: bash
run: |
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: artifacts-build-local-${{ join(matrix.targets, '_') }}
@@ -167,16 +181,16 @@ jobs:
build_and_push_docker_image:
name: Build and Push Docker Image
runs-on: ubuntu-latest
needs: [plan]
if: ${{ needs.plan.outputs.publishing == 'true' }}
needs: [plan]
if: ${{ needs.plan.outputs.publishing == 'true' }}
permissions:
contents: read # Permission to checkout the repository
packages: write # Permission to push Docker image to GHCR
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: recursive # Matches your other checkout steps
submodules: recursive
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -185,33 +199,28 @@ jobs:
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }} # User triggering the workflow
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}
# This action automatically uses the Git tag as the Docker image tag.
# For example, a Git tag 'v1.2.3' will result in Docker tag 'ghcr.io/owner/repo:v1.2.3'.
images: ghcr.io/${{ github.repository }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha # Enable Docker layer caching from GitHub Actions cache
cache-to: type=gha,mode=max # Enable Docker layer caching to GitHub Actions cache
cache-from: type=gha
cache-to: type=gha,mode=max
# Build and package all the platform-agnostic(ish) things
build-global-artifacts:
needs:
- plan
- build-local-artifacts
runs-on: "ubuntu-22.04"
needs: [plan, build-local-artifacts]
runs-on: ubuntu-22.04
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
@@ -219,92 +228,90 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- id: cargo-dist
shell: bash
run: |
dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
echo "dist ran successfully"
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: artifacts-build-global
path: |
${{ steps.cargo-dist.outputs.paths }}
${{ env.BUILD_MANIFEST_NAME }}
# Determines if we should publish/announce
host:
needs:
- plan
- build-local-artifacts
- build-global-artifacts
# Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
needs: [plan, build-local-artifacts, build-global-artifacts]
if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
runs-on: "ubuntu-22.04"
runs-on: ubuntu-22.04
outputs:
val: ${{ steps.host.outputs.manifest }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- id: host
shell: bash
run: |
dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
echo "artifacts uploaded and released successfully"
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
echo "manifest=$(jq -c . dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: Upload dist-manifest.json
uses: actions/upload-artifact@v4
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
- name: Download GitHub Artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: artifacts
merge-multiple: true
- name: Cleanup
run: |
# Remove the granular manifests
rm -f artifacts/*-dist-manifest.json
run: rm -f artifacts/*-dist-manifest.json
- name: Create GitHub Release
env:
PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}"
@@ -312,20 +319,13 @@ jobs:
ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}"
RELEASE_COMMIT: "${{ github.sha }}"
run: |
# Write and read notes from a file to avoid quoting breaking things
echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
announce:
needs:
- plan
- host
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
needs: [plan, host]
if: ${{ always() && needs.host.result == 'success' }}
runs-on: "ubuntu-22.04"
runs-on: ubuntu-22.04
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:

View File

@@ -1,6 +1,9 @@
# Changelog
## Unreleased
## Version 0.2.6 (2025-10-29)
- Added an opt-in FastEmbed-based reranking stage behind `reranking_enabled`. It improves retrieval accuracy by re-scoring hybrid results.
## Version 0.2.5 (2025-10-24)
- Added manual knowledge entity creation flows using a modal, with the option for suggested relationships
- Scratchpad feature, with the feature to convert scratchpads to content.

951
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -56,18 +56,56 @@ base64 = "0.22.1"
object_store = { version = "0.11.2" }
bytes = "1.7.1"
state-machines = "0.2.0"
fastembed = { version = "5.2.0", default-features = false, features = ["hf-hub-native-tls", "ort-load-dynamic"] }
[profile.dist]
inherits = "release"
lto = "thin"
[workspace.lints.clippy]
perf = { level = "warn", priority = -1 }
pedantic = { level = "warn", priority = -1 }
nursery = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }
# Performance-focused lints
perf = { level = "warn", priority = -1 }
vec_init_then_push = "warn"
large_stack_frames = "warn"
redundant_allocation = "warn"
single_char_pattern = "warn"
string_extend_chars = "warn"
format_in_format_args = "warn"
slow_vector_initialization = "warn"
inefficient_to_string = "warn"
implicit_clone = "warn"
redundant_clone = "warn"
needless_question_mark = "allow"
single_call_fn = "allow"
# Security-focused lints
integer_arithmetic = "warn"
indexing_slicing = "warn"
unwrap_used = "warn"
expect_used = "warn"
panic = "warn"
unimplemented = "warn"
todo = "warn"
# Async/Network lints
async_yields_async = "warn"
await_holding_invalid_state = "warn"
rc_buffer = "warn"
# Maintainability-focused lints
cargo = { level = "warn", priority = -1 }
pedantic = { level = "warn", priority = -1 }
clone_on_ref_ptr = "warn"
float_cmp = "warn"
manual_string_new = "warn"
uninlined_format_args = "warn"
unused_self = "warn"
must_use_candidate = "allow"
missing_errors_doc = "allow"
missing_panics_doc = "warn"
module_name_repetitions = "warn"
wildcard_dependencies = "warn"
missing_docs_in_private_items = "warn"
# Allow noisy lints that don't add value for this project
manual_must_use = "allow"
needless_raw_string_hashes = "allow"
multiple_bound_locations = "allow"

View File

@@ -1,7 +1,10 @@
# === Builder Stage ===
FROM clux/muslrust:1.86.0-stable as builder
# === Builder ===
FROM rust:1.86-bookworm AS builder
WORKDIR /usr/src/minne
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config clang cmake git && rm -rf /var/lib/apt/lists/*
# Cache deps
COPY Cargo.toml Cargo.lock ./
RUN mkdir -p api-router common composite-retrieval html-router ingestion-pipeline json-stream-parser main worker
COPY api-router/Cargo.toml ./api-router/
@@ -11,43 +14,38 @@ COPY html-router/Cargo.toml ./html-router/
COPY ingestion-pipeline/Cargo.toml ./ingestion-pipeline/
COPY json-stream-parser/Cargo.toml ./json-stream-parser/
COPY main/Cargo.toml ./main/
RUN cargo build --release --bin main --features ingestion-pipeline/docker || true
# Build with the MUSL target
RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker || true
# Copy the rest of the source code
# Build
COPY . .
RUN cargo build --release --bin main --features ingestion-pipeline/docker
# Build the final application binary with the MUSL target
RUN cargo build --release --target x86_64-unknown-linux-musl --bin main --features ingestion-pipeline/docker
# === Runtime ===
FROM debian:bookworm-slim
# === Runtime Stage ===
FROM alpine:latest
# Chromium + runtime deps + OpenMP for ORT
RUN apt-get update && apt-get install -y --no-install-recommends \
chromium libnss3 libasound2 libgbm1 libxshmfence1 \
ca-certificates fonts-dejavu fonts-noto-color-emoji \
libgomp1 libstdc++6 curl \
&& rm -rf /var/lib/apt/lists/*
RUN apk update && apk add --no-cache \
chromium \
nss \
freetype \
harfbuzz \
ca-certificates \
ttf-freefont \
font-noto-emoji \
&& \
rm -rf /var/cache/apk/*
# ONNX Runtime (CPU). Change if you bump ort.
ARG ORT_VERSION=1.22.0
RUN mkdir -p /opt/onnxruntime && \
curl -fsSL -o /tmp/ort.tgz \
"https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
tar -xzf /tmp/ort.tgz -C /opt/onnxruntime --strip-components=1 && rm /tmp/ort.tgz
ENV CHROME_BIN=/usr/bin/chromium-browser \
CHROME_PATH=/usr/lib/chromium/ \
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
ENV CHROME_BIN=/usr/bin/chromium \
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
ORT_DYLIB_PATH=/opt/onnxruntime/lib/libonnxruntime.so
# Create a non-root user to run the application
RUN adduser -D -h /home/appuser appuser
WORKDIR /home/appuser
# Non-root
RUN useradd -m appuser
USER appuser
WORKDIR /home/appuser
# Copy the compiled binary from the builder stage (note the target path)
COPY --from=builder /usr/src/minne/target/x86_64-unknown-linux-musl/release/main /usr/local/bin/main
COPY --from=builder /usr/src/minne/target/release/main /usr/local/bin/main
EXPOSE 3000
# EXPOSE 8000-9000
CMD ["main"]

395
README.md
View File

@@ -12,196 +12,142 @@
To test _Minne_ out, enter [this](https://minne-demo.stark.pub) read-only demo deployment to view and test functionality out.
## Noteworthy Features
- **Search & Chat Interface** - Find content or knowledge instantly with full-text search, or use the chat mode and conversational AI to find and reason about content
- **Manual and AI-assisted connections** - Build entities and relationships manually with full control, let AI create entities and relationships automatically, or blend both approaches with AI suggestions for manual approval
- **Hybrid Retrieval System** - Search combining vector similarity, full-text search, and graph traversal for highly relevant results
- **Scratchpad Feature** - Quickly capture thoughts and convert them to permanent content when ready
- **Visual Graph Explorer** - Interactive D3-based navigation of your knowledge entities and connections
- **Multi-Format Support** - Ingest text, URLs, PDFs, audio files, and images into your knowledge base
- **Performance Focus** - Built with Rust and server-side rendering for speed and efficiency
- **Self-Hosted & Privacy-Focused** - Full control over your data, and compatible with any OpenAI-compatible API that supports structured outputs
## The "Why" Behind Minne
For a while I've been fascinated by Zettelkasten-style PKM systems. While tools like Logseq and Obsidian are excellent, I found the manual linking process to be a hindrance for me. I also wanted a centralized storage and easy access across devices.
For a while I've been fascinated by personal knowledge management systems. I wanted something that made it incredibly easy to capture content - snippets of text, URLs, and other media - while automatically discovering connections between ideas. But I also wanted to maintain control over my knowledge structure.
While developing Minne, I discovered [KaraKeep](https://karakeep.com/) (formerly Hoarder), which is an excellent application in a similar space you probably want to check it out! However, if you're interested in a PKM that builds an automatic network between related concepts using AI, offers search and the **possibility to chat with your knowledge resource**, and provides a blend of manual and AI-driven organization, then Minne might be worth testing.
Traditional tools like Logseq and Obsidian are excellent, but the manual linking process often became a hindrance. Meanwhile, fully automated systems sometimes miss important context or create relationships I wouldn't have chosen myself.
## Core Philosophy & Features
So I built Minne to offer the best of both worlds: effortless content capture with AI-assisted relationship discovery, but with the flexibility to manually curate, edit, or override any connections. You can let AI handle the heavy lifting of extracting entities and finding relationships, take full control yourself, or use a hybrid approach where AI suggests connections that you can approve or modify.
Minne is designed to make it incredibly easy to save snippets of text, URLs, and other content (limited, pending demand). Simply send content along with a category tag. Minne then ingests this, leveraging AI to create relevant nodes and relationships within its graph database, alongside your manual categorization. This graph backend allows for discoverable connections between your pieces of knowledge.
While developing Minne, I discovered [KaraKeep](https://github.com/karakeep-app/karakeep) (formerly Hoarder), which is an excellent application in a similar space you probably want to check it out! However, if you're interested in a PKM that offers both intelligent automation and manual curation, with the ability to chat with your knowledge base, then Minne might be worth testing.
You can converse with your knowledge base through an LLM-powered chat interface (via OpenAI compatible API, like Ollama or others). For those who like to see the bigger picture, Minne also includes an feature to visually explore your knowledge graph.
## Table of Contents
You may switch and choose between models used, and have the possiblity to change the prompts to your liking. There is the option to change embeddings length, making it easy to test another embedding model.
- [Quick Start](#quick-start)
- [Features in Detail](#features-in-detail)
- [Configuration](#configuration)
- [Tech Stack](#tech-stack)
- [Application Architecture](#application-architecture)
- [AI Configuration](#ai-configuration--model-selection)
- [Roadmap](#roadmap)
- [Development](#development)
- [Contributing](#contributing)
- [License](#license)
The application is built for speed and efficiency using Rust with a Server-Side Rendered (SSR) frontend (HTMX and minimal JavaScript). It's fully responsive, offering a complete mobile interface for reading, editing, and managing your content, including the graph database itself. **PWA (Progressive Web App) support** means you can "install" Minne to your device for a native-like experience. For quick capture on the go on iOS, a [**Shortcut**](https://www.icloud.com/shortcuts/e433fbd7602f4e2eaa70dca162323477) makes sending content to your Minne instance a breeze.
## Quick Start
A hybrid retrieval layer blends embeddings, full-text search, and graph signals to surface the best context when augmenting chat responses and when analyzing new content during ingestion.
The fastest way to get Minne running is with Docker Compose:
Minne is open source (AGPL), self-hostable, and can be deployed flexibly: via Nix, Docker Compose, pre-built binaries, or by building from source. It can run as a single `main` binary or as separate `server` and `worker` processes for optimized resource allocation.
```bash
# Clone the repository
git clone https://github.com/perstarkse/minne.git
cd minne
# Start Minne and its database
docker compose up -d
# Access at http://localhost:3000
```
**Required Setup:**
- Replace `your_openai_api_key_here` in `docker-compose.yml` with your actual API key
- Configure `OPENAI_BASE_URL` if using a custom AI provider (like Ollama)
For detailed installation options, see [Configuration](#configuration).
## Features in Detail
### Search vs. Chat mode
**Search** - Use when you know roughly what you're looking for. Full-text search finds items quickly by matching your query terms.
**Chat Mode** - Use when you want to explore concepts, find connections, or reason about your knowledge. The AI analyzes your query and finds relevant context across your entire knowledge base.
### Content Processing
Minne automatically processes content you save:
1. **Web scraping** extracts readable text from URLs
2. **Text analysis** identifies key concepts and relationships
3. **Graph creation** builds connections between related content
4. **Embedding generation** enables semantic search capabilities
### Visual Knowledge Graph
Explore your knowledge as an interactive network with flexible curation options:
**Manual Curation** - Create knowledge entities and relationships yourself with full control over your graph structure
**AI Automation** - Let AI automatically extract entities and discover relationships from your content
**Hybrid Approach** - Get AI-suggested relationships and entities that you can manually review, edit, or approve
The graph visualization shows:
- Knowledge entities as nodes (manually created or AI-extracted)
- Relationships as connections (manually defined, AI-discovered, or suggested)
- Interactive navigation for discovery and editing
### Optional FastEmbed Reranking
Minne ships with an opt-in reranking stage powered by [fastembed-rs](https://github.com/Anush008/fastembed-rs). When enabled, the hybrid retrieval results are rescored with a lightweight cross-encoder before being returned to chat or ingestion flows. In practice this often means more relevant results, boosting answer quality and downstream enrichment.
⚠️ **Resource notes**
- Enabling reranking downloads and caches ~1.1GB of model data on first startup (cached under `<data_dir>/fastembed/reranker` by default).
- Initialization takes longer while warming the cache, and each query consumes extra CPU. The default pool size (2) is tuned for a singe user setup, but could work with a pool size on 1 as well.
- The feature is disabled by default. Set `reranking_enabled: true` (or `RERANKING_ENABLED=true`) if youre comfortable with the additional footprint.
Example configuration:
```yaml
reranking_enabled: true
reranking_pool_size: 2
fastembed_cache_dir: "/var/lib/minne/fastembed" # optional override, defaults to .fastembed_cache
```
## Tech Stack
- **Backend:** Rust. Server-Side Rendering (SSR). Axum. Minijinja for templating.
- **Frontend:** HTML. HTMX and plain JavaScript for interactivity.
- **Database:** SurrealDB
- **AI Integration:** OpenAI API compatible endpoint (for chat and content processing), with support for structured outputs.
- **Web Content Processing:** Relies on a Chromium instance for robust webpage fetching/rendering.
## Prerequisites
- **For Docker/Nix:** Docker or Nix installed. These methods handle SurrealDB and Chromium dependencies.
- **For Binaries/Source:**
- A running SurrealDB instance.
- Chromium (or a compatible Chrome browser) installed and accessible in your `PATH`.
- Git (if cloning and building from source).
- Rust toolchain (if building from source).
## Getting Started
You have several options to get Minne up and running:
### 1. Nix (Recommended for ease of dependency management)
If you have Nix installed, you can run Minne directly:
```bash
nix run 'github:perstarkse/minne#main'
```
This command will fetch Minne and its dependencies (including Chromium) and run the `main` (combined server/worker) application.
### 2. Docker Compose (Recommended for containerized environments)
This is a great way to manage Minne and its SurrealDB dependency together.
1. Clone the repository (or just save the `docker-compose.yml` below).
1. Create a `docker-compose.yml` file:
```yaml
version: "3.8"
services:
minne:
image: ghcr.io/perstarkse/minne:latest # Pulls the latest pre-built image
# Or, to build from local source:
# build: .
container_name: minne_app
ports:
- "3000:3000" # Exposes Minne on port 3000
environment:
# These are examples, ensure they match your SurrealDB setup below
# and your actual OpenAI key.
SURREALDB_ADDRESS: "ws://surrealdb:8000"
SURREALDB_USERNAME: "root_user" # Default from SurrealDB service below
SURREALDB_PASSWORD: "root_password" # Default from SurrealDB service below
SURREALDB_DATABASE: "minne_db"
SURREALDB_NAMESPACE: "minne_ns"
OPENAI_API_KEY: "your_openai_api_key_here" # IMPORTANT: Replace with your actual key
#OPENAI_BASE_URL: "your_ollama_address" # Uncomment this and change it to override the default openai base url
HTTP_PORT: 3000
DATA_DIR: "/data" # Data directory inside the container
RUST_LOG: "minne=info,tower_http=info" # Example logging level
volumes:
- ./minne_data:/data # Persists Minne's data (e.g., scraped content) on the host
depends_on:
- surrealdb
networks:
- minne-net
# Waits for SurrealDB to be ready before starting Minne
command: >
sh -c "
echo 'Waiting for SurrealDB to start...' &&
# Adjust sleep time if SurrealDB takes longer to initialize in your environment
until nc -z surrealdb 8000; do echo 'Waiting for SurrealDB...'; sleep 2; done &&
echo 'SurrealDB is up, starting Minne application...' &&
/usr/local/bin/main
"
# For separate server/worker:
# command: /usr/local/bin/server # or /usr/local/bin/worker
surrealdb:
image: surrealdb/surrealdb:latest
container_name: minne_surrealdb
ports:
# Exposes SurrealDB on port 8000 (primarily for direct access/debugging if needed,
# not strictly required for Minne if only accessed internally by the minne service)
- "127.0.0.1:8000:8000" # Bind to localhost only for SurrealDB by default
volumes:
# Persists SurrealDB data on the host in a 'surreal_database' folder
- ./surreal_database:/database
command: >
start
--log info # Consider 'debug' for troubleshooting
--user root_user
--pass root_password
file:/database/minne_v1.db # Using file-based storage for simplicity
networks:
- minne-net
volumes:
minne_data: {} # Defines a named volume for Minne data (can be managed by Docker)
surreal_database: {} # Defines a named volume for SurrealDB data
networks:
minne-net:
driver: bridge
```
1. Run:
```bash
docker compose up -d
```
Minne will be accessible at `http://localhost:3000`.
### 3. Pre-built Binaries (GitHub Releases)
Binaries for Windows, macOS, and Linux (combined `main` version) are available on the [GitHub Releases page](https://github.com/perstarkse/minne/releases/latest).
1. Download the appropriate binary for your system.
1. **You will need to provide and run SurrealDB and have Chromium installed and accessible in your PATH separately.**
1. Set the required [Configuration](#configuration) environment variables or use a `config.yaml`.
1. Run the executable.
### 4. Build from Source
1. Clone the repository:
```bash
git clone https://github.com/perstarkse/minne.git
cd minne
```
1. **You will need to provide and run SurrealDB and have Chromium installed and accessible in your PATH separately.**
1. Set the required [Configuration](#configuration) environment variables or use a `config.yaml`.
1. Build and run:
- For the combined `main` binary:
```bash
cargo run --release --bin main
```
- For the `server` binary:
```bash
cargo run --release --bin server
```
- For the `worker` binary (if you want to run it separately):
```bash
cargo run --release --bin worker
```
The compiled binaries will be in `target/release/`.
- **Backend:** Rust with Axum framework and Server-Side Rendering (SSR)
- **Frontend:** HTML with HTMX and minimal JavaScript for interactivity
- **Database:** SurrealDB (graph, document, and vector search)
- **AI Integration:** OpenAI-compatible API with structured outputs
- **Web Processing:** Headless Chrome for robust webpage content extraction
## Configuration
Minne can be configured using environment variables or a `config.yaml` file placed in the working directory where you run the application. Environment variables take precedence over `config.yaml`.
Minne can be configured using environment variables or a `config.yaml` file. Environment variables take precedence over `config.yaml`.
**Required Configuration:**
### Required Configuration
- `SURREALDB_ADDRESS`: WebSocket address of your SurrealDB instance (e.g., `ws://127.0.0.1:8000` or `ws://surrealdb:8000` for Docker).
- `SURREALDB_USERNAME`: Username for SurrealDB (e.g., `root_user`).
- `SURREALDB_PASSWORD`: Password for SurrealDB (e.g., `root_password`).
- `SURREALDB_DATABASE`: Database name in SurrealDB (e.g., `minne_db`).
- `SURREALDB_NAMESPACE`: Namespace in SurrealDB (e.g., `minne_ns`).
- `OPENAI_API_KEY`: Your API key for OpenAI compatible endpoint (e.g., `sk-YourActualOpenAIKeyGoesHere`).
- `HTTP_PORT`: Port for the Minne server to listen on (Default: `3000`).
- `SURREALDB_ADDRESS`: WebSocket address of your SurrealDB instance (e.g., `ws://127.0.0.1:8000`)
- `SURREALDB_USERNAME`: Username for SurrealDB (e.g., `root_user`)
- `SURREALDB_PASSWORD`: Password for SurrealDB (e.g., `root_password`)
- `SURREALDB_DATABASE`: Database name in SurrealDB (e.g., `minne_db`)
- `SURREALDB_NAMESPACE`: Namespace in SurrealDB (e.g., `minne_ns`)
- `OPENAI_API_KEY`: Your API key for OpenAI compatible endpoint
- `HTTP_PORT`: Port for the Minne server (Default: `3000`)
**Optional Configuration:**
### Optional Configuration
- `RUST_LOG`: Controls logging level (e.g., `minne=info,tower_http=debug`).
- `DATA_DIR`: Directory to store local data like fetched webpage content (e.g., `./data`).
- `OPENAI_BASE_URL`: Base URL to a OpenAI API provider, such as Ollama.
- `RUST_LOG`: Controls logging level (e.g., `minne=info,tower_http=debug`)
- `DATA_DIR`: Directory to store local data (e.g., `./data`)
- `OPENAI_BASE_URL`: Base URL for custom AI providers (like Ollama)
- `RERANKING_ENABLED` / `reranking_enabled`: Set to `true` to enable the FastEmbed reranking stage (default `false`)
- `RERANKING_POOL_SIZE` / `reranking_pool_size`: Maximum concurrent reranker workers (defaults to `2`)
- `FASTEMBED_CACHE_DIR` / `fastembed_cache_dir`: Directory for cached FastEmbed models (defaults to `<data_dir>/fastembed/reranker`)
- `FASTEMBED_SHOW_DOWNLOAD_PROGRESS` / `fastembed_show_download_progress`: Show model download progress when warming the cache (default `true`)
**Example `config.yaml`:**
### Example config.yaml
```yaml
surrealdb_address: "ws://127.0.0.1:8000"
@@ -215,66 +161,105 @@ http_port: 3000
# rust_log: "info"
```
## Application Architecture (Binaries)
## Installation Options
Minne offers flexibility in deployment:
### 1. Docker Compose (Recommended)
- **`main`**: A combined binary running both server (API, web UI) and worker (background tasks) in one process. Ideal for simpler setups.
- **`server`**: Runs only the server component.
- **`worker`**: Runs only the worker component, suitable for deployment on a machine with more resources for intensive tasks.
```bash
# Clone and run
git clone https://github.com/perstarkse/minne.git
cd minne
docker compose up -d
```
This modularity allows scaling and resource optimization. The `main` binary or the Docker Compose setup (using `main`) is sufficient for most users.
The included `docker-compose.yml` handles SurrealDB and Chromium dependencies automatically.
### 2. Nix
```bash
nix run 'github:perstarkse/minne#main'
```
This fetches Minne and all dependencies, including Chromium.
### 3. Pre-built Binaries
Download binaries for Windows, macOS, and Linux from the [GitHub Releases](https://github.com/perstarkse/minne/releases/latest).
**Requirements:** You'll need to provide SurrealDB and Chromium separately.
### 4. Build from Source
```bash
git clone https://github.com/perstarkse/minne.git
cd minne
cargo run --release --bin main
```
**Requirements:** SurrealDB and Chromium must be installed and accessible in your PATH.
## Application Architecture
Minne offers flexible deployment options:
- **`main`**: Combined server and worker in one process (recommended for most users)
- **`server`**: Web interface and API only
- **`worker`**: Background processing only (for resource optimization)
## Usage
Once Minne is running:
Once Minne is running at `http://localhost:3000`:
1. Access the web interface at `http://localhost:3000` (or your configured port).
1. On iOS, consider setting up the [Minne iOS Shortcut](https://www.icloud.com/shortcuts/9aa960600ec14329837ba4169f57a166) for effortless content sending. **Add the shortcut, replace the [insert_url] and the [insert_api_key] snippets**.
1. Add notes, URLs, **audio files**, and explore your growing knowledge graph.
1. Engage with the chat interface to query your saved content.
1. Try the experimental visual graph explorer to see connections.
1. **Web Interface**: Full-featured experience for desktop and mobile
2. **iOS Shortcut**: Use the [Minne iOS Shortcut](https://www.icloud.com/shortcuts/e433fbd7602f4e2eaa70dca162323477) for quick content capture
3. **Content Types**: Save notes, URLs, audio files, and more
4. **Knowledge Graph**: Explore automatic connections between your content
5. **Chat Interface**: Query your knowledge base conversationally
## AI Configuration & Model Selection
Minne relies on an OpenAI-compatible API for processing content, generating graph relationships, and powering the chat feature.
### Setting Up AI Providers
**Environment Variables / `config.yaml` keys:**
Minne uses OpenAI-compatible APIs. Configure via environment variables or `config.yaml`:
- `OPENAI_API_KEY` (required): Your API key for the chosen AI provider.
- `OPENAI_BASE_URL` (optional): Use this to override the default OpenAI API URL (`https://api.openai.com/v1`). This is essential for using local models via services like Ollama, or other API providers.
- **Example for Ollama:** `http://<your-ollama-ip>:11434/v1`
- `OPENAI_API_KEY` (required): Your API key
- `OPENAI_BASE_URL` (optional): Custom provider URL (e.g., Ollama: `http://localhost:11434/v1`)
### Changing Models
### Model Selection
Once you have configured the `OPENAI_BASE_URL` to point to your desired provider, you can select the specific models Minne should use.
1. Navigate to the `/admin` page in your Minne instance.
1. The page will list the models available from your configured endpoint. You can select different models for processing content and for chat.
1. **Important:** For content processing, Minne relies on structured outputs (function calling). The model and provider you select for this task **must** support this feature.
1. **Embedding Dimensions:** If you change the embedding model, you **must** update the "Embedding Dimensions" setting in the admin panel to match the output dimensions of your new model (e.g., `text-embedding-3-small` uses 1536, `nomic-embed-text` uses 768). Mismatched dimensions will cause errors. Some newer models will accept a dimension argument, and for these setting the dimensions to whatever should work.
1. Access the `/admin` page in your Minne instance
2. Select models for content processing and chat from your configured provider
3. **Content Processing Requirements**: The model must support structured outputs
4. **Embedding Dimensions**: Update this setting when changing embedding models (e.g., 1536 for `text-embedding-3-small`, 768 for `nomic-embed-text`)
## Roadmap
I've developed Minne primarily for my own use, but having been in the selfhosted space for a long time, and using the efforts by others, I thought I'd share with the community. Feature requests are welcome.
The roadmap as of now is:
Current development focus:
~~- Handle uploaded images wisely.~~
~~- An updated explorer of the graph database.~~
- A TUI frontend which opens your system default editor for improved writing and document management.
- TUI frontend with system editor integration
- Enhanced reranking for improved retrieval recall
- Additional content type support
## Contributing
Contributions are welcome! Whether it's bug reports, feature suggestions, documentation improvements, or code contributions, please feel free to open an issue or submit a pull request.
Feature requests and contributions are welcome!
## Development
Run test with
```rust
```bash
# Run tests
cargo test
# Development build
cargo build
# Comprehensive linting
cargo clippy --workspace --all-targets --all-features
```
There is currently a variety of unit tests for commonly used functions. Additional tests, especially integration tests would be very welcome.
The codebase includes extensive unit tests. Integration tests and additional contributions are welcome.
## Contributing
I've developed Minne primarily for my own use, but having been in the selfhosted space for a long time, and using the efforts by others, I thought I'd share with the community. Feature requests are welcome.
## License
Minne is licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**. See the [LICENSE](LICENSE) file for details. This means if you run a modified version of Minne as a network service, you must also offer the source code of that modified version to its users.
Minne is licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**. See the [LICENSE](LICENSE) file for details.

View File

@@ -214,6 +214,7 @@ mod tests {
openai_base_url: "..".into(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
..Default::default()
}
}

View File

@@ -270,12 +270,29 @@ impl FileInfo {
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::config::{PdfIngestMode::LlmFirst, StorageKind};
use crate::utils::config::{AppConfig, PdfIngestMode::LlmFirst, StorageKind};
use axum::http::HeaderMap;
use axum_typed_multipart::FieldMetadata;
use std::io::Write;
use tempfile::NamedTempFile;
fn test_config(data_dir: &str) -> AppConfig {
AppConfig {
data_dir: data_dir.to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
..Default::default()
}
}
/// Creates a test temporary file with the given content
fn create_test_file(content: &[u8], file_name: &str) -> FieldData<NamedTempFile> {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
@@ -314,19 +331,7 @@ mod tests {
// Create a FileInfo instance with data_dir in /tmp
let user_id = "test_user";
let config = AppConfig {
data_dir: "/tmp/minne_test_data".to_string(), // Using /tmp which is typically on a different filesystem
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let config = test_config("/tmp/minne_test_data");
// Test file creation
let file_info = FileInfo::new(field_data, &db, user_id, &config)
@@ -375,19 +380,7 @@ mod tests {
// Create a FileInfo instance with data_dir in /tmp
let user_id = "test_user";
let config = AppConfig {
data_dir: "/tmp/minne_test_data".to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let config = test_config("/tmp/minne_test_data");
// Store the original file
let original_file_info = FileInfo::new(field_data, &db, user_id, &config)
@@ -432,19 +425,7 @@ mod tests {
// Create a FileInfo instance
let user_id = "test_user";
let config = AppConfig {
data_dir: "./data".to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let config = test_config("./data");
let file_info = FileInfo::new(field_data, &db, user_id, &config).await;
// We can't fully test persistence to disk in unit tests,
@@ -490,19 +471,7 @@ mod tests {
let file_name = "original.txt";
let user_id = "test_user";
let config = AppConfig {
data_dir: "./data".to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let config = test_config("./data");
let field_data1 = create_test_file(content, file_name);
let original_file_info = FileInfo::new(field_data1, &db, user_id, &config)
@@ -655,19 +624,7 @@ mod tests {
// Create and persist a test file via FileInfo::new
let user_id = "user123";
let cfg = AppConfig {
data_dir: "./data".to_string(),
openai_api_key: "".to_string(),
surrealdb_address: "".to_string(),
surrealdb_username: "".to_string(),
surrealdb_password: "".to_string(),
surrealdb_namespace: "".to_string(),
surrealdb_database: "".to_string(),
http_port: 0,
openai_base_url: "".to_string(),
storage: crate::utils::config::StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let cfg = test_config("./data");
let temp = create_test_file(b"test content", "test_file.txt");
let file_info = FileInfo::new(temp, &db, user_id, &cfg)
.await
@@ -710,19 +667,7 @@ mod tests {
let result = FileInfo::delete_by_id(
"nonexistent_id",
&db,
&AppConfig {
data_dir: "./data".to_string(),
openai_api_key: "".to_string(),
surrealdb_address: "".to_string(),
surrealdb_username: "".to_string(),
surrealdb_password: "".to_string(),
surrealdb_namespace: "".to_string(),
surrealdb_database: "".to_string(),
http_port: 0,
openai_base_url: "".to_string(),
storage: crate::utils::config::StorageKind::Local,
pdf_ingest_mode: LlmFirst,
},
&test_config("./data"),
)
.await;
@@ -813,19 +758,7 @@ mod tests {
// Create a FileInfo instance with a custom data directory
let user_id = "test_user";
let custom_data_dir = "/tmp/minne_custom_data_dir";
let config = AppConfig {
data_dir: custom_data_dir.to_string(),
openai_api_key: "test_key".to_string(),
surrealdb_address: "test_address".to_string(),
surrealdb_username: "test_user".to_string(),
surrealdb_password: "test_pass".to_string(),
surrealdb_namespace: "test_ns".to_string(),
surrealdb_database: "test_db".to_string(),
http_port: 3000,
openai_base_url: "..".to_string(),
storage: StorageKind::Local,
pdf_ingest_mode: LlmFirst,
};
let config = test_config(custom_data_dir);
// Test file creation
let file_info = FileInfo::new(field_data, &db, user_id, &config)

View File

@@ -1,5 +1,6 @@
use config::{Config, ConfigError, Environment, File};
use serde::Deserialize;
use std::env;
#[derive(Clone, Deserialize, Debug)]
#[serde(rename_all = "lowercase")]
@@ -42,6 +43,16 @@ pub struct AppConfig {
pub storage: StorageKind,
#[serde(default = "default_pdf_ingest_mode")]
pub pdf_ingest_mode: PdfIngestMode,
#[serde(default = "default_reranking_enabled")]
pub reranking_enabled: bool,
#[serde(default)]
pub reranking_pool_size: Option<usize>,
#[serde(default)]
pub fastembed_cache_dir: Option<String>,
#[serde(default)]
pub fastembed_show_download_progress: Option<bool>,
#[serde(default)]
pub fastembed_max_length: Option<usize>,
}
fn default_data_dir() -> String {
@@ -52,7 +63,66 @@ fn default_base_url() -> String {
"https://api.openai.com/v1".to_string()
}
fn default_reranking_enabled() -> bool {
false
}
pub fn ensure_ort_path() {
if env::var_os("ORT_DYLIB_PATH").is_some() {
return;
}
if let Ok(mut exe) = env::current_exe() {
exe.pop();
if cfg!(target_os = "windows") {
for p in [
exe.join("onnxruntime.dll"),
exe.join("lib").join("onnxruntime.dll"),
] {
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
return;
}
}
}
let name = if cfg!(target_os = "macos") {
"libonnxruntime.dylib"
} else {
"libonnxruntime.so"
};
let p = exe.join("lib").join(name);
if p.exists() {
env::set_var("ORT_DYLIB_PATH", p);
}
}
}
impl Default for AppConfig {
fn default() -> Self {
Self {
openai_api_key: String::new(),
surrealdb_address: String::new(),
surrealdb_username: String::new(),
surrealdb_password: String::new(),
surrealdb_namespace: String::new(),
surrealdb_database: String::new(),
data_dir: default_data_dir(),
http_port: 0,
openai_base_url: default_base_url(),
storage: default_storage_kind(),
pdf_ingest_mode: default_pdf_ingest_mode(),
reranking_enabled: default_reranking_enabled(),
reranking_pool_size: None,
fastembed_cache_dir: None,
fastembed_show_download_progress: None,
fastembed_max_length: None,
}
}
}
pub fn get_config() -> Result<AppConfig, ConfigError> {
ensure_ort_path();
let config = Config::builder()
.add_source(File::with_name("config").required(false))
.add_source(Environment::default())

View File

@@ -19,6 +19,7 @@ surrealdb = { workspace = true }
futures = { workspace = true }
async-openai = { workspace = true }
uuid = { workspace = true }
fastembed = { workspace = true }
common = { path = "../common", features = ["test-utils"] }
state-machines = { workspace = true }

View File

@@ -8,19 +8,14 @@ use async_openai::{
};
use common::{
error::AppError,
storage::{
db::SurrealDbClient,
types::{
message::{format_history, Message},
system_settings::SystemSettings,
},
storage::types::{
message::{format_history, Message},
system_settings::SystemSettings,
},
};
use serde::Deserialize;
use serde_json::Value;
use crate::{retrieve_entities, retrieved_entities_to_json};
use super::answer_retrieval_helper::get_query_response_schema;
#[derive(Debug, Deserialize)]
@@ -36,53 +31,12 @@ pub struct LLMResponseFormat {
pub references: Vec<Reference>,
}
/// Orchestrates query processing and returns an answer with references
///
/// Takes a query and uses the provided clients to generate an answer with supporting references.
///
/// # Arguments
///
/// * `surreal_db_client` - Client for `SurrealDB` interactions
/// * `openai_client` - Client for `OpenAI` API calls
/// * `query` - The user's query string
/// * `user_id` - The user's id
///
/// # Returns
///
/// Returns a tuple of the answer and its references, or an API error
#[derive(Debug)]
pub struct Answer {
pub content: String,
pub references: Vec<String>,
}
pub async fn get_answer_with_references(
surreal_db_client: &SurrealDbClient,
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
query: &str,
user_id: &str,
) -> Result<Answer, AppError> {
let entities = retrieve_entities(surreal_db_client, openai_client, query, user_id).await?;
let settings = SystemSettings::get_current(surreal_db_client).await?;
let entities_json = retrieved_entities_to_json(&entities);
let user_message = create_user_message(&entities_json, query);
let request = create_chat_request(user_message, &settings)?;
let response = openai_client.chat().create(request).await?;
let llm_response = process_llm_response(response).await?;
Ok(Answer {
content: llm_response.answer,
references: llm_response
.references
.into_iter()
.map(|r| r.reference)
.collect(),
})
}
pub fn create_user_message(entities_json: &Value, query: &str) -> String {
format!(
r"

View File

@@ -3,6 +3,7 @@ pub mod answer_retrieval_helper;
pub mod fts;
pub mod graph;
pub mod pipeline;
pub mod reranking;
pub mod scoring;
pub mod vector;
@@ -13,6 +14,7 @@ use common::{
types::{knowledge_entity::KnowledgeEntity, text_chunk::TextChunk},
},
};
use reranking::RerankerLease;
use tracing::instrument;
pub use pipeline::{retrieved_entities_to_json, RetrievalConfig, RetrievalTuning};
@@ -39,6 +41,7 @@ pub async fn retrieve_entities(
openai_client: &async_openai::Client<async_openai::config::OpenAIConfig>,
input_text: &str,
user_id: &str,
reranker: Option<RerankerLease>,
) -> Result<Vec<RetrievedEntity>, AppError> {
pipeline::run_pipeline(
db_client,
@@ -46,6 +49,7 @@ pub async fn retrieve_entities(
input_text,
user_id,
RetrievalConfig::default(),
reranker,
)
.await
}
@@ -142,6 +146,7 @@ mod tests {
"Rust concurrency async tasks",
user_id,
RetrievalConfig::default(),
None,
)
.await
.expect("Hybrid retrieval failed");
@@ -232,6 +237,7 @@ mod tests {
"Rust concurrency async tasks",
user_id,
RetrievalConfig::default(),
None,
)
.await
.expect("Hybrid retrieval failed");

View File

@@ -17,6 +17,9 @@ pub struct RetrievalTuning {
pub graph_score_decay: f32,
pub graph_seed_min_score: f32,
pub graph_vector_inheritance: f32,
pub rerank_blend_weight: f32,
pub rerank_scores_only: bool,
pub rerank_keep_top: usize,
}
impl Default for RetrievalTuning {
@@ -36,6 +39,9 @@ impl Default for RetrievalTuning {
graph_score_decay: 0.75,
graph_seed_min_score: 0.4,
graph_vector_inheritance: 0.6,
rerank_blend_weight: 0.65,
rerank_scores_only: false,
rerank_keep_top: 8,
}
}
}

View File

@@ -4,7 +4,7 @@ mod state;
pub use config::{RetrievalConfig, RetrievalTuning};
use crate::RetrievedEntity;
use crate::{reranking::RerankerLease, RetrievedEntity};
use async_openai::Client;
use common::{error::AppError, storage::db::SurrealDbClient};
use tracing::info;
@@ -16,6 +16,7 @@ pub async fn run_pipeline(
input_text: &str,
user_id: &str,
config: RetrievalConfig,
reranker: Option<RerankerLease>,
) -> Result<Vec<RetrievedEntity>, AppError> {
let machine = state::ready();
let input_chars = input_text.chars().count();
@@ -35,11 +36,13 @@ pub async fn run_pipeline(
input_text.to_owned(),
user_id.to_owned(),
config,
reranker,
);
let machine = stages::embed(machine, &mut ctx).await?;
let machine = stages::collect_candidates(machine, &mut ctx).await?;
let machine = stages::expand_graph(machine, &mut ctx).await?;
let machine = stages::attach_chunks(machine, &mut ctx).await?;
let machine = stages::rerank(machine, &mut ctx).await?;
let results = stages::assemble(machine, &mut ctx)?;
Ok(results)
@@ -53,6 +56,7 @@ pub async fn run_pipeline_with_embedding(
input_text: &str,
user_id: &str,
config: RetrievalConfig,
reranker: Option<RerankerLease>,
) -> Result<Vec<RetrievedEntity>, AppError> {
let machine = state::ready();
let mut ctx = stages::PipelineContext::with_embedding(
@@ -62,11 +66,13 @@ pub async fn run_pipeline_with_embedding(
input_text.to_owned(),
user_id.to_owned(),
config,
reranker,
);
let machine = stages::embed(machine, &mut ctx).await?;
let machine = stages::collect_candidates(machine, &mut ctx).await?;
let machine = stages::expand_graph(machine, &mut ctx).await?;
let machine = stages::attach_chunks(machine, &mut ctx).await?;
let machine = stages::rerank(machine, &mut ctx).await?;
let results = stages::assemble(machine, &mut ctx)?;
Ok(results)

View File

@@ -7,6 +7,7 @@ use common::{
},
utils::embedding::generate_embedding,
};
use fastembed::RerankResult;
use futures::{stream::FuturesUnordered, StreamExt};
use state_machines::core::GuardError;
use std::collections::{HashMap, HashSet};
@@ -15,6 +16,7 @@ use tracing::{debug, instrument, warn};
use crate::{
fts::find_items_by_fts,
graph::{find_entities_by_relationship_by_id, find_entities_by_source_ids},
reranking::RerankerLease,
scoring::{
clamp_unit, fuse_scores, merge_scored_by_id, min_max_normalize, sort_by_fused_desc,
FusionWeights, Scored,
@@ -27,6 +29,7 @@ use super::{
config::RetrievalConfig,
state::{
CandidatesLoaded, ChunksAttached, Embedded, GraphExpanded, HybridRetrievalMachine, Ready,
Reranked,
},
};
@@ -41,6 +44,7 @@ pub struct PipelineContext<'a> {
pub chunk_candidates: HashMap<String, Scored<TextChunk>>,
pub filtered_entities: Vec<Scored<KnowledgeEntity>>,
pub chunk_values: Vec<Scored<TextChunk>>,
pub reranker: Option<RerankerLease>,
}
impl<'a> PipelineContext<'a> {
@@ -50,6 +54,7 @@ impl<'a> PipelineContext<'a> {
input_text: String,
user_id: String,
config: RetrievalConfig,
reranker: Option<RerankerLease>,
) -> Self {
Self {
db_client,
@@ -62,6 +67,7 @@ impl<'a> PipelineContext<'a> {
chunk_candidates: HashMap::new(),
filtered_entities: Vec::new(),
chunk_values: Vec::new(),
reranker,
}
}
@@ -73,8 +79,16 @@ impl<'a> PipelineContext<'a> {
input_text: String,
user_id: String,
config: RetrievalConfig,
reranker: Option<RerankerLease>,
) -> Self {
let mut ctx = Self::new(db_client, openai_client, input_text, user_id, config);
let mut ctx = Self::new(
db_client,
openai_client,
input_text,
user_id,
config,
reranker,
);
ctx.query_embedding = Some(query_embedding);
ctx
}
@@ -327,9 +341,58 @@ pub async fn attach_chunks(
}
#[instrument(level = "trace", skip_all)]
pub fn assemble(
pub async fn rerank(
machine: HybridRetrievalMachine<(), ChunksAttached>,
ctx: &mut PipelineContext<'_>,
) -> Result<HybridRetrievalMachine<(), Reranked>, AppError> {
let mut applied = false;
if let Some(reranker) = ctx.reranker.as_ref() {
if ctx.filtered_entities.len() > 1 {
let documents = build_rerank_documents(ctx, ctx.config.tuning.max_chunks_per_entity);
if documents.len() > 1 {
match reranker.rerank(&ctx.input_text, documents).await {
Ok(results) if !results.is_empty() => {
apply_rerank_results(ctx, results);
applied = true;
}
Ok(_) => {
debug!("Reranker returned no results; retaining original ordering");
}
Err(err) => {
warn!(
error = %err,
"Reranking failed; continuing with original ordering"
);
}
}
} else {
debug!(
document_count = documents.len(),
"Skipping reranking stage; insufficient document context"
);
}
} else {
debug!("Skipping reranking stage; less than two entities available");
}
} else {
debug!("No reranker lease provided; skipping reranking stage");
}
if applied {
debug!("Applied reranking adjustments to candidate ordering");
}
machine
.rerank()
.map_err(|(_, guard)| map_guard_error("rerank", guard))
}
#[instrument(level = "trace", skip_all)]
pub fn assemble(
machine: HybridRetrievalMachine<(), Reranked>,
ctx: &mut PipelineContext<'_>,
) -> Result<Vec<RetrievedEntity>, AppError> {
debug!("Assembling final retrieved entities");
let tuning = &ctx.config.tuning;
@@ -561,6 +624,113 @@ async fn enrich_chunks_from_entities(
Ok(())
}
fn build_rerank_documents(ctx: &PipelineContext<'_>, max_chunks_per_entity: usize) -> Vec<String> {
if ctx.filtered_entities.is_empty() {
return Vec::new();
}
let mut chunk_by_source: HashMap<&str, Vec<&Scored<TextChunk>>> = HashMap::new();
for chunk in &ctx.chunk_values {
chunk_by_source
.entry(chunk.item.source_id.as_str())
.or_default()
.push(chunk);
}
ctx.filtered_entities
.iter()
.map(|entity| {
let mut doc = format!(
"Name: {}\nType: {:?}\nDescription: {}\n",
entity.item.name, entity.item.entity_type, entity.item.description
);
if let Some(chunks) = chunk_by_source.get(entity.item.source_id.as_str()) {
let mut chunk_refs = chunks.clone();
chunk_refs.sort_by(|a, b| {
b.fused
.partial_cmp(&a.fused)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut header_added = false;
for chunk in chunk_refs.into_iter().take(max_chunks_per_entity.max(1)) {
let snippet = chunk.item.chunk.trim();
if snippet.is_empty() {
continue;
}
if !header_added {
doc.push_str("Chunks:\n");
header_added = true;
}
doc.push_str("- ");
doc.push_str(snippet);
doc.push('\n');
}
}
doc
})
.collect()
}
fn apply_rerank_results(ctx: &mut PipelineContext<'_>, results: Vec<RerankResult>) {
if results.is_empty() || ctx.filtered_entities.is_empty() {
return;
}
let mut remaining: Vec<Option<Scored<KnowledgeEntity>>> =
std::mem::take(&mut ctx.filtered_entities)
.into_iter()
.map(Some)
.collect();
let raw_scores: Vec<f32> = results.iter().map(|r| r.score).collect();
let normalized_scores = min_max_normalize(&raw_scores);
let use_only = ctx.config.tuning.rerank_scores_only;
let blend = if use_only {
1.0
} else {
clamp_unit(ctx.config.tuning.rerank_blend_weight)
};
let mut reranked: Vec<Scored<KnowledgeEntity>> = Vec::with_capacity(remaining.len());
for (result, normalized) in results.into_iter().zip(normalized_scores.into_iter()) {
if let Some(slot) = remaining.get_mut(result.index) {
if let Some(mut candidate) = slot.take() {
let original = candidate.fused;
let blended = if use_only {
clamp_unit(normalized)
} else {
clamp_unit(original * (1.0 - blend) + normalized * blend)
};
candidate.update_fused(blended);
reranked.push(candidate);
}
} else {
warn!(
result_index = result.index,
"Reranker returned out-of-range index; skipping"
);
}
if reranked.len() == remaining.len() {
break;
}
}
for slot in remaining.into_iter() {
if let Some(candidate) = slot {
reranked.push(candidate);
}
}
ctx.filtered_entities = reranked;
let keep_top = ctx.config.tuning.rerank_keep_top;
if keep_top > 0 && ctx.filtered_entities.len() > keep_top {
ctx.filtered_entities.truncate(keep_top);
}
}
fn estimate_tokens(text: &str, avg_chars_per_token: usize) -> usize {
let chars = text.chars().count().max(1);
(chars / avg_chars_per_token).max(1)

View File

@@ -4,18 +4,20 @@ state_machine! {
name: HybridRetrievalMachine,
state: HybridRetrievalState,
initial: Ready,
states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Completed, Failed],
states: [Ready, Embedded, CandidatesLoaded, GraphExpanded, ChunksAttached, Reranked, Completed, Failed],
events {
embed { transition: { from: Ready, to: Embedded } }
collect_candidates { transition: { from: Embedded, to: CandidatesLoaded } }
expand_graph { transition: { from: CandidatesLoaded, to: GraphExpanded } }
attach_chunks { transition: { from: GraphExpanded, to: ChunksAttached } }
assemble { transition: { from: ChunksAttached, to: Completed } }
rerank { transition: { from: ChunksAttached, to: Reranked } }
assemble { transition: { from: Reranked, to: Completed } }
abort {
transition: { from: Ready, to: Failed }
transition: { from: CandidatesLoaded, to: Failed }
transition: { from: GraphExpanded, to: Failed }
transition: { from: ChunksAttached, to: Failed }
transition: { from: Reranked, to: Failed }
}
}
}

View File

@@ -0,0 +1,170 @@
use std::{
env, fs,
path::{Path, PathBuf},
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
thread::available_parallelism,
};
use common::{error::AppError, utils::config::AppConfig};
use fastembed::{RerankInitOptions, RerankResult, TextRerank};
use tokio::sync::{Mutex, OwnedSemaphorePermit, Semaphore};
use tracing::debug;
static NEXT_ENGINE: AtomicUsize = AtomicUsize::new(0);
fn pick_engine_index(pool_len: usize) -> usize {
let n = NEXT_ENGINE.fetch_add(1, Ordering::Relaxed);
n % pool_len
}
pub struct RerankerPool {
engines: Vec<Arc<Mutex<TextRerank>>>,
semaphore: Arc<Semaphore>,
}
impl RerankerPool {
/// Build the pool at startup.
/// `pool_size` controls max parallel reranks.
pub fn new(pool_size: usize) -> Result<Arc<Self>, AppError> {
Self::new_with_options(pool_size, RerankInitOptions::default())
}
fn new_with_options(
pool_size: usize,
init_options: RerankInitOptions,
) -> Result<Arc<Self>, AppError> {
if pool_size == 0 {
return Err(AppError::Validation(
"RERANKING_POOL_SIZE must be greater than zero".to_string(),
));
}
fs::create_dir_all(&init_options.cache_dir)?;
let mut engines = Vec::with_capacity(pool_size);
for x in 0..pool_size {
debug!("Creating reranking engine: {x}");
let model = TextRerank::try_new(init_options.clone())
.map_err(|e| AppError::InternalError(e.to_string()))?;
engines.push(Arc::new(Mutex::new(model)));
}
Ok(Arc::new(Self {
engines,
semaphore: Arc::new(Semaphore::new(pool_size)),
}))
}
/// Initialize a pool using application configuration.
pub fn maybe_from_config(config: &AppConfig) -> Result<Option<Arc<Self>>, AppError> {
if !config.reranking_enabled {
return Ok(None);
}
let pool_size = config.reranking_pool_size.unwrap_or_else(default_pool_size);
let init_options = build_rerank_init_options(config)?;
Self::new_with_options(pool_size, init_options).map(Some)
}
/// Check out capacity + pick an engine.
/// This returns a lease that can perform rerank().
pub async fn checkout(self: &Arc<Self>) -> RerankerLease {
// Acquire a permit. This enforces backpressure.
let permit = self
.semaphore
.clone()
.acquire_owned()
.await
.expect("semaphore closed");
// Pick an engine.
// This is naive: just pick based on a simple modulo counter.
// We use an atomic counter to avoid always choosing index 0.
let idx = pick_engine_index(self.engines.len());
let engine = self.engines[idx].clone();
RerankerLease {
_permit: permit,
engine,
}
}
}
fn default_pool_size() -> usize {
available_parallelism()
.map(|value| value.get().min(2))
.unwrap_or(2)
.max(1)
}
fn is_truthy(value: &str) -> bool {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"1" | "true" | "yes" | "on"
)
}
fn build_rerank_init_options(config: &AppConfig) -> Result<RerankInitOptions, AppError> {
let mut options = RerankInitOptions::default();
let cache_dir = config
.fastembed_cache_dir
.as_ref()
.map(PathBuf::from)
.or_else(|| env::var("RERANKING_CACHE_DIR").ok().map(PathBuf::from))
.or_else(|| env::var("FASTEMBED_CACHE_DIR").ok().map(PathBuf::from))
.unwrap_or_else(|| {
Path::new(&config.data_dir)
.join("fastembed")
.join("reranker")
});
fs::create_dir_all(&cache_dir)?;
options.cache_dir = cache_dir;
let show_progress = config
.fastembed_show_download_progress
.or_else(|| env_bool("RERANKING_SHOW_DOWNLOAD_PROGRESS"))
.or_else(|| env_bool("FASTEMBED_SHOW_DOWNLOAD_PROGRESS"))
.unwrap_or(true);
options.show_download_progress = show_progress;
if let Some(max_length) = config.fastembed_max_length.or_else(|| {
env::var("RERANKING_MAX_LENGTH")
.ok()
.and_then(|value| value.parse().ok())
}) {
options.max_length = max_length;
}
Ok(options)
}
fn env_bool(key: &str) -> Option<bool> {
env::var(key).ok().map(|value| is_truthy(&value))
}
/// Active lease on a single TextRerank instance.
pub struct RerankerLease {
// When this drops the semaphore permit is released.
_permit: OwnedSemaphorePermit,
engine: Arc<Mutex<TextRerank>>,
}
impl RerankerLease {
pub async fn rerank(
&self,
query: &str,
documents: Vec<String>,
) -> Result<Vec<RerankResult>, AppError> {
// Lock this specific engine so we get &mut TextRerank
let mut guard = self.engine.lock().await;
guard
.rerank(query.to_owned(), documents, false, None)
.map_err(|e| AppError::InternalError(e.to_string()))
}
}

View File

@@ -3,10 +3,10 @@
"devenv": {
"locked": {
"dir": "src/modules",
"lastModified": 1746681099,
"lastModified": 1761839147,
"owner": "cachix",
"repo": "devenv",
"rev": "a7f2ea275621391209fd702f5ddced32dd56a4e2",
"rev": "bb7849648b68035f6b910120252c22b28195cf54",
"type": "github"
},
"original": {
@@ -16,13 +16,31 @@
"type": "github"
}
},
"fenix": {
"inputs": {
"nixpkgs": "nixpkgs",
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1761893049,
"owner": "nix-community",
"repo": "fenix",
"rev": "c2ac9a5c0d6d16630c3b225b874bd14528d1abe6",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1733328505,
"lastModified": 1761588595,
"owner": "edolstra",
"repo": "flake-compat",
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
"rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5",
"type": "github"
},
"original": {
@@ -40,10 +58,10 @@
]
},
"locked": {
"lastModified": 1746537231,
"lastModified": 1760663237,
"owner": "cachix",
"repo": "git-hooks.nix",
"rev": "fa466640195d38ec97cf0493d6d6882bc4d14969",
"rev": "ca5b894d3e3e151ffc1db040b6ce4dcc75d31c37",
"type": "github"
},
"original": {
@@ -74,10 +92,25 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1746576598,
"lastModified": 1761672384,
"owner": "nixos",
"repo": "nixpkgs",
"rev": "b3582c75c7f21ce0b429898980eddbbf05c68e55",
"rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1761880412,
"owner": "nixos",
"repo": "nixpkgs",
"rev": "a7fc11be66bdfb5cdde611ee5ce381c183da8386",
"type": "github"
},
"original": {
@@ -90,11 +123,48 @@
"root": {
"inputs": {
"devenv": "devenv",
"fenix": "fenix",
"git-hooks": "git-hooks",
"nixpkgs": "nixpkgs",
"nixpkgs": "nixpkgs_2",
"pre-commit-hooks": [
"git-hooks"
],
"rust-overlay": "rust-overlay"
}
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1761849405,
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "f7de8ae045a5fe80f1203c5a1c3015b05f7c3550",
"type": "github"
},
"original": {
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1761878277,
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "6604534e44090c917db714faa58d47861657690c",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
}
},

View File

@@ -11,14 +11,24 @@
pkgs.openssl
pkgs.nodejs
pkgs.vscode-langservers-extracted
pkgs.cargo-dist
pkgs.cargo-xwin
pkgs.clang
pkgs.onnxruntime
];
languages.rust = {
enable = true;
components = ["rustc" "clippy" "rustfmt" "cargo" "rust-analyzer"];
channel = "nightly";
targets = ["x86_64-unknown-linux-gnu" "x86_64-pc-windows-msvc"];
mold.enable = true;
};
env = {
ORT_DYLIB_PATH = "${pkgs.onnxruntime}/lib/libonnxruntime.so";
};
processes = {
surreal_db.exec = "docker run --rm --pull always -p 8000:8000 --net=host --user $(id -u) -v $(pwd)/database:/database surrealdb/surrealdb:latest-dev start rocksdb:/database/database.db --user root_user --pass root_password";
};

View File

@@ -1,15 +1,11 @@
# yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
inputs:
fenix:
url: github:nix-community/fenix
nixpkgs:
url: github:nixos/nixpkgs/nixpkgs-unstable
# If you're using non-OSS software, you can set allowUnfree to true.
rust-overlay:
url: github:oxalica/rust-overlay
inputs:
nixpkgs:
follows: nixpkgs
allowUnfree: true
# If you're willing to use a package that's vulnerable
# permittedInsecurePackages:
# - "openssl-1.1.1w"
# If you have more than one devenv you can merge them
#imports:
# - ./backend

View File

@@ -4,9 +4,11 @@ members = ["cargo:."]
# Config for 'dist'
[dist]
# The preferred dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.28.0"
cargo-dist-version = "0.30.0"
# CI backends to support
ci = "github"
# Extra static files to include in each App (path relative to this Cargo.toml's dir)
include = ["lib"]
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)

View File

@@ -1,5 +1,3 @@
version: '3.8'
services:
minne:
build: .
@@ -12,10 +10,11 @@ services:
SURREALDB_PASSWORD: "root_password"
SURREALDB_DATABASE: "test"
SURREALDB_NAMESPACE: "test"
OPENAI_API_KEY: "sk-key"
OPENAI_API_KEY: "sk-add-your-key"
DATA_DIR: "./data"
HTTP_PORT: 3000
RUST_LOG: "info"
RERANKING_ENABLED: false ## Change to true to enable reranking
depends_on:
- surrealdb
networks:
@@ -31,7 +30,7 @@ services:
- ./database:/database # Mounts a 'database' folder from your project directory
command: >
start
--log debug
--log info
--user root_user
--pass root_password
rocksdb:./database/database.db

22
flake.lock generated
View File

@@ -1,5 +1,20 @@
{
"nodes": {
"crane": {
"locked": {
"lastModified": 1760924934,
"narHash": "sha256-tuuqY5aU7cUkR71sO2TraVKK2boYrdW3gCSXUkF4i44=",
"owner": "ipetkov",
"repo": "crane",
"rev": "c6b4d5308293d0d04fcfeee92705017537cad02f",
"type": "github"
},
"original": {
"owner": "ipetkov",
"repo": "crane",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems"
@@ -20,11 +35,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1746232882,
"narHash": "sha256-MHmBH2rS8KkRRdoU/feC/dKbdlMkcNkB5mwkuipVHeQ=",
"lastModified": 1761672384,
"narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "7a2622e2c0dbad5c4493cb268aba12896e28b008",
"rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
"type": "github"
},
"original": {
@@ -36,6 +51,7 @@
},
"root": {
"inputs": {
"crane": "crane",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}

132
flake.nix
View File

@@ -4,77 +4,83 @@
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
crane.url = "github:ipetkov/crane";
};
outputs = {
self,
nixpkgs,
flake-utils,
crane,
}:
flake-utils.lib.eachDefaultSystem (
system: let
pkgs = nixpkgs.legacyPackages.${system};
# --- Minne Package Definition ---
minne-pkg = pkgs.rustPlatform.buildRustPackage {
pname = "minne";
version = "0.1.0";
src = self;
cargoLock = {
lockFile = ./Cargo.lock;
};
# Skip tests due to testing fs operations
doCheck = false;
nativeBuildInputs = [
pkgs.pkg-config
pkgs.rustfmt
pkgs.makeWrapper # For the postInstall hook
];
buildInputs = [
pkgs.openssl
pkgs.chromium # Runtime dependency for the browser
];
# Wrap the actual executables to provide CHROME at runtime
postInstall = let
chromium_executable = "${pkgs.chromium}/bin/chromium";
in ''
wrapProgram $out/bin/main \
--set CHROME "${chromium_executable}"
wrapProgram $out/bin/worker \
--set CHROME "${chromium_executable}"
'';
meta = with pkgs.lib; {
description = "Minne Application";
license = licenses.mit;
};
};
in {
packages = {
minne = minne-pkg;
default = self.packages.${system}.minne;
flake-utils.lib.eachDefaultSystem (system: let
pkgs = nixpkgs.legacyPackages.${system};
lib = pkgs.lib;
craneLib = crane.mkLib pkgs;
libExt =
if pkgs.stdenv.isDarwin
then "dylib"
else "so";
minne-pkg = craneLib.buildPackage {
src = lib.cleanSourceWith {
src = ./.;
filter = let
extraPaths = [
(toString ./common/migrations)
(toString ./common/schemas)
(toString ./html-router/templates)
(toString ./html-router/assets)
];
in
path: type: let
p = toString path;
in
craneLib.filterCargoSources path type
|| lib.any (x: lib.hasPrefix x p) extraPaths;
};
apps = {
main = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "main";
};
worker = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "worker";
};
server = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "server";
};
default = self.apps.${system}.main;
pname = "minne";
version = "0.2.6";
doCheck = false;
nativeBuildInputs = [pkgs.pkg-config pkgs.rustfmt pkgs.makeWrapper];
buildInputs = [pkgs.openssl pkgs.chromium pkgs.onnxruntime];
postInstall = ''
wrapProgram $out/bin/main \
--set CHROME ${pkgs.chromium}/bin/chromium \
--set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt}
for b in worker server; do
if [ -x "$out/bin/$b" ]; then
wrapProgram $out/bin/$b \
--set CHROME ${pkgs.chromium}/bin/chromium \
--set ORT_DYLIB_PATH ${pkgs.onnxruntime}/lib/libonnxruntime.${libExt}
fi
done
'';
};
in {
packages = {
minne-pkg = minne-pkg;
default = minne-pkg;
};
apps = {
main = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "main";
};
}
);
worker = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "worker";
};
server = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "server";
};
default = flake-utils.lib.mkApp {
drv = minne-pkg;
name = "main";
};
};
});
}

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
use common::storage::db::SurrealDbClient;
use common::utils::template_engine::{ProvidesTemplateEngine, TemplateEngine};
use common::{create_template_engine, storage::db::ProvidesDb, utils::config::AppConfig};
use composite_retrieval::reranking::RerankerPool;
use std::sync::Arc;
use tracing::debug;
@@ -13,6 +14,7 @@ pub struct HtmlState {
pub templates: Arc<TemplateEngine>,
pub session_store: Arc<SessionStoreType>,
pub config: AppConfig,
pub reranker_pool: Option<Arc<RerankerPool>>,
}
impl HtmlState {
@@ -21,6 +23,7 @@ impl HtmlState {
openai_client: Arc<OpenAIClientType>,
session_store: Arc<SessionStoreType>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
) -> Result<Self, Box<dyn std::error::Error>> {
let template_engine = create_template_engine!("templates");
debug!("Template engine created for html_router.");
@@ -31,6 +34,7 @@ impl HtmlState {
session_store,
templates: Arc::new(template_engine),
config,
reranker_pool,
})
}
}

View File

@@ -118,11 +118,17 @@ pub async fn get_response_stream(
};
// 2. Retrieve knowledge entities
let rerank_lease = match state.reranker_pool.as_ref() {
Some(pool) => Some(pool.checkout().await),
None => None,
};
let entities = match retrieve_entities(
&state.db,
&state.openai_client,
&user_message.content,
&user.id,
rerank_lease,
)
.await
{

View File

@@ -195,8 +195,19 @@ pub async fn suggest_knowledge_relationships(
if !query_parts.is_empty() {
let query = query_parts.join(" ");
if let Ok(results) =
retrieve_entities(&state.db, &state.openai_client, &query, &user.id).await
let rerank_lease = match state.reranker_pool.as_ref() {
Some(pool) => Some(pool.checkout().await),
None => None,
};
if let Ok(results) = retrieve_entities(
&state.db,
&state.openai_client,
&query,
&user.id,
rerank_lease,
)
.await
{
for RetrievedEntity { entity, score, .. } in results {
if suggestion_scores.len() >= MAX_RELATIONSHIP_SUGGESTIONS {

View File

@@ -26,6 +26,7 @@ use common::{
},
utils::config::AppConfig,
};
use composite_retrieval::reranking::RerankerPool;
use tracing::{debug, info, warn};
use self::{
@@ -45,9 +46,14 @@ impl IngestionPipeline {
db: Arc<SurrealDbClient>,
openai_client: Arc<Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
) -> Result<Self, AppError> {
let services =
DefaultPipelineServices::new(db.clone(), openai_client.clone(), config.clone());
let services = DefaultPipelineServices::new(
db.clone(),
openai_client.clone(),
config.clone(),
reranker_pool,
);
Self::with_services(db, IngestionConfig::default(), Arc::new(services))
}

View File

@@ -18,7 +18,9 @@ use common::{
},
utils::{config::AppConfig, embedding::generate_embedding},
};
use composite_retrieval::{retrieve_entities, retrieved_entities_to_json, RetrievedEntity};
use composite_retrieval::{
reranking::RerankerPool, retrieve_entities, retrieved_entities_to_json, RetrievedEntity,
};
use text_splitter::TextSplitter;
use super::{enrichment_result::LLMEnrichmentResult, preparation::to_text_content};
@@ -62,6 +64,7 @@ pub struct DefaultPipelineServices {
db: Arc<SurrealDbClient>,
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
}
impl DefaultPipelineServices {
@@ -69,11 +72,13 @@ impl DefaultPipelineServices {
db: Arc<SurrealDbClient>,
openai_client: Arc<async_openai::Client<async_openai::config::OpenAIConfig>>,
config: AppConfig,
reranker_pool: Option<Arc<RerankerPool>>,
) -> Self {
Self {
db,
openai_client,
config,
reranker_pool,
}
}
@@ -151,7 +156,19 @@ impl PipelineServices for DefaultPipelineServices {
content.text, content.category, content.context
);
retrieve_entities(&self.db, &self.openai_client, &input_text, &content.user_id).await
let rerank_lease = match &self.reranker_pool {
Some(pool) => Some(pool.checkout().await),
None => None,
};
retrieve_entities(
&self.db,
&self.openai_client,
&input_text,
&content.user_id,
rerank_lease,
)
.await
}
async fn run_enrichment(

View File

@@ -1,6 +1,6 @@
[package]
name = "main"
version = "0.2.5"
version = "0.2.6"
edition = "2021"
repository = "https://github.com/perstarkse/minne"
license = "AGPL-3.0-or-later"
@@ -25,6 +25,7 @@ ingestion-pipeline = { path = "../ingestion-pipeline" }
api-router = { path = "../api-router" }
html-router = { path = "../html-router" }
common = { path = "../common" }
composite-retrieval = { path = "../composite-retrieval" }
[dev-dependencies]
tower = "0.5"

View File

@@ -1,6 +1,7 @@
use api_router::{api_routes_v1, api_state::ApiState};
use axum::{extract::FromRef, Router};
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use composite_retrieval::reranking::RerankerPool;
use html_router::{html_routes, html_state::HtmlState};
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
use std::sync::Arc;
@@ -43,8 +44,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_api_base(&config.openai_base_url),
));
let html_state =
HtmlState::new_with_resources(db, openai_client, session_store, config.clone())?;
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
let html_state = HtmlState::new_with_resources(
db,
openai_client,
session_store,
config.clone(),
reranker_pool.clone(),
)?;
let api_state = ApiState {
db: html_state.db.clone(),
@@ -102,9 +110,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_api_base(&config.openai_base_url),
));
let ingestion_pipeline = Arc::new(
IngestionPipeline::new(worker_db.clone(), openai_client.clone(), config.clone())
.await
.unwrap(),
IngestionPipeline::new(
worker_db.clone(),
openai_client.clone(),
config.clone(),
reranker_pool.clone(),
)
.await
.unwrap(),
);
info!("Starting worker process");
@@ -152,6 +165,7 @@ mod tests {
openai_base_url: "https://example.com".into(),
storage: StorageKind::Local,
pdf_ingest_mode: PdfIngestMode::LlmFirst,
..Default::default()
}
}
@@ -181,9 +195,14 @@ mod tests {
.with_api_base(&config.openai_base_url),
));
let html_state =
HtmlState::new_with_resources(db.clone(), openai_client, session_store, config.clone())
.expect("failed to build html state");
let html_state = HtmlState::new_with_resources(
db.clone(),
openai_client,
session_store,
config.clone(),
None,
)
.expect("failed to build html state");
let api_state = ApiState {
db: html_state.db.clone(),

View File

@@ -3,6 +3,7 @@ use std::sync::Arc;
use api_router::{api_routes_v1, api_state::ApiState};
use axum::{extract::FromRef, Router};
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use composite_retrieval::reranking::RerankerPool;
use html_router::{html_routes, html_state::HtmlState};
use tracing::info;
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
@@ -41,8 +42,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_api_base(&config.openai_base_url),
));
let html_state =
HtmlState::new_with_resources(db, openai_client, session_store, config.clone())?;
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
let html_state = HtmlState::new_with_resources(
db,
openai_client,
session_store,
config.clone(),
reranker_pool,
)?;
let api_state = ApiState {
db: html_state.db.clone(),

View File

@@ -1,6 +1,7 @@
use std::sync::Arc;
use common::{storage::db::SurrealDbClient, utils::config::get_config};
use composite_retrieval::reranking::RerankerPool;
use ingestion_pipeline::{pipeline::IngestionPipeline, run_worker_loop};
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
@@ -32,8 +33,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_api_base(&config.openai_base_url),
));
let ingestion_pipeline =
Arc::new(IngestionPipeline::new(db.clone(), openai_client.clone(), config).await?);
let reranker_pool = RerankerPool::maybe_from_config(&config)?;
let ingestion_pipeline = Arc::new(
IngestionPipeline::new(db.clone(), openai_client.clone(), config, reranker_pool).await?,
);
run_worker_loop(db, ingestion_pipeline).await
}